// Copyright 2012 Georg-August-Universität Göttingen, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package de.ugoe.cs.autoquest.tasktrees.temporalrelation;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import de.ugoe.cs.autoquest.tasktrees.nodeequality.NodeEquality;
import de.ugoe.cs.autoquest.tasktrees.nodeequality.NodeEqualityRuleManager;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequence;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskTreeBuilder;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskTreeNode;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskTreeNodeFactory;
import de.ugoe.cs.autoquest.usageprofiles.SymbolComparator;
import de.ugoe.cs.autoquest.usageprofiles.Trie;
import de.ugoe.cs.autoquest.usageprofiles.TrieProcessor;
/**
*
* TODO comment
*
*
* @author Patrick Harms
*/
class DefaultTaskSequenceDetectionRule implements TemporalRelationshipRule {
/**
*
* the task tree node factory to be used for creating substructures for the temporal
* relationships identified during rule
*
*/
private ITaskTreeNodeFactory taskTreeNodeFactory;
/**
*
* the task tree builder to be used for creating substructures for the temporal relationships
* identified during rule application
*
*/
private ITaskTreeBuilder taskTreeBuilder;
/**
*
* the node comparator to be used for comparing task tree nodes
*
*/
private SymbolComparator nodeComparator;
/**
*
* instantiates the rule and initializes it with a node equality rule manager and the minimal
* node equality identified sublist must have to consider them as iterated.
*
*/
DefaultTaskSequenceDetectionRule(NodeEqualityRuleManager nodeEqualityRuleManager,
NodeEquality minimalNodeEquality,
ITaskTreeNodeFactory taskTreeNodeFactory,
ITaskTreeBuilder taskTreeBuilder)
{
this.taskTreeNodeFactory = taskTreeNodeFactory;
this.taskTreeBuilder = taskTreeBuilder;
this.nodeComparator =
new TaskTreeNodeComparator(nodeEqualityRuleManager, minimalNodeEquality);
}
/* (non-Javadoc)
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
return "DefaultTaskSequenceDetectionRule";
}
/*
* (non-Javadoc)
*
* @see de.ugoe.cs.tasktree.temporalrelation.TemporalRelationshipRule#apply(TaskTreeNode,
* boolean)
*/
@Override
public RuleApplicationResult apply(ITaskTreeNode parent, boolean finalize) {
if (!(parent instanceof ISequence)) {
return null;
}
if (!finalize) {
// the rule is always feasible as tasks may occur at any time
RuleApplicationResult result = new RuleApplicationResult();
result.setRuleApplicationStatus(RuleApplicationStatus.RULE_APPLICATION_FEASIBLE);
return result;
}
RuleApplicationData appData = new RuleApplicationData(parent);
appData.getStopWatch().start("whole rule application");
do {
getSequencesOccuringMostOften(appData);
if ((appData.getLastFoundSequences().size() > 0) &&
(appData.getLastFoundSequences().getOccurrenceCount() > 1))
{
System.out.println("found " + appData.getLastFoundSequences().size() +
" tasks occurring " +
appData.getLastFoundSequences().getOccurrenceCount() + " times");
for (List task : appData.getLastFoundSequences()) {
// only tasks occurring more often than once are of interest
appData.stopWatch.start("creating task sequences");
String taskId = "task " + RuleUtils.getNewId();
System.out.println(taskId + ": " + task);
appData.resetReplacementCounter();
createSequenceForTaskOccurrences(taskId, task, parent, appData);
if (appData.getReplacementCounter() <
appData.getLastFoundSequences().getOccurrenceCount())
{
System.out.println(taskId + ": replaced task only " +
appData.getReplacementCounter() +
" times instead of expected " +
appData.getLastFoundSequences().getOccurrenceCount());
}
appData.stopWatch.stop("creating task sequences");
}
}
}
while (appData.getLastFoundSequences().getOccurrenceCount() > 2);
appData.getStopWatch().stop("whole rule application");
appData.getStopWatch().dumpStatistics(System.out);
if (appData.getResult().getNewlyCreatedParentNodes().size() > 0) {
appData.getResult().setRuleApplicationStatus
(RuleApplicationStatus.RULE_APPLICATION_FINISHED);
}
System.out.println(appData.getResult().getNewlyCreatedParentNodes().size());
for (ITaskTreeNode node : appData.getResult().getNewlyCreatedParentNodes()) {
for (ITaskTreeNode child : node.getChildren()) {
System.out.println(child);
}
System.out.println();
}
return appData.getResult();
}
/**
*
* TODO: comment
*
*
* @param i
* @return
*/
private void getSequencesOccuringMostOften(RuleApplicationData appData) {
System.out.println("determining most prominent tasks with a maximum of " +
(appData.getLastFoundSequences().getOccurrenceCount() - 1) +
" occurrences");
Sequences sequences;
boolean createNewTrie = (appData.getLastTrie() == null) ||
appData.getReplacementCounter() > 0; // tree has changed
do {
if (createNewTrie) {
createNewTrie(appData);
}
/*PathDumper dumper = new PathDumper();
trie.process(dumper);
dumper.dump();*/
appData.getStopWatch().start("determining tasks");
MaxCountAndLongestSequenceFinder finder = new MaxCountAndLongestSequenceFinder
(appData.getLastFoundSequences().getOccurrenceCount() - 1);
appData.getLastTrie().process(finder);
sequences = finder.getFoundSequences();
createNewTrie = false;
for (List task : sequences) {
if (task.size() >= appData.getTrainedSequenceLength()) {
// Trie must be recreated with a longer sequence length to be sure that
// the found sequences occurring most often are found in their whole length
appData.setTrainedSequenceLength(appData.getTrainedSequenceLength() + 1);
createNewTrie = true;
break;
}
}
appData.getStopWatch().stop("determining tasks");
}
while (createNewTrie);
appData.setLastFoundSequences(sequences);
}
/**
*
* TODO: comment
*
*
* @param parent
* @return
*/
private void createNewTrie(RuleApplicationData appData) {
System.out.println("training trie with a maximum sequence length of " +
appData.getTrainedSequenceLength());
appData.getStopWatch().start("training trie");
appData.setLastTrie(new Trie(nodeComparator));
trainTrie(appData.getTree(), appData);
appData.getStopWatch().stop("training trie");
}
/**
*
* TODO: comment
*
*
* @param trie
* @param parent
*/
private void trainTrie(ITaskTreeNode parent, RuleApplicationData appData) {
// prevent training of already replaces sequences as those shall not be replaced anymore
if (!appData.getResult().getNewlyCreatedParentNodes().contains(parent)) {
List children = parent.getChildren();
if ((children != null) && (children.size() > 0)) {
/*System.out.println();
for (int i = 0; i < children.size(); i++) {
System.out.println(children.get(i));
}*/
appData.getLastTrie().train(children, appData.getTrainedSequenceLength());
for (ITaskTreeNode child : children) {
trainTrie(child, appData);
}
}
}
}
/**
*
* TODO: comment
*
*
* @param task
* @param parent
* @param treeBuilder
* @param nodeFactory
* @param result
*/
private void createSequenceForTaskOccurrences(String taskId,
List task,
ITaskTreeNode parent,
RuleApplicationData appData)
{
List children = parent.getChildren();
if ((children == null) || (children.size() == 0)) {
return;
}
// first traverse the children
for (ITaskTreeNode child : children) {
createSequenceForTaskOccurrences(taskId, task, child, appData);
}
// now check the children themselves for an occurrence of the task
int index = -1;
do {
index = getSubListIndex(children, task, ++index);
if (index > -1) {
if (task.size() < children.size()) {
ISequence sequence = RuleUtils.createNewSubSequenceInRange
(parent, index, index + task.size() - 1, taskId,
taskTreeNodeFactory, taskTreeBuilder);
appData.getResult().addNewlyCreatedParentNode(sequence);
appData.incrementReplacementCounter();
children = parent.getChildren();
}
else {
// the whole list of children is an occurrence of this task. Just set the
// description of the parent and break up
String description = parent.getDescription();
if ((description != null) && (description.length() > 0)) {
description += "; " + taskId;
}
else {
description = taskId;
}
taskTreeBuilder.setDescription(parent, description);
break;
}
}
}
while (index > -1);
}
/**
*
* TODO: comment
*
*
* @param trie
* @param object
* @return
*/
private int getSubListIndex(List list,
List subList,
int startIndex)
{
boolean matchFound;
int result = -1;
for (int i = startIndex; i <= list.size() - subList.size(); i++) {
matchFound = true;
for (int j = 0; j < subList.size(); j++) {
if (!nodeComparator.equals(list.get(i + j), subList.get(j))) {
matchFound = false;
break;
}
else if (!nodeComparator.equals(subList.get(j), list.get(i + j))){
throw new RuntimeException("comparator not commutative");
}
}
if (matchFound) {
result = i;
break;
}
}
return result;
}
/**
*
* TODO comment
*
*
* @author Patrick Harms
*/
private class MaxCountAndLongestSequenceFinder implements TrieProcessor {
/**
*
*/
private int maxCount;
/**
*
*/
private int currentCount;
/**
*
*/
private List> foundSequences = new LinkedList>();
/**
*
* TODO: comment
*
*
* @param maxCount
*/
public MaxCountAndLongestSequenceFinder(int maxCount) {
super();
this.maxCount = maxCount;
this.currentCount = 0;
}
/* (non-Javadoc)
* @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int)
*/
@Override
public TrieProcessor.Result process(List sequence, int count) {
if (sequence.size() < 2) {
// ignore single nodes
return TrieProcessor.Result.CONTINUE;
}
if (count < 2) {
// ignore singular occurrences
return TrieProcessor.Result.SKIP_NODE;
}
if (count > this.maxCount) {
// ignore sequences that occur too often
return TrieProcessor.Result.CONTINUE;
}
if (this.currentCount > count) {
// ignore this children of this node, as they may have only smaller counts than
// the already found sequences
return TrieProcessor.Result.SKIP_NODE;
}
if (this.currentCount < count) {
// the provided sequence occurs more often that all sequences found so far.
// clear all found sequences and use the new count as the one searched for
foundSequences.clear();
this.currentCount = count;
}
if (this.currentCount == count) {
// the sequence is of interest. Sort it into the other found sequences so that
// the longest come first
boolean added = false;
for (int i = 0; i < foundSequences.size(); i++) {
if (foundSequences.get(i).size() < sequence.size()) {
// defensive copy
foundSequences.add(i, new LinkedList(sequence)); // defensive copy
added = true;
break;
}
}
if (!added) {
foundSequences.add(new LinkedList(sequence)); // defensive copy
}
}
return TrieProcessor.Result.CONTINUE;
}
/**
*
* TODO: comment
*
*
* @return
*/
public Sequences getFoundSequences() {
removePermutationsOfShorterTasks();
return new Sequences(currentCount, foundSequences);
}
/**
*
* TODO: comment
*
*
*/
private void removePermutationsOfShorterTasks() {
// now iterate the sorted list and for each task remove all other tasks that are shorter
// (i.e. come later in the sorted list) and that represent a subpart of the task
for (int i = 0; i < foundSequences.size(); i++) {
for (int j = i + 1; j < foundSequences.size();) {
if (foundSequences.get(j).size() < foundSequences.get(i).size()) {
// found a task that is a potential subtask. Check for this and remove the
// subtask if needed
List longTask = foundSequences.get(i);
List shortTask = foundSequences.get(j);
if (getSubListIndex(longTask, shortTask, 0) > -1) {
foundSequences.remove(j);
}
else {
j++;
}
}
else {
j++;
}
}
}
}
}
/**
*
*/
private class RuleApplicationData {
/**
*
*/
private T tree;
/**
*
*/
private RuleApplicationResult result = new RuleApplicationResult();
/**
*
*/
private Sequences lastFoundSequences = new Sequences(Integer.MAX_VALUE, null);
/**
*
*/
private Trie lastTrie;
/**
* default and minimum trained sequence length is 3
*/
private int trainedSequenceLength = 3;
/**
*
*/
private int replacementCounter;
/**
*
*/
private StopWatch stopWatch = new StopWatch();
/**
*
*/
private RuleApplicationData(T tree) {
this.tree = tree;
}
/**
* @return the lastFoundSequences
*/
private Sequences getLastFoundSequences() {
return lastFoundSequences;
}
/**
* @param lastFoundSequences the lastFoundSequences to set
*/
private void setLastFoundSequences(Sequences lastFoundSequences) {
this.lastFoundSequences = lastFoundSequences;
}
/**
* @return the lastTrie
*/
private Trie getLastTrie() {
return lastTrie;
}
/**
* @return the trainedSequenceLength
*/
private int getTrainedSequenceLength() {
return trainedSequenceLength;
}
/**
* @param trainedSequenceLength the trainedSequenceLength to set
*/
private void setTrainedSequenceLength(int trainedSequenceLength) {
this.trainedSequenceLength = trainedSequenceLength;
}
/**
* @param lastTrie the lastTrie to set
*/
private void setLastTrie(Trie lastTrie) {
this.lastTrie = lastTrie;
}
/**
* @return the tree
*/
private T getTree() {
return tree;
}
/**
* @return the result
*/
private RuleApplicationResult getResult() {
return result;
}
/**
* @return the stopWatch
*/
private StopWatch getStopWatch() {
return stopWatch;
}
/**
*
*/
private void resetReplacementCounter() {
replacementCounter = 0;
}
/**
*
*/
private void incrementReplacementCounter() {
replacementCounter++;
}
/**
*
*/
private int getReplacementCounter() {
return replacementCounter;
}
}
/**
*
* TODO comment
*
*
* @author Patrick Harms
*/
private class Sequences implements Iterable> {
/**
*
*/
private int occurrenceCount;
/**
*
*/
private List> sequences;
/**
*
* TODO: comment
*
*
* @param occurrenceCount
* @param sequences
*/
private Sequences(int occurrenceCount, List> sequences) {
super();
this.occurrenceCount = occurrenceCount;
this.sequences = sequences;
}
/**
*
* TODO: comment
*
*
* @return
*/
private int getOccurrenceCount() {
return occurrenceCount;
}
/**
*
* TODO: comment
*
*
* @return
*/
private int size() {
return this.sequences.size();
}
/**
*
*/
/* (non-Javadoc)
* @see java.lang.Iterable#iterator()
*/
@Override
public Iterator> iterator() {
return this.sequences.iterator();
}
}
}