// Copyright 2012 Georg-August-Universität Göttingen, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package de.ugoe.cs.autoquest.tasktrees.temporalrelation; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import de.ugoe.cs.autoquest.tasktrees.nodeequality.NodeEquality; import de.ugoe.cs.autoquest.tasktrees.nodeequality.NodeEqualityRuleManager; import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequence; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskTreeBuilder; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskTreeNode; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskTreeNodeFactory; import de.ugoe.cs.autoquest.usageprofiles.SymbolComparator; import de.ugoe.cs.autoquest.usageprofiles.Trie; import de.ugoe.cs.autoquest.usageprofiles.TrieProcessor; /** *

* TODO comment *

* * @author Patrick Harms */ class DefaultTaskSequenceDetectionRule implements TemporalRelationshipRule { /** *

* the task tree node factory to be used for creating substructures for the temporal * relationships identified during rule *

*/ private ITaskTreeNodeFactory taskTreeNodeFactory; /** *

* the task tree builder to be used for creating substructures for the temporal relationships * identified during rule application *

*/ private ITaskTreeBuilder taskTreeBuilder; /** *

* the node comparator to be used for comparing task tree nodes *

*/ private SymbolComparator nodeComparator; /** *

* instantiates the rule and initializes it with a node equality rule manager and the minimal * node equality identified sublist must have to consider them as iterated. *

*/ DefaultTaskSequenceDetectionRule(NodeEqualityRuleManager nodeEqualityRuleManager, NodeEquality minimalNodeEquality, ITaskTreeNodeFactory taskTreeNodeFactory, ITaskTreeBuilder taskTreeBuilder) { this.taskTreeNodeFactory = taskTreeNodeFactory; this.taskTreeBuilder = taskTreeBuilder; this.nodeComparator = new TaskTreeNodeComparator(nodeEqualityRuleManager, minimalNodeEquality); } /* (non-Javadoc) * @see java.lang.Object#toString() */ @Override public String toString() { return "DefaultTaskSequenceDetectionRule"; } /* * (non-Javadoc) * * @see de.ugoe.cs.tasktree.temporalrelation.TemporalRelationshipRule#apply(TaskTreeNode, * boolean) */ @Override public RuleApplicationResult apply(ITaskTreeNode parent, boolean finalize) { if (!(parent instanceof ISequence)) { return null; } if (!finalize) { // the rule is always feasible as tasks may occur at any time RuleApplicationResult result = new RuleApplicationResult(); result.setRuleApplicationStatus(RuleApplicationStatus.RULE_APPLICATION_FEASIBLE); return result; } RuleApplicationData appData = new RuleApplicationData(parent); appData.getStopWatch().start("whole rule application"); do { getSequencesOccuringMostOften(appData); if ((appData.getLastFoundSequences().size() > 0) && (appData.getLastFoundSequences().getOccurrenceCount() > 1)) { System.out.println("found " + appData.getLastFoundSequences().size() + " tasks occurring " + appData.getLastFoundSequences().getOccurrenceCount() + " times"); for (List task : appData.getLastFoundSequences()) { // only tasks occurring more often than once are of interest appData.stopWatch.start("creating task sequences"); String taskId = "task " + RuleUtils.getNewId(); System.out.println(taskId + ": " + task); appData.resetReplacementCounter(); createSequenceForTaskOccurrences(taskId, task, parent, appData); if (appData.getReplacementCounter() < appData.getLastFoundSequences().getOccurrenceCount()) { System.out.println(taskId + ": replaced task only " + appData.getReplacementCounter() + " times instead of expected " + appData.getLastFoundSequences().getOccurrenceCount()); } appData.stopWatch.stop("creating task sequences"); } } } while (appData.getLastFoundSequences().getOccurrenceCount() > 2); appData.getStopWatch().stop("whole rule application"); appData.getStopWatch().dumpStatistics(System.out); if (appData.getResult().getNewlyCreatedParentNodes().size() > 0) { appData.getResult().setRuleApplicationStatus (RuleApplicationStatus.RULE_APPLICATION_FINISHED); } System.out.println(appData.getResult().getNewlyCreatedParentNodes().size()); for (ITaskTreeNode node : appData.getResult().getNewlyCreatedParentNodes()) { for (ITaskTreeNode child : node.getChildren()) { System.out.println(child); } System.out.println(); } return appData.getResult(); } /** *

* TODO: comment *

* * @param i * @return */ private void getSequencesOccuringMostOften(RuleApplicationData appData) { System.out.println("determining most prominent tasks with a maximum of " + (appData.getLastFoundSequences().getOccurrenceCount() - 1) + " occurrences"); Sequences sequences; boolean createNewTrie = (appData.getLastTrie() == null) || appData.getReplacementCounter() > 0; // tree has changed do { if (createNewTrie) { createNewTrie(appData); } /*PathDumper dumper = new PathDumper(); trie.process(dumper); dumper.dump();*/ appData.getStopWatch().start("determining tasks"); MaxCountAndLongestSequenceFinder finder = new MaxCountAndLongestSequenceFinder (appData.getLastFoundSequences().getOccurrenceCount() - 1); appData.getLastTrie().process(finder); sequences = finder.getFoundSequences(); createNewTrie = false; for (List task : sequences) { if (task.size() >= appData.getTrainedSequenceLength()) { // Trie must be recreated with a longer sequence length to be sure that // the found sequences occurring most often are found in their whole length appData.setTrainedSequenceLength(appData.getTrainedSequenceLength() + 1); createNewTrie = true; break; } } appData.getStopWatch().stop("determining tasks"); } while (createNewTrie); appData.setLastFoundSequences(sequences); } /** *

* TODO: comment *

* * @param parent * @return */ private void createNewTrie(RuleApplicationData appData) { System.out.println("training trie with a maximum sequence length of " + appData.getTrainedSequenceLength()); appData.getStopWatch().start("training trie"); appData.setLastTrie(new Trie(nodeComparator)); trainTrie(appData.getTree(), appData); appData.getStopWatch().stop("training trie"); } /** *

* TODO: comment *

* * @param trie * @param parent */ private void trainTrie(ITaskTreeNode parent, RuleApplicationData appData) { // prevent training of already replaces sequences as those shall not be replaced anymore if (!appData.getResult().getNewlyCreatedParentNodes().contains(parent)) { List children = parent.getChildren(); if ((children != null) && (children.size() > 0)) { /*System.out.println(); for (int i = 0; i < children.size(); i++) { System.out.println(children.get(i)); }*/ appData.getLastTrie().train(children, appData.getTrainedSequenceLength()); for (ITaskTreeNode child : children) { trainTrie(child, appData); } } } } /** *

* TODO: comment *

* * @param task * @param parent * @param treeBuilder * @param nodeFactory * @param result */ private void createSequenceForTaskOccurrences(String taskId, List task, ITaskTreeNode parent, RuleApplicationData appData) { List children = parent.getChildren(); if ((children == null) || (children.size() == 0)) { return; } // first traverse the children for (ITaskTreeNode child : children) { createSequenceForTaskOccurrences(taskId, task, child, appData); } // now check the children themselves for an occurrence of the task int index = -1; do { index = getSubListIndex(children, task, ++index); if (index > -1) { if (task.size() < children.size()) { ISequence sequence = RuleUtils.createNewSubSequenceInRange (parent, index, index + task.size() - 1, taskId, taskTreeNodeFactory, taskTreeBuilder); appData.getResult().addNewlyCreatedParentNode(sequence); appData.incrementReplacementCounter(); children = parent.getChildren(); } else { // the whole list of children is an occurrence of this task. Just set the // description of the parent and break up String description = parent.getDescription(); if ((description != null) && (description.length() > 0)) { description += "; " + taskId; } else { description = taskId; } taskTreeBuilder.setDescription(parent, description); break; } } } while (index > -1); } /** *

* TODO: comment *

* * @param trie * @param object * @return */ private int getSubListIndex(List list, List subList, int startIndex) { boolean matchFound; int result = -1; for (int i = startIndex; i <= list.size() - subList.size(); i++) { matchFound = true; for (int j = 0; j < subList.size(); j++) { if (!nodeComparator.equals(list.get(i + j), subList.get(j))) { matchFound = false; break; } else if (!nodeComparator.equals(subList.get(j), list.get(i + j))){ throw new RuntimeException("comparator not commutative"); } } if (matchFound) { result = i; break; } } return result; } /** *

* TODO comment *

* * @author Patrick Harms */ private class MaxCountAndLongestSequenceFinder implements TrieProcessor { /** * */ private int maxCount; /** * */ private int currentCount; /** * */ private List> foundSequences = new LinkedList>(); /** *

* TODO: comment *

* * @param maxCount */ public MaxCountAndLongestSequenceFinder(int maxCount) { super(); this.maxCount = maxCount; this.currentCount = 0; } /* (non-Javadoc) * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int) */ @Override public TrieProcessor.Result process(List sequence, int count) { if (sequence.size() < 2) { // ignore single nodes return TrieProcessor.Result.CONTINUE; } if (count < 2) { // ignore singular occurrences return TrieProcessor.Result.SKIP_NODE; } if (count > this.maxCount) { // ignore sequences that occur too often return TrieProcessor.Result.CONTINUE; } if (this.currentCount > count) { // ignore this children of this node, as they may have only smaller counts than // the already found sequences return TrieProcessor.Result.SKIP_NODE; } if (this.currentCount < count) { // the provided sequence occurs more often that all sequences found so far. // clear all found sequences and use the new count as the one searched for foundSequences.clear(); this.currentCount = count; } if (this.currentCount == count) { // the sequence is of interest. Sort it into the other found sequences so that // the longest come first boolean added = false; for (int i = 0; i < foundSequences.size(); i++) { if (foundSequences.get(i).size() < sequence.size()) { // defensive copy foundSequences.add(i, new LinkedList(sequence)); // defensive copy added = true; break; } } if (!added) { foundSequences.add(new LinkedList(sequence)); // defensive copy } } return TrieProcessor.Result.CONTINUE; } /** *

* TODO: comment *

* * @return */ public Sequences getFoundSequences() { removePermutationsOfShorterTasks(); return new Sequences(currentCount, foundSequences); } /** *

* TODO: comment *

* */ private void removePermutationsOfShorterTasks() { // now iterate the sorted list and for each task remove all other tasks that are shorter // (i.e. come later in the sorted list) and that represent a subpart of the task for (int i = 0; i < foundSequences.size(); i++) { for (int j = i + 1; j < foundSequences.size();) { if (foundSequences.get(j).size() < foundSequences.get(i).size()) { // found a task that is a potential subtask. Check for this and remove the // subtask if needed List longTask = foundSequences.get(i); List shortTask = foundSequences.get(j); if (getSubListIndex(longTask, shortTask, 0) > -1) { foundSequences.remove(j); } else { j++; } } else { j++; } } } } } /** * */ private class RuleApplicationData { /** * */ private T tree; /** * */ private RuleApplicationResult result = new RuleApplicationResult(); /** * */ private Sequences lastFoundSequences = new Sequences(Integer.MAX_VALUE, null); /** * */ private Trie lastTrie; /** * default and minimum trained sequence length is 3 */ private int trainedSequenceLength = 3; /** * */ private int replacementCounter; /** * */ private StopWatch stopWatch = new StopWatch(); /** * */ private RuleApplicationData(T tree) { this.tree = tree; } /** * @return the lastFoundSequences */ private Sequences getLastFoundSequences() { return lastFoundSequences; } /** * @param lastFoundSequences the lastFoundSequences to set */ private void setLastFoundSequences(Sequences lastFoundSequences) { this.lastFoundSequences = lastFoundSequences; } /** * @return the lastTrie */ private Trie getLastTrie() { return lastTrie; } /** * @return the trainedSequenceLength */ private int getTrainedSequenceLength() { return trainedSequenceLength; } /** * @param trainedSequenceLength the trainedSequenceLength to set */ private void setTrainedSequenceLength(int trainedSequenceLength) { this.trainedSequenceLength = trainedSequenceLength; } /** * @param lastTrie the lastTrie to set */ private void setLastTrie(Trie lastTrie) { this.lastTrie = lastTrie; } /** * @return the tree */ private T getTree() { return tree; } /** * @return the result */ private RuleApplicationResult getResult() { return result; } /** * @return the stopWatch */ private StopWatch getStopWatch() { return stopWatch; } /** * */ private void resetReplacementCounter() { replacementCounter = 0; } /** * */ private void incrementReplacementCounter() { replacementCounter++; } /** * */ private int getReplacementCounter() { return replacementCounter; } } /** *

* TODO comment *

* * @author Patrick Harms */ private class Sequences implements Iterable> { /** * */ private int occurrenceCount; /** * */ private List> sequences; /** *

* TODO: comment *

* * @param occurrenceCount * @param sequences */ private Sequences(int occurrenceCount, List> sequences) { super(); this.occurrenceCount = occurrenceCount; this.sequences = sequences; } /** *

* TODO: comment *

* * @return */ private int getOccurrenceCount() { return occurrenceCount; } /** *

* TODO: comment *

* * @return */ private int size() { return this.sequences.size(); } /** * */ /* (non-Javadoc) * @see java.lang.Iterable#iterator() */ @Override public Iterator> iterator() { return this.sequences.iterator(); } } }