// Copyright 2012 Georg-August-Universität Göttingen, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package de.ugoe.cs.autoquest.tasktrees.temporalrelation; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import de.ugoe.cs.autoquest.tasktrees.nodeequality.NodeEquality; import de.ugoe.cs.autoquest.tasktrees.nodeequality.NodeEqualityRuleManager; import de.ugoe.cs.autoquest.tasktrees.treeifc.IIteration; import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelection; import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequence; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskTreeBuilder; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskTreeNode; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskTreeNodeFactory; import de.ugoe.cs.autoquest.usageprofiles.Trie; import de.ugoe.cs.autoquest.usageprofiles.TrieProcessor; import de.ugoe.cs.util.StopWatch; import de.ugoe.cs.util.console.Console; /** *

* TODO comment *

* * @author Patrick Harms */ class SequenceForTaskDetectionRule implements TemporalRelationshipRule { /** *

* the task tree node factory to be used for creating substructures for the temporal * relationships identified during rule *

*/ private ITaskTreeNodeFactory taskTreeNodeFactory; /** *

* the task tree builder to be used for creating substructures for the temporal relationships * identified during rule application *

*/ private ITaskTreeBuilder taskTreeBuilder; /** *

* the node comparator to be used for comparing task tree nodes *

*/ private TaskTreeNodeComparator nodeComparator; /** *

* instantiates the rule and initializes it with a node equality rule manager and the minimal * node equality identified sublist must have to consider them as iterated. *

*/ SequenceForTaskDetectionRule(NodeEqualityRuleManager nodeEqualityRuleManager, NodeEquality minimalNodeEquality, ITaskTreeNodeFactory taskTreeNodeFactory, ITaskTreeBuilder taskTreeBuilder) { this.taskTreeNodeFactory = taskTreeNodeFactory; this.taskTreeBuilder = taskTreeBuilder; this.nodeComparator = new TaskTreeNodeComparator(nodeEqualityRuleManager, minimalNodeEquality); } /* (non-Javadoc) * @see java.lang.Object#toString() */ @Override public String toString() { return "SequenceForTaskDetectionRule"; } /* * (non-Javadoc) * * @see de.ugoe.cs.tasktree.temporalrelation.TemporalRelationshipRule#apply(TaskTreeNode, * boolean) */ @Override public RuleApplicationResult apply(ITaskTreeNode parent, boolean finalize) { if (!(parent instanceof ISequence)) { return null; } if (!finalize) { // the rule is always feasible as tasks may occur at any time RuleApplicationResult result = new RuleApplicationResult(); result.setRuleApplicationStatus(RuleApplicationStatus.FEASIBLE); return result; } List children = parent.getChildren(); List sessions = new LinkedList(); for (ITaskTreeNode child : children) { if (child instanceof ISequence) { sessions.add((ISequence) child); } else { Console.println("provided parent is no parent of sessions"); return null; } } RuleApplicationData appData = new RuleApplicationData(sessions); boolean finished = false; // this is the real rule application. Loop while something is replaced. do { System.out.println(); appData.getStopWatch().start("whole loop"); detectAndReplaceIterations(appData); appData.getStopWatch().start("task replacement"); detectAndReplaceTasks(appData); appData.getStopWatch().stop("task replacement"); appData.getStopWatch().stop("whole loop"); //((TaskTreeNodeComparator) nodeComparator).getStopWatch().dumpStatistics(System.out); //((TaskTreeNodeComparator) nodeComparator).getStopWatch().reset(); appData.getStopWatch().dumpStatistics(System.out); appData.getStopWatch().reset(); finished = (appData.getReplacementCounter() == 0); } while (!finished); System.out.println("created " + appData.getResult().getNewlyCreatedParentNodes().size() + " new parent nodes\n"); if (appData.getResult().getNewlyCreatedParentNodes().size() > 0) { appData.getResult().setRuleApplicationStatus(RuleApplicationStatus.FINISHED); } return appData.getResult(); } /** * @param appData */ private void detectAndReplaceIterations(RuleApplicationData appData) { System.out.print("detecting iterations"); appData.getStopWatch().start("detecting iterations"); List sessions = appData.getSessions(); int foundIterations = 0; for (ISequence session : sessions) { foundIterations += detectAndReplaceIterations(session, appData); } appData.getStopWatch().stop("detecting iterations"); System.out.println(" --> found " + foundIterations); } /** * @param appData */ private int detectAndReplaceIterations(ISequence session, RuleApplicationData appData) { int count = 0; TemporalRelationshipRule rule = new SimpleIterationDetectionRule (nodeComparator, taskTreeNodeFactory, taskTreeBuilder); RuleApplicationResult result = rule.apply(session, true); if ((result != null) && (result.getNewlyCreatedParentNodes() != null)) { for (ITaskTreeNode newParent : result.getNewlyCreatedParentNodes()) { appData.getResult().addNewlyCreatedParentNode(newParent); if (newParent instanceof IIteration) { count++; } } } return count; } /** * @param appData */ private void detectAndReplaceTasks(RuleApplicationData appData) { System.out.println("detecting and replacing tasks"); appData.getStopWatch().start("detecting tasks"); getSequencesOccuringMostOften(appData); appData.getStopWatch().stop("detecting tasks"); appData.getStopWatch().start("replacing tasks"); replaceSequencesOccurringMostOften(appData); appData.getStopWatch().stop("replacing tasks"); System.out.println("detected and replaced " + appData.getLastFoundTasks().size() + " tasks"); } /** * @return */ private void getSequencesOccuringMostOften(RuleApplicationData appData) { System.out.println("determining most prominent tasks"); Tasks tasks; boolean createNewTrie = (appData.getLastTrie() == null) || appData.getReplacementCounter() > 0; // tree has changed do { if (createNewTrie) { createNewTrie(appData); } MaxCountAndLongestTasksFinder finder = new MaxCountAndLongestTasksFinder(); appData.getLastTrie().process(finder); tasks = finder.getFoundTasks(); createNewTrie = false; for (List task : tasks) { if (task.size() >= appData.getTrainedSequenceLength()) { // Trie must be recreated with a longer sequence length to be sure that // the found sequences occurring most often are found in their whole length appData.setTrainedSequenceLength(appData.getTrainedSequenceLength() + 1); createNewTrie = true; break; } } } while (createNewTrie); appData.setLastFoundTasks(tasks); System.out.println("found " + appData.getLastFoundTasks().size() + " tasks occurring " + appData.getLastFoundTasks().getOccurrenceCount() + " times"); } /** * @param parent * @return */ private void createNewTrie(RuleApplicationData appData) { System.out.println("training trie with a maximum sequence length of " + appData.getTrainedSequenceLength()); appData.getStopWatch().start("training trie"); appData.setLastTrie(new Trie(nodeComparator)); List sessions = appData.getSessions(); for (ISequence session : sessions) { trainTrie(session, appData); } appData.getStopWatch().stop("training trie"); } /** * @param trie * @param parent */ private void trainTrie(ISequence session, RuleApplicationData appData) { List children = session.getChildren(); if ((children != null) && (children.size() > 0)) { appData.getLastTrie().train(children, appData.getTrainedSequenceLength()); } } /** * @param appData */ private void replaceSequencesOccurringMostOften(RuleApplicationData appData) { appData.resetReplacementCounter(); if ((appData.getLastFoundTasks().size() > 0) && (appData.getLastFoundTasks().getOccurrenceCount() > 1)) { System.out.println("replacing tasks occurrences with merged variants of all versions"); for (List task : appData.getLastFoundTasks()) { String taskId = "task " + RuleUtils.getNewId(); System.out.println("replacing " + taskId + ": " + task); appData.clearTaskOccurrences(); determineVariantsOfTaskOccurrences(task, appData.getSessions(), appData); appData.getStopWatch().start("merging task nodes"); ITaskTreeNode taskReplacement = mergeVariantsOfTaskOccurrence(taskId, appData); appData.getStopWatch().stop("merging task nodes"); appData.resetReplacementCounter(); replaceTaskOccurrences(task, taskReplacement, appData.getSessions(), appData); if (appData.getReplacementCounter() > 0) { appData.getResult().addNewlyCreatedParentNode(taskReplacement); } if (appData.getReplacementCounter() < appData.getLastFoundTasks().getOccurrenceCount()) { System.out.println(taskId + ": replaced task only " + appData.getReplacementCounter() + " times instead of expected " + appData.getLastFoundTasks().getOccurrenceCount()); } } } } /** * @param tree */ private void determineVariantsOfTaskOccurrences(List task, List sessions, RuleApplicationData appData) { for (ISequence session : sessions) { int index = -1; List children = session.getChildren(); do { index = getSubListIndex(children, task, ++index); if (index > -1) { ISequence taskOccurrence = RuleUtils.getSubSequenceInRange (session, index, index + task.size() - 1, null, taskTreeNodeFactory, taskTreeBuilder); appData.addTaskOccurrence(taskOccurrence); // let the index point to the last element the belongs the identified occurrence index += task.size() - 1; } } while (index > -1); } } /** * @param appData * @return */ private ITaskTreeNode mergeVariantsOfTaskOccurrence(String taskId, RuleApplicationData appData) { return mergeVariantsOfTasks(taskId, appData.getTaskOccurrences()); } /** * @param appData * @return */ private ITaskTreeNode mergeVariantsOfTasks(String description, List variants) { // merge but preserve lexically distinct variants TaskTreeNodeMerger merger = new TaskTreeNodeMerger (taskTreeNodeFactory, taskTreeBuilder, nodeComparator); merger.mergeTaskNodes(variants); if (variants.size() == 1) { ITaskTreeNode replacement = variants.get(0); taskTreeBuilder.setDescription(replacement, description); return replacement; } else { ISelection selection = taskTreeNodeFactory.createNewSelection(); taskTreeBuilder.setDescription(selection, "variants of task " + description); for (ITaskTreeNode variant : variants) { taskTreeBuilder.addChild(selection, variant); } return selection; } } /** * @param task * @param parent * @param treeBuilder * @param nodeFactory * @param result */ private void replaceTaskOccurrences(List task, ITaskTreeNode replacement, List sessions, RuleApplicationData appData) { // now check the children themselves for an occurrence of the task for (int i = 0; i < sessions.size(); i++) { ISequence session = sessions.get(i); int index = -1; List children = session.getChildren(); do { index = getSubListIndex(children, task, ++index); if (index > -1) { if ((!(replacement instanceof ISequence)) || (task.size() < children.size())) { for (int j = index; j < index + task.size(); j++) { taskTreeBuilder.removeChild(session, index); } taskTreeBuilder.addChild(session, index, replacement); appData.incrementReplacementCounter(); children = session.getChildren(); } else { // the whole list of children is an occurrence of this task. So ask the // caller of the method to replace the whole node sessions.set(i, (ISequence) replacement); appData.incrementReplacementCounter(); break; } } } while (index > -1); } } /** * @param trie * @param object * @return */ private int getSubListIndex(List list, List subList, int startIndex) { boolean matchFound; int result = -1; for (int i = startIndex; i <= list.size() - subList.size(); i++) { matchFound = true; for (int j = 0; j < subList.size(); j++) { if (!nodeComparator.equals(list.get(i + j), subList.get(j))) { matchFound = false; break; } } if (matchFound) { result = i; break; } } return result; } /** * @author Patrick Harms */ private class MaxCountAndLongestTasksFinder implements TrieProcessor { /** * */ private int currentCount; /** * */ private List> foundTasks = new LinkedList>(); /** * */ public MaxCountAndLongestTasksFinder() { super(); this.currentCount = 0; } /* (non-Javadoc) * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int) */ @Override public TrieProcessor.Result process(List task, int count) { if (task.size() < 2) { // ignore single nodes return TrieProcessor.Result.CONTINUE; } if (count < 2) { // ignore singular occurrences return TrieProcessor.Result.SKIP_NODE; } if (this.currentCount > count) { // ignore this children of this node, as they may have only smaller counts than // the already found tasks return TrieProcessor.Result.SKIP_NODE; } if (this.currentCount < count) { // the provided task occurs more often that all tasks found so far. // clear all found tasks and use the new count as the one searched for foundTasks.clear(); this.currentCount = count; } if (this.currentCount == count) { // the task is of interest. Sort it into the other found tasks so that // the longest come first boolean added = false; for (int i = 0; i < foundTasks.size(); i++) { if (foundTasks.get(i).size() < task.size()) { // defensive copy foundTasks.add(i, new LinkedList(task)); // defensive copy added = true; break; } } if (!added) { foundTasks.add(new LinkedList(task)); // defensive copy } } return TrieProcessor.Result.CONTINUE; } /** * @return */ public Tasks getFoundTasks() { removePermutationsOfShorterTasks(); return new Tasks(currentCount, foundTasks); } /** * */ private void removePermutationsOfShorterTasks() { // now iterate the sorted list and for each task remove all other tasks that are shorter // (i.e. come later in the sorted list) and that represent a subpart of the task for (int i = 0; i < foundTasks.size(); i++) { for (int j = i + 1; j < foundTasks.size();) { if (foundTasks.get(j).size() < foundTasks.get(i).size()) { // found a task that is a potential subtask. Check for this and remove the // subtask if needed List longTask = foundTasks.get(i); List shortTask = foundTasks.get(j); if (getSubListIndex(longTask, shortTask, 0) > -1) { foundTasks.remove(j); } else { j++; } } else { j++; } } } } } /** * */ private static class RuleApplicationData { /** * */ private List sessions; /** * */ private Trie lastTrie; /** * default and minimum trained sequence length is 3 */ private int trainedSequenceLength = 3; /** * */ private Tasks lastFoundTasks = new Tasks(Integer.MAX_VALUE, null); /** * */ private List taskOccurrences = new LinkedList(); /** * */ private int replacementCounter; /** * */ private RuleApplicationResult result = new RuleApplicationResult(); /** * */ private StopWatch stopWatch = new StopWatch(); /** * */ private RuleApplicationData(List sessions) { this.sessions = sessions; } /** * @return the tree */ private List getSessions() { return sessions; } /** * @param lastTrie the lastTrie to set */ private void setLastTrie(Trie lastTrie) { this.lastTrie = lastTrie; } /** * @return the lastTrie */ private Trie getLastTrie() { return lastTrie; } /** * @param trainedSequenceLength the trainedSequenceLength to set */ private void setTrainedSequenceLength(int trainedSequenceLength) { this.trainedSequenceLength = trainedSequenceLength; } /** * @return the trainedSequenceLength */ private int getTrainedSequenceLength() { return trainedSequenceLength; } /** * @param lastFoundSequences the lastFoundSequences to set */ private void setLastFoundTasks(Tasks lastFoundSequences) { this.lastFoundTasks = lastFoundSequences; } /** * @return the lastFoundSequences */ private Tasks getLastFoundTasks() { return lastFoundTasks; } /** * @return the taskOccurrences */ private void clearTaskOccurrences() { taskOccurrences.clear(); } /** * @return the taskOccurrences */ private void addTaskOccurrence(ITaskTreeNode taskOccurrence) { taskOccurrences.add(taskOccurrence); } /** * @return the taskOccurrences */ private List getTaskOccurrences() { return taskOccurrences; } /** * */ private void resetReplacementCounter() { replacementCounter = 0; } /** * */ private void incrementReplacementCounter() { replacementCounter++; } /** * */ private int getReplacementCounter() { return replacementCounter; } /** * @return the result */ private RuleApplicationResult getResult() { return result; } /** * @return the stopWatch */ private StopWatch getStopWatch() { return stopWatch; } } /** * @author Patrick Harms */ private static class Tasks implements Iterable> { /** * */ private int occurrenceCount; /** * */ private List> sequences; /** * @param occurrenceCount * @param sequences */ private Tasks(int occurrenceCount, List> sequences) { super(); this.occurrenceCount = occurrenceCount; this.sequences = sequences; } /** * @return */ private int getOccurrenceCount() { return occurrenceCount; } /** * @return */ private int size() { return this.sequences.size(); } /** * */ /* (non-Javadoc) * @see java.lang.Iterable#iterator() */ @Override public Iterator> iterator() { return this.sequences.iterator(); } } }