// Copyright 2012 Georg-August-Universität Göttingen, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package de.ugoe.cs.autoquest.tasktrees.temporalrelation; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import de.ugoe.cs.autoquest.tasktrees.taskequality.TaskEquality; import de.ugoe.cs.autoquest.tasktrees.taskequality.TaskEqualityRuleManager; import de.ugoe.cs.autoquest.tasktrees.treeifc.IEventTask; import de.ugoe.cs.autoquest.tasktrees.treeifc.IIteration; import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelection; import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequence; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskBuilder; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskFactory; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstance; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstanceList; import de.ugoe.cs.autoquest.tasktrees.treeifc.IUserSession; import de.ugoe.cs.autoquest.usageprofiles.SymbolComparator; import de.ugoe.cs.autoquest.usageprofiles.Trie; import de.ugoe.cs.autoquest.usageprofiles.TrieProcessor; import de.ugoe.cs.util.StopWatch; /** *

* TODO comment *

* * @author Patrick Harms */ class SequenceForTaskDetectionRule implements ISessionScopeRule { /** *

* the task factory to be used for creating substructures for the temporal * relationships identified during rule *

*/ private ITaskFactory taskFactory; /** *

* the task builder to be used for creating substructures for the temporal relationships * identified during rule application *

*/ private ITaskBuilder taskBuilder; /** *

* the task comparator to be used for comparing tasks *

*/ private TaskComparator taskComparator; /** *

* instantiates the rule and initializes it with a task equality rule manager and the minimal * task equality identified sublist must have to consider them as iterated. *

*/ SequenceForTaskDetectionRule(TaskEqualityRuleManager taskEqualityRuleManager, TaskEquality minimalTaskEquality, ITaskFactory taskFactory, ITaskBuilder taskBuilder) { this.taskFactory = taskFactory; this.taskBuilder = taskBuilder; this.taskComparator = new TaskComparator(taskEqualityRuleManager, minimalTaskEquality); } /* (non-Javadoc) * @see java.lang.Object#toString() */ @Override public String toString() { return "SequenceForTaskDetectionRule"; } /* (non-Javadoc) * @see de.ugoe.cs.autoquest.tasktrees.temporalrelation.ISessionScopeRule#apply(java.util.List) */ @Override public RuleApplicationResult apply(List sessions) { RuleApplicationData appData = new RuleApplicationData(sessions); // this is the real rule application. Loop while something is replaced. do { System.out.println(); appData.getStopWatch().start("whole loop"); detectAndReplaceIterations(appData); appData.getStopWatch().start("task replacement"); detectAndReplaceTasks(appData); appData.getStopWatch().stop("task replacement"); appData.getStopWatch().stop("whole loop"); //((TaskTreeNodeComparator) taskComparator).getStopWatch().dumpStatistics(System.out); //((TaskTreeNodeComparator) taskComparator).getStopWatch().reset(); appData.getStopWatch().dumpStatistics(System.out); appData.getStopWatch().reset(); } while (appData.detectedAndReplacedTasks()); System.out.println ("created " + appData.getResult().getNewlyCreatedTasks().size() + " new tasks and " + appData.getResult().getNewlyCreatedTaskInstances().size() + " appropriate instances\n"); if ((appData.getResult().getNewlyCreatedTasks().size() > 0) || (appData.getResult().getNewlyCreatedTaskInstances().size() > 0)) { appData.getResult().setRuleApplicationStatus(RuleApplicationStatus.FINISHED); } return appData.getResult(); } /** * @param appData */ private void detectAndReplaceIterations(RuleApplicationData appData) { System.out.print("detecting iterations"); appData.getStopWatch().start("detecting iterations"); List sessions = appData.getSessions(); int iteratedTasks = 0; ITask iteratedTask = null; do { iteratedTask = searchIteratedTask(sessions); if (iteratedTask != null) { replaceIterationsOf(iteratedTask, sessions, appData); iteratedTasks++; } } while (iteratedTask != null); appData.getStopWatch().stop("detecting iterations"); System.out.println(" --> found " + iteratedTasks + " iterated tasks"); } /** * */ private ITask searchIteratedTask(List sessions) { for (IUserSession session : sessions) { for (int i = 0; i < (session.size() - 1); i++) { if (taskComparator.equals(session.get(i), session.get(i + 1))) { return session.get(i).getTask(); } } } return null; } /** * */ /* private ITask searchIteratedTask(List sessions) { int minNoOfRepetitions = 2; int minNoOfIterationOccurrences = 1; Map iterationsCounter = new HashMap(); for (IUserSession session : sessions) { for (int i = 0; i < (session.size() - minNoOfRepetitions + 1); i++) { ITask task = session.get(i).getTask(); // check if the task is iterated boolean isIterated = true; for (int j = i + 1; j < i + minNoOfRepetitions; j++) { if (!taskComparator.equals(task, session.get(j).getTask())) { isIterated = false; break; } } if (isIterated) { Integer currentCounter = null; for (Map.Entry entry : iterationsCounter.entrySet()) { if (taskComparator.equals(task, entry.getKey())) { currentCounter = entry.getValue(); break; } } if (currentCounter == null) { currentCounter = 1; iterationsCounter.put(task, currentCounter); } else { currentCounter++; iterationsCounter.put(task, currentCounter); } if (currentCounter >= minNoOfIterationOccurrences) { return task; } } } } return null; }*/ /** * */ private void replaceIterationsOf(ITask iteratedTask, List sessions, RuleApplicationData appData) { IIteration iteration = taskFactory.createNewIteration(); ITaskInstance iterationInstance = null; List iterationInstances = new LinkedList(); for (IUserSession session : sessions) { int index = 0; while (index < session.size()) { if (taskComparator.equals(iteratedTask, session.get(index).getTask())) { if (iterationInstance == null) { iterationInstance = taskFactory.createNewTaskInstance(iteration); iterationInstances.add(iterationInstance); taskBuilder.addTaskInstance(session, index, iterationInstance); index++; } taskBuilder.addChild(iterationInstance, session.get(index)); taskBuilder.removeTaskInstance(session, index); } else { if (iterationInstance != null) { iterationInstance = null; } index++; } } } harmonizeIterationInstancesModel(iteration, iterationInstances); } /** * */ private void harmonizeIterationInstancesModel(IIteration iteration, List iterationInstances) { List iteratedTaskVariants = new LinkedList(); // merge the lexically different variants of iterated task to a unique list for (ITaskInstance iterationInstance : iterationInstances) { for (ITaskInstance executionVariant : iterationInstance) { ITask candidate = executionVariant.getTask(); boolean found = false; for (ITask taskVariant : iteratedTaskVariants) { if (taskComparator.areLexicallyEqual(taskVariant, candidate)) { taskBuilder.setTask(executionVariant, taskVariant); found = true; break; } } if (!found) { iteratedTaskVariants.add(candidate); } } } // if there are more than one lexically different variant of iterated tasks, adapt the // iteration model to be a selection of different variants. In this case also adapt // the generated iteration instances to correctly contain selection instances. If there // is only one variant of an iterated task, simply set this as the marked task of the // iteration. In this case, the instances can be preserved as is if (iteratedTaskVariants.size() > 1) { ISelection selection = taskFactory.createNewSelection(); for (ITask variant : iteratedTaskVariants) { taskBuilder.addChild(selection, variant); } taskBuilder.setMarkedTask(iteration, selection); for (ITaskInstance instance : iterationInstances) { for (int i = 0; i < instance.size(); i++) { ITaskInstance selectionInstance = taskFactory.createNewTaskInstance(selection); taskBuilder.addChild(selectionInstance, instance.get(i)); taskBuilder.setTaskInstance(instance, i, selectionInstance); } } } else { taskBuilder.setMarkedTask(iteration, iteratedTaskVariants.get(0)); } } /** * @param appData */ private void detectAndReplaceTasks(RuleApplicationData appData) { System.out.println("detecting and replacing tasks"); appData.getStopWatch().start("detecting tasks"); getSequencesOccuringMostOften(appData); appData.getStopWatch().stop("detecting tasks"); appData.getStopWatch().start("replacing tasks"); replaceSequencesOccurringMostOften(appData); appData.getStopWatch().stop("replacing tasks"); System.out.println("detected and replaced " + appData.getLastFoundTasks().size() + " tasks"); } /** * @return */ private void getSequencesOccuringMostOften(RuleApplicationData appData) { System.out.println("determining most prominent tasks"); Tasks tasks; boolean createNewTrie = (appData.getLastTrie() == null) || appData.detectedAndReplacedTasks(); // tree has changed do { if (createNewTrie) { createNewTrie(appData); } MaxCountAndLongestTasksFinder finder = new MaxCountAndLongestTasksFinder(); appData.getLastTrie().process(finder); tasks = finder.getFoundTasks(); createNewTrie = false; for (List task : tasks) { if (task.size() >= appData.getTrainedSequenceLength()) { // Trie must be recreated with a longer sequence length to be sure that // the found sequences occurring most often are found in their whole length appData.setTrainedSequenceLength(appData.getTrainedSequenceLength() + 1); createNewTrie = true; break; } } } while (createNewTrie); appData.setLastFoundTasks(tasks); System.out.println("found " + appData.getLastFoundTasks().size() + " tasks occurring " + appData.getLastFoundTasks().getOccurrenceCount() + " times"); } /** * @param parent * @return */ private void createNewTrie(RuleApplicationData appData) { System.out.println("training trie with a maximum sequence length of " + appData.getTrainedSequenceLength()); appData.getStopWatch().start("training trie"); appData.setLastTrie(new Trie(taskComparator)); List sessions = appData.getSessions(); for (IUserSession session : sessions) { trainTrie(session, appData); } appData.getStopWatch().stop("training trie"); } /** * @param trie * @param parent */ private void trainTrie(IUserSession session, RuleApplicationData appData) { List children = session.getExecutedTasks(); if ((children != null) && (children.size() > 0)) { appData.getLastTrie().train(children, appData.getTrainedSequenceLength()); } } /** * @param appData */ private void replaceSequencesOccurringMostOften(RuleApplicationData appData) { appData.detectedAndReplacedTasks(false); if ((appData.getLastFoundTasks().size() > 0) && (appData.getLastFoundTasks().getOccurrenceCount() > 1)) { System.out.println("replacing tasks occurrences"); for (List task : appData.getLastFoundTasks()) { ISequence sequence = taskFactory.createNewSequence(); System.out.println("replacing " + sequence.getId() + ": " + task); List sequenceInstances = replaceTaskOccurrences(task, appData.getSessions(), sequence); harmonizeSequenceInstancesModel(sequence, sequenceInstances,task.size()); appData.detectedAndReplacedTasks(sequenceInstances.size() > 0); if (sequenceInstances.size() < appData.getLastFoundTasks().getOccurrenceCount()) { System.out.println(sequence.getId() + ": replaced task only " + sequenceInstances.size() + " times instead of expected " + appData.getLastFoundTasks().getOccurrenceCount()); } } } } /** * */ private void harmonizeSequenceInstancesModel(ISequence sequence, List sequenceInstances, int sequenceLength) { // ensure for each subtask that lexically different variants are preserved for (int subTaskIndex = 0; subTaskIndex < sequenceLength; subTaskIndex++) { List subTaskVariants = new LinkedList(); for (ITaskInstance sequenceInstance : sequenceInstances) { ITask candidate = sequenceInstance.get(subTaskIndex).getTask(); boolean found = false; for (int i = 0; i < subTaskVariants.size(); i++) { if (taskComparator.areLexicallyEqual(subTaskVariants.get(i), candidate)) { taskBuilder.setTask (sequenceInstance.get(subTaskIndex), subTaskVariants.get(i)); found = true; break; } } if (!found) { subTaskVariants.add(candidate); } } // if there are more than one lexically different variant of the sub task at // the considered position, adapt the sequence model at that position to have // a selection of the different variants. In this case also adapt the // generated sequence instances to correctly contain selection instances. If // there is only one variant of sub tasks at the given position, simply set // this variant as the sub task of the selection. In this case, the instances // can be preserved as is if (subTaskVariants.size() > 1) { ISelection selection = taskFactory.createNewSelection(); for (ITask variant : subTaskVariants) { taskBuilder.addChild(selection, variant); } taskBuilder.addChild(sequence, selection); for (ITaskInstance instance : sequenceInstances) { ITaskInstance selectionInstance = taskFactory.createNewTaskInstance(selection); taskBuilder.addChild(selectionInstance, instance.get(subTaskIndex)); taskBuilder.setTaskInstance(instance, subTaskIndex, selectionInstance); } } else if (subTaskVariants.size() == 1) { taskBuilder.addChild(sequence, subTaskVariants.get(0)); } } } /** * @param tree */ private List replaceTaskOccurrences(List task, List sessions, ISequence temporalTaskModel) { List sequenceInstances = new LinkedList(); for (IUserSession session : sessions) { int index = -1; do { index = getSubListIndex(session, task, ++index); if (index > -1) { sequenceInstances.add (RuleUtils.createNewSubSequenceInRange (session, index, index + task.size() - 1, temporalTaskModel, taskFactory, taskBuilder)); } } while (index > -1); } return sequenceInstances; } /** * @param trie * @param object * @return */ private int getSubListIndex(ITaskInstanceList list, List subList, int startIndex) { boolean matchFound; int result = -1; for (int i = startIndex; i <= list.size() - subList.size(); i++) { matchFound = true; for (int j = 0; j < subList.size(); j++) { if (!taskComparator.equals(list.get(i + j), subList.get(j))) { matchFound = false; break; } } if (matchFound) { result = i; break; } } return result; } /** * @param trie * @param object * @return */ private int getSubListIndex(List list, List subList, int startIndex) { boolean matchFound; int result = -1; for (int i = startIndex; i <= list.size() - subList.size(); i++) { matchFound = true; for (int j = 0; j < subList.size(); j++) { if (!taskComparator.equals(list.get(i + j), subList.get(j))) { matchFound = false; break; } } if (matchFound) { result = i; break; } } return result; } /** * @author Patrick Harms */ private class MaxCountAndLongestTasksFinder implements TrieProcessor { /** * */ private int currentCount; /** * */ private List> foundTasks = new LinkedList>(); /** * */ public MaxCountAndLongestTasksFinder() { super(); this.currentCount = 0; } /* (non-Javadoc) * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int) */ @Override public TrieProcessor.Result process(List foundTask, int count) { if (foundTask.size() < 2) { // ignore single tasks return TrieProcessor.Result.CONTINUE; } if (count < 2) { // ignore singular occurrences return TrieProcessor.Result.SKIP_NODE; } if (this.currentCount > count) { // ignore this children of this task, as they may have only smaller counts than // the already found tasks return TrieProcessor.Result.SKIP_NODE; } if (this.currentCount < count) { // the provided task occurs more often that all tasks found so far. // clear all found tasks and use the new count as the one searched for foundTasks.clear(); this.currentCount = count; } if (this.currentCount == count) { // the task is of interest. Sort it into the other found tasks so that // the longest come first boolean added = false; for (int i = 0; i < foundTasks.size(); i++) { if (foundTasks.get(i).size() < foundTask.size()) { // defensive copy foundTasks.add(i, new LinkedList(foundTask)); // defensive copy added = true; break; } } if (!added) { foundTasks.add(new LinkedList(foundTask)); // defensive copy } } return TrieProcessor.Result.CONTINUE; } /** * @return */ public Tasks getFoundTasks() { removePermutationsOfShorterTasks(); return new Tasks(currentCount, foundTasks); } /** * */ private void removePermutationsOfShorterTasks() { // now iterate the sorted list and for each task remove all other tasks that are shorter // (i.e. come later in the sorted list) and that represent a subpart of the task for (int i = 0; i < foundTasks.size(); i++) { for (int j = i + 1; j < foundTasks.size();) { if (foundTasks.get(j).size() < foundTasks.get(i).size()) { // found a task that is a potential subtask. Check for this and remove the // subtask if needed List longTask = foundTasks.get(i); List shortTask = foundTasks.get(j); if (getSubListIndex(longTask, shortTask, 0) > -1) { foundTasks.remove(j); } else { j++; } } else { j++; } } } } } /** * */ private static class RuleApplicationData { /** * */ private List sessions; /** * */ private Trie lastTrie; /** * default and minimum trained sequence length is 3 */ private int trainedSequenceLength = 3; /** * */ private Tasks lastFoundTasks = new Tasks(Integer.MAX_VALUE, null); /** * */ private boolean detectedAndReplacedTasks; /** * */ private RuleApplicationResult result = new RuleApplicationResult(); /** * */ private StopWatch stopWatch = new StopWatch(); /** * */ private RuleApplicationData(List sessions) { this.sessions = sessions; } /** * @return the tree */ private List getSessions() { return sessions; } /** * @param lastTrie the lastTrie to set */ private void setLastTrie(Trie lastTrie) { this.lastTrie = lastTrie; } /** * @return the lastTrie */ private Trie getLastTrie() { return lastTrie; } /** * @param trainedSequenceLength the trainedSequenceLength to set */ private void setTrainedSequenceLength(int trainedSequenceLength) { this.trainedSequenceLength = trainedSequenceLength; } /** * @return the trainedSequenceLength */ private int getTrainedSequenceLength() { return trainedSequenceLength; } /** * @param lastFoundSequences the lastFoundSequences to set */ private void setLastFoundTasks(Tasks lastFoundSequences) { this.lastFoundTasks = lastFoundSequences; } /** * @return the lastFoundSequences */ private Tasks getLastFoundTasks() { return lastFoundTasks; } /** * */ private void detectedAndReplacedTasks(boolean detectedAndReplacedTasks) { this.detectedAndReplacedTasks = detectedAndReplacedTasks; } /** * */ private boolean detectedAndReplacedTasks() { return detectedAndReplacedTasks; } /** * @return the result */ private RuleApplicationResult getResult() { return result; } /** * @return the stopWatch */ private StopWatch getStopWatch() { return stopWatch; } } /** * @author Patrick Harms */ private static class Tasks implements Iterable> { /** * */ private int occurrenceCount; /** * */ private List> sequences; /** * @param occurrenceCount * @param sequences */ private Tasks(int occurrenceCount, List> sequences) { super(); this.occurrenceCount = occurrenceCount; this.sequences = sequences; } /** * @return */ private int getOccurrenceCount() { return occurrenceCount; } /** * @return */ private int size() { return this.sequences.size(); } /** * */ /* (non-Javadoc) * @see java.lang.Iterable#iterator() */ @Override public Iterator> iterator() { return this.sequences.iterator(); } } /** * */ private static class TaskComparator implements SymbolComparator { /** */ private Comparer comparer; /** */ private Comparer lexicalComparer; /** */ private HashMap equalityBuffer = new HashMap(); /** */ private HashMap lexicalEqualityBuffer; /** * */ public TaskComparator(TaskEqualityRuleManager taskEqualityRuleManager, TaskEquality minimalNodeEquality) { super(); if (minimalNodeEquality == TaskEquality.LEXICALLY_EQUAL) { comparer = new LexicalComparer(taskEqualityRuleManager); } else if (minimalNodeEquality == TaskEquality.SYNTACTICALLY_EQUAL) { comparer = new SyntacticalComparer(taskEqualityRuleManager); } else if (minimalNodeEquality == TaskEquality.SEMANTICALLY_EQUAL) { comparer = new SemanticalComparer(taskEqualityRuleManager); } else { comparer = new DefaultComparer(taskEqualityRuleManager, minimalNodeEquality); } if (minimalNodeEquality == TaskEquality.LEXICALLY_EQUAL) { lexicalComparer = comparer; lexicalEqualityBuffer = equalityBuffer; } else { lexicalComparer = new LexicalComparer(taskEqualityRuleManager); lexicalEqualityBuffer = new HashMap(); } } /* (non-Javadoc) * @see de.ugoe.cs.autoquest.tasktrees.temporalrelation.SymbolComparator#equals(java.lang.Object, java.lang.Object) */ @Override public boolean equals(ITaskInstance taskInstance1, ITaskInstance taskInstance2) { return equals(taskInstance1.getTask(), taskInstance2.getTask()); } /** * */ public boolean equals(ITask task1, ITask task2) { Boolean result; if (task1 != task2) { if ((task1 instanceof IEventTask) && (task2 instanceof IEventTask)) { long key = ((long) System.identityHashCode(task1)) << 32; key += System.identityHashCode(task2); result = equalityBuffer.get(key); if (result == null) { result = comparer.compare(task1, task2); equalityBuffer.put(key, result); } } else { result = false; } } else { result = true; } return result; } /** * */ public boolean areLexicallyEqual(ITask task1, ITask task2) { Boolean result; if (task1 != task2) { long key = ((long) System.identityHashCode(task1)) << 32; key += System.identityHashCode(task2); result = lexicalEqualityBuffer.get(key); if (result == null) { result = lexicalComparer.compare(task1, task2); lexicalEqualityBuffer.put(key, result); } } else { result = true; } return result; } } /** * */ private static interface Comparer { /** * */ boolean compare(ITask task1, ITask task2); } /** * */ private static class LexicalComparer implements Comparer { /** *

* the task equality manager needed for comparing tasks with each other *

*/ private TaskEqualityRuleManager taskEqualityRuleManager; /** * */ public LexicalComparer(TaskEqualityRuleManager taskEqualityRuleManager) { this.taskEqualityRuleManager = taskEqualityRuleManager; } /** * */ public boolean compare(ITask task1, ITask task2) { return taskEqualityRuleManager.areLexicallyEqual(task1, task2); } } /** * */ private static class SyntacticalComparer implements Comparer { /** *

* the task equality manager needed for comparing tasks with each other *

*/ private TaskEqualityRuleManager taskEqualityRuleManager; /** * */ public SyntacticalComparer(TaskEqualityRuleManager taskEqualityRuleManager) { this.taskEqualityRuleManager = taskEqualityRuleManager; } /** * */ public boolean compare(ITask task1, ITask task2) { return taskEqualityRuleManager.areSyntacticallyEqual(task1, task2); } } /** * */ private static class SemanticalComparer implements Comparer { /** *

* the task equality manager needed for comparing tasks with each other *

*/ private TaskEqualityRuleManager taskEqualityRuleManager; /** * */ public SemanticalComparer(TaskEqualityRuleManager taskEqualityRuleManager) { this.taskEqualityRuleManager = taskEqualityRuleManager; } /** * */ public boolean compare(ITask task1, ITask task2) { return taskEqualityRuleManager.areSemanticallyEqual(task1, task2); } } /** * */ private static class DefaultComparer implements Comparer { /** *

* the task equality manager needed for comparing tasks with each other *

*/ private TaskEqualityRuleManager taskEqualityRuleManager; /** *

* the minimal task equality two identified sublists need to have to consider them as equal *

*/ private TaskEquality minimalNodeEquality; /** * */ public DefaultComparer(TaskEqualityRuleManager taskEqualityRuleManager, TaskEquality minimalNodeEquality) { this.taskEqualityRuleManager = taskEqualityRuleManager; this.minimalNodeEquality = minimalNodeEquality; } /** * */ public boolean compare(ITask task1, ITask task2) { return taskEqualityRuleManager.areAtLeastEqual(task1, task2, minimalNodeEquality); } } }