// Copyright 2012 Georg-August-Universität Göttingen, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package de.ugoe.cs.autoquest.tasktrees.temporalrelation; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.IdentityHashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.ListIterator; import java.util.Map; import java.util.Set; import java.util.logging.Level; import de.ugoe.cs.autoquest.tasktrees.taskequality.TaskEquality; import de.ugoe.cs.autoquest.tasktrees.temporalrelation.utils.DebuggingHelper; import de.ugoe.cs.autoquest.tasktrees.treeifc.DefaultTaskInstanceTraversingVisitor; import de.ugoe.cs.autoquest.tasktrees.treeifc.IEventTaskInstance; import de.ugoe.cs.autoquest.tasktrees.treeifc.IIteration; import de.ugoe.cs.autoquest.tasktrees.treeifc.IIterationInstance; import de.ugoe.cs.autoquest.tasktrees.treeifc.IOptional; import de.ugoe.cs.autoquest.tasktrees.treeifc.IOptionalInstance; import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelection; import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelectionInstance; import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequence; import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequenceInstance; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskBuilder; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskFactory; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstance; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstanceList; import de.ugoe.cs.autoquest.tasktrees.treeifc.IUserSession; import de.ugoe.cs.autoquest.usageprofiles.SymbolMap; import de.ugoe.cs.autoquest.usageprofiles.Trie; import de.ugoe.cs.autoquest.usageprofiles.TrieProcessor; import de.ugoe.cs.util.StopWatch; import de.ugoe.cs.util.console.Console; /** *

* This class implements the major rule for creating task trees based on a set of recorded * user sessions. For this, it first harmonizes all tasks. This eases later comparison. Then it * searches the sessions for iterations and replaces them accordingly. Then it searches for sub * sequences being the longest and occurring most often. For each found sub sequence, it replaces * the occurrences by creating appropriate {@link ISequence}s. Afterwards, again searches for * iterations and then again for sub sequences until no more replacements are done. *

*

* For determining the longest sequence occurring most often, the implementation uses a * {@link Trie}. The depth of the tree is initially 3. If the algorithm has a longest sequence * occurring most often whose length is equal to the depth of the trie, it recalculates the trie * with an increased depth. *

* * @author Patrick Harms */ class SequenceForTaskDetectionRule implements ISessionScopeRule { /** *

* the task factory to be used for creating substructures for the temporal * relationships identified during rul application *

*/ private ITaskFactory taskFactory; /** *

* the task builder to be used for creating substructures for the temporal relationships * identified during rule application *

*/ private ITaskBuilder taskBuilder; /** *

* the task handling strategy to be used for comparing tasks for preparation, i.e., before * the tasks are harmonized *

*/ private TaskHandlingStrategy preparationTaskHandlingStrategy; /** *

* the task handling strategy to be used for comparing tasks during iteration detection an trie * generation, i.e., after the tasks are harmonized *

*/ private TaskHandlingStrategy identityTaskHandlingStrategy;; /** *

* the minimum number of event task instances a sequence must cover to still detect it as a * representative task *

*/ private int minimumSequenceCoverage;; /** *

* instantiates the rule and initializes it with a task equality to be considered when * comparing tasks as well as a task factory and builder to be used for creating task * structures. *

* * @param minimalTaskEquality the task equality to be considered when comparing tasks * @param taskFactory the task factory to be used for creating substructures * @param taskBuilder the task builder to be used for creating substructures * @param minimumSequenceCoverage the minimum number of event task instances a sequence must * cover to still detect it as a representative task */ SequenceForTaskDetectionRule(TaskEquality minimalTaskEquality, ITaskFactory taskFactory, ITaskBuilder taskBuilder, int minimumSequenceCoverage) { this.taskFactory = taskFactory; this.taskBuilder = taskBuilder; this.minimumSequenceCoverage = minimumSequenceCoverage; this.preparationTaskHandlingStrategy = new TaskHandlingStrategy(minimalTaskEquality); this.identityTaskHandlingStrategy = new TaskHandlingStrategy(TaskEquality.IDENTICAL); } /* (non-Javadoc) * @see java.lang.Object#toString() */ @Override public String toString() { return "SequenceForTaskDetectionRule"; } /* (non-Javadoc) * @see de.ugoe.cs.autoquest.tasktrees.temporalrelation.ISessionScopeRule#apply(java.util.List) */ @Override public RuleApplicationResult apply(List sessions) { RuleApplicationData appData = new RuleApplicationData(sessions); // this is the real rule application. Loop while something is replaced. harmonizeEventTaskInstancesModel(appData); do { System.out.println(); appData.getStopWatch().start("whole loop"); detectAndReplaceIterations(appData); appData.getStopWatch().start("task replacement"); detectAndReplaceTasks(appData); appData.getStopWatch().stop("task replacement"); appData.getStopWatch().stop("whole loop"); //((TaskTreeNodeComparator) taskComparator).getStopWatch().dumpStatistics(System.out); //((TaskTreeNodeComparator) taskComparator).getStopWatch().reset(); appData.getStopWatch().dumpStatistics(System.out); appData.getStopWatch().reset(); } while (appData.detectedAndReplacedTasks()); Console.println ("created " + appData.getResult().getNewlyCreatedTasks().size() + " new tasks and " + appData.getResult().getNewlyCreatedTaskInstances().size() + " appropriate instances\n"); if ((appData.getResult().getNewlyCreatedTasks().size() > 0) || (appData.getResult().getNewlyCreatedTaskInstances().size() > 0)) { appData.getResult().setRuleApplicationStatus(RuleApplicationStatus.FINISHED); } return appData.getResult(); } /** *

* harmonizes the event task instances by unifying tasks. This is done, as initially the * event tasks being equal with respect to the considered task equality are distinct objects. * The comparison of these distinct objects is more time consuming than comparing the object * references. *

* * @param appData the rule application data combining all data used for applying this rule */ private void harmonizeEventTaskInstancesModel(RuleApplicationData appData) { Console.traceln(Level.INFO, "harmonizing task model of event task instances"); appData.getStopWatch().start("harmonizing event tasks"); SymbolMap uniqueTasks = preparationTaskHandlingStrategy.createSymbolMap(); TaskComparator comparator = preparationTaskHandlingStrategy.getTaskComparator(); int unifiedTasks = 0; ITask task; List sessions = appData.getSessions(); int sessionNo = 0; for (IUserSession session : sessions) { Console.traceln(Level.FINE, "handling " + (++sessionNo) + ". " + session); for (ITaskInstance taskInstance : session) { task = uniqueTasks.getValue(taskInstance.getTask()); if (task == null) { uniqueTasks.addSymbol(taskInstance.getTask(), taskInstance.getTask()); } else { taskBuilder.setTask(taskInstance, task); unifiedTasks++; } } comparator.clearBuffers(); } appData.getStopWatch().stop("harmonizing event tasks"); Console.traceln(Level.INFO, "harmonized " + unifiedTasks + " task occurrences (still " + uniqueTasks.size() + " different tasks)"); appData.getStopWatch().dumpStatistics(System.out); appData.getStopWatch().reset(); } /** *

* searches for direct iterations of single tasks in all sequences and replaces them with * {@link IIteration}s, respectively appropriate instances. Also all single occurrences of * a task that is iterated somewhen are replaced with iterations to have again an efficient * way for task comparisons. *

* * @param appData the rule application data combining all data used for applying this rule */ private void detectAndReplaceIterations(RuleApplicationData appData) { Console.traceln(Level.FINE, "detecting iterations"); appData.getStopWatch().start("detecting iterations"); List sessions = appData.getSessions(); Set iteratedTasks = searchIteratedTasks(sessions); if (iteratedTasks.size() > 0) { replaceIterationsOf(iteratedTasks, sessions, appData); } appData.getStopWatch().stop("detecting iterations"); Console.traceln(Level.INFO, "replaced " + iteratedTasks.size() + " iterated tasks"); } /** *

* searches the provided sessions for task iterations. If a task is iterated, it is added * to the returned set. *

* * @param the session to search for iterations in * * @return a set of tasks being iterated somewhere */ private Set searchIteratedTasks(List sessions) { Set iteratedTasks = new HashSet(); for (IUserSession session : sessions) { for (int i = 0; i < (session.size() - 1); i++) { // we prepared the task instances to refer to unique tasks, if they are treated // as equal. Therefore, we just compare the identity of the tasks of the task // instances if (session.get(i).getTask() == session.get(i + 1).getTask()) { iteratedTasks.add(session.get(i).getTask()); } } } return iteratedTasks; } /** *

* replaces all occurrences of all tasks provided in the set with iterations *

* * @param iteratedTasks the tasks to be replaced with iterations * @param sessions the sessions in which the tasks are to be replaced * @param appData the rule application data combining all data used for applying this rule */ private void replaceIterationsOf(Set iteratedTasks, List sessions, RuleApplicationData appData) { Map iterations = new HashMap(); Map optionals = new HashMap(); Map> iterationInstances = new HashMap>(); for (ITask iteratedTask : iteratedTasks) { IIteration iteration = taskFactory.createNewIteration(); iterations.put(iteratedTask, iteration); if (iteratedTask instanceof IOptional) { IOptional optional = taskFactory.createNewOptional(); taskBuilder.setMarkedTask(optional, iteration); optionals.put(iteration, optional); } iterationInstances.put(iteration, new LinkedList()); } IOptionalInstance optionalInstance; IIterationInstance iterationInstance; for (IUserSession session : sessions) { int index = 0; optionalInstance = null; iterationInstance = null; boolean inReplacement = false; while (index < session.size()) { // we prepared the task instances to refer to unique tasks, if they are treated // as equal. Therefore, we just compare the identity of the tasks of the task // instances ITask currentTask = session.get(index).getTask(); IIteration iteration = iterations.get(currentTask); if (iteration != null) { // replacement required. Check if we already replaced a previous child, or if // we did, if the current child needs to be in the same iteration instance or // a new one. if (!inReplacement || (iterationInstance.getTask() != iteration)) { // initiate a new replacement by creating the corresponding new instance if (currentTask instanceof IOptional) { IOptional optional = optionals.get(iteration); optionalInstance = taskFactory.createNewTaskInstance(optional); taskBuilder.addTaskInstance(session, index, optionalInstance); } else { iterationInstance = taskFactory.createNewTaskInstance(iteration); iterationInstances.get(iteration).add(iterationInstance); taskBuilder.addTaskInstance(session, index, iterationInstance); } inReplacement = true; index++; } // add the current task instance to the iteration instance that is going to // replace it if (currentTask instanceof IOptional) { ITaskInstance child = ((IOptionalInstance) session.get(index)).getChild(); if (child != null) { if (iterationInstance == null) { iterationInstance = taskFactory.createNewTaskInstance(iteration); iterationInstances.get(iteration).add(iterationInstance); taskBuilder.setChild(optionalInstance, iterationInstance); } taskBuilder.addChild(iterationInstance, child); } } else { taskBuilder.addChild(iterationInstance, session.get(index)); } // remove the task instance that was added to the replacing iteration instance taskBuilder.removeTaskInstance(session, index); } else { // no replacement require. Finish previous replacements and continue if (iterationInstance != null) { iterationInstance = null; optionalInstance = null; inReplacement = false; } index++; } } } for (Map.Entry> entry : iterationInstances.entrySet()) { harmonizeIterationInstancesModel(entry.getKey(), entry.getValue()); } } /** *

* This may not be required anymore. The goal was the following. Consider two subsequent task * instances which are detected to be repeating tasks on a level defined by the * preparationTaskHandlingStrategy. Then the replacement of these iterated tasks by iteration * instances would result in an iteration instance with two children, both of them having a * different task assigned which does not match the child of the iteration model. This needs * to be harmonized afterwards. *

*

* But, because we introduced the harmonization of event tasks directly at the beginning and * afterwards, we compare only based on object identity, this may not be required anymore. *

*/ private void harmonizeIterationInstancesModel(IIteration iteration, List iterationInstances) { List iteratedTaskVariants = new LinkedList(); TaskComparator comparator = preparationTaskHandlingStrategy.getTaskComparator(); // merge the lexically different variants of iterated task to a unique list for (IIterationInstance iterationInstance : iterationInstances) { for (ITaskInstance executionVariant : iterationInstance) { ITask candidate = executionVariant.getTask(); boolean found = false; for (ITask taskVariant : iteratedTaskVariants) { if (comparator.areLexicallyEqual(taskVariant, candidate)) { taskBuilder.setTask(executionVariant, taskVariant); found = true; break; } } if (!found) { iteratedTaskVariants.add(candidate); } } } // if there are more than one lexically different variant of iterated tasks, adapt the // iteration model to be a selection of different variants. In this case also adapt // the generated iteration instances to correctly contain selection instances. If there // is only one variant of an iterated task, simply set this as the marked task of the // iteration. In this case, the instances can be preserved as is if (iteratedTaskVariants.size() > 1) { ISelection selection = taskFactory.createNewSelection(); for (ITask variant : iteratedTaskVariants) { taskBuilder.addChild(selection, variant); } taskBuilder.setMarkedTask(iteration, selection); for (IIterationInstance instance : iterationInstances) { for (int i = 0; i < instance.size(); i++) { ISelectionInstance selectionInstance = taskFactory.createNewTaskInstance(selection); taskBuilder.setChild(selectionInstance, instance.get(i)); taskBuilder.setTaskInstance(instance, i, selectionInstance); } } } else { taskBuilder.setMarkedTask(iteration, iteratedTaskVariants.get(0)); } } /** * TODO go on commenting * @param appData the rule application data combining all data used for applying this rule */ private void detectAndReplaceTasks(RuleApplicationData appData) { Console.traceln(Level.FINE, "detecting and replacing tasks"); appData.getStopWatch().start("detecting tasks"); getSubsequencesOccuringMostOften(appData); appData.getStopWatch().stop("detecting tasks"); appData.getStopWatch().start("sorting tasks"); removeInterleavingTasks(appData); appData.getStopWatch().stop("sorting tasks"); appData.getStopWatch().start("replacing tasks"); replaceSequencesOccurringMostOften(appData); appData.getStopWatch().stop("replacing tasks"); Console.traceln(Level.INFO, "detected and replaced " + appData.getLastFoundSubsequences().size() + " tasks occuring " + appData.getLastFoundSubsequences().getOccurrenceCount() + " times"); } /** * @param appData the rule application data combining all data used for applying this rule */ private void getSubsequencesOccuringMostOften(RuleApplicationData appData) { Console.traceln(Level.FINER, "determining most prominent subsequences"); Subsequences subsequences; boolean createNewTrie = (appData.getLastTrie() == null) || appData.detectedAndReplacedTasks(); // tree has changed do { if (createNewTrie) { createNewTrie(appData); } appData.getStopWatch().start("applying finder"); MaxCountAndLongestSubsequencesFinder finder = new MaxCountAndLongestSubsequencesFinder(); appData.getLastTrie().process(finder); appData.getStopWatch().stop("applying finder"); appData.getStopWatch().start("remove permutations"); subsequences = finder.getFoundSubsequences(); appData.getStopWatch().stop("remove permutations"); appData.getStopWatch().start("drop unrepresentative"); dropUnrepresentative(subsequences, appData); appData.getStopWatch().stop("drop unrepresentative"); createNewTrie = false; for (Subsequence subsequence : subsequences) { if (subsequence.size() >= appData.getTrainedSubsequenceLength()) { // Trie must be recreated with a longer sequence length to be sure that // the found sequences occurring most often are found in their whole length appData.setTrainedSubsequenceLength(appData.getTrainedSubsequenceLength() + 3); createNewTrie = true; break; } } } while (createNewTrie); // create a normal trie for comparison /*System.out.println("creating test trie for comparison"); appData.getStopWatch().start("testtrie"); Trie trie = new Trie(identityTaskComparator); for (IUserSession session : appData.getSessions()) { trie.train(session.getExecutedTasks(), appData.getTrainedSequenceLength()); } appData.getStopWatch().stop("testtrie"); MaxCountAndLongestTasksFinder finder = new MaxCountAndLongestTasksFinder(); trie.process(finder); boolean allTasksEqual = finder.getFoundTasks().size() == tasks.size(); allTasksEqual &= finder.getFoundTasks().occurrenceCount == tasks.occurrenceCount; for (List task1 : finder.getFoundTasks()) { boolean foundTask = false; for (List task2 : tasks) { boolean tasksEqual = false; if (task1.size() == task2.size()) { tasksEqual = true; for (int i = 0; i < task1.size(); i++) { if (!identityTaskComparator.equals(task1.get(i).getTask(), task2.get(i).getTask())) { tasksEqual = false; } } } if (tasksEqual) { foundTask = true; break; } } if (!foundTask) { System.out.println("different is " + task1); allTasksEqual = false; break; } } if (!allTasksEqual) { System.out.println(finder.getFoundTasks()); System.out.println(tasks); throw new IllegalArgumentException("both tries calculated different subsequences"); } /*TrieStatisticsDumper dumper = new TrieStatisticsDumper(); appData.getLastTrie().process(dumper); dumper.dumpCounters();*/ appData.setLastFoundSubsequences(subsequences); Console.traceln(Level.FINE, "found " + appData.getLastFoundSubsequences().size() + " tasks occurring " + appData.getLastFoundSubsequences().getOccurrenceCount() + " times"); } /** *

* TODO: comment *

* * @param subsequences */ private void dropUnrepresentative(Subsequences subsequences, RuleApplicationData appData) { Map> locations = getLocations(subsequences, appData); for (Map.Entry> entry : locations.entrySet()) { final int[] coveredActionInstances = new int[1]; for (SubsequenceLocation loc : entry.getValue()) { for (int i = loc.getIndex(); i < loc.getIndex() + entry.getKey().size(); i++) { ITaskInstance taskInstance = loc.getSession().get(i); taskInstance.accept(new DefaultTaskInstanceTraversingVisitor() { @Override public void visit(IEventTaskInstance eventTaskInstance) { coveredActionInstances[0]++; } }); } } if (coveredActionInstances[0] < minimumSequenceCoverage) { subsequences.remove(entry.getKey()); } } } /** * @param appData the rule application data combining all data used for applying this rule */ private void createNewTrie(RuleApplicationData appData) { Console.traceln(Level.FINEST, "training trie with a maximum sequence length of " + appData.getTrainedSubsequenceLength()); appData.getStopWatch().start("training trie"); // we prepared the task instances to refer to unique tasks, if they are treated // as equal. Therefore, we just compare the identity of the tasks of the task // instances appData.setLastTrie(new TaskInstanceTrie(identityTaskHandlingStrategy)); appData.getLastTrie().trainSessions (appData.getSessions(), appData.getTrainedSubsequenceLength()); appData.getStopWatch().stop("training trie"); } /** * from the last found tasks, sort out those that interleave with each other as for them always * the same replacement order needs to be taken. */ // private void removeInterleavingTasksOld(RuleApplicationData appData) { // // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> // // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> TEST IMPLEMENTATION >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> // // dumpSortedCollectionOfSubsequences // // ("found task", appData.getLastFoundSubsequences().subsequences); // // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< TEST IMPLEMENTATION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // // List interleavings; // // do { // interleavings = getInterleavings(appData.getLastFoundSubsequences(), appData); // // if (interleavings.size() > 0) { // // dumpSorted(interleavings); // // // Console.traceln(Level.FINEST, "found " + interleavings.size() + " subsequences " + // // "--> checking for most real interleavings"); // // // we found interleaving subsequences. We need to decide for them, which to replace // // first. For this, we need to remove some of them from the list of detected // // subsequences in an ordered way to always have the same prioritization for // // replacements. The ones to be removed are those with most interleavings. // interleavings = getMostIntensiveInterleavings(interleavings); // // //dumpSorted(interleavings); // // if (interleavings.size() == 1) { // // Console.traceln(Level.FINEST, "found one most interleaving subsequence " + // // "--> removing it and checking for further interleavings"); // // // we have exactly one most interleaving detected subsequence. In this case, // // we remove it from the list of found subsequences and check again, if their // // are remaining interleavings. If not, we have the final list of non // // interleaving subsequences to be replaced. The order or their replacement // // doesn't matter as they are not interleaving. // appData.getLastFoundSubsequences().remove // (interleavings.get(0).getSubsequence()); // // continue; // } // else if (interleavings.size() == 0) { // // this should never happen // throw new RuntimeException("Implementation error: don't know how I got here"); // } // // // Console.traceln(Level.FINEST, "found " + interleavings.size() + " most " + // // "interleaving subsequences --> checking which are most often a " + // // "successor"); // // // there are several subsequences with the same amount of interleavings. // // Check which of them is the most often a successor. This should be removed. // interleavings = getMostInterleavingsAsSuccessor(interleavings); // // // dumpSorted(interleavings); // // if (interleavings.size() == 1) { // // Console.traceln(Level.FINEST, "found one interleaving subsequence being most " + // // "often a successor --> removing it and checking for further " + // // "interleavings"); // // // we have exactly one interleaving subsequence that is most often a successor // // of the others. Remove it and try again to see if sufficient interleavings // // were cleared. // appData.getLastFoundSubsequences().remove // (interleavings.get(0).getSubsequence()); // // continue; // } // else if (interleavings.size() == 0) { // // this should never happen // throw new RuntimeException("Implementation error: don't know how I got here"); // } // // // Console.traceln(Level.FINEST, "found " + interleavings.size() + // // " most interleaving subsequences being most often a successor " + // // "--> checking which is most seldom a predecessor"); // // // there are several subsequences with the same amount of interleavings being also // // the same times a successor of another subsequence. Hence, check which of them is // // the least often a predecessor. This should be removed. // interleavings = getLeastInterleavingsAsPredecessor(interleavings); // // //dumpSorted(interleavings); // // if (interleavings.size() == 1) { // // Console.traceln(Level.FINEST, "found one interleaving subsequence being most " + // // "often a successor and most seldom a predecessor --> " + // // "removing it and checking for further interleavings"); // // // we have exactly one interleaving subsequence that is most often a successor // // and most seldom a predecessor of the others. Remove it and try again to see // // if sufficient interleavings were cleared. // appData.getLastFoundSubsequences().remove // (interleavings.get(0).getSubsequence()); // // continue; // } // else if (interleavings.size() == 0) { // // this should never happen // throw new RuntimeException("Implementation error: don't know how I got here"); // } // // // the remaining subsequences are interleaving the same amount of times and are // // used the same amount of times as successors and as predecessors by all other // // detected interleaving subsequences. Now lets check, which of them is mostly used // // as successor and most seldom used as predecessor only considering the remaining // // subsequences. If some of them do not interfere with the others, remove them and // // try again to see if sufficient interleavings were cleared. // // // Console.traceln(Level.FINEST, "found " + interleavings.size() + // // " most interleaving subsequences being most often a successor " + // // "and most seldom a predecessor --> removing collision being not " + // // "between themselves"); // // interleavings = removeCollisionsOfOtherSubsequences(interleavings); // // // dumpSorted(interleavings); // // // Console.traceln(Level.FINEST, "removed collisions of other subsequences not " + // // "belonging to the remaining interleaving subsequences --> " + // // "checking of the remaining interleaving are most often a " + // // "successor of another one"); // // interleavings = getMostInterleavingsAsSuccessor(interleavings); // interleavings = removeCollisionsOfOtherSubsequences(interleavings); // // // dumpSorted(interleavings); // // if (interleavings.size() == 1) { // // Console.traceln(Level.FINEST, "found one interleaving subsequence being most " + // // "often a successor in comparison to the other remaining " + // // "interleavings --> removing it and checking for further " + // // "interleavings"); // // appData.getLastFoundSubsequences().remove // (interleavings.get(0).getSubsequence()); // // continue; // } // else if (interleavings.size() == 0) { // // this should never happen // throw new RuntimeException("Implementation error: don't know how I got here"); // } // // // Console.traceln(Level.FINEST, "found " + interleavings.size() + // // " most interleaving subsequences being most often a successor in " + // // "comparison to the other remaining interleavings --> checking " + // // "which is most seldom a predecessor"); // // interleavings = getLeastInterleavingsAsPredecessor(interleavings); // interleavings = removeCollisionsOfOtherSubsequences(interleavings); // // // dumpSorted(interleavings); // // if (interleavings.size() == 1) { // // Console.traceln(Level.FINEST, "found one interleaving subsequence being most " + // // "often a successor and most seldom a predecessor in " + // // "comparison to the other remaining interleavings --> " + // // "removing it and checking for further interleavings"); // // appData.getLastFoundSubsequences().remove // (interleavings.get(0).getSubsequence()); // // continue; // } // else if (interleavings.size() == 0) { // // this should never happen // throw new RuntimeException("Implementation error: don't know how I got here"); // } // // // Console.traceln(Level.FINEST, "found " + interleavings.size() + // // " most interleaving subsequences being most often a successor " + // // "and most seldom a predecessor in comparison to the other " + // // "remaining interleavings --> this may happen if there are no " + // // "collisions between them anymore. If so, remove all of them at " + // // "once."); // // int collisionCount = 0; // // for (InterleavingSubsequence subsequence : interleavings) { // collisionCount += subsequence.getCollisionCounter(); // } // // if (collisionCount == 0) { // // Console.traceln(Level.FINEST, "remaining interleaving subsequences have no " + // // "further collisions with each other --> removing all of them " + // // "and checking for further interleavings"); // // for (InterleavingSubsequence subsequence : interleavings) { // appData.getLastFoundSubsequences().remove(subsequence.getSubsequence()); // } // // continue; // } // // // Console.traceln(Level.FINEST, "remaining interleaving subsequences still have " + // // "collisions with each other --> decide based on their occurrences" + // // " in the sessions (remove those, coming later in comparison to " + // // "the others)"); // // // determine the predecessor collisions being the last in all sessions. Those // // collisions show the interleaving subsequence occurring last // Map sessions = // new IdentityHashMap(); // // for (InterleavingSubsequence interleaving : interleavings) { // for (Collision coll : interleaving.getPredecessorCollisions()) { // Collision maxPosColl = sessions.get(coll.getLocation().getSession()); // // if ((maxPosColl == null) || // (maxPosColl.getLocation().getIndex() < coll.getLocation().getIndex())) // { // sessions.put(coll.getLocation().getSession(), coll); // } // } // } // // // determine, which of the subsequences occurs most often as the last one // // interleaving with another // Map lastOccurrenceCounters = // new IdentityHashMap(); // // for (Collision coll : sessions.values()) { // Integer counter = lastOccurrenceCounters.get(coll.getSubsequence()); // // if (counter == null) { // lastOccurrenceCounters.put(coll.getSubsequence(), 1); // } // else { // lastOccurrenceCounters.put(coll.getSubsequence(), counter + 1); // } // } // // int maxCounter = 0; // Subsequence toRemove = null; // // for (Map.Entry entry : lastOccurrenceCounters.entrySet()) { // if (entry.getValue() > maxCounter) { // maxCounter = entry.getValue(); // toRemove = entry.getKey(); // } // else if (entry.getValue() == maxCounter) { // // we can not decide again // toRemove = null; // break; // } // } // // if (toRemove != null) { // // Console.traceln(Level.FINEST, "one of the remaining interleaving subsequences" + // // " is most often the last to occur --> removing it and " + // // "checking for further interleavings"); // // appData.getLastFoundSubsequences().remove(toRemove); // continue; // } // // // checking now, if there is one sequence, having the lowest index for any of // // its collisions and preserve it. If there are several ones with the same minimum // // index, check if they are independent. If not, throw an exception // int minPosInAnySequence = Integer.MAX_VALUE; // List subsequencesWithMinPos = new LinkedList<>(); // // for (InterleavingSubsequence interleaving : interleavings) { // for (Collision collision : interleaving.getSuccessorCollisions()) { // if (minPosInAnySequence > collision.getLocation().getIndex()) { // minPosInAnySequence = collision.getLocation().getIndex(); // subsequencesWithMinPos.clear(); // } // // if (minPosInAnySequence == collision.getLocation().getIndex()) { // // several have the same min pos --> undecidable which to prefer // subsequencesWithMinPos.add(interleaving); // } // } // } // // if (subsequencesWithMinPos.size() > 0) { // List subsequencesToRemove = new LinkedList(); // // for (Subsequence candidate : appData.getLastFoundSubsequences()) { // boolean found = false; // // for (InterleavingSubsequence subsequenceWithMinPos : subsequencesWithMinPos) // { // if (candidate == subsequenceWithMinPos.getSubsequence()) { // found = true; // } // } // // if (!found) { // subsequencesToRemove.add(candidate); // } // } // // if (subsequencesToRemove.size() > 0) { // for (Subsequence candidate : subsequencesToRemove) { // appData.getLastFoundSubsequences().remove(candidate); // } // // continue; // } // } // // // At this point, we can not decide anymore. But it should also not // // happen. Even if two subsequences ab and ba one of them should be // // more often a successor or predecessor than the other if they // // directly follow each other. If not, it can not be decided, which of // // them to replace first. Perhaps the one occurring more often as // // the first in a session that the other. But this can be implemented // // later. // dumpSorted(interleavings, Level.SEVERE); // // throw new RuntimeException // ("can not decide which detected subsequence to replace first."); // } // } // while (interleavings.size() > 0); // // // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> // // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> TEST IMPLEMENTATION >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> // // dumpSortedCollectionOfSubsequences // // ("to replace", appData.getLastFoundSubsequences().subsequences); // // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< TEST IMPLEMENTATION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // } /** * from the last found tasks, sort out those that interleave with each other as for them always * the same replacement order needs to be taken. */ private void removeInterleavingTasks(RuleApplicationData appData) { // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> TEST IMPLEMENTATION >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> // dumpSortedCollectionOfSubsequences // ("found task", appData.getLastFoundSubsequences().subsequences); // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< TEST IMPLEMENTATION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< List interleavings; do { interleavings = getInterleavings(appData.getLastFoundSubsequences(), appData); if (interleavings.size() > 0) { // dumpSorted(interleavings); // Console.traceln(Level.FINEST, "found " + interleavings.size() + " subsequences " + // "--> checking for most real interleavings"); // we found interleaving subsequences. We need to decide for them, which to replace // first. For this, we need to remove some of them from the list of detected // subsequences in an ordered way to always have the same prioritization for // replacements. The ones to be removed are those with most interleavings. interleavings = getMostIntensiveInterleavings(interleavings); //dumpSorted(interleavings); if (interleavings.size() < appData.getLastFoundSubsequences().size()) { // Console.traceln(Level.FINEST, "found most interleaving subsequences " + // "--> removing them and checking for further interleavings"); // we have most interleaving subsequences. As these are less than all detected // subsequences, we remove them and check if there are remaining interleavings. for (InterleavingSubsequence interleaving : interleavings) { appData.getLastFoundSubsequences().remove(interleaving.getSubsequence()); } continue; } else if (interleavings.size() == 0) { // this should never happen throw new RuntimeException("Implementation error: don't know how I got here"); } // Console.traceln(Level.FINEST, "found " + interleavings.size() + " most " + // "interleaving subsequences --> checking which are most often a " + // "successor"); // all detected subsequences are also interleaving with the same amount of time // check, which of them is most often the successor in a collision interleavings = removeCollisionsOfOtherSubsequences(interleavings); interleavings = getMostInterleavingsAsSuccessor(interleavings); // dumpSorted(interleavings); if (interleavings.size() < appData.getLastFoundSubsequences().size()) { // Console.traceln(Level.FINEST, "found interleaving subsequences being most " + // "often a successor --> removing them and checking for further " + // "interleavings"); // we have interleaving subsequences that are most often a successor // of the others. As they are less than all detected subsequences, remove them // and try again to see if sufficient interleavings were cleared. for (InterleavingSubsequence interleaving : interleavings) { appData.getLastFoundSubsequences().remove(interleaving.getSubsequence()); } continue; } else if (interleavings.size() == 0) { // this should never happen throw new RuntimeException("Implementation error: don't know how I got here"); } // Console.traceln(Level.FINEST, "found " + interleavings.size() + // " most interleaving subsequences being most often a successor " + // "--> checking which is most seldom a predecessor"); // all detected subsequences have the same amount of interleavings being also // the same times a successor of another subsequence. Hence, check which of them is // the least often a predecessor. This should be removed. interleavings = removeCollisionsOfOtherSubsequences(interleavings); interleavings = getLeastInterleavingsAsPredecessor(interleavings); //dumpSorted(interleavings); if (interleavings.size() < appData.getLastFoundSubsequences().size()) { // Console.traceln(Level.FINEST, "found interleaving subsequences being most " + // "often a successor and most seldom a predecessor --> " + // "removing them and checking for further interleavings"); // we have interleaving subsequences that are most often a successor // and most seldom a predecessor of the others. As they are less than all // detected subsequences, remove them and try again to see // if sufficient interleavings were cleared. for (InterleavingSubsequence interleaving : interleavings) { appData.getLastFoundSubsequences().remove(interleaving.getSubsequence()); } continue; } else if (interleavings.size() == 0) { // this should never happen throw new RuntimeException("Implementation error: don't know how I got here"); } // the remaining subsequences are interleaving the same amount of times and are // used the same amount of times as successors and as predecessors by all other // detected interleaving subsequences. // Console.traceln(Level.FINEST, "found " + interleavings.size() + // " most interleaving subsequences being most often a successor " + // "and most seldom a predecessor in comparison to the other " + // "remaining interleavings --> decide based on their occurrences" + // " in the sessions (remove those, coming later in comparison to " + // "the others)"); // determine the subsequences whose sum of the indexes of collisions is smallest or // who has the smallest collision index in all sessions and remove all others int overallMinSumOfCollisionIndexes = Integer.MAX_VALUE; int overallMinimalCollisionIndex = Integer.MAX_VALUE; List interleavingsWithMinSum = new LinkedList<>(); List interleavingsWithMinIndex = new LinkedList<>(); for (InterleavingSubsequence interleaving : interleavings) { int sumOfCollisionIndexes = 0; int minimalCollisionIndex = Integer.MAX_VALUE; for (Collision coll : interleaving.getPredecessorCollisions()) { sumOfCollisionIndexes += coll.getLocation().getIndex(); minimalCollisionIndex = Math.min(minimalCollisionIndex, coll.getLocation().getIndex()); } for (Collision coll : interleaving.getSuccessorCollisions()) { sumOfCollisionIndexes += coll.getLocation().getIndex(); minimalCollisionIndex = Math.min(minimalCollisionIndex, coll.getLocation().getIndex()); } if (overallMinSumOfCollisionIndexes > sumOfCollisionIndexes) { interleavingsWithMinSum.clear(); interleavingsWithMinSum.add(interleaving); overallMinSumOfCollisionIndexes = sumOfCollisionIndexes; } else if (overallMinSumOfCollisionIndexes == sumOfCollisionIndexes) { interleavingsWithMinSum.add(interleaving); } if (overallMinimalCollisionIndex > minimalCollisionIndex) { interleavingsWithMinIndex.clear(); interleavingsWithMinIndex.add(interleaving); overallMinimalCollisionIndex = minimalCollisionIndex; } else if (overallMinimalCollisionIndex == minimalCollisionIndex) { interleavingsWithMinIndex.add(interleaving); } } if (interleavingsWithMinSum.size() < appData.getLastFoundSubsequences().size()) { for (InterleavingSubsequence interleaving : interleavings) { boolean found = false; for (InterleavingSubsequence candidate : interleavingsWithMinSum) { if (candidate == interleaving) { found = true; break; } } if (!found) { appData.getLastFoundSubsequences().remove(interleaving.getSubsequence()); } } continue; } if (interleavingsWithMinIndex.size() < appData.getLastFoundSubsequences().size()) { for (InterleavingSubsequence interleaving : interleavings) { boolean found = false; for (InterleavingSubsequence candidate : interleavingsWithMinIndex) { if (candidate == interleaving) { found = true; break; } } if (!found) { appData.getLastFoundSubsequences().remove(interleaving.getSubsequence()); } } continue; } // the remaining subsequences are interleaving the same amount of times, are // used the same amount of times as successors and predecessors by all other // detected interleaving subsequences, and their sum of the indexes of collisions // is smallest or their collision index in all sessions is the smallest. This may // happen for a subsequence aba for which there is an occurrence ababa. That is, // the subsequence interleaves with itself. We can try to solve this by shortening // the detected subsequence by one. Let us see, what happens. /*boolean changedSubsequence = false; for (InterleavingSubsequence interleaving : interleavings) { if (interleaving.getSubsequence().size() > 2) { appData.getLastFoundSubsequences().remove(interleaving.getSubsequence()); List shortenedSubsequence = new LinkedList<>(); for (int i = 0; i < interleaving.getSubsequence().size() - 1; i++) { shortenedSubsequence.add(interleaving.getSubsequence().get(i)); } appData.getLastFoundSubsequences().subsequences.add (new Subsequence(interleaving.getSubsequence().occurrenceCount, shortenedSubsequence)); changedSubsequence = true; } } if (changedSubsequence) { continue; }*/ // At this point, we can not decide anymore. But it should also not // happen. Even if two subsequences ab and ba one of them should be // more often a successor or predecessor than the other if they // directly follow each other. If not, it can not be decided, which of // them to replace first. Perhaps the one occurring more often as // the first in a session than the other. But this can be implemented // later. dumpSorted(interleavings, Level.SEVERE); throw new RuntimeException ("can not decide which detected subsequence to replace first."); } } while (interleavings.size() > 0); // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> TEST IMPLEMENTATION >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> // dumpSortedCollectionOfSubsequences // ("to replace", appData.getLastFoundSubsequences().subsequences); // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< TEST IMPLEMENTATION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< } /** * */ private List getInterleavings(Subsequences subsequences, RuleApplicationData appData) { appData.getStopWatch().start("determine interleavings"); List interleavings = new LinkedList(); List potentialPredecessors = new LinkedList(); List potentialSuccessors = new LinkedList(); for (Subsequence subsequence : subsequences) { // Console.traceln // (Level.FINEST, "searching for interleavings of " + toPlainStr(subsequence)); potentialPredecessors.clear(); potentialSuccessors.clear(); getInterleavingSubsequences (subsequence, subsequences, potentialPredecessors, potentialSuccessors); if ((potentialPredecessors.size() <= 0) && (potentialSuccessors.size() <= 0)) { // subsequence is not interleaving with others. Check next one. continue; } // subsequence is interleaving. First, determine its locations in the user sessions. List precedingCollisions = getPrecedingCollisions(subsequence, potentialPredecessors, appData); List succeedingCollisions = getSucceedingCollisions(subsequence, potentialSuccessors, appData); if ((precedingCollisions.size() <= 0) && (succeedingCollisions.size() <= 0)) { // subsequence is not interleaving with others. Check next one. continue; } interleavings.add(new InterleavingSubsequence(subsequence, precedingCollisions, succeedingCollisions)); } appData.getStopWatch().stop("determine interleavings"); return interleavings; } /** * */ private void getInterleavingSubsequences(Subsequence subsequence, Subsequences allSubsequences, List potentialPredecessors, List potentialSuccessors) { TaskComparator comparator = identityTaskHandlingStrategy.getTaskComparator(); for (Subsequence candidate : allSubsequences) { if (comparator.equals(subsequence.get(subsequence.size() - 1), candidate.get(0))) { potentialSuccessors.add(candidate); } if (comparator.equals(subsequence.get(0), candidate.get(candidate.size() - 1))) { potentialPredecessors.add(candidate); } } // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> TEST IMPLEMENTATION >>>>>>>>>>>>>>>>>>>>>>>>>>>>> // Console.traceln(Level.FINEST, " potential successors of " + toPlainStr(subsequence)); // dumpSortedCollectionOfSubsequences(" ", potentialSuccessors); // Console.traceln(Level.FINEST, " potential predecessors of " + toPlainStr(subsequence)); // dumpSortedCollectionOfSubsequences(" ", potentialPredecessors); // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< TEST IMPLEMENTATION <<<<<<<<<<<<<<<<<<<<<<<<<<<<< } /** * */ private Map> getLocations(Subsequences subsequences, RuleApplicationData appData) { Map> result = new IdentityHashMap>(); // fill the map with empty locations for (Subsequence subsequence : subsequences) { result.put(subsequence, new LinkedList()); } /* LinkedList> candidates = new LinkedList>(); for (IUserSession session : appData.getSessions()) { for (int i = 0; i < session.size(); i++) { ITask currentTask = session.get(i).getTask(); // create a list of candidates that may start here LinkedList candidatesToStartHere = new LinkedList(); for (Subsequence candidate : subsequences) { if (candidate.get(0) == currentTask) { candidatesToStartHere.add(candidate); } } candidates.addFirst(candidatesToStartHere); // check if the other candidates continue here. ListIterator> candidatesIt = candidates.listIterator(1); int index = 1; while (candidatesIt.hasNext()) { ListIterator candidateIt = candidatesIt.next().listIterator(); while (candidateIt.hasNext()) { Subsequence candidate = candidateIt.next(); if (candidate.get(index) == currentTask) { if (candidate.size() == (index + 1)) { // subsequence was fully matched --> store location result.get(candidate).add (new SubsequenceLocation(session, i - candidate.size() + 1)); candidateIt.remove(); } } else if (candidate.get(index) != currentTask) { // remove the candidate as it is not located here candidateIt.remove(); } } index++; } // remove potential continuation being empty while ((candidates.size() > 0) && (candidates.getLast().size() == 0)) { candidates.removeLast(); } } }*/ /* for (IUserSession session : appData.getSessions()) { LinkedList> candidateIterators = new LinkedList<>(); LinkedList candidates = new LinkedList<>(); for (int i = 0; i < session.size(); i++) { ITask currentTask = session.get(i).getTask(); // create a list of candidates that may start here for (Subsequence candidate : subsequences) { if (candidate.get(0) == currentTask) { candidates.add(candidate); candidateIterators.add(candidate.iterator()); } } // check which candidates continue/end here. ListIterator> candidateItsIt = candidateIterators.listIterator(); ListIterator candidatesIt = candidates.listIterator(); while (candidateItsIt.hasNext()) { Iterator candidateIterator = candidateItsIt.next(); Subsequence candidate = candidatesIt.next(); if (candidateIterator.hasNext()) { ITask expectedTask = candidateIterator.next(); if (expectedTask != currentTask) { // remove the iterator and the candidate from both lists. candidatesIt.remove(); candidateItsIt.remove(); } // else leave iterator as is and continue } else { // the candidate belonging to the iterator fully matched. Hence, store the // location and drop the candidate and its iterator candidatesIt.remove(); candidateItsIt.remove(); result.get(candidate).add (new SubsequenceLocation(session, i - candidate.size())); } } } // check, which further iterators match with the session end. ListIterator> candidateItsIt = candidateIterators.listIterator(); ListIterator candidatesIt = candidates.listIterator(); while (candidatesIt.hasNext()) { Iterator candidateIterator = candidateItsIt.next(); Subsequence candidate = candidatesIt.next(); if (!candidateIterator.hasNext()) { // the candidate belonging to the iterator fully matched. Hence, store the // location and drop the candidate and its iterator result.get(candidate).add (new SubsequenceLocation(session, session.size() - candidate.size())); } } }*/ for (IUserSession session : appData.getSessions()) { for (Subsequence candidate : subsequences) { int index = -1; do { index = getSubListIndex(session, candidate, index + 1); if (index > -1) { result.get(candidate).add(new SubsequenceLocation(session, index)); } } while (index > -1); } } return result; } /** * */ private List getPrecedingCollisions(Subsequence subsequence, List potentialPredecessors, RuleApplicationData appData) { List precedingCollisions = new LinkedList(); Map> allLocations = appData.getLastFoundSubsequenceLocations(); for (SubsequenceLocation location : allLocations.get(subsequence)) { for (Subsequence predecessor : potentialPredecessors) { for (SubsequenceLocation predecessorLocation : allLocations.get(predecessor)) { if ((location.getSession() == predecessorLocation.getSession()) && (location.getIndex() - predecessor.size() + 1 == predecessorLocation.getIndex())) { precedingCollisions.add(new Collision(location, subsequence, predecessor)); } } } } return precedingCollisions; } /** * */ private List getSucceedingCollisions(Subsequence subsequence, List potentialSuccessors, RuleApplicationData appData) { List succeedingCollisions = new LinkedList(); Map> allLocations = appData.getLastFoundSubsequenceLocations(); for (SubsequenceLocation location : allLocations.get(subsequence)) { for (Subsequence successor : potentialSuccessors) { for (SubsequenceLocation successorLocation : allLocations.get(successor)) { if ((location.getSession() == successorLocation.getSession()) && (location.getIndex() + subsequence.size() - 1 == successorLocation.getIndex())) { succeedingCollisions.add(new Collision(location, subsequence, successor)); } } } } return succeedingCollisions; } /** * */ private List getMostIntensiveInterleavings (List interleavings) { List mostIntensiveInterleavings = new LinkedList(); int collisionCounter = 0; for (InterleavingSubsequence interleaving : interleavings) { if (interleaving.getCollisionCounter() > collisionCounter) { collisionCounter = interleaving.getCollisionCounter(); mostIntensiveInterleavings.clear(); } if (interleaving.getCollisionCounter() == collisionCounter) { mostIntensiveInterleavings.add(interleaving); } } return mostIntensiveInterleavings; } /** * */ private List getLeastInterleavingsAsPredecessor (List interleavings) { List leastInterleavingsAsPredecessor = new LinkedList(); int collisionCounter = Integer.MAX_VALUE; for (InterleavingSubsequence interleaving : interleavings) { if (interleaving.getSuccessorCollisionCounter() < collisionCounter) { collisionCounter = interleaving.getSuccessorCollisionCounter(); leastInterleavingsAsPredecessor.clear(); } if (interleaving.getSuccessorCollisionCounter() == collisionCounter) { leastInterleavingsAsPredecessor.add(interleaving); } } return leastInterleavingsAsPredecessor; } /** * */ private List getMostInterleavingsAsSuccessor (List interleavings) { List mostInterleavingsAsSuccessor = new LinkedList(); int collisionCounter = 0; for (InterleavingSubsequence interleaving : interleavings) { if (interleaving.getPredecessorCollisionCounter() > collisionCounter) { collisionCounter = interleaving.getPredecessorCollisionCounter(); mostInterleavingsAsSuccessor.clear(); } if (interleaving.getPredecessorCollisionCounter() == collisionCounter) { mostInterleavingsAsSuccessor.add(interleaving); } } return mostInterleavingsAsSuccessor; } /** * */ private List removeCollisionsOfOtherSubsequences (List interleavings) { List subsequencesWithoutOtherCollisions = new LinkedList(); for (InterleavingSubsequence interleaving : interleavings) { List reducedPredecessorCollisions = new LinkedList<>(); for (Collision collision : interleaving.getPredecessorCollisions()) { for (InterleavingSubsequence otherInterleaving : interleavings) { if (otherInterleaving.getSubsequence() == collision.getCollidingWith()) { reducedPredecessorCollisions.add(collision); break; } } } List reducedSuccessorCollisions = new LinkedList<>(); for (Collision collision : interleaving.getSuccessorCollisions()) { for (InterleavingSubsequence otherInterleaving : interleavings) { if (otherInterleaving.getSubsequence() == collision.getCollidingWith()) { reducedSuccessorCollisions.add(collision); break; } } } subsequencesWithoutOtherCollisions.add (new InterleavingSubsequence(interleaving.getSubsequence(), reducedPredecessorCollisions, reducedSuccessorCollisions)); } return subsequencesWithoutOtherCollisions; } /** * @param appData the rule application data combining all data used for applying this rule */ private void replaceSequencesOccurringMostOften(RuleApplicationData appData) { appData.detectedAndReplacedTasks(false); if ((appData.getLastFoundSubsequences().size() > 0) && (appData.getLastFoundSubsequences().getOccurrenceCount() > 1)) { Console.traceln(Level.FINER, "replacing tasks occurrences"); for (Subsequence subsequence : appData.getLastFoundSubsequences()) { ISequence sequence = taskFactory.createNewSequence(); Console.traceln(Level.FINEST, "replacing " + sequence.getId() + ": " + subsequence); List sequenceInstances = replaceSubsequenceOccurrences (subsequence, appData.getSessions(), sequence); harmonizeSequenceInstancesModel(sequence, sequenceInstances, subsequence.size()); appData.detectedAndReplacedTasks (appData.detectedAndReplacedTasks() || (sequenceInstances.size() > 0)); if (sequenceInstances.size() < appData.getLastFoundSubsequences().getOccurrenceCount()) { Console.traceln(Level.FINE, sequence.getId() + ": replaced task only " + sequenceInstances.size() + " times instead of expected " + appData.getLastFoundSubsequences().getOccurrenceCount()); } } } } /** * */ private void harmonizeSequenceInstancesModel(ISequence sequence, List sequenceInstances, int sequenceLength) { TaskComparator comparator = preparationTaskHandlingStrategy.getTaskComparator(); // ensure for each subtask that lexically different variants are preserved for (int subTaskIndex = 0; subTaskIndex < sequenceLength; subTaskIndex++) { List subTaskVariants = new LinkedList(); for (ISequenceInstance sequenceInstance : sequenceInstances) { ITask candidate = sequenceInstance.get(subTaskIndex).getTask(); boolean found = false; for (int i = 0; i < subTaskVariants.size(); i++) { if (comparator.areLexicallyEqual(subTaskVariants.get(i), candidate)) { taskBuilder.setTask (sequenceInstance.get(subTaskIndex), subTaskVariants.get(i)); found = true; break; } } if (!found) { subTaskVariants.add(candidate); } } // if there are more than one lexically different variant of the sub task at // the considered position, adapt the sequence model at that position to have // a selection of the different variants. In this case also adapt the // generated sequence instances to correctly contain selection instances. If // there is only one variant of sub tasks at the given position, simply set // this variant as the sub task of the selection. In this case, the instances // can be preserved as is if (subTaskVariants.size() > 1) { ISelection selection = taskFactory.createNewSelection(); for (ITask variant : subTaskVariants) { taskBuilder.addChild(selection, variant); } taskBuilder.addChild(sequence, selection); for (ISequenceInstance instance : sequenceInstances) { ISelectionInstance selectionInstance = taskFactory.createNewTaskInstance(selection); taskBuilder.setChild(selectionInstance, instance.get(subTaskIndex)); taskBuilder.setTaskInstance(instance, subTaskIndex, selectionInstance); } } else if (subTaskVariants.size() == 1) { taskBuilder.addChild(sequence, subTaskVariants.get(0)); } } } /** * @param tree */ private List replaceSubsequenceOccurrences(Subsequence subsequence, List sessions, ISequence temporalTaskModel) { List sequenceInstances = new LinkedList(); for (IUserSession session : sessions) { int index = -1; do { index = getSubListIndex(session, subsequence, ++index); if (index > -1) { // subsequence is found --> perform replacement sequenceInstances.add (RuleUtils.createNewSubSequenceInRange (session, index, index + subsequence.size() - 1, temporalTaskModel, taskFactory, taskBuilder)); } } while (index > -1); } return sequenceInstances; } /** * @param trie * @param object * @return */ private static int getSubListIndex(ITaskInstanceList list, Subsequence subsequence, int startIndex) { boolean matchFound; int result = -1; for (int i = startIndex; i <= list.size() - subsequence.size(); i++) { matchFound = true; for (int j = 0; j < subsequence.size(); j++) { // we prepared the task instances to refer to unique tasks, if they are treated // as equal. Therefore, we just compare the identity of the tasks of the task // instances if (list.get(i + j).getTask() != subsequence.get(j)) { matchFound = false; break; } } if (matchFound) { result = i; break; } } return result; } /** * @param trie * @param object * @return */ private int getSubListIndex(Subsequence longerSubsequence, Subsequence shorterSubsequence, int startIndex) { boolean matchFound; int result = -1; for (int i = startIndex; i <= longerSubsequence.size() - shorterSubsequence.size(); i++) { matchFound = true; for (int j = 0; j < shorterSubsequence.size(); j++) { // we prepared the task instances to refer to unique tasks, if they are treated // as equal. Therefore, we just compare the identity of the tasks of the task // instances if (longerSubsequence.get(i + j) != shorterSubsequence.get(j)) { matchFound = false; break; } } if (matchFound) { result = i; break; } } return result; } // /** // * // */ // private void dumpSorted(List interleavings) { // dumpSorted(interleavings, Level.FINEST); // } /** * */ private void dumpSorted(List interleavings, Level level) { List tmpList = new LinkedList(); for (InterleavingSubsequence interleaving : interleavings) { String taskStr = toPlainStr(interleaving); int index; for (index = 0; index < tmpList.size(); index++) { if (taskStr.compareTo(tmpList.get(index)) > 0) { break; } } tmpList.add(index, taskStr); } for (String task : tmpList) { Console.traceln(level, task); } } // /** // * // */ // private void dumpSortedCollectionOfSubsequences(String prefix, // Collection subsequences) // { // List tmpList = new LinkedList(); // // for (Subsequence subsequence : subsequences) { // String taskStr = toPlainStr(subsequence); // int index; // for (index = 0; index < tmpList.size(); index++) { // if (taskStr.compareTo(tmpList.get(index)) > 0) { // break; // } // } // // tmpList.add(index, taskStr); // } // // for (String task : tmpList) { // Console.traceln(Level.FINEST, prefix + " " + task); // } // } /** * */ private String toPlainStr(InterleavingSubsequence interleaving) { StringBuffer result = new StringBuffer(); result.append("interleavings of "); result.append(toPlainStr(interleaving.getSubsequence())); result.append("\n predecessor collisions:\n"); for (Collision collision : interleaving.getPredecessorCollisions()) { result.append(" "); result.append(toPlainStr(collision.getCollidingWith())); result.append(" ("); result.append(collision.getLocation()); result.append(")\n"); } result.append(" successor collisions:\n"); for (Collision collision : interleaving.getSuccessorCollisions()) { result.append(" "); result.append(toPlainStr(collision.getCollidingWith())); result.append(" ("); result.append(collision.getLocation()); result.append(")\n"); } return result.toString(); } /** * */ private String toPlainStr(Subsequence subsequence) { StringBuffer result = new StringBuffer(); result.append(subsequence.size()); result.append(": "); for (ITask task : subsequence) { DebuggingHelper.toPlainStr(task, result); result.append(" --> "); } return result.toString(); } /** * @author Patrick Harms */ private class MaxCountAndLongestSubsequencesFinder implements TrieProcessor { /** * */ private int currentCount; /** * */ private LinkedList foundSubsequences = new LinkedList(); /** * */ public MaxCountAndLongestSubsequencesFinder() { super(); this.currentCount = 0; } /* (non-Javadoc) * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int) */ @Override public TrieProcessor.Result process(List foundTask, int count) { if (foundTask.size() < 2) { // ignore single tasks return TrieProcessor.Result.CONTINUE; } if (count < 2) { // ignore singular occurrences return TrieProcessor.Result.SKIP_NODE; } if (this.currentCount > count) { // ignore this children of this task, as they may have only smaller counts than // the already found tasks return TrieProcessor.Result.SKIP_NODE; } if (this.currentCount < count) { // the provided task occurs more often that all tasks found so far. // clear all found tasks and use the new count as the one searched for foundSubsequences.clear(); this.currentCount = count; } if (this.currentCount == count) { /* // this is a potential performance improvement: // the task is of interest. Ensure that only the longest remain if ((foundSubsequences.size() > 0) && (foundSubsequences.get(0).size() < foundTask.size())) { foundSubsequences.clear(); } foundSubsequences.add(new Subsequence(currentCount, foundTask));*/ // the task is of interest. Sort it into the other found tasks so that // the longest come first boolean added = false; for (int i = 0; i < foundSubsequences.size(); i++) { if (foundSubsequences.get(i).size() <= foundTask.size()) { foundSubsequences.add(i, new Subsequence(currentCount, foundTask)); added = true; break; } } if (!added) { foundSubsequences.add(new Subsequence(currentCount, foundTask)); } } return TrieProcessor.Result.CONTINUE; } /** * @return */ private Subsequences getFoundSubsequences() { removePermutationsOfShorterTasks(); return new Subsequences(currentCount, foundSubsequences); } /** * */ private void removePermutationsOfShorterTasks() { // now iterate the sorted list and for each task remove all other tasks that are shorter // (i.e. come later in the sorted list) and that represent a subpart of the task boolean removeFirst; ListIterator iterator1 = foundSubsequences.listIterator(); while (iterator1.hasNext()) { removeFirst = false; boolean removeSecond; Subsequence longTask = iterator1.next(); ListIterator iterator2 = foundSubsequences.listIterator(iterator1.nextIndex()); while (iterator2.hasNext()) { removeSecond = false; Subsequence shortTask = iterator2.next(); if (shortTask.size() < longTask.size()) { // found a task that is a potential subtask. Check for this and remove the // subtask if needed int index = getSubListIndex(longTask, shortTask, 0); if (index > -1) { if (index == 0) { // check if the short task is included in the long task partially // a second time. If so, prefer the short task boolean secondInclude = true; for (int pos = shortTask.size(); pos < longTask.size(); pos++) { // we prepared the task instances to refer to unique tasks, if // they are treated as equal. Therefore, we just compare the // identity of the tasks of the task instances if (longTask.get(pos) != shortTask.get(pos % shortTask.size())) { secondInclude = false; break; } } if (secondInclude) { // delete the long task // Console.traceln(Level.FINEST, "1: short task is contained several times in longer one --> removing it"); // Console.traceln(Level.FINEST, "short: " + toPlainStr(shortTask)); // Console.traceln(Level.FINEST, "long: " + toPlainStr(longTask)); removeFirst = true; } else { // Console.traceln(Level.FINEST, "2: short task is a part of a longer one --> removing it"); // Console.traceln(Level.FINEST, "short: " + toPlainStr(shortTask)); // Console.traceln(Level.FINEST, "long: " + toPlainStr(longTask)); removeSecond = true; } } else { // Console.traceln(Level.FINEST, "3: short task is a part of a longer one --> removing it"); // Console.traceln(Level.FINEST, "short: " + toPlainStr(shortTask)); // Console.traceln(Level.FINEST, "long: " + toPlainStr(longTask)); removeSecond = true; } } } if (removeSecond) { // unfortunately, this invalidates the first iterator. So recreate it after // the removal. As the removed element will be after the current position of // the first iterator, it is sufficient to remember its location int indexOf1 = iterator1.nextIndex() - 1; iterator2.remove(); iterator1 = foundSubsequences.listIterator(indexOf1); iterator1.next(); } } if (removeFirst) { iterator1.remove(); } } } } // /** // * @author Patrick Harms // */ // private class TrieStatisticsDumper implements TrieProcessor { // // /** // * // */ // private Map counters = new HashMap(); // // /* (non-Javadoc) // * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int) // */ // @Override // public TrieProcessor.Result process(List subsequence, int count) { // if (subsequence.size() == 1) { // Integer value = counters.get(count); // // if (value == null) { // value = 0; // } // // counters.put(count, value + 1); // // return TrieProcessor.Result.CONTINUE; // } // else { // // ignore singular occurrences // return TrieProcessor.Result.SKIP_NODE; // } // } // // /** // * @return // */ // public void dumpCounters() { // int dumpedCounters = 0; // // int count = 1; // while (dumpedCounters < counters.size()) { // Integer value = counters.get(count++); // if (value != null) { // System.out.println(value + " symbols occurred " + count + " times"); // dumpedCounters++; // } // } // } // // } /** * */ private static class RuleApplicationData { /** * */ private List sessions; /** * */ private TaskInstanceTrie lastTrie; /** * default and minimum trained subsequence length is 3 */ private int trainedSubsequenceLength = 3; /** * */ private Subsequences lastFoundSubsequences = new Subsequences(Integer.MAX_VALUE, null); /** * */ private Map> lastFoundSubsequenceLocations; /** * */ private boolean detectedAndReplacedTasks; /** * */ private RuleApplicationResult result = new RuleApplicationResult(); /** * */ private StopWatch stopWatch = new StopWatch(); /** * */ private RuleApplicationData(List sessions) { this.sessions = sessions; } /** * @return the tree */ private List getSessions() { return sessions; } /** * @param lastTrie the lastTrie to set */ private void setLastTrie(TaskInstanceTrie lastTrie) { this.lastTrie = lastTrie; } /** * @return the lastTrie */ private TaskInstanceTrie getLastTrie() { return lastTrie; } /** * @param trainedSubsequenceLength the trainedSubsequenceLength to set */ private void setTrainedSubsequenceLength(int trainedSubsequenceLength) { this.trainedSubsequenceLength = trainedSubsequenceLength; } /** * @return the trainedSubsequenceLength */ private int getTrainedSubsequenceLength() { return trainedSubsequenceLength; } /** * @param lastFoundSequences the lastFoundSequences to set */ private void setLastFoundSubsequences(Subsequences lastFoundSequences) { this.lastFoundSubsequences = lastFoundSequences; this.lastFoundSubsequenceLocations = null; } /** * @return the lastFoundSequences */ private Subsequences getLastFoundSubsequences() { return lastFoundSubsequences; } /** * @return the lastFoundSequences */ private Map> getLastFoundSubsequenceLocations() { if (lastFoundSubsequenceLocations != null) { return lastFoundSubsequenceLocations; } lastFoundSubsequenceLocations = new IdentityHashMap>(); // fill the map with empty locations for (Subsequence subsequence : lastFoundSubsequences) { lastFoundSubsequenceLocations.put (subsequence, new LinkedList()); } for (IUserSession session : sessions) { for (Subsequence candidate : lastFoundSubsequences) { int index = -1; do { index = getSubListIndex(session, candidate, index + 1); if (index > -1) { lastFoundSubsequenceLocations.get (candidate).add(new SubsequenceLocation(session, index)); } } while (index > -1); } } return lastFoundSubsequenceLocations; } /** * */ private void detectedAndReplacedTasks(boolean detectedAndReplacedTasks) { this.detectedAndReplacedTasks = detectedAndReplacedTasks; } /** * */ private boolean detectedAndReplacedTasks() { return detectedAndReplacedTasks; } /** * @return the result */ private RuleApplicationResult getResult() { return result; } /** * @return the stopWatch */ private StopWatch getStopWatch() { return stopWatch; } } /** * @author Patrick Harms */ private static class Subsequence implements Iterable { /** * */ private int occurrenceCount; /** * */ private List subsequence; /** * @param occurrenceCount * @param subsequences */ private Subsequence(int occurrenceCount, List subsequence) { super(); this.occurrenceCount = occurrenceCount; this.subsequence = new ArrayList(subsequence); } /** */ private int size() { return this.subsequence.size(); } /** */ private ITask get(int index) { return this.subsequence.get(index); } /* (non-Javadoc) * @see java.lang.Iterable#iterator() */ @Override public Iterator iterator() { return this.subsequence.iterator(); } /* (non-Javadoc) * @see java.lang.Object#toString() */ @Override public String toString() { return subsequence.toString() + " (" + occurrenceCount + ")"; } } /** * @author Patrick Harms */ private static class Subsequences implements Iterable { /** * */ private int occurrenceCount; /** * */ private LinkedList subsequences; /** * @param occurrenceCount * @param sequences */ private Subsequences(int occurrenceCount, LinkedList subsequences) { super(); this.occurrenceCount = occurrenceCount; this.subsequences = subsequences; } /** * */ private void remove(Subsequence subsequence) { ListIterator it = subsequences.listIterator(); while (it.hasNext()) { // reference comparison is sufficient if (it.next() == subsequence) { it.remove(); } } } /** */ private int getOccurrenceCount() { return occurrenceCount; } /** */ private int size() { return this.subsequences.size(); } /* (non-Javadoc) * @see java.lang.Iterable#iterator() */ @Override public Iterator iterator() { return this.subsequences.iterator(); } /* (non-Javadoc) * @see java.lang.Object#toString() */ @Override public String toString() { StringBuffer result = new StringBuffer(); result.append(" subsequences occuring "); result.append(this.occurrenceCount); result.append(" times:\n"); for (Subsequence subsequence : subsequences) { result.append(subsequence); result.append("\n"); } return result.toString(); } } /** * @author Patrick Harms */ private static class InterleavingSubsequence { /** */ private Subsequence subsequence; /** */ private List successorCollisions; /** */ private List predecessorCollisions; /** * */ private InterleavingSubsequence(Subsequence subsequence, List predecessorCollisions, List successorCollisions) { super(); this.subsequence = subsequence; this.predecessorCollisions = predecessorCollisions; this.successorCollisions = successorCollisions; } /** * */ private int getCollisionCounter() { return getSuccessorCollisionCounter() + getPredecessorCollisionCounter(); } /** * */ private int getSuccessorCollisionCounter() { return successorCollisions.size(); } /** * */ private List getSuccessorCollisions() { return successorCollisions; } /** * */ private int getPredecessorCollisionCounter() { return predecessorCollisions.size(); } /** * */ private List getPredecessorCollisions() { return predecessorCollisions; } /** * */ private Subsequence getSubsequence() { return subsequence; } /* (non-Javadoc) * @see java.lang.Object#toString() */ @Override public String toString() { return "interleaving subsequence " + subsequence.toString() + " (" + successorCollisions.size() + " successor, " + predecessorCollisions.size() + " predecessor)"; } } /** * @author Patrick Harms */ private static class SubsequenceLocation { /** */ private IUserSession session; /** */ private int index; /** * */ private SubsequenceLocation(IUserSession session, int index) { this.session = session; this.index = index; } /** * @return the session */ private IUserSession getSession() { return session; } /** * @return the index */ private int getIndex() { return index; } /* (non-Javadoc) * @see java.lang.Object#toString() */ @Override public String toString() { return "location (" + session + ", " + index + ")"; } } /** * @author Patrick Harms */ private static class Collision { /** */ private SubsequenceLocation location; /** */ private Subsequence subsequence; /** */ private Subsequence collidingWith; /** * */ private Collision(SubsequenceLocation location, Subsequence subsequence, Subsequence collidingWith) { this.location = location; this.subsequence = subsequence; this.collidingWith = collidingWith; } /** * @return the collidingWith */ private Subsequence getCollidingWith() { return collidingWith; } /** * @return the location */ private SubsequenceLocation getLocation() { return location; } /** * @return the subsequence */ // private Subsequence getSubsequence() { // return subsequence; // } /* (non-Javadoc) * @see java.lang.Object#toString() */ @Override public String toString() { return "collision (" + location + " ," + subsequence + ", " + collidingWith + ")"; } } // methods for internal testing // private void checkMatchingOfSessions(List> flattenedSessions, // List sessions, // String when) // { // List> currentFlattenedSessions = flattenSessions(sessions); // if (flattenedSessions.size() != currentFlattenedSessions.size()) { // System.out.println("################## number of sessions changed after " + when); // } // else { // for (int i = 0; i < flattenedSessions.size(); i++) { // List expected = flattenedSessions.get(i); // List current = currentFlattenedSessions.get(i); // // if (expected.size() != current.size()) { // System.out.println // ("################## length of session " + i + " changed after " + when); // } // else { // for (int j = 0; j < expected.size(); j++) { // if (!expected.get(j).equals(current.get(j))) { // System.out.println("################## event " + j + " of session " + // i + " changed after " + when); // } // } // } // } // } // } // // private List> flattenSessions(List sessions) { // List> flattenedSessions = new ArrayList>(); // for (IUserSession session : sessions) { // List flattenedUserSession = new ArrayList(); // flatten(session, flattenedUserSession); // flattenedSessions.add(flattenedUserSession); // } // // return flattenedSessions; // } // // private void flatten(IUserSession iUserSession, List flattenedUserSession) { // for (ITaskInstance instance : iUserSession) { // flatten(instance, flattenedUserSession); // } // } // // private void flatten(ITaskInstance instance, List flattenedUserSession) { // if (instance instanceof ITaskInstanceList) { // for (ITaskInstance child : (ITaskInstanceList) instance) { // flatten(child, flattenedUserSession); // } // } // else if (instance instanceof ISelectionInstance) { // flatten(((ISelectionInstance) instance).getChild(), flattenedUserSession); // } // else if (instance instanceof IOptionalInstance) { // flatten(((IOptionalInstance) instance).getChild(), flattenedUserSession); // } // else if (instance instanceof IEventTaskInstance) { // flattenedUserSession.add(((IEventTaskInstance) instance).getEvent()); // } // } }