// Copyright 2012 Georg-August-Universität Göttingen, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package de.ugoe.cs.autoquest.tasktrees.temporalrelation; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.logging.Level; import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.Match; import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.MatchOccurence; import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.NumberSequence; import de.ugoe.cs.autoquest.tasktrees.alignment.matrix.PairwiseAlignmentGenerator; import de.ugoe.cs.autoquest.tasktrees.alignment.matrix.PairwiseAlignmentStorage; import de.ugoe.cs.autoquest.tasktrees.alignment.matrix.ObjectDistanceSubstitionMatrix; import de.ugoe.cs.autoquest.tasktrees.taskequality.TaskEquality; import de.ugoe.cs.autoquest.tasktrees.treeifc.IIteration; import de.ugoe.cs.autoquest.tasktrees.treeifc.IIterationInstance; import de.ugoe.cs.autoquest.tasktrees.treeifc.IOptional; import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelection; import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelectionInstance; import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequence; import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequenceInstance; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskBuilder; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskFactory; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstance; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstanceList; import de.ugoe.cs.autoquest.tasktrees.treeifc.IUserSession; import de.ugoe.cs.autoquest.usageprofiles.SymbolMap; import de.ugoe.cs.util.StopWatch; import de.ugoe.cs.util.console.Console; /** *

* This class implements the major rule for creating task trees based on a set * of recorded user sessions. For this, it first harmonizes all tasks. This * eases later comparison. Then it searches the sessions for iterations and * replaces them accordingly. Then it searches for sub sequences being the * longest and occurring most often. For each found sub sequence, it replaces * the occurrences by creating appropriate {@link ISequence}s. Afterwards, again * searches for iterations and then again for sub sequences until no more * replacements are done. *

*

* * * @author Patrick Harms */ class SequenceForTaskDetectionRuleAlignment implements ISessionScopeRule { /** *

* the task factory to be used for creating substructures for the temporal * relationships identified during rul application *

*/ private ITaskFactory taskFactory; /** *

* the task builder to be used for creating substructures for the temporal * relationships identified during rule application *

*/ private ITaskBuilder taskBuilder; /** *

* the task handling strategy to be used for comparing tasks for * preparation, i.e., before the tasks are harmonized *

*/ private TaskHandlingStrategy preparationTaskHandlingStrategy; /** *

* the task handling strategy to be used for comparing tasks during * iteration detection i.e., after the tasks are * harmonized *

*/ private TaskHandlingStrategy identityTaskHandlingStrategy; /** *

* instantiates the rule and initializes it with a task equality to be * considered when comparing tasks as well as a task factory and builder to * be used for creating task structures. *

* * @param minimalTaskEquality * the task equality to be considered when comparing tasks * @param taskFactory * the task factory to be used for creating substructures * @param taskBuilder * the task builder to be used for creating substructures */ SequenceForTaskDetectionRuleAlignment(TaskEquality minimalTaskEquality, ITaskFactory taskFactory, ITaskBuilder taskBuilder) { this.taskFactory = taskFactory; this.taskBuilder = taskBuilder; this.preparationTaskHandlingStrategy = new TaskHandlingStrategy( minimalTaskEquality); this.identityTaskHandlingStrategy = new TaskHandlingStrategy( TaskEquality.IDENTICAL); } /* * (non-Javadoc) * * @see java.lang.Object#toString() */ @Override public String toString() { return "SequenceForTaskDetectionRuleAlignment"; } /* * (non-Javadoc) * * @see * de.ugoe.cs.autoquest.tasktrees.temporalrelation.ISessionScopeRule#apply * (java.util.List) */ @Override public RuleApplicationResult apply(List sessions) { RuleApplicationData appData = new RuleApplicationData(sessions); // this is the real rule application. Loop while something is replaced. SymbolMap uniqueTasks = harmonizeEventTaskInstancesModel(appData); // Generate a substitution matrix between all occurring events. Console.traceln(Level.INFO, "generating substitution matrix"); ObjectDistanceSubstitionMatrix submat = new ObjectDistanceSubstitionMatrix( uniqueTasks, 6, -3); submat.generate(); // Generate pairwise alignments Console.traceln(Level.INFO, "generating pairwise alignments"); LinkedList matchseqs = new LinkedList(); PairwiseAlignmentStorage alignments = PairwiseAlignmentGenerator .generate(appData.getNumberSequences(), submat, 9); // Retrieve all matches reached a specific threshold Console.traceln(Level.INFO, "retrieving significant sequence pieces"); for (int i = 0; i < appData.getNumberSequences().size(); i++) { Console.traceln( Level.FINEST, "retrieving significant sequence pieces: " + Math.round((float) i / (float) appData.getNumberSequences().size() * 100) + "%"); for (int j = 0; j < appData.getNumberSequences().size(); j++) { if (i != j) { matchseqs.addAll(alignments.get(i, j).getMatches()); } } } Console.traceln(Level.FINEST, "retrieving significant sequence pieces: 100%"); Console.traceln(Level.INFO, "searching for patterns occuring most"); // search each match in every other sequence for (Iterator it = matchseqs.iterator(); it.hasNext();) { Match pattern = it.next(); // Skip sequences with more 0 events (scrolls) than other events. // Both of the pattern sequences are equally long, so the zero // counts just need to be smaller than the length of one sequence if (pattern.getFirstSequence().eventCount(0) + pattern.getSecondSequence().eventCount(0) + 1 > pattern .getFirstSequence().size()) continue; for (int j = 0; j < appData.getNumberSequences().size(); j++) { LinkedList startpositions = appData.getNumberSequences().get(j) .containsPattern(pattern); if (startpositions.size() > 0) { for (Iterator jt = startpositions.iterator(); jt .hasNext();) { int start = jt.next(); System.out.println("Found match "); pattern.getFirstSequence().printSequence(); pattern.getSecondSequence().printSequence(); System.out.println("in sequence " + (j+1) + " at position " + start); pattern.addOccurence( new MatchOccurence(start, j)); } } } } Console.traceln(Level.INFO, "sorting results"); // Sort results to get the most occurring results Comparator comparator = new Comparator() { public int compare(Match m1, Match m2) { return m2.occurenceCount() - m1.occurenceCount(); // use your // logic } }; Collections.sort(matchseqs, comparator); // TODO: Harmonize matches: finding double matches and merge them /* Console.traceln(Level.INFO, "harmonizing matches"); int i=0; while(i 2) { ISequence task = matchAsSequence(appData,matchseqs.get(i)); for(Iterator it = matchseqs.get(i).getOccurences().iterator();it.hasNext();) { MatchOccurence oc = it.next(); System.out.println("Trying to replace sequence: "); matchseqs.get(i).getFirstSequence().printSequence(); matchseqs.get(i).getSecondSequence().printSequence(); System.out.println(" in session number: " + (oc.getSequenceId()+1) + " at position " + oc.getStartindex() + "-" + (oc.getStartindex()+matchseqs.get(i).getFirstSequence().size())); System.out.println("Printing session: "); for(int j=0;j sequenceInstances = new LinkedList(); RuleUtils.createNewSubSequenceInRange(sessions.get(oc.getSequenceId()), oc.getStartindex(), oc.getStartindex()+matchseqs.get(i).getFirstSequence().size()-1, task, taskFactory, taskBuilder); } System.out.println(task); //matchseqs.get(i).getFirstSequence().printSequence(); //matchseqs.get(i).getSecondSequence().printSequence(); //System.out.println("Found pattern " // + matchseqs.get(i).occurenceCount() + " times"); System.out.println(); } } alignments = null; do { appData.getStopWatch().start("whole loop"); // detectAndReplaceIterations(appData); appData.getStopWatch().start("task replacement"); // //detectAndReplaceTasks(appData); // appData.getStopWatch().stop("task replacement"); // appData.getStopWatch().stop("whole loop"); appData.getStopWatch().dumpStatistics(System.out); // appData.getStopWatch().reset(); } while (appData.detectedAndReplacedTasks()); Console.println("created " + appData.getResult().getNewlyCreatedTasks().size() + " new tasks and " + appData.getResult().getNewlyCreatedTaskInstances().size() + " appropriate instances\n"); if ((appData.getResult().getNewlyCreatedTasks().size() > 0) || (appData.getResult().getNewlyCreatedTaskInstances().size() > 0)) { appData.getResult().setRuleApplicationStatus( RuleApplicationStatus.FINISHED); } return appData.getResult(); } /** *

* harmonizes the event task instances by unifying tasks. This is done, as * initially the event tasks being equal with respect to the considered task * equality are distinct objects. The comparison of these distinct objects * is more time consuming than comparing the object references. *

* * @param appData * the rule application data combining all data used for applying * this rule * @return Returns the unique tasks symbol map */ private SymbolMap harmonizeEventTaskInstancesModel( RuleApplicationData appData) { Console.traceln(Level.INFO, "harmonizing task model of event task instances"); appData.getStopWatch().start("harmonizing event tasks"); SymbolMap uniqueTasks = preparationTaskHandlingStrategy .createSymbolMap(); TaskInstanceComparator comparator = preparationTaskHandlingStrategy .getTaskComparator(); int unifiedTasks = 0; ITask task; List sessions = appData.getSessions(); int sessionNo = 0; for (IUserSession session : sessions) { Console.traceln(Level.FINE, "handling " + (++sessionNo) + ". " + session); NumberSequence templist = new NumberSequence(session.size()); for (int i = 0; i < session.size(); i++) { ITaskInstance taskInstance = session.get(i); task = uniqueTasks.getValue(taskInstance); if (task == null) { uniqueTasks.addSymbol(taskInstance, taskInstance.getTask()); templist.getSequence()[i] = taskInstance.getTask().getId(); } else { taskBuilder.setTask(taskInstance, task); templist.getSequence()[i] = task.getId(); unifiedTasks++; } appData.getNumber2Task().put(templist.getSequence()[i], taskInstance.getTask()); //System.out.println("TaskID: " + taskInstance.getTask().getId()+ " Numbersequence: " + templist.getSequence()[i]); } //Each NumberSequence is identified by its id, beginning to count at zero templist.setId(sessionNo-1); appData.getNumberSequences().add(templist); comparator.clearBuffers(); } appData.getStopWatch().stop("harmonizing event tasks"); Console.traceln(Level.INFO, "harmonized " + unifiedTasks + " task occurrences (still " + uniqueTasks.size() + " different tasks)"); appData.getStopWatch().dumpStatistics(System.out); appData.getStopWatch().reset(); return uniqueTasks; } /** *

* searches for direct iterations of single tasks in all sequences and * replaces them with {@link IIteration}s, respectively appropriate * instances. Also all single occurrences of a task that is iterated * somewhen are replaced with iterations to have again an efficient way for * task comparisons. *

* * @param appData * the rule application data combining all data used for applying * this rule */ private void detectAndReplaceIterations(RuleApplicationData appData) { Console.traceln(Level.FINE, "detecting iterations"); appData.getStopWatch().start("detecting iterations"); List sessions = appData.getSessions(); Set iteratedTasks = searchIteratedTasks(sessions); if (iteratedTasks.size() > 0) { replaceIterationsOf(iteratedTasks, sessions, appData); } appData.getStopWatch().stop("detecting iterations"); Console.traceln(Level.INFO, "replaced " + iteratedTasks.size() + " iterated tasks"); } /** *

* searches the provided sessions for task iterations. If a task is * iterated, it is added to the returned set. *

* * @param the * session to search for iterations in * * @return a set of tasks being iterated somewhere */ private Set searchIteratedTasks(List sessions) { Set iteratedTasks = new HashSet(); for (IUserSession session : sessions) { for (int i = 0; i < (session.size() - 1); i++) { // we prepared the task instances to refer to unique tasks, if // they are treated // as equal. Therefore, we just compare the identity of the // tasks of the task // instances if (session.get(i).getTask() == session.get(i + 1).getTask()) { iteratedTasks.add(session.get(i).getTask()); } } } return iteratedTasks; } /** *

* replaces all occurrences of all tasks provided in the set with iterations *

* * @param iteratedTasks * the tasks to be replaced with iterations * @param sessions * the sessions in which the tasks are to be replaced * @param appData * the rule application data combining all data used for applying * this rule */ private void replaceIterationsOf(Set iteratedTasks, List sessions, RuleApplicationData appData) { Map iterations = new HashMap(); Map> iterationInstances = new HashMap>(); for (ITask iteratedTask : iteratedTasks) { IIteration iteration = taskFactory.createNewIteration(); iterations.put(iteratedTask, iteration); iterationInstances.put(iteration, new LinkedList()); } IIterationInstance iterationInstance; for (IUserSession session : sessions) { int index = 0; iterationInstance = null; while (index < session.size()) { // we prepared the task instances to refer to unique tasks, if // they are treated // as equal. Therefore, we just compare the identity of the // tasks of the task // instances ITask currentTask = session.get(index).getTask(); IIteration iteration = iterations.get(currentTask); if (iteration != null) { if ((iterationInstance == null) || (iterationInstance.getTask() != iteration)) { iterationInstance = taskFactory .createNewTaskInstance(iteration); iterationInstances.get(iteration) .add(iterationInstance); taskBuilder.addTaskInstance(session, index, iterationInstance); index++; } taskBuilder.addChild(iterationInstance, session.get(index)); taskBuilder.removeTaskInstance(session, index); } else { if (iterationInstance != null) { iterationInstance = null; } index++; } } } for (Map.Entry> entry : iterationInstances .entrySet()) { harmonizeIterationInstancesModel(entry.getKey(), entry.getValue()); } } ISequence matchAsSequence(RuleApplicationData appData,Match m) { ISequence sequence = taskFactory.createNewSequence(); int[] first = m.getFirstSequence().getSequence(); int[] second = m.getSecondSequence().getSequence(); //Both sequences of a match are equally long for(int i=0;i * TODO clarify why this is done *

*/ private void harmonizeIterationInstancesModel(IIteration iteration, List iterationInstances) { List iteratedTaskVariants = new LinkedList(); TaskInstanceComparator comparator = preparationTaskHandlingStrategy .getTaskComparator(); // merge the lexically different variants of iterated task to a unique // list for (IIterationInstance iterationInstance : iterationInstances) { for (ITaskInstance executionVariant : iterationInstance) { ITask candidate = executionVariant.getTask(); boolean found = false; for (ITask taskVariant : iteratedTaskVariants) { if (comparator.areLexicallyEqual(taskVariant, candidate)) { taskBuilder.setTask(executionVariant, taskVariant); found = true; break; } } if (!found) { iteratedTaskVariants.add(candidate); } } } // if there are more than one lexically different variant of iterated // tasks, adapt the // iteration model to be a selection of different variants. In this case // also adapt // the generated iteration instances to correctly contain selection // instances. If there // is only one variant of an iterated task, simply set this as the // marked task of the // iteration. In this case, the instances can be preserved as is if (iteratedTaskVariants.size() > 1) { ISelection selection = taskFactory.createNewSelection(); for (ITask variant : iteratedTaskVariants) { taskBuilder.addChild(selection, variant); } taskBuilder.setMarkedTask(iteration, selection); for (IIterationInstance instance : iterationInstances) { for (int i = 0; i < instance.size(); i++) { ISelectionInstance selectionInstance = taskFactory .createNewTaskInstance(selection); taskBuilder.setChild(selectionInstance, instance.get(i)); taskBuilder.setTaskInstance(instance, i, selectionInstance); } } } else { taskBuilder.setMarkedTask(iteration, iteratedTaskVariants.get(0)); } } /** * TODO go on commenting * * @param appData * the rule application data combining all data used for applying * this rule */ private void detectAndReplaceTasks(RuleApplicationData appData) { Console.traceln(Level.FINE, "detecting and replacing tasks"); appData.getStopWatch().start("detecting tasks"); //getSequencesOccuringMostOften(appData); appData.getStopWatch().stop("detecting tasks"); appData.getStopWatch().start("replacing tasks"); replaceSequencesOccurringMostOften(appData); appData.getStopWatch().stop("replacing tasks"); //Console.traceln(Level.INFO, "detected and replaced " // + appData.getLastFoundTasks().size() + " tasks occuring " // + appData.getLastFoundTasks().getOccurrenceCount() + " times"); } /** * @param appData * the rule application data combining all data used for applying * this rule */ private void replaceSequencesOccurringMostOften(RuleApplicationData appData) { appData.detectedAndReplacedTasks(false); /* Console.traceln(Level.FINER, "replacing tasks occurrences"); for (List task : appData.getLastFoundTasks()) { ISequence sequence = taskFactory.createNewSequence(); Console.traceln(Level.FINEST, "replacing " + sequence.getId() + ": " + task); List sequenceInstances = replaceTaskOccurrences( task, appData.getSessions(), sequence); harmonizeSequenceInstancesModel(sequence, sequenceInstances, task.size()); appData.detectedAndReplacedTasks(appData .detectedAndReplacedTasks() || (sequenceInstances.size() > 0)); if (sequenceInstances.size() < appData.getLastFoundTasks() .getOccurrenceCount()) { Console.traceln(Level.FINE, sequence.getId() + ": replaced task only " + sequenceInstances.size() + " times instead of expected " + appData.getLastFoundTasks() .getOccurrenceCount()); } } */ } /** * */ private void harmonizeSequenceInstancesModel(ISequence sequence, List sequenceInstances, int sequenceLength) { TaskInstanceComparator comparator = preparationTaskHandlingStrategy .getTaskComparator(); // ensure for each subtask that lexically different variants are // preserved for (int subTaskIndex = 0; subTaskIndex < sequenceLength; subTaskIndex++) { List subTaskVariants = new LinkedList(); for (ISequenceInstance sequenceInstance : sequenceInstances) { ITask candidate = sequenceInstance.get(subTaskIndex).getTask(); boolean found = false; for (int i = 0; i < subTaskVariants.size(); i++) { if (comparator.areLexicallyEqual(subTaskVariants.get(i), candidate)) { taskBuilder.setTask(sequenceInstance.get(subTaskIndex), subTaskVariants.get(i)); found = true; break; } } if (!found) { subTaskVariants.add(candidate); } } // if there are more than one lexically different variant of the sub // task at // the considered position, adapt the sequence model at that // position to have // a selection of the different variants. In this case also adapt // the // generated sequence instances to correctly contain selection // instances. If // there is only one variant of sub tasks at the given position, // simply set // this variant as the sub task of the selection. In this case, the // instances // can be preserved as is if (subTaskVariants.size() > 1) { ISelection selection = taskFactory.createNewSelection(); for (ITask variant : subTaskVariants) { taskBuilder.addChild(selection, variant); } taskBuilder.addChild(sequence, selection); for (ISequenceInstance instance : sequenceInstances) { ISelectionInstance selectionInstance = taskFactory .createNewTaskInstance(selection); taskBuilder.setChild(selectionInstance, instance.get(subTaskIndex)); taskBuilder.setTaskInstance(instance, subTaskIndex, selectionInstance); } } else if (subTaskVariants.size() == 1) { taskBuilder.addChild(sequence, subTaskVariants.get(0)); } } } /** * @param tree */ private List replaceTaskOccurrences( List task, List sessions, ISequence temporalTaskModel) { List sequenceInstances = new LinkedList(); for (IUserSession session : sessions) { int index = -1; do { index = getSubListIndex(session, task, ++index); if (index > -1) { sequenceInstances.add(RuleUtils .createNewSubSequenceInRange(session, index, index + task.size() - 1, temporalTaskModel, taskFactory, taskBuilder)); } } while (index > -1); } return sequenceInstances; } /** * @param trie * @param object * @return */ private int getSubListIndex(ITaskInstanceList list, List subList, int startIndex) { boolean matchFound; int result = -1; for (int i = startIndex; i <= list.size() - subList.size(); i++) { matchFound = true; for (int j = 0; j < subList.size(); j++) { // we prepared the task instances to refer to unique tasks, if // they are treated // as equal. Therefore, we just compare the identity of the // tasks of the task // instances if (list.get(i + j).getTask() != subList.get(j).getTask()) { matchFound = false; break; } } if (matchFound) { result = i; break; } } return result; } /** * */ private static class RuleApplicationData { private HashMap number2task; private ArrayList numberseqs; /** * */ private List sessions; /** * */ private boolean detectedAndReplacedTasks; /** * */ private RuleApplicationResult result; /** * */ private StopWatch stopWatch; /** * */ private RuleApplicationData(List sessions) { this.sessions = sessions; numberseqs = new ArrayList(); number2task = new HashMap(); stopWatch= new StopWatch(); result = new RuleApplicationResult(); } /** * @return the tree */ private List getSessions() { return sessions; } private ArrayList getNumberSequences() { return numberseqs; } /** * */ private void detectedAndReplacedTasks(boolean detectedAndReplacedTasks) { this.detectedAndReplacedTasks = detectedAndReplacedTasks; } /** * */ private boolean detectedAndReplacedTasks() { return detectedAndReplacedTasks; } /** * @return the result */ private RuleApplicationResult getResult() { return result; } /** * @return the stopWatch */ private StopWatch getStopWatch() { return stopWatch; } private HashMap getNumber2Task() { return number2task; } } }