source: trunk/autoquest-core-tasktrees/src/main/java/de/ugoe/cs/autoquest/tasktrees/temporalrelation/SequenceForTaskDetectionRule.java @ 1767

Last change on this file since 1767 was 1767, checked in by pharms, 10 years ago
  • first version of merging similar task trees
File size: 43.1 KB
Line 
1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.tasktrees.temporalrelation;
16
17import java.util.HashMap;
18import java.util.HashSet;
19import java.util.Iterator;
20import java.util.LinkedList;
21import java.util.List;
22import java.util.Map;
23import java.util.Set;
24import java.util.logging.Level;
25
26import de.ugoe.cs.autoquest.tasktrees.taskequality.TaskEquality;
27import de.ugoe.cs.autoquest.tasktrees.treeifc.IIteration;
28import de.ugoe.cs.autoquest.tasktrees.treeifc.IIterationInstance;
29import de.ugoe.cs.autoquest.tasktrees.treeifc.IOptional;
30import de.ugoe.cs.autoquest.tasktrees.treeifc.IOptionalInstance;
31import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelection;
32import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelectionInstance;
33import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequence;
34import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequenceInstance;
35import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask;
36import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskBuilder;
37import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskFactory;
38import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstance;
39import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstanceList;
40import de.ugoe.cs.autoquest.tasktrees.treeifc.IUserSession;
41import de.ugoe.cs.autoquest.usageprofiles.SymbolMap;
42import de.ugoe.cs.autoquest.usageprofiles.Trie;
43import de.ugoe.cs.autoquest.usageprofiles.TrieProcessor;
44import de.ugoe.cs.util.StopWatch;
45import de.ugoe.cs.util.console.Console;
46
47/**
48 * <p>
49 * This class implements the major rule for creating task trees based on a set of recorded
50 * user sessions. For this, it first harmonizes all tasks. This eases later comparison. Then it
51 * searches the sessions for iterations and replaces them accordingly. Then it searches for sub
52 * sequences being the longest and occurring most often. For each found sub sequence, it replaces
53 * the occurrences by creating appropriate {@link ISequence}s. Afterwards, again searches for
54 * iterations and then again for sub sequences until no more replacements are done.
55 * </p>
56 * <p>
57 * For determining the longest sequence occurring most often, the implementation uses a
58 * {@link Trie}. The depth of the tree is initially 3. If the algorithm has a longest sequence
59 * occurring most often whose length is equal to the depth of the trie, it recalculates the trie
60 * with an increased depth.
61 * </p>
62 *
63 * @author Patrick Harms
64 */
65class SequenceForTaskDetectionRule implements ISessionScopeRule {
66   
67    /**
68     * <p>
69     * the task factory to be used for creating substructures for the temporal
70     * relationships identified during rul application
71     * </p>
72     */
73    private ITaskFactory taskFactory;
74    /**
75     * <p>
76     * the task builder to be used for creating substructures for the temporal relationships
77     * identified during rule application
78     * </p>
79     */
80    private ITaskBuilder taskBuilder;
81
82    /**
83     * <p>
84     * the task handling strategy to be used for comparing tasks for preparation, i.e., before
85     * the tasks are harmonized
86     * </p>
87     */
88    private TaskHandlingStrategy preparationTaskHandlingStrategy;
89   
90    /**
91     * <p>
92     * the task handling strategy to be used for comparing tasks during iteration detection an trie
93     * generation, i.e., after the tasks are harmonized
94     * </p>
95     */
96    private TaskHandlingStrategy identityTaskHandlingStrategy;;
97   
98    /**
99     * <p>
100     * instantiates the rule and initializes it with a task equality to be considered when
101     * comparing tasks as well as a task factory and builder to be used for creating task
102     * structures.
103     * </p>
104     *
105     * @param minimalTaskEquality the task equality to be considered when comparing tasks
106     * @param taskFactory         the task factory to be used for creating substructures
107     * @param taskBuilder         the task builder to be used for creating substructures
108     */
109    SequenceForTaskDetectionRule(TaskEquality minimalTaskEquality,
110                                 ITaskFactory taskFactory,
111                                 ITaskBuilder taskBuilder)
112    {
113        this.taskFactory = taskFactory;
114        this.taskBuilder = taskBuilder;
115       
116        this.preparationTaskHandlingStrategy = new TaskHandlingStrategy(minimalTaskEquality);
117        this.identityTaskHandlingStrategy = new TaskHandlingStrategy(TaskEquality.IDENTICAL);
118    }
119
120    /* (non-Javadoc)
121     * @see java.lang.Object#toString()
122     */
123    @Override
124    public String toString() {
125        return "SequenceForTaskDetectionRule";
126    }
127
128    /* (non-Javadoc)
129     * @see de.ugoe.cs.autoquest.tasktrees.temporalrelation.ISessionScopeRule#apply(java.util.List)
130     */
131    @Override
132    public RuleApplicationResult apply(List<IUserSession> sessions) {
133        RuleApplicationData appData = new RuleApplicationData(sessions);
134       
135        // this is the real rule application. Loop while something is replaced.
136        harmonizeEventTaskInstancesModel(appData);
137        do {
138            System.out.println();
139           
140            appData.getStopWatch().start("whole loop");
141            detectAndReplaceIterations(appData);
142
143            appData.getStopWatch().start("task replacement");
144            detectAndReplaceTasks(appData);
145            appData.getStopWatch().stop("task replacement");
146            appData.getStopWatch().stop("whole loop");
147           
148            //((TaskTreeNodeComparator) taskComparator).getStopWatch().dumpStatistics(System.out);
149            //((TaskTreeNodeComparator) taskComparator).getStopWatch().reset();
150           
151            appData.getStopWatch().dumpStatistics(System.out);
152            appData.getStopWatch().reset();
153           
154        }
155        while (appData.detectedAndReplacedTasks());
156       
157        Console.println
158            ("created " + appData.getResult().getNewlyCreatedTasks().size() +
159             " new tasks and " + appData.getResult().getNewlyCreatedTaskInstances().size() +
160             " appropriate instances\n");
161       
162        if ((appData.getResult().getNewlyCreatedTasks().size() > 0) ||
163            (appData.getResult().getNewlyCreatedTaskInstances().size() > 0))
164        {
165            appData.getResult().setRuleApplicationStatus(RuleApplicationStatus.FINISHED);
166        }
167       
168        return appData.getResult();
169    }
170
171    /**
172     * <p>
173     * harmonizes the event task instances by unifying tasks. This is done, as initially the
174     * event tasks being equal with respect to the considered task equality are distinct objects.
175     * The comparison of these distinct objects is more time consuming than comparing the object
176     * references.
177     * </p>
178     *
179     * @param appData the rule application data combining all data used for applying this rule
180     */
181    private void harmonizeEventTaskInstancesModel(RuleApplicationData appData) {
182        Console.traceln(Level.INFO, "harmonizing task model of event task instances");
183        appData.getStopWatch().start("harmonizing event tasks");
184
185        SymbolMap<ITaskInstance, ITask> uniqueTasks =
186            preparationTaskHandlingStrategy.createSymbolMap();
187        TaskInstanceComparator comparator = preparationTaskHandlingStrategy.getTaskComparator();
188       
189        int unifiedTasks = 0;
190        ITask task;
191        List<IUserSession> sessions = appData.getSessions();
192        int sessionNo = 0;
193        for (IUserSession session : sessions) {
194            Console.traceln(Level.FINE, "handling " + (++sessionNo) + ". " + session);
195            for (ITaskInstance taskInstance : session) {
196                task = uniqueTasks.getValue(taskInstance);
197               
198                if (task == null) {
199                    uniqueTasks.addSymbol(taskInstance, taskInstance.getTask());
200                }
201                else {
202                    taskBuilder.setTask(taskInstance, task);
203                    unifiedTasks++;
204                }
205            }
206           
207            comparator.clearBuffers();
208        }
209       
210        appData.getStopWatch().stop("harmonizing event tasks");
211        Console.traceln(Level.INFO, "harmonized " + unifiedTasks + " task occurrences (still " +
212                        uniqueTasks.size() + " different tasks)");
213
214        appData.getStopWatch().dumpStatistics(System.out);
215        appData.getStopWatch().reset();
216    }
217
218    /**
219     * <p>
220     * searches for direct iterations of single tasks in all sequences and replaces them with
221     * {@link IIteration}s, respectively appropriate instances. Also all single occurrences of
222     * a task that is iterated somewhen are replaced with iterations to have again an efficient
223     * way for task comparisons.
224     * </p>
225     *
226     * @param appData the rule application data combining all data used for applying this rule
227     */
228    private void detectAndReplaceIterations(RuleApplicationData appData) {
229        Console.traceln(Level.FINE, "detecting iterations");
230        appData.getStopWatch().start("detecting iterations");
231       
232        List<IUserSession> sessions = appData.getSessions();
233       
234        Set<ITask> iteratedTasks = searchIteratedTasks(sessions);
235       
236        if (iteratedTasks.size() > 0) {
237            replaceIterationsOf(iteratedTasks, sessions, appData);
238        }
239       
240        appData.getStopWatch().stop("detecting iterations");
241        Console.traceln(Level.INFO, "replaced " + iteratedTasks.size() + " iterated tasks");
242    }
243
244    /**
245     * <p>
246     * searches the provided sessions for task iterations. If a task is iterated, it is added
247     * to the returned set.
248     * </p>
249     *
250     * @param the session to search for iterations in
251     *
252     * @return a set of tasks being iterated somewhere
253     */
254    private Set<ITask> searchIteratedTasks(List<IUserSession> sessions) {
255        Set<ITask> iteratedTasks = new HashSet<ITask>();
256        for (IUserSession session : sessions) {
257            for (int i = 0; i < (session.size() - 1); i++) {
258                // we prepared the task instances to refer to unique tasks, if they are treated
259                // as equal. Therefore, we just compare the identity of the tasks of the task
260                // instances
261                if (session.get(i).getTask() == session.get(i + 1).getTask()) {
262                    iteratedTasks.add(session.get(i).getTask());
263                }
264            }
265        }
266       
267        return iteratedTasks;
268    }
269
270    /**
271     * <p>
272     * replaces all occurrences of all tasks provided in the set with iterations
273     * </p>
274     *
275     * @param iteratedTasks the tasks to be replaced with iterations
276     * @param sessions      the sessions in which the tasks are to be replaced
277     * @param appData       the rule application data combining all data used for applying this rule
278     */
279    private void replaceIterationsOf(Set<ITask>          iteratedTasks,
280                                     List<IUserSession>  sessions,
281                                     RuleApplicationData appData)
282    {
283        Map<ITask, IIteration> iterations = new HashMap<ITask, IIteration>();
284        Map<IIteration, IOptional> optionals = new HashMap<IIteration, IOptional>();
285        Map<IIteration, List<IIterationInstance>> iterationInstances =
286                new HashMap<IIteration, List<IIterationInstance>>();
287           
288        for (ITask iteratedTask : iteratedTasks) {
289            IIteration iteration = taskFactory.createNewIteration();
290            iterations.put(iteratedTask, iteration);
291           
292            if (iteratedTask instanceof IOptional) {
293                IOptional optional = taskFactory.createNewOptional();
294                taskBuilder.setMarkedTask(optional, iteration);
295                optionals.put(iteration, optional);
296            }
297           
298            iterationInstances.put(iteration, new LinkedList<IIterationInstance>());
299        }
300       
301        IOptionalInstance optionalInstance;
302        IIterationInstance iterationInstance;
303       
304        for (IUserSession session : sessions) {
305            int index = 0;
306            optionalInstance = null;
307            iterationInstance = null;
308            boolean inReplacement = false;
309
310            while (index < session.size()) {
311                // we prepared the task instances to refer to unique tasks, if they are treated
312                // as equal. Therefore, we just compare the identity of the tasks of the task
313                // instances
314                ITask currentTask = session.get(index).getTask();
315                IIteration iteration = iterations.get(currentTask);
316                if (iteration != null) {
317                    if (!inReplacement || (iterationInstance.getTask() != iteration)) {
318                        if (currentTask instanceof IOptional) {
319                            IOptional optional = optionals.get(iteration);
320                            optionalInstance = taskFactory.createNewTaskInstance(optional);
321                            taskBuilder.addTaskInstance(session, index, optionalInstance);
322                        }
323                        else {
324                            iterationInstance = taskFactory.createNewTaskInstance(iteration);
325                            iterationInstances.get(iteration).add(iterationInstance);
326                            taskBuilder.addTaskInstance(session, index, iterationInstance);
327                        }
328                        inReplacement = true;
329                        index++;
330                    }
331                   
332                    if (currentTask instanceof IOptional) {
333                        ITaskInstance child = ((IOptionalInstance) session.get(index)).getChild();
334                       
335                        if (child != null) {
336                            if (iterationInstance == null) {
337                                iterationInstance = taskFactory.createNewTaskInstance(iteration);
338                                iterationInstances.get(iteration).add(iterationInstance);
339                                taskBuilder.setChild(optionalInstance, iterationInstance);
340                            }
341                            taskBuilder.addChild(iterationInstance, child);
342                        }
343                    }
344                    else {
345                        taskBuilder.addChild(iterationInstance, session.get(index));
346                    }
347                   
348                    taskBuilder.removeTaskInstance(session, index);
349                }
350                else {
351                    if (iterationInstance != null) {
352                        iterationInstance = null;
353                        optionalInstance = null;
354                        inReplacement = false;
355                    }
356                    index++;
357                }
358            }
359        }
360       
361        for (Map.Entry<IIteration, List<IIterationInstance>> entry : iterationInstances.entrySet())
362        {
363            harmonizeIterationInstancesModel(entry.getKey(), entry.getValue());
364        }
365    }
366
367    /**
368     * <p>
369     * TODO clarify why this is done
370     * </p>
371     */
372    private void harmonizeIterationInstancesModel(IIteration               iteration,
373                                                  List<IIterationInstance> iterationInstances)
374    {
375        List<ITask> iteratedTaskVariants = new LinkedList<ITask>();
376        TaskInstanceComparator comparator = preparationTaskHandlingStrategy.getTaskComparator();
377       
378        // merge the lexically different variants of iterated task to a unique list
379        for (IIterationInstance iterationInstance : iterationInstances) {
380            for (ITaskInstance executionVariant : iterationInstance) {
381                ITask candidate = executionVariant.getTask();
382           
383                boolean found = false;
384                for (ITask taskVariant : iteratedTaskVariants) {
385                    if (comparator.areLexicallyEqual(taskVariant, candidate)) {
386                        taskBuilder.setTask(executionVariant, taskVariant);
387                        found = true;
388                        break;
389                    }
390                }
391               
392                if (!found) {
393                    iteratedTaskVariants.add(candidate);
394                }
395            }
396        }
397       
398        // if there are more than one lexically different variant of iterated tasks, adapt the
399        // iteration model to be a selection of different variants. In this case also adapt
400        // the generated iteration instances to correctly contain selection instances. If there
401        // is only one variant of an iterated task, simply set this as the marked task of the
402        // iteration. In this case, the instances can be preserved as is
403        if (iteratedTaskVariants.size() > 1) {
404            ISelection selection = taskFactory.createNewSelection();
405           
406            for (ITask variant : iteratedTaskVariants) {
407                taskBuilder.addChild(selection, variant);
408            }
409           
410            taskBuilder.setMarkedTask(iteration, selection);
411           
412            for (IIterationInstance instance : iterationInstances) {
413                for (int i = 0; i < instance.size(); i++) {
414                    ISelectionInstance selectionInstance =
415                        taskFactory.createNewTaskInstance(selection);
416                    taskBuilder.setChild(selectionInstance, instance.get(i));
417                    taskBuilder.setTaskInstance(instance, i, selectionInstance);
418                }
419            }
420        }
421        else {
422            taskBuilder.setMarkedTask(iteration, iteratedTaskVariants.get(0));
423        }
424    }
425
426    /**
427     * TODO go on commenting
428     * @param appData the rule application data combining all data used for applying this rule
429     */
430    private void detectAndReplaceTasks(RuleApplicationData appData) {
431        Console.traceln(Level.FINE, "detecting and replacing tasks");
432        appData.getStopWatch().start("detecting tasks");
433       
434        getSequencesOccuringMostOften(appData);
435
436        appData.getStopWatch().stop("detecting tasks");
437        appData.getStopWatch().start("replacing tasks");
438       
439        replaceSequencesOccurringMostOften(appData);
440       
441        appData.getStopWatch().stop("replacing tasks");
442        Console.traceln(Level.INFO, "detected and replaced " + appData.getLastFoundTasks().size() +
443                        " tasks occuring " + appData.getLastFoundTasks().getOccurrenceCount() +
444                        " times");
445    }
446
447    /**
448     * @param appData the rule application data combining all data used for applying this rule
449     */
450    private void getSequencesOccuringMostOften(RuleApplicationData appData) {
451        Console.traceln(Level.FINE, "determining most prominent tasks");
452
453        Tasks tasks;
454        boolean createNewTrie = (appData.getLastTrie() == null) ||
455            appData.detectedAndReplacedTasks(); // tree has changed
456       
457        do {
458            if (createNewTrie) {
459                createNewTrie(appData);
460            }
461           
462            MaxCountAndLongestTasksFinder finder = new MaxCountAndLongestTasksFinder();
463            appData.getLastTrie().process(finder);
464       
465            tasks = finder.getFoundTasks();
466           
467            createNewTrie = false;
468           
469            for (List<ITaskInstance> task : tasks) {
470                if (task.size() >= appData.getTrainedSequenceLength()) {
471                    // Trie must be recreated with a longer sequence length to be sure that
472                    // the found sequences occurring most often are found in their whole length
473                    appData.setTrainedSequenceLength(appData.getTrainedSequenceLength() + 1);
474                    createNewTrie = true;
475                    break;
476                }
477            }
478        }
479        while (createNewTrie);
480       
481        // create a normal trie for comparison
482        /*System.out.println("creating test trie for comparison");
483       
484        appData.getStopWatch().start("testtrie");
485        Trie<ITaskInstance> trie = new Trie<ITaskInstance>(identityTaskComparator);
486       
487        for (IUserSession session : appData.getSessions()) {
488            trie.train(session.getExecutedTasks(), appData.getTrainedSequenceLength());
489        }
490       
491        appData.getStopWatch().stop("testtrie");
492       
493        MaxCountAndLongestTasksFinder finder = new MaxCountAndLongestTasksFinder();
494        trie.process(finder);
495
496        boolean allTasksEqual = finder.getFoundTasks().size() == tasks.size();
497       
498        allTasksEqual &= finder.getFoundTasks().occurrenceCount == tasks.occurrenceCount;
499       
500        for (List<ITaskInstance> task1 : finder.getFoundTasks()) {
501            boolean foundTask = false;
502            for (List<ITaskInstance> task2 : tasks) {
503                boolean tasksEqual = false;
504                if (task1.size() == task2.size()) {
505                    tasksEqual = true;
506                    for (int i = 0; i < task1.size(); i++) {
507                        if (!identityTaskComparator.equals(task1.get(i).getTask(), task2.get(i).getTask()))
508                        {
509                            tasksEqual = false;
510                        }
511                    }
512                }
513               
514                if (tasksEqual) {
515                    foundTask = true;
516                    break;
517                }
518            }
519           
520            if (!foundTask) {
521                System.out.println("different is " + task1);
522                allTasksEqual = false;
523                break;
524            }
525        }
526       
527        if (!allTasksEqual) {
528            System.out.println(finder.getFoundTasks());
529            System.out.println(tasks);
530           
531            throw new IllegalArgumentException("both tries calculated different subsequences");
532        }
533       
534        /*TrieStatisticsDumper dumper = new TrieStatisticsDumper();
535        appData.getLastTrie().process(dumper);
536        dumper.dumpCounters();*/
537       
538        appData.setLastFoundTasks(tasks);
539
540        Console.traceln(Level.FINE, "found " + appData.getLastFoundTasks().size() + " tasks " +
541                        "occurring " + appData.getLastFoundTasks().getOccurrenceCount() + " times");
542    }
543
544    /**
545     * @param appData the rule application data combining all data used for applying this rule
546     */
547    private void createNewTrie(RuleApplicationData appData) {
548        Console.traceln(Level.FINER, "training trie with a maximum sequence length of " +
549                        appData.getTrainedSequenceLength());
550
551        appData.getStopWatch().start("training trie");
552       
553        // we prepared the task instances to refer to unique tasks, if they are treated
554        // as equal. Therefore, we just compare the identity of the tasks of the task
555        // instances
556        appData.setLastTrie(new TaskInstanceTrie(identityTaskHandlingStrategy));
557   
558        appData.getLastTrie().trainSessions
559            (appData.getSessions(), appData.getTrainedSequenceLength());
560       
561        appData.getStopWatch().stop("training trie");
562    }
563
564    /**
565     * @param appData the rule application data combining all data used for applying this rule
566     */
567    private void replaceSequencesOccurringMostOften(RuleApplicationData appData) {
568        appData.detectedAndReplacedTasks(false);
569
570        if ((appData.getLastFoundTasks().size() > 0) &&
571            (appData.getLastFoundTasks().getOccurrenceCount() > 1))
572        {
573            Console.traceln(Level.FINER, "replacing tasks occurrences");
574
575            for (List<ITaskInstance> task : appData.getLastFoundTasks()) {
576                ISequence sequence = taskFactory.createNewSequence();
577               
578                Console.traceln(Level.FINEST, "replacing " + sequence.getId() + ": " + task);
579
580                List<ISequenceInstance> sequenceInstances =
581                    replaceTaskOccurrences(task, appData.getSessions(), sequence);
582               
583                harmonizeSequenceInstancesModel(sequence, sequenceInstances, task.size());
584                appData.detectedAndReplacedTasks
585                    (appData.detectedAndReplacedTasks() || (sequenceInstances.size() > 0));
586
587                if (sequenceInstances.size() < appData.getLastFoundTasks().getOccurrenceCount()) {
588                    Console.traceln(Level.FINE, sequence.getId() + ": replaced task only " +
589                                    sequenceInstances.size() + " times instead of expected " +
590                                    appData.getLastFoundTasks().getOccurrenceCount());
591                }
592            }
593        }
594       
595    }
596
597    /**
598     *
599     */
600    private void harmonizeSequenceInstancesModel(ISequence               sequence,
601                                                 List<ISequenceInstance> sequenceInstances,
602                                                 int                     sequenceLength)
603    {
604        TaskInstanceComparator comparator = preparationTaskHandlingStrategy.getTaskComparator();
605       
606        // ensure for each subtask that lexically different variants are preserved
607        for (int subTaskIndex = 0; subTaskIndex < sequenceLength; subTaskIndex++) {
608            List<ITask> subTaskVariants = new LinkedList<ITask>();
609           
610            for (ISequenceInstance sequenceInstance : sequenceInstances) {
611                ITask candidate = sequenceInstance.get(subTaskIndex).getTask();
612               
613                boolean found = false;
614               
615                for (int i = 0; i < subTaskVariants.size(); i++) {
616                    if (comparator.areLexicallyEqual(subTaskVariants.get(i), candidate)) {
617                        taskBuilder.setTask
618                            (sequenceInstance.get(subTaskIndex), subTaskVariants.get(i));
619                       
620                        found = true;
621                        break;
622                    }
623                }
624               
625                if (!found) {
626                    subTaskVariants.add(candidate);
627                }
628            }
629           
630            // if there are more than one lexically different variant of the sub task at
631            // the considered position, adapt the sequence model at that position to have
632            // a selection of the different variants. In this case also adapt the
633            // generated sequence instances to correctly contain selection instances. If
634            // there is only one variant of sub tasks at the given position, simply set
635            // this variant as the sub task of the selection. In this case, the instances
636            // can be preserved as is
637            if (subTaskVariants.size() > 1) {
638                ISelection selection = taskFactory.createNewSelection();
639               
640                for (ITask variant : subTaskVariants) {
641                    taskBuilder.addChild(selection, variant);
642                }
643               
644                taskBuilder.addChild(sequence, selection);
645               
646                for (ISequenceInstance instance : sequenceInstances) {
647                    ISelectionInstance selectionInstance =
648                        taskFactory.createNewTaskInstance(selection);
649                    taskBuilder.setChild(selectionInstance, instance.get(subTaskIndex));
650                    taskBuilder.setTaskInstance(instance, subTaskIndex, selectionInstance);
651                }
652            }
653            else if (subTaskVariants.size() == 1) {
654                taskBuilder.addChild(sequence, subTaskVariants.get(0));
655            }
656        }
657    }
658
659    /**
660     * @param tree
661     */
662    private List<ISequenceInstance> replaceTaskOccurrences(List<ITaskInstance> task,
663                                                           List<IUserSession>  sessions,
664                                                           ISequence           temporalTaskModel)
665    {
666        List<ISequenceInstance> sequenceInstances = new LinkedList<ISequenceInstance>();
667       
668        for (IUserSession session : sessions) {
669            int index = -1;
670               
671            do {
672                index = getSubListIndex(session, task, ++index);
673
674                if (index > -1) {
675                    sequenceInstances.add
676                        (RuleUtils.createNewSubSequenceInRange
677                             (session, index, index + task.size() - 1, temporalTaskModel,
678                              taskFactory, taskBuilder));
679                }
680            }
681            while (index > -1);
682        }
683       
684        return sequenceInstances;
685    }
686
687    /**
688     * @param trie
689     * @param object
690     * @return
691     */
692    private int getSubListIndex(ITaskInstanceList   list,
693                                List<ITaskInstance> subList,
694                                int                 startIndex)
695    {
696        boolean matchFound;
697        int result = -1;
698       
699        for (int i = startIndex; i <= list.size() - subList.size(); i++) {
700            matchFound = true;
701           
702            for (int j = 0; j < subList.size(); j++) {
703                // we prepared the task instances to refer to unique tasks, if they are treated
704                // as equal. Therefore, we just compare the identity of the tasks of the task
705                // instances
706                if (list.get(i + j).getTask() != subList.get(j).getTask()) {
707                    matchFound = false;
708                    break;
709                }
710            }
711           
712            if (matchFound) {
713                result = i;
714                break;
715            }
716        }
717       
718        return result;
719    }
720
721    /**
722     * @param trie
723     * @param object
724     * @return
725     */
726    private int getSubListIndex(List<ITaskInstance> list,
727                                List<ITaskInstance> subList,
728                                int                 startIndex)
729    {
730        boolean matchFound;
731        int result = -1;
732       
733        for (int i = startIndex; i <= list.size() - subList.size(); i++) {
734            matchFound = true;
735           
736            for (int j = 0; j < subList.size(); j++) {
737                // we prepared the task instances to refer to unique tasks, if they are treated
738                // as equal. Therefore, we just compare the identity of the tasks of the task
739                // instances
740                if (list.get(i + j) != subList.get(j)) {
741                    matchFound = false;
742                    break;
743                }
744            }
745           
746            if (matchFound) {
747                result = i;
748                break;
749            }
750        }
751       
752        return result;
753    }
754   
755    /**
756     * @author Patrick Harms
757     */
758    private class MaxCountAndLongestTasksFinder implements TrieProcessor<ITaskInstance> {
759       
760        /**
761         *
762         */
763        private int currentCount;
764       
765        /**
766         *
767         */
768        private List<List<ITaskInstance>> foundTasks = new LinkedList<List<ITaskInstance>>();
769
770        /**
771         *
772         */
773        public MaxCountAndLongestTasksFinder() {
774            super();
775            this.currentCount = 0;
776        }
777
778        /* (non-Javadoc)
779         * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int)
780         */
781        @Override
782        public TrieProcessor.Result process(List<ITaskInstance> foundTask, int count) {
783            if (foundTask.size() < 2) {
784                // ignore single tasks
785                return TrieProcessor.Result.CONTINUE;
786            }
787           
788            if (count < 2) {
789                // ignore singular occurrences
790                return TrieProcessor.Result.SKIP_NODE;
791            }
792
793            if (this.currentCount > count) {
794                // ignore this children of this task, as they may have only smaller counts than
795                // the already found tasks
796                return TrieProcessor.Result.SKIP_NODE;
797            }
798           
799            if (this.currentCount < count) {
800                // the provided task occurs more often that all tasks found so far.
801                // clear all found tasks and use the new count as the one searched for
802                foundTasks.clear();
803                this.currentCount = count;
804            }
805           
806            if (this.currentCount == count) {
807                // the task is of interest. Sort it into the other found tasks so that
808                // the longest come first
809                boolean added = false;
810                for (int i = 0; i < foundTasks.size(); i++) {
811                    if (foundTasks.get(i).size() < foundTask.size()) {
812                        // defensive copy
813                        foundTasks.add(i, new LinkedList<ITaskInstance>(foundTask)); // defensive copy
814                        added = true;
815                        break;
816                    }
817                }
818               
819                if (!added) {
820                    foundTasks.add(new LinkedList<ITaskInstance>(foundTask)); // defensive copy
821                }
822            }
823           
824            return TrieProcessor.Result.CONTINUE;
825        }
826
827        /**
828         *  @return
829         */
830        public Tasks getFoundTasks() {
831            removePermutationsOfShorterTasks();
832            return new Tasks(currentCount, foundTasks);
833        }
834
835        /**
836         *
837         */
838        private void removePermutationsOfShorterTasks() {
839            // now iterate the sorted list and for each task remove all other tasks that are shorter
840            // (i.e. come later in the sorted list) and that represent a subpart of the task
841            for (int i = 0; i < foundTasks.size(); i++) {
842                for (int j = i + 1; j < foundTasks.size();) {
843                    if (foundTasks.get(j).size() < foundTasks.get(i).size()) {
844                        // found a task that is a potential subtask. Check for this and remove the
845                        // subtask if needed
846                        List<ITaskInstance> longTask = foundTasks.get(i);
847                        List<ITaskInstance> shortTask = foundTasks.get(j);
848                       
849                        if (getSubListIndex(longTask, shortTask, 0) > -1) {
850                            foundTasks.remove(j);
851                        }
852                        else {
853                            j++;
854                        }
855                    }
856                    else {
857                        j++;
858                    }
859                }
860            }
861        }
862
863    }
864   
865//    /**
866//     * @author Patrick Harms
867//     */
868//    private class TrieStatisticsDumper implements TrieProcessor<ITaskInstance> {
869//       
870//        /**
871//         *
872//         */
873//        private Map<Integer, Integer> counters = new HashMap<Integer, Integer>();
874//       
875//        /* (non-Javadoc)
876//         * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int)
877//         */
878//        @Override
879//        public TrieProcessor.Result process(List<ITaskInstance> subsequence, int count) {
880//            if (subsequence.size() == 1) {
881//                Integer value = counters.get(count);
882//               
883//                if (value == null) {
884//                    value = 0;
885//                }
886//               
887//                counters.put(count, value + 1);
888//               
889//                return TrieProcessor.Result.CONTINUE;
890//            }
891//            else {
892//                // ignore singular occurrences
893//                return TrieProcessor.Result.SKIP_NODE;
894//            }
895//        }
896//
897//        /**
898//         *  @return
899//         */
900//        public void dumpCounters() {
901//            int dumpedCounters = 0;
902//           
903//            int count = 1;
904//            while (dumpedCounters < counters.size()) {
905//                Integer value = counters.get(count++);
906//                if (value != null) {
907//                    System.out.println(value + " symbols occurred " + count + " times");
908//                    dumpedCounters++;
909//                }
910//            }
911//        }
912//
913//    }
914   
915    /**
916     *
917     */
918    private static class RuleApplicationData {
919       
920        /**
921         *
922         */
923        private List<IUserSession> sessions;
924       
925        /**
926         *
927         */
928        private TaskInstanceTrie lastTrie;
929       
930        /**
931         * default and minimum trained sequence length is 3
932         */
933        private int trainedSequenceLength = 3;
934       
935        /**
936         *
937         */
938        private Tasks lastFoundTasks = new Tasks(Integer.MAX_VALUE, null);
939       
940        /**
941         *
942         */
943        private boolean detectedAndReplacedTasks;
944       
945        /**
946         *
947         */
948        private RuleApplicationResult result = new RuleApplicationResult();
949       
950        /**
951         *
952         */
953        private StopWatch stopWatch = new StopWatch();
954       
955        /**
956         *
957         */
958        private RuleApplicationData(List<IUserSession> sessions) {
959            this.sessions = sessions;
960        }
961
962        /**
963         * @return the tree
964         */
965        private List<IUserSession> getSessions() {
966            return sessions;
967        }
968
969        /**
970         * @param lastTrie the lastTrie to set
971         */
972        private void setLastTrie(TaskInstanceTrie lastTrie) {
973            this.lastTrie = lastTrie;
974        }
975
976        /**
977         * @return the lastTrie
978         */
979        private TaskInstanceTrie getLastTrie() {
980            return lastTrie;
981        }
982
983        /**
984         * @param trainedSequenceLength the trainedSequenceLength to set
985         */
986        private void setTrainedSequenceLength(int trainedSequenceLength) {
987            this.trainedSequenceLength = trainedSequenceLength;
988        }
989
990        /**
991         * @return the trainedSequenceLength
992         */
993        private int getTrainedSequenceLength() {
994            return trainedSequenceLength;
995        }
996
997        /**
998         * @param lastFoundSequences the lastFoundSequences to set
999         */
1000        private void setLastFoundTasks(Tasks lastFoundSequences) {
1001            this.lastFoundTasks = lastFoundSequences;
1002        }
1003
1004        /**
1005         * @return the lastFoundSequences
1006         */
1007        private Tasks getLastFoundTasks() {
1008            return lastFoundTasks;
1009        }
1010
1011        /**
1012         *
1013         */
1014        private void detectedAndReplacedTasks(boolean detectedAndReplacedTasks) {
1015            this.detectedAndReplacedTasks = detectedAndReplacedTasks;
1016        }
1017
1018        /**
1019         *
1020         */
1021        private boolean detectedAndReplacedTasks() {
1022            return detectedAndReplacedTasks;
1023        }
1024       
1025        /**
1026         * @return the result
1027         */
1028        private RuleApplicationResult getResult() {
1029            return result;
1030        }
1031
1032        /**
1033         * @return the stopWatch
1034         */
1035        private StopWatch getStopWatch() {
1036            return stopWatch;
1037        }
1038
1039    }
1040   
1041
1042    /**
1043     * @author Patrick Harms
1044     */
1045    private static class Tasks implements Iterable<List<ITaskInstance>> {
1046       
1047        /**
1048         *
1049         */
1050        private int occurrenceCount;
1051       
1052        /**
1053         *
1054         */
1055        private List<List<ITaskInstance>> sequences;
1056
1057        /**
1058         * @param occurrenceCount
1059         * @param sequences
1060         */
1061        private Tasks(int occurrenceCount, List<List<ITaskInstance>> sequences) {
1062            super();
1063            this.occurrenceCount = occurrenceCount;
1064            this.sequences = sequences;
1065        }
1066
1067        /**
1068         * @return
1069         */
1070        private int getOccurrenceCount() {
1071            return occurrenceCount;
1072        }
1073
1074        /**
1075         * @return
1076         */
1077        private int size() {
1078            return this.sequences.size();
1079        }
1080
1081        /**
1082         *
1083         */
1084
1085        /* (non-Javadoc)
1086         * @see java.lang.Iterable#iterator()
1087         */
1088        @Override
1089        public Iterator<List<ITaskInstance>> iterator() {
1090            return this.sequences.iterator();
1091        }
1092
1093        /* (non-Javadoc)
1094         * @see java.lang.Object#toString()
1095         */
1096        @Override
1097        public String toString() {
1098            StringBuffer result = new StringBuffer();
1099            result.append(this.occurrenceCount);
1100            result.append(" occurrences:\n");
1101           
1102            for (List<ITaskInstance> task : sequences) {
1103                result.append(task);
1104                result.append("\n");
1105            }
1106           
1107            return result.toString();
1108        }
1109
1110    }
1111   
1112    // methods for internal testing
1113//    private void checkMatchingOfSessions(List<List<Event>>  flattenedSessions,
1114//                                         List<IUserSession> sessions,
1115//                                         String             when)
1116//    {
1117//        List<List<Event>> currentFlattenedSessions = flattenSessions(sessions);
1118//        if (flattenedSessions.size() != currentFlattenedSessions.size()) {
1119//            System.out.println("################## number of sessions changed after " + when);
1120//        }
1121//        else {
1122//            for (int i = 0; i < flattenedSessions.size(); i++) {
1123//                List<Event> expected = flattenedSessions.get(i);
1124//                List<Event> current = currentFlattenedSessions.get(i);
1125//           
1126//                if (expected.size() != current.size()) {
1127//                    System.out.println
1128//                        ("################## length of session " + i + " changed after " + when);
1129//                }
1130//                else {
1131//                    for (int j = 0; j < expected.size(); j++) {
1132//                        if (!expected.get(j).equals(current.get(j))) {
1133//                            System.out.println("################## event " + j + " of session " +
1134//                                               i + " changed after " + when);
1135//                        }
1136//                    }
1137//                }
1138//            }     
1139//        }
1140//    }
1141//
1142//    private List<List<Event>> flattenSessions(List<IUserSession> sessions) {
1143//        List<List<Event>> flattenedSessions = new ArrayList<List<Event>>();
1144//        for (IUserSession session : sessions) {
1145//            List<Event> flattenedUserSession = new ArrayList<Event>();
1146//            flatten(session, flattenedUserSession);
1147//            flattenedSessions.add(flattenedUserSession);
1148//        }
1149//
1150//        return flattenedSessions;
1151//    }
1152//
1153//    private void flatten(IUserSession iUserSession, List<Event> flattenedUserSession) {
1154//        for (ITaskInstance instance : iUserSession) {
1155//            flatten(instance, flattenedUserSession);
1156//        }
1157//    }
1158//
1159//    private void flatten(ITaskInstance instance, List<Event> flattenedUserSession) {
1160//        if (instance instanceof ITaskInstanceList) {
1161//            for (ITaskInstance child : (ITaskInstanceList) instance) {
1162//                flatten(child, flattenedUserSession);
1163//            }
1164//        }
1165//        else if (instance instanceof ISelectionInstance) {
1166//            flatten(((ISelectionInstance) instance).getChild(), flattenedUserSession);
1167//        }
1168//        else if (instance instanceof IOptionalInstance) {
1169//            flatten(((IOptionalInstance) instance).getChild(), flattenedUserSession);
1170//        }
1171//        else if (instance instanceof IEventTaskInstance) {
1172//            flattenedUserSession.add(((IEventTaskInstance) instance).getEvent());
1173//        }
1174//    }
1175
1176}
Note: See TracBrowser for help on using the repository browser.