source: trunk/autoquest-core-tasktrees/src/main/java/de/ugoe/cs/autoquest/tasktrees/temporalrelation/SequenceForTaskDetectionRule.java @ 1401

Last change on this file since 1401 was 1401, checked in by pharms, 10 years ago
File size: 41.2 KB
Line 
1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.tasktrees.temporalrelation;
16
17import java.util.HashMap;
18import java.util.HashSet;
19import java.util.Iterator;
20import java.util.LinkedList;
21import java.util.List;
22import java.util.Map;
23import java.util.Set;
24import java.util.logging.Level;
25
26import de.ugoe.cs.autoquest.tasktrees.taskequality.TaskEquality;
27import de.ugoe.cs.autoquest.tasktrees.treeifc.IIteration;
28import de.ugoe.cs.autoquest.tasktrees.treeifc.IIterationInstance;
29import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelection;
30import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelectionInstance;
31import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequence;
32import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequenceInstance;
33import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask;
34import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskBuilder;
35import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskFactory;
36import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstance;
37import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstanceList;
38import de.ugoe.cs.autoquest.tasktrees.treeifc.IUserSession;
39import de.ugoe.cs.autoquest.usageprofiles.SymbolMap;
40import de.ugoe.cs.autoquest.usageprofiles.Trie;
41import de.ugoe.cs.autoquest.usageprofiles.TrieProcessor;
42import de.ugoe.cs.util.StopWatch;
43import de.ugoe.cs.util.console.Console;
44
45/**
46 * <p>
47 * This class implements the major rule for creating task trees based on a set of recorded
48 * user sessions. For this, it first harmonizes all tasks. This eases later comparison. Then it
49 * searches the sessions for iterations and replaces them accordingly. Then it searches for sub
50 * sequences being the longest and occurring most often. For each found sub sequence, it replaces
51 * the occurrences by creating appropriate {@link ISequence}s. Afterwards, again searches for
52 * iterations and then again for sub sequences until no more replacements are done.
53 * </p>
54 * <p>
55 * For determining the longest sequence occurring most often, the implementation uses a
56 * {@link Trie}. The depth of the tree is initially 3. If the algorithm has a longest sequence
57 * occurring most often whose length is equal to the depth of the trie, it recalculates the trie
58 * with an increased depth.
59 * </p>
60 *
61 * @author Patrick Harms
62 */
63class SequenceForTaskDetectionRule implements ISessionScopeRule {
64   
65    /**
66     * <p>
67     * the task factory to be used for creating substructures for the temporal
68     * relationships identified during rul application
69     * </p>
70     */
71    private ITaskFactory taskFactory;
72    /**
73     * <p>
74     * the task builder to be used for creating substructures for the temporal relationships
75     * identified during rule application
76     * </p>
77     */
78    private ITaskBuilder taskBuilder;
79
80    /**
81     * <p>
82     * the task handling strategy to be used for comparing tasks for preparation, i.e., before
83     * the tasks are harmonized
84     * </p>
85     */
86    private TaskHandlingStrategy preparationTaskHandlingStrategy;
87   
88    /**
89     * <p>
90     * the task handling strategy to be used for comparing tasks during iteration detection an trie
91     * generation, i.e., after the tasks are harmonized
92     * </p>
93     */
94    private TaskHandlingStrategy identityTaskHandlingStrategy;;
95   
96    /**
97     * <p>
98     * instantiates the rule and initializes it with a task equality to be considered when
99     * comparing tasks as well as a task factory and builder to be used for creating task
100     * structures.
101     * </p>
102     *
103     * @param minimalTaskEquality the task equality to be considered when comparing tasks
104     * @param taskFactory         the task factory to be used for creating substructures
105     * @param taskBuilder         the task builder to be used for creating substructures
106     */
107    SequenceForTaskDetectionRule(TaskEquality minimalTaskEquality,
108                                 ITaskFactory taskFactory,
109                                 ITaskBuilder taskBuilder)
110    {
111        this.taskFactory = taskFactory;
112        this.taskBuilder = taskBuilder;
113       
114        this.preparationTaskHandlingStrategy = new TaskHandlingStrategy(minimalTaskEquality);
115        this.identityTaskHandlingStrategy = new TaskHandlingStrategy(TaskEquality.IDENTICAL);
116    }
117
118    /* (non-Javadoc)
119     * @see java.lang.Object#toString()
120     */
121    @Override
122    public String toString() {
123        return "SequenceForTaskDetectionRule";
124    }
125
126    /* (non-Javadoc)
127     * @see de.ugoe.cs.autoquest.tasktrees.temporalrelation.ISessionScopeRule#apply(java.util.List)
128     */
129    @Override
130    public RuleApplicationResult apply(List<IUserSession> sessions) {
131        RuleApplicationData appData = new RuleApplicationData(sessions);
132       
133        // this is the real rule application. Loop while something is replaced.
134        harmonizeEventTaskInstancesModel(appData);
135        do {
136            System.out.println();
137           
138            appData.getStopWatch().start("whole loop");
139            detectAndReplaceIterations(appData);
140
141            appData.getStopWatch().start("task replacement");
142            detectAndReplaceTasks(appData);
143            appData.getStopWatch().stop("task replacement");
144            appData.getStopWatch().stop("whole loop");
145           
146            //((TaskTreeNodeComparator) taskComparator).getStopWatch().dumpStatistics(System.out);
147            //((TaskTreeNodeComparator) taskComparator).getStopWatch().reset();
148           
149            appData.getStopWatch().dumpStatistics(System.out);
150            appData.getStopWatch().reset();
151           
152        }
153        while (appData.detectedAndReplacedTasks());
154       
155        Console.println
156            ("created " + appData.getResult().getNewlyCreatedTasks().size() +
157             " new tasks and " + appData.getResult().getNewlyCreatedTaskInstances().size() +
158             " appropriate instances\n");
159       
160        if ((appData.getResult().getNewlyCreatedTasks().size() > 0) ||
161            (appData.getResult().getNewlyCreatedTaskInstances().size() > 0))
162        {
163            appData.getResult().setRuleApplicationStatus(RuleApplicationStatus.FINISHED);
164        }
165       
166        return appData.getResult();
167    }
168
169    /**
170     * <p>
171     * harmonizes the event task instances by unifying tasks. This is done, as initially the
172     * event tasks being equal with respect to the considered task equality are distinct objects.
173     * The comparison of these distinct objects is more time consuming than comparing the object
174     * references.
175     * </p>
176     *
177     * @param appData the rule application data combining all data used for applying this rule
178     */
179    private void harmonizeEventTaskInstancesModel(RuleApplicationData appData) {
180        Console.traceln(Level.INFO, "harmonizing task model of event task instances");
181        appData.getStopWatch().start("harmonizing event tasks");
182
183        SymbolMap<ITaskInstance, ITask> uniqueTasks =
184            preparationTaskHandlingStrategy.createSymbolMap();
185        TaskInstanceComparator comparator = preparationTaskHandlingStrategy.getTaskComparator();
186       
187        int unifiedTasks = 0;
188        ITask task;
189        List<IUserSession> sessions = appData.getSessions();
190        int sessionNo = 0;
191        for (IUserSession session : sessions) {
192            Console.traceln(Level.FINE, "handling " + (++sessionNo) + ". " + session);
193            for (ITaskInstance taskInstance : session) {
194                task = uniqueTasks.getValue(taskInstance);
195               
196                if (task == null) {
197                    uniqueTasks.addSymbol(taskInstance, taskInstance.getTask());
198                }
199                else {
200                    taskBuilder.setTask(taskInstance, task);
201                    unifiedTasks++;
202                }
203            }
204           
205            comparator.clearBuffers();
206        }
207       
208        appData.getStopWatch().stop("harmonizing event tasks");
209        Console.traceln(Level.INFO, "harmonized " + unifiedTasks + " task occurrences (still " +
210                        uniqueTasks.size() + " different tasks)");
211
212        appData.getStopWatch().dumpStatistics(System.out);
213        appData.getStopWatch().reset();
214    }
215
216    /**
217     * <p>
218     * searches for direct iterations of single tasks in all sequences and replaces them with
219     * {@link IIteration}s, respectively appropriate instances. Also all single occurrences of
220     * a task that is iterated somewhen are replaced with iterations to have again an efficient
221     * way for task comparisons.
222     * </p>
223     *
224     * @param appData the rule application data combining all data used for applying this rule
225     */
226    private void detectAndReplaceIterations(RuleApplicationData appData) {
227        Console.traceln(Level.FINE, "detecting iterations");
228        appData.getStopWatch().start("detecting iterations");
229       
230        List<IUserSession> sessions = appData.getSessions();
231       
232        Set<ITask> iteratedTasks = searchIteratedTasks(sessions);
233       
234        if (iteratedTasks.size() > 0) {
235            replaceIterationsOf(iteratedTasks, sessions, appData);
236        }
237       
238        appData.getStopWatch().stop("detecting iterations");
239        Console.traceln(Level.INFO, "replaced " + iteratedTasks.size() + " iterated tasks");
240    }
241
242    /**
243     * <p>
244     * searches the provided sessions for task iterations. If a task is iterated, it is added
245     * to the returned set.
246     * </p>
247     *
248     * @param the session to search for iterations in
249     *
250     * @return a set of tasks being iterated somewhere
251     */
252    private Set<ITask> searchIteratedTasks(List<IUserSession> sessions) {
253        Set<ITask> iteratedTasks = new HashSet<ITask>();
254        for (IUserSession session : sessions) {
255            for (int i = 0; i < (session.size() - 1); i++) {
256                // we prepared the task instances to refer to unique tasks, if they are treated
257                // as equal. Therefore, we just compare the identity of the tasks of the task
258                // instances
259                if (session.get(i).getTask() == session.get(i + 1).getTask()) {
260                    iteratedTasks.add(session.get(i).getTask());
261                }
262            }
263        }
264       
265        return iteratedTasks;
266    }
267
268    /**
269     * <p>
270     * replaces all occurrences of all tasks provided in the set with iterations
271     * </p>
272     *
273     * @param iteratedTasks the tasks to be replaced with iterations
274     * @param sessions      the sessions in which the tasks are to be replaced
275     * @param appData       the rule application data combining all data used for applying this rule
276     */
277    private void replaceIterationsOf(Set<ITask>          iteratedTasks,
278                                     List<IUserSession>  sessions,
279                                     RuleApplicationData appData)
280    {
281        Map<ITask, IIteration> iterations = new HashMap<ITask, IIteration>();
282        Map<IIteration, List<IIterationInstance>> iterationInstances =
283                new HashMap<IIteration, List<IIterationInstance>>();
284           
285        for (ITask iteratedTask : iteratedTasks) {
286            IIteration iteration = taskFactory.createNewIteration();
287            iterations.put(iteratedTask, iteration);
288            iterationInstances.put(iteration, new LinkedList<IIterationInstance>());
289        }
290       
291        IIterationInstance iterationInstance;
292       
293        for (IUserSession session : sessions) {
294            int index = 0;
295            iterationInstance = null;
296
297            while (index < session.size()) {
298                // we prepared the task instances to refer to unique tasks, if they are treated
299                // as equal. Therefore, we just compare the identity of the tasks of the task
300                // instances
301                ITask currentTask = session.get(index).getTask();
302                IIteration iteration = iterations.get(currentTask);
303                if (iteration != null) {
304                    if ((iterationInstance == null) || (iterationInstance.getTask() != iteration))
305                    {
306                        iterationInstance = taskFactory.createNewTaskInstance(iteration);
307                        iterationInstances.get(iteration).add(iterationInstance);
308                        taskBuilder.addTaskInstance(session, index, iterationInstance);
309                        index++;
310                    }
311                   
312                    taskBuilder.addChild(iterationInstance, session.get(index));
313                    taskBuilder.removeTaskInstance(session, index);
314                }
315                else {
316                    if (iterationInstance != null) {
317                        iterationInstance = null;
318                    }
319                    index++;
320                }
321            }
322        }
323       
324        for (Map.Entry<IIteration, List<IIterationInstance>> entry : iterationInstances.entrySet())
325        {
326            harmonizeIterationInstancesModel(entry.getKey(), entry.getValue());
327        }
328    }
329
330    /**
331     * <p>
332     * TODO clarify why this is done
333     * </p>
334     */
335    private void harmonizeIterationInstancesModel(IIteration               iteration,
336                                                  List<IIterationInstance> iterationInstances)
337    {
338        List<ITask> iteratedTaskVariants = new LinkedList<ITask>();
339        TaskInstanceComparator comparator = preparationTaskHandlingStrategy.getTaskComparator();
340       
341        // merge the lexically different variants of iterated task to a unique list
342        for (IIterationInstance iterationInstance : iterationInstances) {
343            for (ITaskInstance executionVariant : iterationInstance) {
344                ITask candidate = executionVariant.getTask();
345           
346                boolean found = false;
347                for (ITask taskVariant : iteratedTaskVariants) {
348                    if (comparator.areLexicallyEqual(taskVariant, candidate)) {
349                        taskBuilder.setTask(executionVariant, taskVariant);
350                        found = true;
351                        break;
352                    }
353                }
354               
355                if (!found) {
356                    iteratedTaskVariants.add(candidate);
357                }
358            }
359        }
360       
361        // if there are more than one lexically different variant of iterated tasks, adapt the
362        // iteration model to be a selection of different variants. In this case also adapt
363        // the generated iteration instances to correctly contain selection instances. If there
364        // is only one variant of an iterated task, simply set this as the marked task of the
365        // iteration. In this case, the instances can be preserved as is
366        if (iteratedTaskVariants.size() > 1) {
367            ISelection selection = taskFactory.createNewSelection();
368           
369            for (ITask variant : iteratedTaskVariants) {
370                taskBuilder.addChild(selection, variant);
371            }
372           
373            taskBuilder.setMarkedTask(iteration, selection);
374           
375            for (IIterationInstance instance : iterationInstances) {
376                for (int i = 0; i < instance.size(); i++) {
377                    ISelectionInstance selectionInstance =
378                        taskFactory.createNewTaskInstance(selection);
379                    taskBuilder.setChild(selectionInstance, instance.get(i));
380                    taskBuilder.setTaskInstance(instance, i, selectionInstance);
381                }
382            }
383        }
384        else {
385            taskBuilder.setMarkedTask(iteration, iteratedTaskVariants.get(0));
386        }
387    }
388
389    /**
390     * TODO go on commenting
391     * @param appData the rule application data combining all data used for applying this rule
392     */
393    private void detectAndReplaceTasks(RuleApplicationData appData) {
394        Console.traceln(Level.FINE, "detecting and replacing tasks");
395        appData.getStopWatch().start("detecting tasks");
396       
397        getSequencesOccuringMostOften(appData);
398
399        appData.getStopWatch().stop("detecting tasks");
400        appData.getStopWatch().start("replacing tasks");
401       
402        replaceSequencesOccurringMostOften(appData);
403       
404        appData.getStopWatch().stop("replacing tasks");
405        Console.traceln(Level.INFO, "detected and replaced " + appData.getLastFoundTasks().size() +
406                        " tasks occuring " + appData.getLastFoundTasks().getOccurrenceCount() +
407                        " times");
408    }
409
410    /**
411     * @param appData the rule application data combining all data used for applying this rule
412     */
413    private void getSequencesOccuringMostOften(RuleApplicationData appData) {
414        Console.traceln(Level.FINE, "determining most prominent tasks");
415
416        Tasks tasks;
417        boolean createNewTrie = (appData.getLastTrie() == null) ||
418            appData.detectedAndReplacedTasks(); // tree has changed
419       
420        do {
421            if (createNewTrie) {
422                createNewTrie(appData);
423            }
424           
425            MaxCountAndLongestTasksFinder finder = new MaxCountAndLongestTasksFinder();
426            appData.getLastTrie().process(finder);
427       
428            tasks = finder.getFoundTasks();
429           
430            createNewTrie = false;
431           
432            for (List<ITaskInstance> task : tasks) {
433                if (task.size() >= appData.getTrainedSequenceLength()) {
434                    // Trie must be recreated with a longer sequence length to be sure that
435                    // the found sequences occurring most often are found in their whole length
436                    appData.setTrainedSequenceLength(appData.getTrainedSequenceLength() + 1);
437                    createNewTrie = true;
438                    break;
439                }
440            }
441        }
442        while (createNewTrie);
443       
444        // create a normal trie for comparison
445        /*System.out.println("creating test trie for comparison");
446       
447        appData.getStopWatch().start("testtrie");
448        Trie<ITaskInstance> trie = new Trie<ITaskInstance>(identityTaskComparator);
449       
450        for (IUserSession session : appData.getSessions()) {
451            trie.train(session.getExecutedTasks(), appData.getTrainedSequenceLength());
452        }
453       
454        appData.getStopWatch().stop("testtrie");
455       
456        MaxCountAndLongestTasksFinder finder = new MaxCountAndLongestTasksFinder();
457        trie.process(finder);
458
459        boolean allTasksEqual = finder.getFoundTasks().size() == tasks.size();
460       
461        allTasksEqual &= finder.getFoundTasks().occurrenceCount == tasks.occurrenceCount;
462       
463        for (List<ITaskInstance> task1 : finder.getFoundTasks()) {
464            boolean foundTask = false;
465            for (List<ITaskInstance> task2 : tasks) {
466                boolean tasksEqual = false;
467                if (task1.size() == task2.size()) {
468                    tasksEqual = true;
469                    for (int i = 0; i < task1.size(); i++) {
470                        if (!identityTaskComparator.equals(task1.get(i).getTask(), task2.get(i).getTask()))
471                        {
472                            tasksEqual = false;
473                        }
474                    }
475                }
476               
477                if (tasksEqual) {
478                    foundTask = true;
479                    break;
480                }
481            }
482           
483            if (!foundTask) {
484                System.out.println("different is " + task1);
485                allTasksEqual = false;
486                break;
487            }
488        }
489       
490        if (!allTasksEqual) {
491            System.out.println(finder.getFoundTasks());
492            System.out.println(tasks);
493           
494            throw new IllegalArgumentException("both tries calculated different subsequences");
495        }
496       
497        /*TrieStatisticsDumper dumper = new TrieStatisticsDumper();
498        appData.getLastTrie().process(dumper);
499        dumper.dumpCounters();*/
500       
501        appData.setLastFoundTasks(tasks);
502
503        Console.traceln(Level.FINE, "found " + appData.getLastFoundTasks().size() + " tasks " +
504                        "occurring " + appData.getLastFoundTasks().getOccurrenceCount() + " times");
505    }
506
507    /**
508     * @param appData the rule application data combining all data used for applying this rule
509     */
510    private void createNewTrie(RuleApplicationData appData) {
511        Console.traceln(Level.FINER, "training trie with a maximum sequence length of " +
512                        appData.getTrainedSequenceLength());
513
514        appData.getStopWatch().start("training trie");
515       
516        // we prepared the task instances to refer to unique tasks, if they are treated
517        // as equal. Therefore, we just compare the identity of the tasks of the task
518        // instances
519        appData.setLastTrie(new TaskInstanceTrie(identityTaskHandlingStrategy));
520   
521        appData.getLastTrie().trainSessions
522            (appData.getSessions(), appData.getTrainedSequenceLength());
523       
524        appData.getStopWatch().stop("training trie");
525    }
526
527    /**
528     * @param appData the rule application data combining all data used for applying this rule
529     */
530    private void replaceSequencesOccurringMostOften(RuleApplicationData appData) {
531        appData.detectedAndReplacedTasks(false);
532
533        if ((appData.getLastFoundTasks().size() > 0) &&
534            (appData.getLastFoundTasks().getOccurrenceCount() > 1))
535        {
536            Console.traceln(Level.FINER, "replacing tasks occurrences");
537
538            for (List<ITaskInstance> task : appData.getLastFoundTasks()) {
539                ISequence sequence = taskFactory.createNewSequence();
540               
541                Console.traceln(Level.FINEST, "replacing " + sequence.getId() + ": " + task);
542
543                List<ISequenceInstance> sequenceInstances =
544                    replaceTaskOccurrences(task, appData.getSessions(), sequence);
545               
546                harmonizeSequenceInstancesModel(sequence, sequenceInstances,task.size());
547                appData.detectedAndReplacedTasks
548                    (appData.detectedAndReplacedTasks() || (sequenceInstances.size() > 0));
549
550                if (sequenceInstances.size() < appData.getLastFoundTasks().getOccurrenceCount()) {
551                    Console.traceln(Level.FINE, sequence.getId() + ": replaced task only " +
552                                    sequenceInstances.size() + " times instead of expected " +
553                                    appData.getLastFoundTasks().getOccurrenceCount());
554                }
555            }
556        }
557       
558    }
559
560    /**
561     *
562     */
563    private void harmonizeSequenceInstancesModel(ISequence               sequence,
564                                                 List<ISequenceInstance> sequenceInstances,
565                                                 int                     sequenceLength)
566    {
567        TaskInstanceComparator comparator = preparationTaskHandlingStrategy.getTaskComparator();
568       
569        // ensure for each subtask that lexically different variants are preserved
570        for (int subTaskIndex = 0; subTaskIndex < sequenceLength; subTaskIndex++) {
571            List<ITask> subTaskVariants = new LinkedList<ITask>();
572           
573            for (ISequenceInstance sequenceInstance : sequenceInstances) {
574                ITask candidate = sequenceInstance.get(subTaskIndex).getTask();
575               
576                boolean found = false;
577               
578                for (int i = 0; i < subTaskVariants.size(); i++) {
579                    if (comparator.areLexicallyEqual(subTaskVariants.get(i), candidate)) {
580                        taskBuilder.setTask
581                            (sequenceInstance.get(subTaskIndex), subTaskVariants.get(i));
582                       
583                        found = true;
584                        break;
585                    }
586                }
587               
588                if (!found) {
589                    subTaskVariants.add(candidate);
590                }
591            }
592           
593            // if there are more than one lexically different variant of the sub task at
594            // the considered position, adapt the sequence model at that position to have
595            // a selection of the different variants. In this case also adapt the
596            // generated sequence instances to correctly contain selection instances. If
597            // there is only one variant of sub tasks at the given position, simply set
598            // this variant as the sub task of the selection. In this case, the instances
599            // can be preserved as is
600            if (subTaskVariants.size() > 1) {
601                ISelection selection = taskFactory.createNewSelection();
602               
603                for (ITask variant : subTaskVariants) {
604                    taskBuilder.addChild(selection, variant);
605                }
606               
607                taskBuilder.addChild(sequence, selection);
608               
609                for (ISequenceInstance instance : sequenceInstances) {
610                    ISelectionInstance selectionInstance =
611                        taskFactory.createNewTaskInstance(selection);
612                    taskBuilder.setChild(selectionInstance, instance.get(subTaskIndex));
613                    taskBuilder.setTaskInstance(instance, subTaskIndex, selectionInstance);
614                }
615            }
616            else if (subTaskVariants.size() == 1) {
617                taskBuilder.addChild(sequence, subTaskVariants.get(0));
618            }
619        }
620    }
621
622    /**
623     * @param tree
624     */
625    private List<ISequenceInstance> replaceTaskOccurrences(List<ITaskInstance> task,
626                                                           List<IUserSession>  sessions,
627                                                           ISequence           temporalTaskModel)
628    {
629        List<ISequenceInstance> sequenceInstances = new LinkedList<ISequenceInstance>();
630       
631        for (IUserSession session : sessions) {
632            int index = -1;
633               
634            do {
635                index = getSubListIndex(session, task, ++index);
636
637                if (index > -1) {
638                    sequenceInstances.add
639                        (RuleUtils.createNewSubSequenceInRange
640                             (session, index, index + task.size() - 1, temporalTaskModel,
641                              taskFactory, taskBuilder));
642                }
643            }
644            while (index > -1);
645        }
646       
647        return sequenceInstances;
648    }
649
650    /**
651     * @param trie
652     * @param object
653     * @return
654     */
655    private int getSubListIndex(ITaskInstanceList   list,
656                                List<ITaskInstance> subList,
657                                int                 startIndex)
658    {
659        boolean matchFound;
660        int result = -1;
661       
662        for (int i = startIndex; i <= list.size() - subList.size(); i++) {
663            matchFound = true;
664           
665            for (int j = 0; j < subList.size(); j++) {
666                // we prepared the task instances to refer to unique tasks, if they are treated
667                // as equal. Therefore, we just compare the identity of the tasks of the task
668                // instances
669                if (list.get(i + j).getTask() != subList.get(j).getTask()) {
670                    matchFound = false;
671                    break;
672                }
673            }
674           
675            if (matchFound) {
676                result = i;
677                break;
678            }
679        }
680       
681        return result;
682    }
683
684    /**
685     * @param trie
686     * @param object
687     * @return
688     */
689    private int getSubListIndex(List<ITaskInstance> list,
690                                List<ITaskInstance> subList,
691                                int                 startIndex)
692    {
693        boolean matchFound;
694        int result = -1;
695       
696        for (int i = startIndex; i <= list.size() - subList.size(); i++) {
697            matchFound = true;
698           
699            for (int j = 0; j < subList.size(); j++) {
700                // we prepared the task instances to refer to unique tasks, if they are treated
701                // as equal. Therefore, we just compare the identity of the tasks of the task
702                // instances
703                if (list.get(i + j) != subList.get(j)) {
704                    matchFound = false;
705                    break;
706                }
707            }
708           
709            if (matchFound) {
710                result = i;
711                break;
712            }
713        }
714       
715        return result;
716    }
717   
718    /**
719     * @author Patrick Harms
720     */
721    private class MaxCountAndLongestTasksFinder implements TrieProcessor<ITaskInstance> {
722       
723        /**
724         *
725         */
726        private int currentCount;
727       
728        /**
729         *
730         */
731        private List<List<ITaskInstance>> foundTasks = new LinkedList<List<ITaskInstance>>();
732
733        /**
734         *
735         */
736        public MaxCountAndLongestTasksFinder() {
737            super();
738            this.currentCount = 0;
739        }
740
741        /* (non-Javadoc)
742         * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int)
743         */
744        @Override
745        public TrieProcessor.Result process(List<ITaskInstance> foundTask, int count) {
746            if (foundTask.size() < 2) {
747                // ignore single tasks
748                return TrieProcessor.Result.CONTINUE;
749            }
750           
751            if (count < 2) {
752                // ignore singular occurrences
753                return TrieProcessor.Result.SKIP_NODE;
754            }
755
756            if (this.currentCount > count) {
757                // ignore this children of this task, as they may have only smaller counts than
758                // the already found tasks
759                return TrieProcessor.Result.SKIP_NODE;
760            }
761           
762            if (this.currentCount < count) {
763                // the provided task occurs more often that all tasks found so far.
764                // clear all found tasks and use the new count as the one searched for
765                foundTasks.clear();
766                this.currentCount = count;
767            }
768           
769            if (this.currentCount == count) {
770                // the task is of interest. Sort it into the other found tasks so that
771                // the longest come first
772                boolean added = false;
773                for (int i = 0; i < foundTasks.size(); i++) {
774                    if (foundTasks.get(i).size() < foundTask.size()) {
775                        // defensive copy
776                        foundTasks.add(i, new LinkedList<ITaskInstance>(foundTask)); // defensive copy
777                        added = true;
778                        break;
779                    }
780                }
781               
782                if (!added) {
783                    foundTasks.add(new LinkedList<ITaskInstance>(foundTask)); // defensive copy
784                }
785            }
786           
787            return TrieProcessor.Result.CONTINUE;
788        }
789
790        /**
791         *  @return
792         */
793        public Tasks getFoundTasks() {
794            removePermutationsOfShorterTasks();
795            return new Tasks(currentCount, foundTasks);
796        }
797
798        /**
799         *
800         */
801        private void removePermutationsOfShorterTasks() {
802            // now iterate the sorted list and for each task remove all other tasks that are shorter
803            // (i.e. come later in the sorted list) and that represent a subpart of the task
804            for (int i = 0; i < foundTasks.size(); i++) {
805                for (int j = i + 1; j < foundTasks.size();) {
806                    if (foundTasks.get(j).size() < foundTasks.get(i).size()) {
807                        // found a task that is a potential subtask. Check for this and remove the
808                        // subtask if needed
809                        List<ITaskInstance> longTask = foundTasks.get(i);
810                        List<ITaskInstance> shortTask = foundTasks.get(j);
811                       
812                        if (getSubListIndex(longTask, shortTask, 0) > -1) {
813                            foundTasks.remove(j);
814                        }
815                        else {
816                            j++;
817                        }
818                    }
819                    else {
820                        j++;
821                    }
822                }
823            }
824        }
825
826    }
827   
828//    /**
829//     * @author Patrick Harms
830//     */
831//    private class TrieStatisticsDumper implements TrieProcessor<ITaskInstance> {
832//       
833//        /**
834//         *
835//         */
836//        private Map<Integer, Integer> counters = new HashMap<Integer, Integer>();
837//       
838//        /* (non-Javadoc)
839//         * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int)
840//         */
841//        @Override
842//        public TrieProcessor.Result process(List<ITaskInstance> subsequence, int count) {
843//            if (subsequence.size() == 1) {
844//                Integer value = counters.get(count);
845//               
846//                if (value == null) {
847//                    value = 0;
848//                }
849//               
850//                counters.put(count, value + 1);
851//               
852//                return TrieProcessor.Result.CONTINUE;
853//            }
854//            else {
855//                // ignore singular occurrences
856//                return TrieProcessor.Result.SKIP_NODE;
857//            }
858//        }
859//
860//        /**
861//         *  @return
862//         */
863//        public void dumpCounters() {
864//            int dumpedCounters = 0;
865//           
866//            int count = 1;
867//            while (dumpedCounters < counters.size()) {
868//                Integer value = counters.get(count++);
869//                if (value != null) {
870//                    System.out.println(value + " symbols occurred " + count + " times");
871//                    dumpedCounters++;
872//                }
873//            }
874//        }
875//
876//    }
877   
878    /**
879     *
880     */
881    private static class RuleApplicationData {
882       
883        /**
884         *
885         */
886        private List<IUserSession> sessions;
887       
888        /**
889         *
890         */
891        private TaskInstanceTrie lastTrie;
892       
893        /**
894         * default and minimum trained sequence length is 3
895         */
896        private int trainedSequenceLength = 3;
897       
898        /**
899         *
900         */
901        private Tasks lastFoundTasks = new Tasks(Integer.MAX_VALUE, null);
902       
903        /**
904         *
905         */
906        private boolean detectedAndReplacedTasks;
907       
908        /**
909         *
910         */
911        private RuleApplicationResult result = new RuleApplicationResult();
912       
913        /**
914         *
915         */
916        private StopWatch stopWatch = new StopWatch();
917       
918        /**
919         *
920         */
921        private RuleApplicationData(List<IUserSession> sessions) {
922            this.sessions = sessions;
923        }
924
925        /**
926         * @return the tree
927         */
928        private List<IUserSession> getSessions() {
929            return sessions;
930        }
931
932        /**
933         * @param lastTrie the lastTrie to set
934         */
935        private void setLastTrie(TaskInstanceTrie lastTrie) {
936            this.lastTrie = lastTrie;
937        }
938
939        /**
940         * @return the lastTrie
941         */
942        private TaskInstanceTrie getLastTrie() {
943            return lastTrie;
944        }
945
946        /**
947         * @param trainedSequenceLength the trainedSequenceLength to set
948         */
949        private void setTrainedSequenceLength(int trainedSequenceLength) {
950            this.trainedSequenceLength = trainedSequenceLength;
951        }
952
953        /**
954         * @return the trainedSequenceLength
955         */
956        private int getTrainedSequenceLength() {
957            return trainedSequenceLength;
958        }
959
960        /**
961         * @param lastFoundSequences the lastFoundSequences to set
962         */
963        private void setLastFoundTasks(Tasks lastFoundSequences) {
964            this.lastFoundTasks = lastFoundSequences;
965        }
966
967        /**
968         * @return the lastFoundSequences
969         */
970        private Tasks getLastFoundTasks() {
971            return lastFoundTasks;
972        }
973
974        /**
975         *
976         */
977        private void detectedAndReplacedTasks(boolean detectedAndReplacedTasks) {
978            this.detectedAndReplacedTasks = detectedAndReplacedTasks;
979        }
980
981        /**
982         *
983         */
984        private boolean detectedAndReplacedTasks() {
985            return detectedAndReplacedTasks;
986        }
987       
988        /**
989         * @return the result
990         */
991        private RuleApplicationResult getResult() {
992            return result;
993        }
994
995        /**
996         * @return the stopWatch
997         */
998        private StopWatch getStopWatch() {
999            return stopWatch;
1000        }
1001
1002    }
1003   
1004
1005    /**
1006     * @author Patrick Harms
1007     */
1008    private static class Tasks implements Iterable<List<ITaskInstance>> {
1009       
1010        /**
1011         *
1012         */
1013        private int occurrenceCount;
1014       
1015        /**
1016         *
1017         */
1018        private List<List<ITaskInstance>> sequences;
1019
1020        /**
1021         * @param occurrenceCount
1022         * @param sequences
1023         */
1024        private Tasks(int occurrenceCount, List<List<ITaskInstance>> sequences) {
1025            super();
1026            this.occurrenceCount = occurrenceCount;
1027            this.sequences = sequences;
1028        }
1029
1030        /**
1031         * @return
1032         */
1033        private int getOccurrenceCount() {
1034            return occurrenceCount;
1035        }
1036
1037        /**
1038         * @return
1039         */
1040        private int size() {
1041            return this.sequences.size();
1042        }
1043
1044        /**
1045         *
1046         */
1047
1048        /* (non-Javadoc)
1049         * @see java.lang.Iterable#iterator()
1050         */
1051        @Override
1052        public Iterator<List<ITaskInstance>> iterator() {
1053            return this.sequences.iterator();
1054        }
1055
1056        /* (non-Javadoc)
1057         * @see java.lang.Object#toString()
1058         */
1059        @Override
1060        public String toString() {
1061            StringBuffer result = new StringBuffer();
1062            result.append(this.occurrenceCount);
1063            result.append(" occurrences:\n");
1064           
1065            for (List<ITaskInstance> task : sequences) {
1066                result.append(task);
1067                result.append("\n");
1068            }
1069           
1070            return result.toString();
1071        }
1072
1073    }
1074   
1075    // methods for internal testing
1076//    private void checkMatchingOfSessions(List<List<Event>>  flattenedSessions,
1077//                                         List<IUserSession> sessions,
1078//                                         String             when)
1079//    {
1080//        List<List<Event>> currentFlattenedSessions = flattenSessions(sessions);
1081//        if (flattenedSessions.size() != currentFlattenedSessions.size()) {
1082//            System.out.println("################## number of sessions changed after " + when);
1083//        }
1084//        else {
1085//            for (int i = 0; i < flattenedSessions.size(); i++) {
1086//                List<Event> expected = flattenedSessions.get(i);
1087//                List<Event> current = currentFlattenedSessions.get(i);
1088//           
1089//                if (expected.size() != current.size()) {
1090//                    System.out.println
1091//                        ("################## length of session " + i + " changed after " + when);
1092//                }
1093//                else {
1094//                    for (int j = 0; j < expected.size(); j++) {
1095//                        if (!expected.get(j).equals(current.get(j))) {
1096//                            System.out.println("################## event " + j + " of session " +
1097//                                               i + " changed after " + when);
1098//                        }
1099//                    }
1100//                }
1101//            }     
1102//        }
1103//    }
1104//
1105//    private List<List<Event>> flattenSessions(List<IUserSession> sessions) {
1106//        List<List<Event>> flattenedSessions = new ArrayList<List<Event>>();
1107//        for (IUserSession session : sessions) {
1108//            List<Event> flattenedUserSession = new ArrayList<Event>();
1109//            flatten(session, flattenedUserSession);
1110//            flattenedSessions.add(flattenedUserSession);
1111//        }
1112//
1113//        return flattenedSessions;
1114//    }
1115//
1116//    private void flatten(IUserSession iUserSession, List<Event> flattenedUserSession) {
1117//        for (ITaskInstance instance : iUserSession) {
1118//            flatten(instance, flattenedUserSession);
1119//        }
1120//    }
1121//
1122//    private void flatten(ITaskInstance instance, List<Event> flattenedUserSession) {
1123//        if (instance instanceof ITaskInstanceList) {
1124//            for (ITaskInstance child : (ITaskInstanceList) instance) {
1125//                flatten(child, flattenedUserSession);
1126//            }
1127//        }
1128//        else if (instance instanceof ISelectionInstance) {
1129//            flatten(((ISelectionInstance) instance).getChild(), flattenedUserSession);
1130//        }
1131//        else if (instance instanceof IOptionalInstance) {
1132//            flatten(((IOptionalInstance) instance).getChild(), flattenedUserSession);
1133//        }
1134//        else if (instance instanceof IEventTaskInstance) {
1135//            flattenedUserSession.add(((IEventTaskInstance) instance).getEvent());
1136//        }
1137//    }
1138
1139}
Note: See TracBrowser for help on using the repository browser.