source: branches/ralph/src/main/java/de/ugoe/cs/autoquest/tasktrees/temporalrelation/SequenceForTaskDetectionRuleAlignment.java @ 1553

Last change on this file since 1553 was 1553, checked in by rkrimmel, 10 years ago

Completly restructured alignment approach classes, it is no longer a plugin.

File size: 41.8 KB
Line 
1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.tasktrees.temporalrelation;
16
17import java.util.ArrayList;
18import java.util.HashMap;
19import java.util.HashSet;
20import java.util.Iterator;
21import java.util.LinkedList;
22import java.util.List;
23import java.util.Map;
24import java.util.Set;
25import java.util.logging.Level;
26
27import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.NumberSequence;
28import de.ugoe.cs.autoquest.tasktrees.taskequality.TaskEquality;
29import de.ugoe.cs.autoquest.tasktrees.treeifc.IIteration;
30import de.ugoe.cs.autoquest.tasktrees.treeifc.IIterationInstance;
31import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelection;
32import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelectionInstance;
33import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequence;
34import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequenceInstance;
35import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask;
36import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskBuilder;
37import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskFactory;
38import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstance;
39import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstanceList;
40import de.ugoe.cs.autoquest.tasktrees.treeifc.IUserSession;
41import de.ugoe.cs.autoquest.usageprofiles.SymbolMap;
42import de.ugoe.cs.autoquest.usageprofiles.Trie;
43import de.ugoe.cs.autoquest.usageprofiles.TrieProcessor;
44import de.ugoe.cs.util.StopWatch;
45import de.ugoe.cs.util.console.Console;
46
47/**
48 * <p>
49 * This class implements the major rule for creating task trees based on a set of recorded
50 * user sessions. For this, it first harmonizes all tasks. This eases later comparison. Then it
51 * searches the sessions for iterations and replaces them accordingly. Then it searches for sub
52 * sequences being the longest and occurring most often. For each found sub sequence, it replaces
53 * the occurrences by creating appropriate {@link ISequence}s. Afterwards, again searches for
54 * iterations and then again for sub sequences until no more replacements are done.
55 * </p>
56 * <p>
57 * For determining the longest sequence occurring most often, the implementation uses a
58 * {@link Trie}. The depth of the tree is initially 3. If the algorithm has a longest sequence
59 * occurring most often whose length is equal to the depth of the trie, it recalculates the trie
60 * with an increased depth.
61 * </p>
62 *
63 * @author Patrick Harms
64 */
65class SequenceForTaskDetectionRuleAlignment implements ISessionScopeRule {
66   
67    /**
68     * <p>
69     * the task factory to be used for creating substructures for the temporal
70     * relationships identified during rul application
71     * </p>
72     */
73    private ITaskFactory taskFactory;
74    /**
75     * <p>
76     * the task builder to be used for creating substructures for the temporal relationships
77     * identified during rule application
78     * </p>
79     */
80    private ITaskBuilder taskBuilder;
81
82    /**
83     * <p>
84     * the task handling strategy to be used for comparing tasks for preparation, i.e., before
85     * the tasks are harmonized
86     * </p>
87     */
88    private TaskHandlingStrategy preparationTaskHandlingStrategy;
89   
90    /**
91     * <p>
92     * the task handling strategy to be used for comparing tasks during iteration detection an trie
93     * generation, i.e., after the tasks are harmonized
94     * </p>
95     */
96    private TaskHandlingStrategy identityTaskHandlingStrategy;;
97   
98   
99    private ArrayList<NumberSequence> numberseqs;
100   
101    /**
102     * <p>
103     * instantiates the rule and initializes it with a task equality to be considered when
104     * comparing tasks as well as a task factory and builder to be used for creating task
105     * structures.
106     * </p>
107     *
108     * @param minimalTaskEquality the task equality to be considered when comparing tasks
109     * @param taskFactory         the task factory to be used for creating substructures
110     * @param taskBuilder         the task builder to be used for creating substructures
111     */
112   
113   
114    SequenceForTaskDetectionRuleAlignment(TaskEquality minimalTaskEquality,
115                                 ITaskFactory taskFactory,
116                                 ITaskBuilder taskBuilder)
117    {
118        this.taskFactory = taskFactory;
119        this.taskBuilder = taskBuilder;
120       
121        this.preparationTaskHandlingStrategy = new TaskHandlingStrategy(minimalTaskEquality);
122        this.identityTaskHandlingStrategy = new TaskHandlingStrategy(TaskEquality.IDENTICAL);
123       
124    }
125
126    /* (non-Javadoc)
127     * @see java.lang.Object#toString()
128     */
129    @Override
130    public String toString() {
131        return "SequenceForTaskDetectionRuleAlignment";
132    }
133
134    /* (non-Javadoc)
135     * @see de.ugoe.cs.autoquest.tasktrees.temporalrelation.ISessionScopeRule#apply(java.util.List)
136     */
137    @Override
138    public RuleApplicationResult apply(List<IUserSession> sessions) {
139        RuleApplicationData appData = new RuleApplicationData(sessions);
140        numberseqs = new ArrayList<NumberSequence>();
141       
142        // this is the real rule application. Loop while something is replaced.
143        harmonizeEventTaskInstancesModel(appData);
144       
145       
146        //Hier mein kram hin
147        /*do {
148            System.out.println();
149           
150            appData.getStopWatch().start("whole loop");
151            detectAndReplaceIterations(appData);
152
153            appData.getStopWatch().start("task replacement");
154            detectAndReplaceTasks(appData);
155            appData.getStopWatch().stop("task replacement");
156            appData.getStopWatch().stop("whole loop");
157           
158            //((TaskTreeNodeComparator) taskComparator).getStopWatch().dumpStatistics(System.out);
159            //((TaskTreeNodeComparator) taskComparator).getStopWatch().reset();
160           
161            appData.getStopWatch().dumpStatistics(System.out);
162            appData.getStopWatch().reset();
163           
164        }
165        while (appData.detectedAndReplacedTasks());*/
166       
167        Console.println
168            ("created " + appData.getResult().getNewlyCreatedTasks().size() +
169             " new tasks and " + appData.getResult().getNewlyCreatedTaskInstances().size() +
170             " appropriate instances\n");
171       
172        if ((appData.getResult().getNewlyCreatedTasks().size() > 0) ||
173            (appData.getResult().getNewlyCreatedTaskInstances().size() > 0))
174        {
175            appData.getResult().setRuleApplicationStatus(RuleApplicationStatus.FINISHED);
176        }
177       
178        return appData.getResult();
179    }
180
181    /**
182     * <p>
183     * harmonizes the event task instances by unifying tasks. This is done, as initially the
184     * event tasks being equal with respect to the considered task equality are distinct objects.
185     * The comparison of these distinct objects is more time consuming than comparing the object
186     * references.
187     * </p>
188     *
189     * @param appData the rule application data combining all data used for applying this rule
190     */
191    private void harmonizeEventTaskInstancesModel(RuleApplicationData appData) {
192        Console.traceln(Level.INFO, "harmonizing task model of event task instances");
193        appData.getStopWatch().start("harmonizing event tasks");
194
195        SymbolMap<ITaskInstance, ITask> uniqueTasks =
196            preparationTaskHandlingStrategy.createSymbolMap();
197        TaskInstanceComparator comparator = preparationTaskHandlingStrategy.getTaskComparator();
198       
199        int unifiedTasks = 0;
200        ITask task;
201        List<IUserSession> sessions = appData.getSessions();
202        int sessionNo = 0;
203        for (IUserSession session : sessions) {
204            Console.traceln(Level.FINE, "handling " + (++sessionNo) + ". " + session);
205                NumberSequence templist = new NumberSequence(session.size()) ;
206           
207            for (int i = 0; i < session.size(); i++) {
208                ITaskInstance taskInstance = session.get(i);
209                task = uniqueTasks.getValue(taskInstance);
210               
211                if (task == null) {
212                    uniqueTasks.addSymbol(taskInstance, taskInstance.getTask());
213                    templist.getSequence()[i]=taskInstance.getTask().getId();
214                }
215                else {
216                    taskBuilder.setTask(taskInstance, task);
217                        templist.getSequence()[i]=task.getId();
218                    unifiedTasks++;
219                }
220            }
221           
222            comparator.clearBuffers();
223        }
224       
225        appData.getStopWatch().stop("harmonizing event tasks");
226        Console.traceln(Level.INFO, "harmonized " + unifiedTasks + " task occurrences (still " +
227                        uniqueTasks.size() + " different tasks)");
228
229        appData.getStopWatch().dumpStatistics(System.out);
230        appData.getStopWatch().reset();
231    }
232
233    /**
234     * <p>
235     * searches for direct iterations of single tasks in all sequences and replaces them with
236     * {@link IIteration}s, respectively appropriate instances. Also all single occurrences of
237     * a task that is iterated somewhen are replaced with iterations to have again an efficient
238     * way for task comparisons.
239     * </p>
240     *
241     * @param appData the rule application data combining all data used for applying this rule
242     */
243    private void detectAndReplaceIterations(RuleApplicationData appData) {
244        Console.traceln(Level.FINE, "detecting iterations");
245        appData.getStopWatch().start("detecting iterations");
246       
247        List<IUserSession> sessions = appData.getSessions();
248       
249        Set<ITask> iteratedTasks = searchIteratedTasks(sessions);
250       
251        if (iteratedTasks.size() > 0) {
252            replaceIterationsOf(iteratedTasks, sessions, appData);
253        }
254       
255        appData.getStopWatch().stop("detecting iterations");
256        Console.traceln(Level.INFO, "replaced " + iteratedTasks.size() + " iterated tasks");
257    }
258
259    /**
260     * <p>
261     * searches the provided sessions for task iterations. If a task is iterated, it is added
262     * to the returned set.
263     * </p>
264     *
265     * @param the session to search for iterations in
266     *
267     * @return a set of tasks being iterated somewhere
268     */
269    private Set<ITask> searchIteratedTasks(List<IUserSession> sessions) {
270        Set<ITask> iteratedTasks = new HashSet<ITask>();
271        for (IUserSession session : sessions) {
272            for (int i = 0; i < (session.size() - 1); i++) {
273                // we prepared the task instances to refer to unique tasks, if they are treated
274                // as equal. Therefore, we just compare the identity of the tasks of the task
275                // instances
276                if (session.get(i).getTask() == session.get(i + 1).getTask()) {
277                    iteratedTasks.add(session.get(i).getTask());
278                }
279            }
280        }
281       
282        return iteratedTasks;
283    }
284
285    /**
286     * <p>
287     * replaces all occurrences of all tasks provided in the set with iterations
288     * </p>
289     *
290     * @param iteratedTasks the tasks to be replaced with iterations
291     * @param sessions      the sessions in which the tasks are to be replaced
292     * @param appData       the rule application data combining all data used for applying this rule
293     */
294    private void replaceIterationsOf(Set<ITask>          iteratedTasks,
295                                     List<IUserSession>  sessions,
296                                     RuleApplicationData appData)
297    {
298        Map<ITask, IIteration> iterations = new HashMap<ITask, IIteration>();
299        Map<IIteration, List<IIterationInstance>> iterationInstances =
300                new HashMap<IIteration, List<IIterationInstance>>();
301           
302        for (ITask iteratedTask : iteratedTasks) {
303            IIteration iteration = taskFactory.createNewIteration();
304            iterations.put(iteratedTask, iteration);
305            iterationInstances.put(iteration, new LinkedList<IIterationInstance>());
306        }
307       
308        IIterationInstance iterationInstance;
309       
310        for (IUserSession session : sessions) {
311            int index = 0;
312            iterationInstance = null;
313
314            while (index < session.size()) {
315                // we prepared the task instances to refer to unique tasks, if they are treated
316                // as equal. Therefore, we just compare the identity of the tasks of the task
317                // instances
318                ITask currentTask = session.get(index).getTask();
319                IIteration iteration = iterations.get(currentTask);
320                if (iteration != null) {
321                    if ((iterationInstance == null) || (iterationInstance.getTask() != iteration))
322                    {
323                        iterationInstance = taskFactory.createNewTaskInstance(iteration);
324                        iterationInstances.get(iteration).add(iterationInstance);
325                        taskBuilder.addTaskInstance(session, index, iterationInstance);
326                        index++;
327                    }
328                   
329                    taskBuilder.addChild(iterationInstance, session.get(index));
330                    taskBuilder.removeTaskInstance(session, index);
331                }
332                else {
333                    if (iterationInstance != null) {
334                        iterationInstance = null;
335                    }
336                    index++;
337                }
338            }
339        }
340       
341        for (Map.Entry<IIteration, List<IIterationInstance>> entry : iterationInstances.entrySet())
342        {
343            harmonizeIterationInstancesModel(entry.getKey(), entry.getValue());
344        }
345    }
346
347    /**
348     * <p>
349     * TODO clarify why this is done
350     * </p>
351     */
352    private void harmonizeIterationInstancesModel(IIteration               iteration,
353                                                  List<IIterationInstance> iterationInstances)
354    {
355        List<ITask> iteratedTaskVariants = new LinkedList<ITask>();
356        TaskInstanceComparator comparator = preparationTaskHandlingStrategy.getTaskComparator();
357       
358        // merge the lexically different variants of iterated task to a unique list
359        for (IIterationInstance iterationInstance : iterationInstances) {
360            for (ITaskInstance executionVariant : iterationInstance) {
361                ITask candidate = executionVariant.getTask();
362           
363                boolean found = false;
364                for (ITask taskVariant : iteratedTaskVariants) {
365                    if (comparator.areLexicallyEqual(taskVariant, candidate)) {
366                        taskBuilder.setTask(executionVariant, taskVariant);
367                        found = true;
368                        break;
369                    }
370                }
371               
372                if (!found) {
373                    iteratedTaskVariants.add(candidate);
374                }
375            }
376        }
377       
378        // if there are more than one lexically different variant of iterated tasks, adapt the
379        // iteration model to be a selection of different variants. In this case also adapt
380        // the generated iteration instances to correctly contain selection instances. If there
381        // is only one variant of an iterated task, simply set this as the marked task of the
382        // iteration. In this case, the instances can be preserved as is
383        if (iteratedTaskVariants.size() > 1) {
384            ISelection selection = taskFactory.createNewSelection();
385           
386            for (ITask variant : iteratedTaskVariants) {
387                taskBuilder.addChild(selection, variant);
388            }
389           
390            taskBuilder.setMarkedTask(iteration, selection);
391           
392            for (IIterationInstance instance : iterationInstances) {
393                for (int i = 0; i < instance.size(); i++) {
394                    ISelectionInstance selectionInstance =
395                        taskFactory.createNewTaskInstance(selection);
396                    taskBuilder.setChild(selectionInstance, instance.get(i));
397                    taskBuilder.setTaskInstance(instance, i, selectionInstance);
398                }
399            }
400        }
401        else {
402            taskBuilder.setMarkedTask(iteration, iteratedTaskVariants.get(0));
403        }
404    }
405
406    /**
407     * TODO go on commenting
408     * @param appData the rule application data combining all data used for applying this rule
409     */
410    private void detectAndReplaceTasks(RuleApplicationData appData) {
411        Console.traceln(Level.FINE, "detecting and replacing tasks");
412        appData.getStopWatch().start("detecting tasks");
413       
414        getSequencesOccuringMostOften(appData);
415
416        appData.getStopWatch().stop("detecting tasks");
417        appData.getStopWatch().start("replacing tasks");
418       
419        replaceSequencesOccurringMostOften(appData);
420       
421        appData.getStopWatch().stop("replacing tasks");
422        Console.traceln(Level.INFO, "detected and replaced " + appData.getLastFoundTasks().size() +
423                        " tasks occuring " + appData.getLastFoundTasks().getOccurrenceCount() +
424                        " times");
425    }
426
427    /**
428     * @param appData the rule application data combining all data used for applying this rule
429     */
430    private void getSequencesOccuringMostOften(RuleApplicationData appData) {
431        Console.traceln(Level.FINE, "determining most prominent tasks");
432
433        Tasks tasks;
434        boolean createNewTrie = (appData.getLastTrie() == null) ||
435            appData.detectedAndReplacedTasks(); // tree has changed
436       
437        do {
438            if (createNewTrie) {
439                createNewTrie(appData);
440            }
441           
442            MaxCountAndLongestTasksFinder finder = new MaxCountAndLongestTasksFinder();
443            appData.getLastTrie().process(finder);
444       
445            tasks = finder.getFoundTasks();
446           
447            createNewTrie = false;
448           
449            for (List<ITaskInstance> task : tasks) {
450                if (task.size() >= appData.getTrainedSequenceLength()) {
451                    // Trie must be recreated with a longer sequence length to be sure that
452                    // the found sequences occurring most often are found in their whole length
453                    appData.setTrainedSequenceLength(appData.getTrainedSequenceLength() + 1);
454                    createNewTrie = true;
455                    break;
456                }
457            }
458        }
459        while (createNewTrie);
460       
461        // create a normal trie for comparison
462        /*System.out.println("creating test trie for comparison");
463       
464        appData.getStopWatch().start("testtrie");
465        Trie<ITaskInstance> trie = new Trie<ITaskInstance>(identityTaskComparator);
466       
467        for (IUserSession session : appData.getSessions()) {
468            trie.train(session.getExecutedTasks(), appData.getTrainedSequenceLength());
469        }
470       
471        appData.getStopWatch().stop("testtrie");
472       
473        MaxCountAndLongestTasksFinder finder = new MaxCountAndLongestTasksFinder();
474        trie.process(finder);
475
476        boolean allTasksEqual = finder.getFoundTasks().size() == tasks.size();
477       
478        allTasksEqual &= finder.getFoundTasks().occurrenceCount == tasks.occurrenceCount;
479       
480        for (List<ITaskInstance> task1 : finder.getFoundTasks()) {
481            boolean foundTask = false;
482            for (List<ITaskInstance> task2 : tasks) {
483                boolean tasksEqual = false;
484                if (task1.size() == task2.size()) {
485                    tasksEqual = true;
486                    for (int i = 0; i < task1.size(); i++) {
487                        if (!identityTaskComparator.equals(task1.get(i).getTask(), task2.get(i).getTask()))
488                        {
489                            tasksEqual = false;
490                        }
491                    }
492                }
493               
494                if (tasksEqual) {
495                    foundTask = true;
496                    break;
497                }
498            }
499           
500            if (!foundTask) {
501                System.out.println("different is " + task1);
502                allTasksEqual = false;
503                break;
504            }
505        }
506       
507        if (!allTasksEqual) {
508            System.out.println(finder.getFoundTasks());
509            System.out.println(tasks);
510           
511            throw new IllegalArgumentException("both tries calculated different subsequences");
512        }
513       
514        /*TrieStatisticsDumper dumper = new TrieStatisticsDumper();
515        appData.getLastTrie().process(dumper);
516        dumper.dumpCounters();*/
517       
518        appData.setLastFoundTasks(tasks);
519
520        Console.traceln(Level.FINE, "found " + appData.getLastFoundTasks().size() + " tasks " +
521                        "occurring " + appData.getLastFoundTasks().getOccurrenceCount() + " times");
522    }
523
524    /**
525     * @param appData the rule application data combining all data used for applying this rule
526     */
527    private void createNewTrie(RuleApplicationData appData) {
528        Console.traceln(Level.FINER, "training trie with a maximum sequence length of " +
529                        appData.getTrainedSequenceLength());
530
531        appData.getStopWatch().start("training trie");
532       
533        // we prepared the task instances to refer to unique tasks, if they are treated
534        // as equal. Therefore, we just compare the identity of the tasks of the task
535        // instances
536        appData.setLastTrie(new TaskInstanceTrie(identityTaskHandlingStrategy));
537   
538        appData.getLastTrie().trainSessions
539            (appData.getSessions(), appData.getTrainedSequenceLength());
540       
541        appData.getStopWatch().stop("training trie");
542    }
543
544    /**
545     * @param appData the rule application data combining all data used for applying this rule
546     */
547    private void replaceSequencesOccurringMostOften(RuleApplicationData appData) {
548        appData.detectedAndReplacedTasks(false);
549
550        if ((appData.getLastFoundTasks().size() > 0) &&
551            (appData.getLastFoundTasks().getOccurrenceCount() > 1))
552        {
553            Console.traceln(Level.FINER, "replacing tasks occurrences");
554
555            for (List<ITaskInstance> task : appData.getLastFoundTasks()) {
556                ISequence sequence = taskFactory.createNewSequence();
557               
558                Console.traceln(Level.FINEST, "replacing " + sequence.getId() + ": " + task);
559
560                List<ISequenceInstance> sequenceInstances =
561                    replaceTaskOccurrences(task, appData.getSessions(), sequence);
562               
563                harmonizeSequenceInstancesModel(sequence, sequenceInstances,task.size());
564                appData.detectedAndReplacedTasks
565                    (appData.detectedAndReplacedTasks() || (sequenceInstances.size() > 0));
566
567                if (sequenceInstances.size() < appData.getLastFoundTasks().getOccurrenceCount()) {
568                    Console.traceln(Level.FINE, sequence.getId() + ": replaced task only " +
569                                    sequenceInstances.size() + " times instead of expected " +
570                                    appData.getLastFoundTasks().getOccurrenceCount());
571                }
572            }
573        }
574       
575    }
576
577    /**
578     *
579     */
580    private void harmonizeSequenceInstancesModel(ISequence               sequence,
581                                                 List<ISequenceInstance> sequenceInstances,
582                                                 int                     sequenceLength)
583    {
584        TaskInstanceComparator comparator = preparationTaskHandlingStrategy.getTaskComparator();
585       
586        // ensure for each subtask that lexically different variants are preserved
587        for (int subTaskIndex = 0; subTaskIndex < sequenceLength; subTaskIndex++) {
588            List<ITask> subTaskVariants = new LinkedList<ITask>();
589           
590            for (ISequenceInstance sequenceInstance : sequenceInstances) {
591                ITask candidate = sequenceInstance.get(subTaskIndex).getTask();
592               
593                boolean found = false;
594               
595                for (int i = 0; i < subTaskVariants.size(); i++) {
596                    if (comparator.areLexicallyEqual(subTaskVariants.get(i), candidate)) {
597                        taskBuilder.setTask
598                            (sequenceInstance.get(subTaskIndex), subTaskVariants.get(i));
599                       
600                        found = true;
601                        break;
602                    }
603                }
604               
605                if (!found) {
606                    subTaskVariants.add(candidate);
607                }
608            }
609           
610            // if there are more than one lexically different variant of the sub task at
611            // the considered position, adapt the sequence model at that position to have
612            // a selection of the different variants. In this case also adapt the
613            // generated sequence instances to correctly contain selection instances. If
614            // there is only one variant of sub tasks at the given position, simply set
615            // this variant as the sub task of the selection. In this case, the instances
616            // can be preserved as is
617            if (subTaskVariants.size() > 1) {
618                ISelection selection = taskFactory.createNewSelection();
619               
620                for (ITask variant : subTaskVariants) {
621                    taskBuilder.addChild(selection, variant);
622                }
623               
624                taskBuilder.addChild(sequence, selection);
625               
626                for (ISequenceInstance instance : sequenceInstances) {
627                    ISelectionInstance selectionInstance =
628                        taskFactory.createNewTaskInstance(selection);
629                    taskBuilder.setChild(selectionInstance, instance.get(subTaskIndex));
630                    taskBuilder.setTaskInstance(instance, subTaskIndex, selectionInstance);
631                }
632            }
633            else if (subTaskVariants.size() == 1) {
634                taskBuilder.addChild(sequence, subTaskVariants.get(0));
635            }
636        }
637    }
638
639    /**
640     * @param tree
641     */
642    private List<ISequenceInstance> replaceTaskOccurrences(List<ITaskInstance> task,
643                                                           List<IUserSession>  sessions,
644                                                           ISequence           temporalTaskModel)
645    {
646        List<ISequenceInstance> sequenceInstances = new LinkedList<ISequenceInstance>();
647       
648        for (IUserSession session : sessions) {
649            int index = -1;
650               
651            do {
652                index = getSubListIndex(session, task, ++index);
653
654                if (index > -1) {
655                    sequenceInstances.add
656                        (RuleUtils.createNewSubSequenceInRange
657                             (session, index, index + task.size() - 1, temporalTaskModel,
658                              taskFactory, taskBuilder));
659                }
660            }
661            while (index > -1);
662        }
663       
664        return sequenceInstances;
665    }
666
667    /**
668     * @param trie
669     * @param object
670     * @return
671     */
672    private int getSubListIndex(ITaskInstanceList   list,
673                                List<ITaskInstance> subList,
674                                int                 startIndex)
675    {
676        boolean matchFound;
677        int result = -1;
678       
679        for (int i = startIndex; i <= list.size() - subList.size(); i++) {
680            matchFound = true;
681           
682            for (int j = 0; j < subList.size(); j++) {
683                // we prepared the task instances to refer to unique tasks, if they are treated
684                // as equal. Therefore, we just compare the identity of the tasks of the task
685                // instances
686                if (list.get(i + j).getTask() != subList.get(j).getTask()) {
687                    matchFound = false;
688                    break;
689                }
690            }
691           
692            if (matchFound) {
693                result = i;
694                break;
695            }
696        }
697       
698        return result;
699    }
700
701    /**
702     * @param trie
703     * @param object
704     * @return
705     */
706    private int getSubListIndex(List<ITaskInstance> list,
707                                List<ITaskInstance> subList,
708                                int                 startIndex)
709    {
710        boolean matchFound;
711        int result = -1;
712       
713        for (int i = startIndex; i <= list.size() - subList.size(); i++) {
714            matchFound = true;
715           
716            for (int j = 0; j < subList.size(); j++) {
717                // we prepared the task instances to refer to unique tasks, if they are treated
718                // as equal. Therefore, we just compare the identity of the tasks of the task
719                // instances
720                if (list.get(i + j) != subList.get(j)) {
721                    matchFound = false;
722                    break;
723                }
724            }
725           
726            if (matchFound) {
727                result = i;
728                break;
729            }
730        }
731       
732        return result;
733    }
734   
735    /**
736     * @author Patrick Harms
737     */
738    private class MaxCountAndLongestTasksFinder implements TrieProcessor<ITaskInstance> {
739       
740        /**
741         *
742         */
743        private int currentCount;
744       
745        /**
746         *
747         */
748        private List<List<ITaskInstance>> foundTasks = new LinkedList<List<ITaskInstance>>();
749
750        /**
751         *
752         */
753        public MaxCountAndLongestTasksFinder() {
754            super();
755            this.currentCount = 0;
756        }
757
758        /* (non-Javadoc)
759         * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int)
760         */
761        @Override
762        public TrieProcessor.Result process(List<ITaskInstance> foundTask, int count) {
763            if (foundTask.size() < 2) {
764                // ignore single tasks
765                return TrieProcessor.Result.CONTINUE;
766            }
767           
768            if (count < 2) {
769                // ignore singular occurrences
770                return TrieProcessor.Result.SKIP_NODE;
771            }
772
773            if (this.currentCount > count) {
774                // ignore this children of this task, as they may have only smaller counts than
775                // the already found tasks
776                return TrieProcessor.Result.SKIP_NODE;
777            }
778           
779            if (this.currentCount < count) {
780                // the provided task occurs more often that all tasks found so far.
781                // clear all found tasks and use the new count as the one searched for
782                foundTasks.clear();
783                this.currentCount = count;
784            }
785           
786            if (this.currentCount == count) {
787                // the task is of interest. Sort it into the other found tasks so that
788                // the longest come first
789                boolean added = false;
790                for (int i = 0; i < foundTasks.size(); i++) {
791                    if (foundTasks.get(i).size() < foundTask.size()) {
792                        // defensive copy
793                        foundTasks.add(i, new LinkedList<ITaskInstance>(foundTask)); // defensive copy
794                        added = true;
795                        break;
796                    }
797                }
798               
799                if (!added) {
800                    foundTasks.add(new LinkedList<ITaskInstance>(foundTask)); // defensive copy
801                }
802            }
803           
804            return TrieProcessor.Result.CONTINUE;
805        }
806
807        /**
808         *  @return
809         */
810        public Tasks getFoundTasks() {
811            removePermutationsOfShorterTasks();
812            return new Tasks(currentCount, foundTasks);
813        }
814
815        /**
816         *
817         */
818        private void removePermutationsOfShorterTasks() {
819            // now iterate the sorted list and for each task remove all other tasks that are shorter
820            // (i.e. come later in the sorted list) and that represent a subpart of the task
821            for (int i = 0; i < foundTasks.size(); i++) {
822                for (int j = i + 1; j < foundTasks.size();) {
823                    if (foundTasks.get(j).size() < foundTasks.get(i).size()) {
824                        // found a task that is a potential subtask. Check for this and remove the
825                        // subtask if needed
826                        List<ITaskInstance> longTask = foundTasks.get(i);
827                        List<ITaskInstance> shortTask = foundTasks.get(j);
828                       
829                        if (getSubListIndex(longTask, shortTask, 0) > -1) {
830                            foundTasks.remove(j);
831                        }
832                        else {
833                            j++;
834                        }
835                    }
836                    else {
837                        j++;
838                    }
839                }
840            }
841        }
842
843    }
844   
845//    /**
846//     * @author Patrick Harms
847//     */
848//    private class TrieStatisticsDumper implements TrieProcessor<ITaskInstance> {
849//       
850//        /**
851//         *
852//         */
853//        private Map<Integer, Integer> counters = new HashMap<Integer, Integer>();
854//       
855//        /* (non-Javadoc)
856//         * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int)
857//         */
858//        @Override
859//        public TrieProcessor.Result process(List<ITaskInstance> subsequence, int count) {
860//            if (subsequence.size() == 1) {
861//                Integer value = counters.get(count);
862//               
863//                if (value == null) {
864//                    value = 0;
865//                }
866//               
867//                counters.put(count, value + 1);
868//               
869//                return TrieProcessor.Result.CONTINUE;
870//            }
871//            else {
872//                // ignore singular occurrences
873//                return TrieProcessor.Result.SKIP_NODE;
874//            }
875//        }
876//
877//        /**
878//         *  @return
879//         */
880//        public void dumpCounters() {
881//            int dumpedCounters = 0;
882//           
883//            int count = 1;
884//            while (dumpedCounters < counters.size()) {
885//                Integer value = counters.get(count++);
886//                if (value != null) {
887//                    System.out.println(value + " symbols occurred " + count + " times");
888//                    dumpedCounters++;
889//                }
890//            }
891//        }
892//
893//    }
894   
895    /**
896     *
897     */
898    private static class RuleApplicationData {
899       
900        /**
901         *
902         */
903        private List<IUserSession> sessions;
904       
905        /**
906         *
907         */
908        private TaskInstanceTrie lastTrie;
909       
910        /**
911         * default and minimum trained sequence length is 3
912         */
913        private int trainedSequenceLength = 3;
914       
915        /**
916         *
917         */
918        private Tasks lastFoundTasks = new Tasks(Integer.MAX_VALUE, null);
919       
920        /**
921         *
922         */
923        private boolean detectedAndReplacedTasks;
924       
925        /**
926         *
927         */
928        private RuleApplicationResult result = new RuleApplicationResult();
929       
930        /**
931         *
932         */
933        private StopWatch stopWatch = new StopWatch();
934       
935        /**
936         *
937         */
938        private RuleApplicationData(List<IUserSession> sessions) {
939            this.sessions = sessions;
940        }
941
942        /**
943         * @return the tree
944         */
945        private List<IUserSession> getSessions() {
946            return sessions;
947        }
948
949        /**
950         * @param lastTrie the lastTrie to set
951         */
952        private void setLastTrie(TaskInstanceTrie lastTrie) {
953            this.lastTrie = lastTrie;
954        }
955
956        /**
957         * @return the lastTrie
958         */
959        private TaskInstanceTrie getLastTrie() {
960            return lastTrie;
961        }
962
963        /**
964         * @param trainedSequenceLength the trainedSequenceLength to set
965         */
966        private void setTrainedSequenceLength(int trainedSequenceLength) {
967            this.trainedSequenceLength = trainedSequenceLength;
968        }
969
970        /**
971         * @return the trainedSequenceLength
972         */
973        private int getTrainedSequenceLength() {
974            return trainedSequenceLength;
975        }
976
977        /**
978         * @param lastFoundSequences the lastFoundSequences to set
979         */
980        private void setLastFoundTasks(Tasks lastFoundSequences) {
981            this.lastFoundTasks = lastFoundSequences;
982        }
983
984        /**
985         * @return the lastFoundSequences
986         */
987        private Tasks getLastFoundTasks() {
988            return lastFoundTasks;
989        }
990
991        /**
992         *
993         */
994        private void detectedAndReplacedTasks(boolean detectedAndReplacedTasks) {
995            this.detectedAndReplacedTasks = detectedAndReplacedTasks;
996        }
997
998        /**
999         *
1000         */
1001        private boolean detectedAndReplacedTasks() {
1002            return detectedAndReplacedTasks;
1003        }
1004       
1005        /**
1006         * @return the result
1007         */
1008        private RuleApplicationResult getResult() {
1009            return result;
1010        }
1011
1012        /**
1013         * @return the stopWatch
1014         */
1015        private StopWatch getStopWatch() {
1016            return stopWatch;
1017        }
1018
1019    }
1020   
1021
1022    /**
1023     * @author Patrick Harms
1024     */
1025    private static class Tasks implements Iterable<List<ITaskInstance>> {
1026       
1027        /**
1028         *
1029         */
1030        private int occurrenceCount;
1031       
1032        /**
1033         *
1034         */
1035        private List<List<ITaskInstance>> sequences;
1036
1037        /**
1038         * @param occurrenceCount
1039         * @param sequences
1040         */
1041        private Tasks(int occurrenceCount, List<List<ITaskInstance>> sequences) {
1042            super();
1043            this.occurrenceCount = occurrenceCount;
1044            this.sequences = sequences;
1045        }
1046
1047        /**
1048         * @return
1049         */
1050        private int getOccurrenceCount() {
1051            return occurrenceCount;
1052        }
1053
1054        /**
1055         * @return
1056         */
1057        private int size() {
1058            return this.sequences.size();
1059        }
1060
1061        /**
1062         *
1063         */
1064
1065        /* (non-Javadoc)
1066         * @see java.lang.Iterable#iterator()
1067         */
1068        @Override
1069        public Iterator<List<ITaskInstance>> iterator() {
1070            return this.sequences.iterator();
1071        }
1072
1073        /* (non-Javadoc)
1074         * @see java.lang.Object#toString()
1075         */
1076        @Override
1077        public String toString() {
1078            StringBuffer result = new StringBuffer();
1079            result.append(this.occurrenceCount);
1080            result.append(" occurrences:\n");
1081           
1082            for (List<ITaskInstance> task : sequences) {
1083                result.append(task);
1084                result.append("\n");
1085            }
1086           
1087            return result.toString();
1088        }
1089
1090    }
1091   
1092    // methods for internal testing
1093//    private void checkMatchingOfSessions(List<List<Event>>  flattenedSessions,
1094//                                         List<IUserSession> sessions,
1095//                                         String             when)
1096//    {
1097//        List<List<Event>> currentFlattenedSessions = flattenSessions(sessions);
1098//        if (flattenedSessions.size() != currentFlattenedSessions.size()) {
1099//            System.out.println("################## number of sessions changed after " + when);
1100//        }
1101//        else {
1102//            for (int i = 0; i < flattenedSessions.size(); i++) {
1103//                List<Event> expected = flattenedSessions.get(i);
1104//                List<Event> current = currentFlattenedSessions.get(i);
1105//           
1106//                if (expected.size() != current.size()) {
1107//                    System.out.println
1108//                        ("################## length of session " + i + " changed after " + when);
1109//                }
1110//                else {
1111//                    for (int j = 0; j < expected.size(); j++) {
1112//                        if (!expected.get(j).equals(current.get(j))) {
1113//                            System.out.println("################## event " + j + " of session " +
1114//                                               i + " changed after " + when);
1115//                        }
1116//                    }
1117//                }
1118//            }     
1119//        }
1120//    }
1121//
1122//    private List<List<Event>> flattenSessions(List<IUserSession> sessions) {
1123//        List<List<Event>> flattenedSessions = new ArrayList<List<Event>>();
1124//        for (IUserSession session : sessions) {
1125//            List<Event> flattenedUserSession = new ArrayList<Event>();
1126//            flatten(session, flattenedUserSession);
1127//            flattenedSessions.add(flattenedUserSession);
1128//        }
1129//
1130//        return flattenedSessions;
1131//    }
1132//
1133//    private void flatten(IUserSession iUserSession, List<Event> flattenedUserSession) {
1134//        for (ITaskInstance instance : iUserSession) {
1135//            flatten(instance, flattenedUserSession);
1136//        }
1137//    }
1138//
1139//    private void flatten(ITaskInstance instance, List<Event> flattenedUserSession) {
1140//        if (instance instanceof ITaskInstanceList) {
1141//            for (ITaskInstance child : (ITaskInstanceList) instance) {
1142//                flatten(child, flattenedUserSession);
1143//            }
1144//        }
1145//        else if (instance instanceof ISelectionInstance) {
1146//            flatten(((ISelectionInstance) instance).getChild(), flattenedUserSession);
1147//        }
1148//        else if (instance instanceof IOptionalInstance) {
1149//            flatten(((IOptionalInstance) instance).getChild(), flattenedUserSession);
1150//        }
1151//        else if (instance instanceof IEventTaskInstance) {
1152//            flattenedUserSession.add(((IEventTaskInstance) instance).getEvent());
1153//        }
1154//    }
1155
1156}
Note: See TracBrowser for help on using the repository browser.