source: branches/ralph/src/main/java/de/ugoe/cs/autoquest/tasktrees/temporalrelation/SequenceForTaskDetectionRuleAlignment.java @ 1557

Last change on this file since 1557 was 1557, checked in by rkrimmel, 10 years ago

Smith-Waterman adjustments

File size: 42.4 KB
Line 
1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.tasktrees.temporalrelation;
16
17import java.util.ArrayList;
18import java.util.HashMap;
19import java.util.HashSet;
20import java.util.Iterator;
21import java.util.LinkedList;
22import java.util.List;
23import java.util.Map;
24import java.util.Set;
25import java.util.logging.Level;
26
27import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.FengDoolittle;
28import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.NumberSequence;
29import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.SmithWaterman;
30import de.ugoe.cs.autoquest.tasktrees.alignment.substitution.ObjectDistanceSubstitionMatrix;
31import de.ugoe.cs.autoquest.tasktrees.taskequality.TaskEquality;
32import de.ugoe.cs.autoquest.tasktrees.treeifc.IIteration;
33import de.ugoe.cs.autoquest.tasktrees.treeifc.IIterationInstance;
34import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelection;
35import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelectionInstance;
36import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequence;
37import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequenceInstance;
38import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask;
39import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskBuilder;
40import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskFactory;
41import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstance;
42import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstanceList;
43import de.ugoe.cs.autoquest.tasktrees.treeifc.IUserSession;
44import de.ugoe.cs.autoquest.usageprofiles.SymbolMap;
45import de.ugoe.cs.autoquest.usageprofiles.Trie;
46import de.ugoe.cs.autoquest.usageprofiles.TrieProcessor;
47import de.ugoe.cs.util.StopWatch;
48import de.ugoe.cs.util.console.Console;
49
50/**
51 * <p>
52 * This class implements the major rule for creating task trees based on a set of recorded
53 * user sessions. For this, it first harmonizes all tasks. This eases later comparison. Then it
54 * searches the sessions for iterations and replaces them accordingly. Then it searches for sub
55 * sequences being the longest and occurring most often. For each found sub sequence, it replaces
56 * the occurrences by creating appropriate {@link ISequence}s. Afterwards, again searches for
57 * iterations and then again for sub sequences until no more replacements are done.
58 * </p>
59 * <p>
60 * For determining the longest sequence occurring most often, the implementation uses a
61 * {@link Trie}. The depth of the tree is initially 3. If the algorithm has a longest sequence
62 * occurring most often whose length is equal to the depth of the trie, it recalculates the trie
63 * with an increased depth.
64 * </p>
65 *
66 * @author Patrick Harms
67 */
68class SequenceForTaskDetectionRuleAlignment implements ISessionScopeRule {
69   
70    /**
71     * <p>
72     * the task factory to be used for creating substructures for the temporal
73     * relationships identified during rul application
74     * </p>
75     */
76    private ITaskFactory taskFactory;
77    /**
78     * <p>
79     * the task builder to be used for creating substructures for the temporal relationships
80     * identified during rule application
81     * </p>
82     */
83    private ITaskBuilder taskBuilder;
84
85    /**
86     * <p>
87     * the task handling strategy to be used for comparing tasks for preparation, i.e., before
88     * the tasks are harmonized
89     * </p>
90     */
91    private TaskHandlingStrategy preparationTaskHandlingStrategy;
92   
93    /**
94     * <p>
95     * the task handling strategy to be used for comparing tasks during iteration detection an trie
96     * generation, i.e., after the tasks are harmonized
97     * </p>
98     */
99    private TaskHandlingStrategy identityTaskHandlingStrategy;;
100   
101   
102    private ArrayList<NumberSequence> numberseqs;
103   
104    /**
105     * <p>
106     * instantiates the rule and initializes it with a task equality to be considered when
107     * comparing tasks as well as a task factory and builder to be used for creating task
108     * structures.
109     * </p>
110     *
111     * @param minimalTaskEquality the task equality to be considered when comparing tasks
112     * @param taskFactory         the task factory to be used for creating substructures
113     * @param taskBuilder         the task builder to be used for creating substructures
114     */
115   
116   
117    SequenceForTaskDetectionRuleAlignment(TaskEquality minimalTaskEquality,
118                                 ITaskFactory taskFactory,
119                                 ITaskBuilder taskBuilder)
120    {
121        this.taskFactory = taskFactory;
122        this.taskBuilder = taskBuilder;
123       
124        this.preparationTaskHandlingStrategy = new TaskHandlingStrategy(minimalTaskEquality);
125        this.identityTaskHandlingStrategy = new TaskHandlingStrategy(TaskEquality.IDENTICAL);
126        numberseqs = new ArrayList<NumberSequence>();
127       
128    }
129
130    /* (non-Javadoc)
131     * @see java.lang.Object#toString()
132     */
133    @Override
134    public String toString() {
135        return "SequenceForTaskDetectionRuleAlignment";
136    }
137
138    /* (non-Javadoc)
139     * @see de.ugoe.cs.autoquest.tasktrees.temporalrelation.ISessionScopeRule#apply(java.util.List)
140     */
141    @Override
142    public RuleApplicationResult apply(List<IUserSession> sessions) {
143        RuleApplicationData appData = new RuleApplicationData(sessions);
144       
145       
146       
147       
148        // this is the real rule application. Loop while something is replaced.
149        SymbolMap<ITaskInstance, ITask> uniqueTasks = harmonizeEventTaskInstancesModel(appData);
150        ObjectDistanceSubstitionMatrix submat = new ObjectDistanceSubstitionMatrix(uniqueTasks); 
151        submat.generate();
152       
153        SmithWaterman sw = new SmithWaterman(numberseqs.get(1).getSequence(), numberseqs.get(1).getSequence(), submat);
154       
155   
156       
157       
158     
159       
160       
161        //Hier mein kram hin
162        do {
163            System.out.println();
164            FengDoolittle fd = new FengDoolittle();
165           
166           
167            appData.getStopWatch().start("whole loop");
168            detectAndReplaceIterations(appData);
169
170            appData.getStopWatch().start("task replacement");
171            detectAndReplaceTasks(appData);
172            appData.getStopWatch().stop("task replacement");
173            appData.getStopWatch().stop("whole loop");
174           
175           
176            appData.getStopWatch().dumpStatistics(System.out);
177            appData.getStopWatch().reset();
178           
179        }
180        while (appData.detectedAndReplacedTasks());
181       
182        Console.println
183            ("created " + appData.getResult().getNewlyCreatedTasks().size() +
184             " new tasks and " + appData.getResult().getNewlyCreatedTaskInstances().size() +
185             " appropriate instances\n");
186       
187        if ((appData.getResult().getNewlyCreatedTasks().size() > 0) ||
188            (appData.getResult().getNewlyCreatedTaskInstances().size() > 0))
189        {
190            appData.getResult().setRuleApplicationStatus(RuleApplicationStatus.FINISHED);
191        }
192       
193        return appData.getResult();
194    }
195
196    /**
197     * <p>
198     * harmonizes the event task instances by unifying tasks. This is done, as initially the
199     * event tasks being equal with respect to the considered task equality are distinct objects.
200     * The comparison of these distinct objects is more time consuming than comparing the object
201     * references.
202     * </p>
203     *
204     * @param appData the rule application data combining all data used for applying this rule
205     * @return Returns the unique tasks symbol map
206     */
207    private SymbolMap<ITaskInstance, ITask> harmonizeEventTaskInstancesModel(RuleApplicationData appData) {
208        Console.traceln(Level.INFO, "harmonizing task model of event task instances");
209        appData.getStopWatch().start("harmonizing event tasks");
210
211        SymbolMap<ITaskInstance, ITask> uniqueTasks =
212            preparationTaskHandlingStrategy.createSymbolMap();
213        TaskInstanceComparator comparator = preparationTaskHandlingStrategy.getTaskComparator();
214       
215        int unifiedTasks = 0;
216        ITask task;
217        List<IUserSession> sessions = appData.getSessions();
218        int sessionNo = 0;
219        numberseqs = new ArrayList<NumberSequence>();
220        for (IUserSession session : sessions) {
221            Console.traceln(Level.FINE, "handling " + (++sessionNo) + ". " + session);
222                NumberSequence templist = new NumberSequence(session.size()) ;
223           
224            for (int i = 0; i < session.size(); i++) {
225                ITaskInstance taskInstance = session.get(i);
226                task = uniqueTasks.getValue(taskInstance);
227               
228                if (task == null) {
229                    uniqueTasks.addSymbol(taskInstance, taskInstance.getTask());
230                    templist.getSequence()[i]=taskInstance.getTask().getId();
231                }
232                else {
233                    taskBuilder.setTask(taskInstance, task);
234                        templist.getSequence()[i]=task.getId();
235                    unifiedTasks++;
236                }
237            }
238            numberseqs.add(templist);
239            comparator.clearBuffers();
240        }
241       
242        appData.getStopWatch().stop("harmonizing event tasks");
243        Console.traceln(Level.INFO, "harmonized " + unifiedTasks + " task occurrences (still " +
244                        uniqueTasks.size() + " different tasks)");
245
246        appData.getStopWatch().dumpStatistics(System.out);
247        appData.getStopWatch().reset();
248                return uniqueTasks;
249    }
250
251    /**
252     * <p>
253     * searches for direct iterations of single tasks in all sequences and replaces them with
254     * {@link IIteration}s, respectively appropriate instances. Also all single occurrences of
255     * a task that is iterated somewhen are replaced with iterations to have again an efficient
256     * way for task comparisons.
257     * </p>
258     *
259     * @param appData the rule application data combining all data used for applying this rule
260     */
261    private void detectAndReplaceIterations(RuleApplicationData appData) {
262        Console.traceln(Level.FINE, "detecting iterations");
263        appData.getStopWatch().start("detecting iterations");
264       
265        List<IUserSession> sessions = appData.getSessions();
266       
267        Set<ITask> iteratedTasks = searchIteratedTasks(sessions);
268       
269        if (iteratedTasks.size() > 0) {
270            replaceIterationsOf(iteratedTasks, sessions, appData);
271        }
272       
273        appData.getStopWatch().stop("detecting iterations");
274        Console.traceln(Level.INFO, "replaced " + iteratedTasks.size() + " iterated tasks");
275    }
276
277    /**
278     * <p>
279     * searches the provided sessions for task iterations. If a task is iterated, it is added
280     * to the returned set.
281     * </p>
282     *
283     * @param the session to search for iterations in
284     *
285     * @return a set of tasks being iterated somewhere
286     */
287    private Set<ITask> searchIteratedTasks(List<IUserSession> sessions) {
288        Set<ITask> iteratedTasks = new HashSet<ITask>();
289        for (IUserSession session : sessions) {
290            for (int i = 0; i < (session.size() - 1); i++) {
291                // we prepared the task instances to refer to unique tasks, if they are treated
292                // as equal. Therefore, we just compare the identity of the tasks of the task
293                // instances
294                if (session.get(i).getTask() == session.get(i + 1).getTask()) {
295                    iteratedTasks.add(session.get(i).getTask());
296                }
297            }
298        }
299       
300        return iteratedTasks;
301    }
302
303    /**
304     * <p>
305     * replaces all occurrences of all tasks provided in the set with iterations
306     * </p>
307     *
308     * @param iteratedTasks the tasks to be replaced with iterations
309     * @param sessions      the sessions in which the tasks are to be replaced
310     * @param appData       the rule application data combining all data used for applying this rule
311     */
312    private void replaceIterationsOf(Set<ITask>          iteratedTasks,
313                                     List<IUserSession>  sessions,
314                                     RuleApplicationData appData)
315    {
316        Map<ITask, IIteration> iterations = new HashMap<ITask, IIteration>();
317        Map<IIteration, List<IIterationInstance>> iterationInstances =
318                new HashMap<IIteration, List<IIterationInstance>>();
319           
320        for (ITask iteratedTask : iteratedTasks) {
321            IIteration iteration = taskFactory.createNewIteration();
322            iterations.put(iteratedTask, iteration);
323            iterationInstances.put(iteration, new LinkedList<IIterationInstance>());
324        }
325       
326        IIterationInstance iterationInstance;
327       
328        for (IUserSession session : sessions) {
329            int index = 0;
330            iterationInstance = null;
331
332            while (index < session.size()) {
333                // we prepared the task instances to refer to unique tasks, if they are treated
334                // as equal. Therefore, we just compare the identity of the tasks of the task
335                // instances
336                ITask currentTask = session.get(index).getTask();
337                IIteration iteration = iterations.get(currentTask);
338                if (iteration != null) {
339                    if ((iterationInstance == null) || (iterationInstance.getTask() != iteration))
340                    {
341                        iterationInstance = taskFactory.createNewTaskInstance(iteration);
342                        iterationInstances.get(iteration).add(iterationInstance);
343                        taskBuilder.addTaskInstance(session, index, iterationInstance);
344                        index++;
345                    }
346                   
347                    taskBuilder.addChild(iterationInstance, session.get(index));
348                    taskBuilder.removeTaskInstance(session, index);
349                }
350                else {
351                    if (iterationInstance != null) {
352                        iterationInstance = null;
353                    }
354                    index++;
355                }
356            }
357        }
358       
359        for (Map.Entry<IIteration, List<IIterationInstance>> entry : iterationInstances.entrySet())
360        {
361            harmonizeIterationInstancesModel(entry.getKey(), entry.getValue());
362        }
363    }
364
365    /**
366     * <p>
367     * TODO clarify why this is done
368     * </p>
369     */
370    private void harmonizeIterationInstancesModel(IIteration               iteration,
371                                                  List<IIterationInstance> iterationInstances)
372    {
373        List<ITask> iteratedTaskVariants = new LinkedList<ITask>();
374        TaskInstanceComparator comparator = preparationTaskHandlingStrategy.getTaskComparator();
375       
376        // merge the lexically different variants of iterated task to a unique list
377        for (IIterationInstance iterationInstance : iterationInstances) {
378            for (ITaskInstance executionVariant : iterationInstance) {
379                ITask candidate = executionVariant.getTask();
380           
381                boolean found = false;
382                for (ITask taskVariant : iteratedTaskVariants) {
383                    if (comparator.areLexicallyEqual(taskVariant, candidate)) {
384                        taskBuilder.setTask(executionVariant, taskVariant);
385                        found = true;
386                        break;
387                    }
388                }
389               
390                if (!found) {
391                    iteratedTaskVariants.add(candidate);
392                }
393            }
394        }
395       
396        // if there are more than one lexically different variant of iterated tasks, adapt the
397        // iteration model to be a selection of different variants. In this case also adapt
398        // the generated iteration instances to correctly contain selection instances. If there
399        // is only one variant of an iterated task, simply set this as the marked task of the
400        // iteration. In this case, the instances can be preserved as is
401        if (iteratedTaskVariants.size() > 1) {
402            ISelection selection = taskFactory.createNewSelection();
403           
404            for (ITask variant : iteratedTaskVariants) {
405                taskBuilder.addChild(selection, variant);
406            }
407           
408            taskBuilder.setMarkedTask(iteration, selection);
409           
410            for (IIterationInstance instance : iterationInstances) {
411                for (int i = 0; i < instance.size(); i++) {
412                    ISelectionInstance selectionInstance =
413                        taskFactory.createNewTaskInstance(selection);
414                    taskBuilder.setChild(selectionInstance, instance.get(i));
415                    taskBuilder.setTaskInstance(instance, i, selectionInstance);
416                }
417            }
418        }
419        else {
420            taskBuilder.setMarkedTask(iteration, iteratedTaskVariants.get(0));
421        }
422    }
423
424    /**
425     * TODO go on commenting
426     * @param appData the rule application data combining all data used for applying this rule
427     */
428    private void detectAndReplaceTasks(RuleApplicationData appData) {
429        Console.traceln(Level.FINE, "detecting and replacing tasks");
430        appData.getStopWatch().start("detecting tasks");
431       
432        getSequencesOccuringMostOften(appData);
433
434        appData.getStopWatch().stop("detecting tasks");
435        appData.getStopWatch().start("replacing tasks");
436       
437        replaceSequencesOccurringMostOften(appData);
438       
439        appData.getStopWatch().stop("replacing tasks");
440        Console.traceln(Level.INFO, "detected and replaced " + appData.getLastFoundTasks().size() +
441                        " tasks occuring " + appData.getLastFoundTasks().getOccurrenceCount() +
442                        " times");
443    }
444
445    /**
446     * @param appData the rule application data combining all data used for applying this rule
447     */
448    private void getSequencesOccuringMostOften(RuleApplicationData appData) {
449        Console.traceln(Level.FINE, "determining most prominent tasks");
450
451        Tasks tasks;
452        boolean createNewTrie = (appData.getLastTrie() == null) ||
453            appData.detectedAndReplacedTasks(); // tree has changed
454       
455        do {
456            if (createNewTrie) {
457                createNewTrie(appData);
458            }
459           
460            MaxCountAndLongestTasksFinder finder = new MaxCountAndLongestTasksFinder();
461            appData.getLastTrie().process(finder);
462       
463            tasks = finder.getFoundTasks();
464           
465            createNewTrie = false;
466           
467            for (List<ITaskInstance> task : tasks) {
468                if (task.size() >= appData.getTrainedSequenceLength()) {
469                    // Trie must be recreated with a longer sequence length to be sure that
470                    // the found sequences occurring most often are found in their whole length
471                    appData.setTrainedSequenceLength(appData.getTrainedSequenceLength() + 1);
472                    createNewTrie = true;
473                    break;
474                }
475            }
476        }
477        while (createNewTrie);
478       
479        // create a normal trie for comparison
480        /*System.out.println("creating test trie for comparison");
481       
482        appData.getStopWatch().start("testtrie");
483        Trie<ITaskInstance> trie = new Trie<ITaskInstance>(identityTaskComparator);
484       
485        for (IUserSession session : appData.getSessions()) {
486            trie.train(session.getExecutedTasks(), appData.getTrainedSequenceLength());
487        }
488       
489        appData.getStopWatch().stop("testtrie");
490       
491        MaxCountAndLongestTasksFinder finder = new MaxCountAndLongestTasksFinder();
492        trie.process(finder);
493
494        boolean allTasksEqual = finder.getFoundTasks().size() == tasks.size();
495       
496        allTasksEqual &= finder.getFoundTasks().occurrenceCount == tasks.occurrenceCount;
497       
498        for (List<ITaskInstance> task1 : finder.getFoundTasks()) {
499            boolean foundTask = false;
500            for (List<ITaskInstance> task2 : tasks) {
501                boolean tasksEqual = false;
502                if (task1.size() == task2.size()) {
503                    tasksEqual = true;
504                    for (int i = 0; i < task1.size(); i++) {
505                        if (!identityTaskComparator.equals(task1.get(i).getTask(), task2.get(i).getTask()))
506                        {
507                            tasksEqual = false;
508                        }
509                    }
510                }
511               
512                if (tasksEqual) {
513                    foundTask = true;
514                    break;
515                }
516            }
517           
518            if (!foundTask) {
519                System.out.println("different is " + task1);
520                allTasksEqual = false;
521                break;
522            }
523        }
524       
525        if (!allTasksEqual) {
526            System.out.println(finder.getFoundTasks());
527            System.out.println(tasks);
528           
529            throw new IllegalArgumentException("both tries calculated different subsequences");
530        }
531       
532        /*TrieStatisticsDumper dumper = new TrieStatisticsDumper();
533        appData.getLastTrie().process(dumper);
534        dumper.dumpCounters();*/
535       
536        appData.setLastFoundTasks(tasks);
537
538        Console.traceln(Level.FINE, "found " + appData.getLastFoundTasks().size() + " tasks " +
539                        "occurring " + appData.getLastFoundTasks().getOccurrenceCount() + " times");
540    }
541
542    /**
543     * @param appData the rule application data combining all data used for applying this rule
544     */
545    private void createNewTrie(RuleApplicationData appData) {
546        Console.traceln(Level.FINER, "training trie with a maximum sequence length of " +
547                        appData.getTrainedSequenceLength());
548
549        appData.getStopWatch().start("training trie");
550       
551        // we prepared the task instances to refer to unique tasks, if they are treated
552        // as equal. Therefore, we just compare the identity of the tasks of the task
553        // instances
554        appData.setLastTrie(new TaskInstanceTrie(identityTaskHandlingStrategy));
555   
556        appData.getLastTrie().trainSessions
557            (appData.getSessions(), appData.getTrainedSequenceLength());
558       
559        appData.getStopWatch().stop("training trie");
560    }
561
562    /**
563     * @param appData the rule application data combining all data used for applying this rule
564     */
565    private void replaceSequencesOccurringMostOften(RuleApplicationData appData) {
566        appData.detectedAndReplacedTasks(false);
567
568        if ((appData.getLastFoundTasks().size() > 0) &&
569            (appData.getLastFoundTasks().getOccurrenceCount() > 1))
570        {
571            Console.traceln(Level.FINER, "replacing tasks occurrences");
572
573            for (List<ITaskInstance> task : appData.getLastFoundTasks()) {
574                ISequence sequence = taskFactory.createNewSequence();
575               
576                Console.traceln(Level.FINEST, "replacing " + sequence.getId() + ": " + task);
577
578                List<ISequenceInstance> sequenceInstances =
579                    replaceTaskOccurrences(task, appData.getSessions(), sequence);
580               
581                harmonizeSequenceInstancesModel(sequence, sequenceInstances,task.size());
582                appData.detectedAndReplacedTasks
583                    (appData.detectedAndReplacedTasks() || (sequenceInstances.size() > 0));
584
585                if (sequenceInstances.size() < appData.getLastFoundTasks().getOccurrenceCount()) {
586                    Console.traceln(Level.FINE, sequence.getId() + ": replaced task only " +
587                                    sequenceInstances.size() + " times instead of expected " +
588                                    appData.getLastFoundTasks().getOccurrenceCount());
589                }
590            }
591        }
592       
593    }
594
595    /**
596     *
597     */
598    private void harmonizeSequenceInstancesModel(ISequence               sequence,
599                                                 List<ISequenceInstance> sequenceInstances,
600                                                 int                     sequenceLength)
601    {
602        TaskInstanceComparator comparator = preparationTaskHandlingStrategy.getTaskComparator();
603       
604        // ensure for each subtask that lexically different variants are preserved
605        for (int subTaskIndex = 0; subTaskIndex < sequenceLength; subTaskIndex++) {
606            List<ITask> subTaskVariants = new LinkedList<ITask>();
607           
608            for (ISequenceInstance sequenceInstance : sequenceInstances) {
609                ITask candidate = sequenceInstance.get(subTaskIndex).getTask();
610               
611                boolean found = false;
612               
613                for (int i = 0; i < subTaskVariants.size(); i++) {
614                    if (comparator.areLexicallyEqual(subTaskVariants.get(i), candidate)) {
615                        taskBuilder.setTask
616                            (sequenceInstance.get(subTaskIndex), subTaskVariants.get(i));
617                       
618                        found = true;
619                        break;
620                    }
621                }
622               
623                if (!found) {
624                    subTaskVariants.add(candidate);
625                }
626            }
627           
628            // if there are more than one lexically different variant of the sub task at
629            // the considered position, adapt the sequence model at that position to have
630            // a selection of the different variants. In this case also adapt the
631            // generated sequence instances to correctly contain selection instances. If
632            // there is only one variant of sub tasks at the given position, simply set
633            // this variant as the sub task of the selection. In this case, the instances
634            // can be preserved as is
635            if (subTaskVariants.size() > 1) {
636                ISelection selection = taskFactory.createNewSelection();
637               
638                for (ITask variant : subTaskVariants) {
639                    taskBuilder.addChild(selection, variant);
640                }
641               
642                taskBuilder.addChild(sequence, selection);
643               
644                for (ISequenceInstance instance : sequenceInstances) {
645                    ISelectionInstance selectionInstance =
646                        taskFactory.createNewTaskInstance(selection);
647                    taskBuilder.setChild(selectionInstance, instance.get(subTaskIndex));
648                    taskBuilder.setTaskInstance(instance, subTaskIndex, selectionInstance);
649                }
650            }
651            else if (subTaskVariants.size() == 1) {
652                taskBuilder.addChild(sequence, subTaskVariants.get(0));
653            }
654        }
655    }
656
657    /**
658     * @param tree
659     */
660    private List<ISequenceInstance> replaceTaskOccurrences(List<ITaskInstance> task,
661                                                           List<IUserSession>  sessions,
662                                                           ISequence           temporalTaskModel)
663    {
664        List<ISequenceInstance> sequenceInstances = new LinkedList<ISequenceInstance>();
665       
666        for (IUserSession session : sessions) {
667            int index = -1;
668               
669            do {
670                index = getSubListIndex(session, task, ++index);
671
672                if (index > -1) {
673                    sequenceInstances.add
674                        (RuleUtils.createNewSubSequenceInRange
675                             (session, index, index + task.size() - 1, temporalTaskModel,
676                              taskFactory, taskBuilder));
677                }
678            }
679            while (index > -1);
680        }
681       
682        return sequenceInstances;
683    }
684
685    /**
686     * @param trie
687     * @param object
688     * @return
689     */
690    private int getSubListIndex(ITaskInstanceList   list,
691                                List<ITaskInstance> subList,
692                                int                 startIndex)
693    {
694        boolean matchFound;
695        int result = -1;
696       
697        for (int i = startIndex; i <= list.size() - subList.size(); i++) {
698            matchFound = true;
699           
700            for (int j = 0; j < subList.size(); j++) {
701                // we prepared the task instances to refer to unique tasks, if they are treated
702                // as equal. Therefore, we just compare the identity of the tasks of the task
703                // instances
704                if (list.get(i + j).getTask() != subList.get(j).getTask()) {
705                    matchFound = false;
706                    break;
707                }
708            }
709           
710            if (matchFound) {
711                result = i;
712                break;
713            }
714        }
715       
716        return result;
717    }
718
719    /**
720     * @param trie
721     * @param object
722     * @return
723     */
724    private int getSubListIndex(List<ITaskInstance> list,
725                                List<ITaskInstance> subList,
726                                int                 startIndex)
727    {
728        boolean matchFound;
729        int result = -1;
730       
731        for (int i = startIndex; i <= list.size() - subList.size(); i++) {
732            matchFound = true;
733           
734            for (int j = 0; j < subList.size(); j++) {
735                // we prepared the task instances to refer to unique tasks, if they are treated
736                // as equal. Therefore, we just compare the identity of the tasks of the task
737                // instances
738                if (list.get(i + j) != subList.get(j)) {
739                    matchFound = false;
740                    break;
741                }
742            }
743           
744            if (matchFound) {
745                result = i;
746                break;
747            }
748        }
749       
750        return result;
751    }
752   
753    /**
754     * @author Patrick Harms
755     */
756    private class MaxCountAndLongestTasksFinder implements TrieProcessor<ITaskInstance> {
757       
758        /**
759         *
760         */
761        private int currentCount;
762       
763        /**
764         *
765         */
766        private List<List<ITaskInstance>> foundTasks = new LinkedList<List<ITaskInstance>>();
767
768        /**
769         *
770         */
771        public MaxCountAndLongestTasksFinder() {
772            super();
773            this.currentCount = 0;
774        }
775
776        /* (non-Javadoc)
777         * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int)
778         */
779        @Override
780        public TrieProcessor.Result process(List<ITaskInstance> foundTask, int count) {
781            if (foundTask.size() < 2) {
782                // ignore single tasks
783                return TrieProcessor.Result.CONTINUE;
784            }
785           
786            if (count < 2) {
787                // ignore singular occurrences
788                return TrieProcessor.Result.SKIP_NODE;
789            }
790
791            if (this.currentCount > count) {
792                // ignore this children of this task, as they may have only smaller counts than
793                // the already found tasks
794                return TrieProcessor.Result.SKIP_NODE;
795            }
796           
797            if (this.currentCount < count) {
798                // the provided task occurs more often that all tasks found so far.
799                // clear all found tasks and use the new count as the one searched for
800                foundTasks.clear();
801                this.currentCount = count;
802            }
803           
804            if (this.currentCount == count) {
805                // the task is of interest. Sort it into the other found tasks so that
806                // the longest come first
807                boolean added = false;
808                for (int i = 0; i < foundTasks.size(); i++) {
809                    if (foundTasks.get(i).size() < foundTask.size()) {
810                        // defensive copy
811                        foundTasks.add(i, new LinkedList<ITaskInstance>(foundTask)); // defensive copy
812                        added = true;
813                        break;
814                    }
815                }
816               
817                if (!added) {
818                    foundTasks.add(new LinkedList<ITaskInstance>(foundTask)); // defensive copy
819                }
820            }
821           
822            return TrieProcessor.Result.CONTINUE;
823        }
824
825        /**
826         *  @return
827         */
828        public Tasks getFoundTasks() {
829            removePermutationsOfShorterTasks();
830            return new Tasks(currentCount, foundTasks);
831        }
832
833        /**
834         *
835         */
836        private void removePermutationsOfShorterTasks() {
837            // now iterate the sorted list and for each task remove all other tasks that are shorter
838            // (i.e. come later in the sorted list) and that represent a subpart of the task
839            for (int i = 0; i < foundTasks.size(); i++) {
840                for (int j = i + 1; j < foundTasks.size();) {
841                    if (foundTasks.get(j).size() < foundTasks.get(i).size()) {
842                        // found a task that is a potential subtask. Check for this and remove the
843                        // subtask if needed
844                        List<ITaskInstance> longTask = foundTasks.get(i);
845                        List<ITaskInstance> shortTask = foundTasks.get(j);
846                       
847                        if (getSubListIndex(longTask, shortTask, 0) > -1) {
848                            foundTasks.remove(j);
849                        }
850                        else {
851                            j++;
852                        }
853                    }
854                    else {
855                        j++;
856                    }
857                }
858            }
859        }
860
861    }
862   
863//    /**
864//     * @author Patrick Harms
865//     */
866//    private class TrieStatisticsDumper implements TrieProcessor<ITaskInstance> {
867//       
868//        /**
869//         *
870//         */
871//        private Map<Integer, Integer> counters = new HashMap<Integer, Integer>();
872//       
873//        /* (non-Javadoc)
874//         * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int)
875//         */
876//        @Override
877//        public TrieProcessor.Result process(List<ITaskInstance> subsequence, int count) {
878//            if (subsequence.size() == 1) {
879//                Integer value = counters.get(count);
880//               
881//                if (value == null) {
882//                    value = 0;
883//                }
884//               
885//                counters.put(count, value + 1);
886//               
887//                return TrieProcessor.Result.CONTINUE;
888//            }
889//            else {
890//                // ignore singular occurrences
891//                return TrieProcessor.Result.SKIP_NODE;
892//            }
893//        }
894//
895//        /**
896//         *  @return
897//         */
898//        public void dumpCounters() {
899//            int dumpedCounters = 0;
900//           
901//            int count = 1;
902//            while (dumpedCounters < counters.size()) {
903//                Integer value = counters.get(count++);
904//                if (value != null) {
905//                    System.out.println(value + " symbols occurred " + count + " times");
906//                    dumpedCounters++;
907//                }
908//            }
909//        }
910//
911//    }
912   
913    /**
914     *
915     */
916    private static class RuleApplicationData {
917       
918        /**
919         *
920         */
921        private List<IUserSession> sessions;
922       
923        /**
924         *
925         */
926        private TaskInstanceTrie lastTrie;
927       
928        /**
929         * default and minimum trained sequence length is 3
930         */
931        private int trainedSequenceLength = 3;
932       
933        /**
934         *
935         */
936        private Tasks lastFoundTasks = new Tasks(Integer.MAX_VALUE, null);
937       
938        /**
939         *
940         */
941        private boolean detectedAndReplacedTasks;
942       
943        /**
944         *
945         */
946        private RuleApplicationResult result = new RuleApplicationResult();
947       
948        /**
949         *
950         */
951        private StopWatch stopWatch = new StopWatch();
952       
953        /**
954         *
955         */
956        private RuleApplicationData(List<IUserSession> sessions) {
957            this.sessions = sessions;
958        }
959
960        /**
961         * @return the tree
962         */
963        private List<IUserSession> getSessions() {
964            return sessions;
965        }
966
967        /**
968         * @param lastTrie the lastTrie to set
969         */
970        private void setLastTrie(TaskInstanceTrie lastTrie) {
971            this.lastTrie = lastTrie;
972        }
973
974        /**
975         * @return the lastTrie
976         */
977        private TaskInstanceTrie getLastTrie() {
978            return lastTrie;
979        }
980
981        /**
982         * @param trainedSequenceLength the trainedSequenceLength to set
983         */
984        private void setTrainedSequenceLength(int trainedSequenceLength) {
985            this.trainedSequenceLength = trainedSequenceLength;
986        }
987
988        /**
989         * @return the trainedSequenceLength
990         */
991        private int getTrainedSequenceLength() {
992            return trainedSequenceLength;
993        }
994
995        /**
996         * @param lastFoundSequences the lastFoundSequences to set
997         */
998        private void setLastFoundTasks(Tasks lastFoundSequences) {
999            this.lastFoundTasks = lastFoundSequences;
1000        }
1001
1002        /**
1003         * @return the lastFoundSequences
1004         */
1005        private Tasks getLastFoundTasks() {
1006            return lastFoundTasks;
1007        }
1008
1009        /**
1010         *
1011         */
1012        private void detectedAndReplacedTasks(boolean detectedAndReplacedTasks) {
1013            this.detectedAndReplacedTasks = detectedAndReplacedTasks;
1014        }
1015
1016        /**
1017         *
1018         */
1019        private boolean detectedAndReplacedTasks() {
1020            return detectedAndReplacedTasks;
1021        }
1022       
1023        /**
1024         * @return the result
1025         */
1026        private RuleApplicationResult getResult() {
1027            return result;
1028        }
1029
1030        /**
1031         * @return the stopWatch
1032         */
1033        private StopWatch getStopWatch() {
1034            return stopWatch;
1035        }
1036
1037    }
1038   
1039
1040    /**
1041     * @author Patrick Harms
1042     */
1043    private static class Tasks implements Iterable<List<ITaskInstance>> {
1044       
1045        /**
1046         *
1047         */
1048        private int occurrenceCount;
1049       
1050        /**
1051         *
1052         */
1053        private List<List<ITaskInstance>> sequences;
1054
1055        /**
1056         * @param occurrenceCount
1057         * @param sequences
1058         */
1059        private Tasks(int occurrenceCount, List<List<ITaskInstance>> sequences) {
1060            super();
1061            this.occurrenceCount = occurrenceCount;
1062            this.sequences = sequences;
1063        }
1064
1065        /**
1066         * @return
1067         */
1068        private int getOccurrenceCount() {
1069            return occurrenceCount;
1070        }
1071
1072        /**
1073         * @return
1074         */
1075        private int size() {
1076            return this.sequences.size();
1077        }
1078
1079        /**
1080         *
1081         */
1082
1083        /* (non-Javadoc)
1084         * @see java.lang.Iterable#iterator()
1085         */
1086        @Override
1087        public Iterator<List<ITaskInstance>> iterator() {
1088            return this.sequences.iterator();
1089        }
1090
1091        /* (non-Javadoc)
1092         * @see java.lang.Object#toString()
1093         */
1094        @Override
1095        public String toString() {
1096            StringBuffer result = new StringBuffer();
1097            result.append(this.occurrenceCount);
1098            result.append(" occurrences:\n");
1099           
1100            for (List<ITaskInstance> task : sequences) {
1101                result.append(task);
1102                result.append("\n");
1103            }
1104           
1105            return result.toString();
1106        }
1107
1108    }
1109   
1110    // methods for internal testing
1111//    private void checkMatchingOfSessions(List<List<Event>>  flattenedSessions,
1112//                                         List<IUserSession> sessions,
1113//                                         String             when)
1114//    {
1115//        List<List<Event>> currentFlattenedSessions = flattenSessions(sessions);
1116//        if (flattenedSessions.size() != currentFlattenedSessions.size()) {
1117//            System.out.println("################## number of sessions changed after " + when);
1118//        }
1119//        else {
1120//            for (int i = 0; i < flattenedSessions.size(); i++) {
1121//                List<Event> expected = flattenedSessions.get(i);
1122//                List<Event> current = currentFlattenedSessions.get(i);
1123//           
1124//                if (expected.size() != current.size()) {
1125//                    System.out.println
1126//                        ("################## length of session " + i + " changed after " + when);
1127//                }
1128//                else {
1129//                    for (int j = 0; j < expected.size(); j++) {
1130//                        if (!expected.get(j).equals(current.get(j))) {
1131//                            System.out.println("################## event " + j + " of session " +
1132//                                               i + " changed after " + when);
1133//                        }
1134//                    }
1135//                }
1136//            }     
1137//        }
1138//    }
1139//
1140//    private List<List<Event>> flattenSessions(List<IUserSession> sessions) {
1141//        List<List<Event>> flattenedSessions = new ArrayList<List<Event>>();
1142//        for (IUserSession session : sessions) {
1143//            List<Event> flattenedUserSession = new ArrayList<Event>();
1144//            flatten(session, flattenedUserSession);
1145//            flattenedSessions.add(flattenedUserSession);
1146//        }
1147//
1148//        return flattenedSessions;
1149//    }
1150//
1151//    private void flatten(IUserSession iUserSession, List<Event> flattenedUserSession) {
1152//        for (ITaskInstance instance : iUserSession) {
1153//            flatten(instance, flattenedUserSession);
1154//        }
1155//    }
1156//
1157//    private void flatten(ITaskInstance instance, List<Event> flattenedUserSession) {
1158//        if (instance instanceof ITaskInstanceList) {
1159//            for (ITaskInstance child : (ITaskInstanceList) instance) {
1160//                flatten(child, flattenedUserSession);
1161//            }
1162//        }
1163//        else if (instance instanceof ISelectionInstance) {
1164//            flatten(((ISelectionInstance) instance).getChild(), flattenedUserSession);
1165//        }
1166//        else if (instance instanceof IOptionalInstance) {
1167//            flatten(((IOptionalInstance) instance).getChild(), flattenedUserSession);
1168//        }
1169//        else if (instance instanceof IEventTaskInstance) {
1170//            flattenedUserSession.add(((IEventTaskInstance) instance).getEvent());
1171//        }
1172//    }
1173
1174}
Note: See TracBrowser for help on using the repository browser.