source: branches/ralph/src/main/java/de/ugoe/cs/autoquest/tasktrees/temporalrelation/SequenceForTaskDetectionRuleAlignment.java @ 1554

Last change on this file since 1554 was 1554, checked in by rkrimmel, 10 years ago

Moved alignment plugin into core-tasktrees

File size: 42.5 KB
Line 
1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.tasktrees.temporalrelation;
16
17import java.util.ArrayList;
18import java.util.HashMap;
19import java.util.HashSet;
20import java.util.Iterator;
21import java.util.LinkedList;
22import java.util.List;
23import java.util.Map;
24import java.util.Set;
25import java.util.logging.Level;
26
27import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.NumberSequence;
28import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.SmithWaterman;
29import de.ugoe.cs.autoquest.tasktrees.alignment.substitution.ObjectDistanceSubstitionMatrix;
30import de.ugoe.cs.autoquest.tasktrees.taskequality.TaskEquality;
31import de.ugoe.cs.autoquest.tasktrees.treeifc.IIteration;
32import de.ugoe.cs.autoquest.tasktrees.treeifc.IIterationInstance;
33import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelection;
34import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelectionInstance;
35import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequence;
36import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequenceInstance;
37import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask;
38import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskBuilder;
39import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskFactory;
40import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstance;
41import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstanceList;
42import de.ugoe.cs.autoquest.tasktrees.treeifc.IUserSession;
43import de.ugoe.cs.autoquest.usageprofiles.SymbolMap;
44import de.ugoe.cs.autoquest.usageprofiles.Trie;
45import de.ugoe.cs.autoquest.usageprofiles.TrieProcessor;
46import de.ugoe.cs.util.StopWatch;
47import de.ugoe.cs.util.console.Console;
48
49/**
50 * <p>
51 * This class implements the major rule for creating task trees based on a set of recorded
52 * user sessions. For this, it first harmonizes all tasks. This eases later comparison. Then it
53 * searches the sessions for iterations and replaces them accordingly. Then it searches for sub
54 * sequences being the longest and occurring most often. For each found sub sequence, it replaces
55 * the occurrences by creating appropriate {@link ISequence}s. Afterwards, again searches for
56 * iterations and then again for sub sequences until no more replacements are done.
57 * </p>
58 * <p>
59 * For determining the longest sequence occurring most often, the implementation uses a
60 * {@link Trie}. The depth of the tree is initially 3. If the algorithm has a longest sequence
61 * occurring most often whose length is equal to the depth of the trie, it recalculates the trie
62 * with an increased depth.
63 * </p>
64 *
65 * @author Patrick Harms
66 */
67class SequenceForTaskDetectionRuleAlignment implements ISessionScopeRule {
68   
69    /**
70     * <p>
71     * the task factory to be used for creating substructures for the temporal
72     * relationships identified during rul application
73     * </p>
74     */
75    private ITaskFactory taskFactory;
76    /**
77     * <p>
78     * the task builder to be used for creating substructures for the temporal relationships
79     * identified during rule application
80     * </p>
81     */
82    private ITaskBuilder taskBuilder;
83
84    /**
85     * <p>
86     * the task handling strategy to be used for comparing tasks for preparation, i.e., before
87     * the tasks are harmonized
88     * </p>
89     */
90    private TaskHandlingStrategy preparationTaskHandlingStrategy;
91   
92    /**
93     * <p>
94     * the task handling strategy to be used for comparing tasks during iteration detection an trie
95     * generation, i.e., after the tasks are harmonized
96     * </p>
97     */
98    private TaskHandlingStrategy identityTaskHandlingStrategy;;
99   
100   
101    private ArrayList<NumberSequence> numberseqs;
102   
103    /**
104     * <p>
105     * instantiates the rule and initializes it with a task equality to be considered when
106     * comparing tasks as well as a task factory and builder to be used for creating task
107     * structures.
108     * </p>
109     *
110     * @param minimalTaskEquality the task equality to be considered when comparing tasks
111     * @param taskFactory         the task factory to be used for creating substructures
112     * @param taskBuilder         the task builder to be used for creating substructures
113     */
114   
115   
116    SequenceForTaskDetectionRuleAlignment(TaskEquality minimalTaskEquality,
117                                 ITaskFactory taskFactory,
118                                 ITaskBuilder taskBuilder)
119    {
120        this.taskFactory = taskFactory;
121        this.taskBuilder = taskBuilder;
122       
123        this.preparationTaskHandlingStrategy = new TaskHandlingStrategy(minimalTaskEquality);
124        this.identityTaskHandlingStrategy = new TaskHandlingStrategy(TaskEquality.IDENTICAL);
125       
126    }
127
128    /* (non-Javadoc)
129     * @see java.lang.Object#toString()
130     */
131    @Override
132    public String toString() {
133        return "SequenceForTaskDetectionRuleAlignment";
134    }
135
136    /* (non-Javadoc)
137     * @see de.ugoe.cs.autoquest.tasktrees.temporalrelation.ISessionScopeRule#apply(java.util.List)
138     */
139    @Override
140    public RuleApplicationResult apply(List<IUserSession> sessions) {
141        RuleApplicationData appData = new RuleApplicationData(sessions);
142        numberseqs = new ArrayList<NumberSequence>();
143       
144       
145       
146        // this is the real rule application. Loop while something is replaced.
147        SymbolMap<ITaskInstance, ITask> uniqueTasks = harmonizeEventTaskInstancesModel(appData,numberseqs);
148        ObjectDistanceSubstitionMatrix submat = new ObjectDistanceSubstitionMatrix(uniqueTasks); 
149        submat.generate();
150       
151       
152        SmithWaterman sm = new SmithWaterman(numberseqs.get(0).getSequence(), numberseqs.get(0).getSequence(), submat);
153        sm.printDPMatrix();
154     
155       
156       
157        //Hier mein kram hin
158        /*do {
159            System.out.println();
160           
161            appData.getStopWatch().start("whole loop");
162            detectAndReplaceIterations(appData);
163
164            appData.getStopWatch().start("task replacement");
165            detectAndReplaceTasks(appData);
166            appData.getStopWatch().stop("task replacement");
167            appData.getStopWatch().stop("whole loop");
168           
169            //((TaskTreeNodeComparator) taskComparator).getStopWatch().dumpStatistics(System.out);
170            //((TaskTreeNodeComparator) taskComparator).getStopWatch().reset();
171           
172            appData.getStopWatch().dumpStatistics(System.out);
173            appData.getStopWatch().reset();
174           
175        }
176        while (appData.detectedAndReplacedTasks());*/
177       
178        Console.println
179            ("created " + appData.getResult().getNewlyCreatedTasks().size() +
180             " new tasks and " + appData.getResult().getNewlyCreatedTaskInstances().size() +
181             " appropriate instances\n");
182       
183        if ((appData.getResult().getNewlyCreatedTasks().size() > 0) ||
184            (appData.getResult().getNewlyCreatedTaskInstances().size() > 0))
185        {
186            appData.getResult().setRuleApplicationStatus(RuleApplicationStatus.FINISHED);
187        }
188       
189        return appData.getResult();
190    }
191
192    /**
193     * <p>
194     * harmonizes the event task instances by unifying tasks. This is done, as initially the
195     * event tasks being equal with respect to the considered task equality are distinct objects.
196     * The comparison of these distinct objects is more time consuming than comparing the object
197     * references.
198     * </p>
199     *
200     * @param appData the rule application data combining all data used for applying this rule
201     * @return Returns the unique tasks symbol map
202     */
203    private SymbolMap<ITaskInstance, ITask> harmonizeEventTaskInstancesModel(RuleApplicationData appData, ArrayList<NumberSequence> numberseqs) {
204        Console.traceln(Level.INFO, "harmonizing task model of event task instances");
205        appData.getStopWatch().start("harmonizing event tasks");
206
207        SymbolMap<ITaskInstance, ITask> uniqueTasks =
208            preparationTaskHandlingStrategy.createSymbolMap();
209        TaskInstanceComparator comparator = preparationTaskHandlingStrategy.getTaskComparator();
210       
211        int unifiedTasks = 0;
212        ITask task;
213        List<IUserSession> sessions = appData.getSessions();
214        int sessionNo = 0;
215        numberseqs = new ArrayList<NumberSequence>();
216        for (IUserSession session : sessions) {
217            Console.traceln(Level.FINE, "handling " + (++sessionNo) + ". " + session);
218                NumberSequence templist = new NumberSequence(session.size()) ;
219           
220            for (int i = 0; i < session.size(); i++) {
221                ITaskInstance taskInstance = session.get(i);
222                task = uniqueTasks.getValue(taskInstance);
223               
224                if (task == null) {
225                    uniqueTasks.addSymbol(taskInstance, taskInstance.getTask());
226                    templist.getSequence()[i]=taskInstance.getTask().getId();
227                }
228                else {
229                    taskBuilder.setTask(taskInstance, task);
230                        templist.getSequence()[i]=task.getId();
231                    unifiedTasks++;
232                }
233            }
234            numberseqs.add(templist);
235            comparator.clearBuffers();
236        }
237       
238        appData.getStopWatch().stop("harmonizing event tasks");
239        Console.traceln(Level.INFO, "harmonized " + unifiedTasks + " task occurrences (still " +
240                        uniqueTasks.size() + " different tasks)");
241
242        appData.getStopWatch().dumpStatistics(System.out);
243        appData.getStopWatch().reset();
244                return uniqueTasks;
245    }
246
247    /**
248     * <p>
249     * searches for direct iterations of single tasks in all sequences and replaces them with
250     * {@link IIteration}s, respectively appropriate instances. Also all single occurrences of
251     * a task that is iterated somewhen are replaced with iterations to have again an efficient
252     * way for task comparisons.
253     * </p>
254     *
255     * @param appData the rule application data combining all data used for applying this rule
256     */
257    private void detectAndReplaceIterations(RuleApplicationData appData) {
258        Console.traceln(Level.FINE, "detecting iterations");
259        appData.getStopWatch().start("detecting iterations");
260       
261        List<IUserSession> sessions = appData.getSessions();
262       
263        Set<ITask> iteratedTasks = searchIteratedTasks(sessions);
264       
265        if (iteratedTasks.size() > 0) {
266            replaceIterationsOf(iteratedTasks, sessions, appData);
267        }
268       
269        appData.getStopWatch().stop("detecting iterations");
270        Console.traceln(Level.INFO, "replaced " + iteratedTasks.size() + " iterated tasks");
271    }
272
273    /**
274     * <p>
275     * searches the provided sessions for task iterations. If a task is iterated, it is added
276     * to the returned set.
277     * </p>
278     *
279     * @param the session to search for iterations in
280     *
281     * @return a set of tasks being iterated somewhere
282     */
283    private Set<ITask> searchIteratedTasks(List<IUserSession> sessions) {
284        Set<ITask> iteratedTasks = new HashSet<ITask>();
285        for (IUserSession session : sessions) {
286            for (int i = 0; i < (session.size() - 1); i++) {
287                // we prepared the task instances to refer to unique tasks, if they are treated
288                // as equal. Therefore, we just compare the identity of the tasks of the task
289                // instances
290                if (session.get(i).getTask() == session.get(i + 1).getTask()) {
291                    iteratedTasks.add(session.get(i).getTask());
292                }
293            }
294        }
295       
296        return iteratedTasks;
297    }
298
299    /**
300     * <p>
301     * replaces all occurrences of all tasks provided in the set with iterations
302     * </p>
303     *
304     * @param iteratedTasks the tasks to be replaced with iterations
305     * @param sessions      the sessions in which the tasks are to be replaced
306     * @param appData       the rule application data combining all data used for applying this rule
307     */
308    private void replaceIterationsOf(Set<ITask>          iteratedTasks,
309                                     List<IUserSession>  sessions,
310                                     RuleApplicationData appData)
311    {
312        Map<ITask, IIteration> iterations = new HashMap<ITask, IIteration>();
313        Map<IIteration, List<IIterationInstance>> iterationInstances =
314                new HashMap<IIteration, List<IIterationInstance>>();
315           
316        for (ITask iteratedTask : iteratedTasks) {
317            IIteration iteration = taskFactory.createNewIteration();
318            iterations.put(iteratedTask, iteration);
319            iterationInstances.put(iteration, new LinkedList<IIterationInstance>());
320        }
321       
322        IIterationInstance iterationInstance;
323       
324        for (IUserSession session : sessions) {
325            int index = 0;
326            iterationInstance = null;
327
328            while (index < session.size()) {
329                // we prepared the task instances to refer to unique tasks, if they are treated
330                // as equal. Therefore, we just compare the identity of the tasks of the task
331                // instances
332                ITask currentTask = session.get(index).getTask();
333                IIteration iteration = iterations.get(currentTask);
334                if (iteration != null) {
335                    if ((iterationInstance == null) || (iterationInstance.getTask() != iteration))
336                    {
337                        iterationInstance = taskFactory.createNewTaskInstance(iteration);
338                        iterationInstances.get(iteration).add(iterationInstance);
339                        taskBuilder.addTaskInstance(session, index, iterationInstance);
340                        index++;
341                    }
342                   
343                    taskBuilder.addChild(iterationInstance, session.get(index));
344                    taskBuilder.removeTaskInstance(session, index);
345                }
346                else {
347                    if (iterationInstance != null) {
348                        iterationInstance = null;
349                    }
350                    index++;
351                }
352            }
353        }
354       
355        for (Map.Entry<IIteration, List<IIterationInstance>> entry : iterationInstances.entrySet())
356        {
357            harmonizeIterationInstancesModel(entry.getKey(), entry.getValue());
358        }
359    }
360
361    /**
362     * <p>
363     * TODO clarify why this is done
364     * </p>
365     */
366    private void harmonizeIterationInstancesModel(IIteration               iteration,
367                                                  List<IIterationInstance> iterationInstances)
368    {
369        List<ITask> iteratedTaskVariants = new LinkedList<ITask>();
370        TaskInstanceComparator comparator = preparationTaskHandlingStrategy.getTaskComparator();
371       
372        // merge the lexically different variants of iterated task to a unique list
373        for (IIterationInstance iterationInstance : iterationInstances) {
374            for (ITaskInstance executionVariant : iterationInstance) {
375                ITask candidate = executionVariant.getTask();
376           
377                boolean found = false;
378                for (ITask taskVariant : iteratedTaskVariants) {
379                    if (comparator.areLexicallyEqual(taskVariant, candidate)) {
380                        taskBuilder.setTask(executionVariant, taskVariant);
381                        found = true;
382                        break;
383                    }
384                }
385               
386                if (!found) {
387                    iteratedTaskVariants.add(candidate);
388                }
389            }
390        }
391       
392        // if there are more than one lexically different variant of iterated tasks, adapt the
393        // iteration model to be a selection of different variants. In this case also adapt
394        // the generated iteration instances to correctly contain selection instances. If there
395        // is only one variant of an iterated task, simply set this as the marked task of the
396        // iteration. In this case, the instances can be preserved as is
397        if (iteratedTaskVariants.size() > 1) {
398            ISelection selection = taskFactory.createNewSelection();
399           
400            for (ITask variant : iteratedTaskVariants) {
401                taskBuilder.addChild(selection, variant);
402            }
403           
404            taskBuilder.setMarkedTask(iteration, selection);
405           
406            for (IIterationInstance instance : iterationInstances) {
407                for (int i = 0; i < instance.size(); i++) {
408                    ISelectionInstance selectionInstance =
409                        taskFactory.createNewTaskInstance(selection);
410                    taskBuilder.setChild(selectionInstance, instance.get(i));
411                    taskBuilder.setTaskInstance(instance, i, selectionInstance);
412                }
413            }
414        }
415        else {
416            taskBuilder.setMarkedTask(iteration, iteratedTaskVariants.get(0));
417        }
418    }
419
420    /**
421     * TODO go on commenting
422     * @param appData the rule application data combining all data used for applying this rule
423     */
424    private void detectAndReplaceTasks(RuleApplicationData appData) {
425        Console.traceln(Level.FINE, "detecting and replacing tasks");
426        appData.getStopWatch().start("detecting tasks");
427       
428        getSequencesOccuringMostOften(appData);
429
430        appData.getStopWatch().stop("detecting tasks");
431        appData.getStopWatch().start("replacing tasks");
432       
433        replaceSequencesOccurringMostOften(appData);
434       
435        appData.getStopWatch().stop("replacing tasks");
436        Console.traceln(Level.INFO, "detected and replaced " + appData.getLastFoundTasks().size() +
437                        " tasks occuring " + appData.getLastFoundTasks().getOccurrenceCount() +
438                        " times");
439    }
440
441    /**
442     * @param appData the rule application data combining all data used for applying this rule
443     */
444    private void getSequencesOccuringMostOften(RuleApplicationData appData) {
445        Console.traceln(Level.FINE, "determining most prominent tasks");
446
447        Tasks tasks;
448        boolean createNewTrie = (appData.getLastTrie() == null) ||
449            appData.detectedAndReplacedTasks(); // tree has changed
450       
451        do {
452            if (createNewTrie) {
453                createNewTrie(appData);
454            }
455           
456            MaxCountAndLongestTasksFinder finder = new MaxCountAndLongestTasksFinder();
457            appData.getLastTrie().process(finder);
458       
459            tasks = finder.getFoundTasks();
460           
461            createNewTrie = false;
462           
463            for (List<ITaskInstance> task : tasks) {
464                if (task.size() >= appData.getTrainedSequenceLength()) {
465                    // Trie must be recreated with a longer sequence length to be sure that
466                    // the found sequences occurring most often are found in their whole length
467                    appData.setTrainedSequenceLength(appData.getTrainedSequenceLength() + 1);
468                    createNewTrie = true;
469                    break;
470                }
471            }
472        }
473        while (createNewTrie);
474       
475        // create a normal trie for comparison
476        /*System.out.println("creating test trie for comparison");
477       
478        appData.getStopWatch().start("testtrie");
479        Trie<ITaskInstance> trie = new Trie<ITaskInstance>(identityTaskComparator);
480       
481        for (IUserSession session : appData.getSessions()) {
482            trie.train(session.getExecutedTasks(), appData.getTrainedSequenceLength());
483        }
484       
485        appData.getStopWatch().stop("testtrie");
486       
487        MaxCountAndLongestTasksFinder finder = new MaxCountAndLongestTasksFinder();
488        trie.process(finder);
489
490        boolean allTasksEqual = finder.getFoundTasks().size() == tasks.size();
491       
492        allTasksEqual &= finder.getFoundTasks().occurrenceCount == tasks.occurrenceCount;
493       
494        for (List<ITaskInstance> task1 : finder.getFoundTasks()) {
495            boolean foundTask = false;
496            for (List<ITaskInstance> task2 : tasks) {
497                boolean tasksEqual = false;
498                if (task1.size() == task2.size()) {
499                    tasksEqual = true;
500                    for (int i = 0; i < task1.size(); i++) {
501                        if (!identityTaskComparator.equals(task1.get(i).getTask(), task2.get(i).getTask()))
502                        {
503                            tasksEqual = false;
504                        }
505                    }
506                }
507               
508                if (tasksEqual) {
509                    foundTask = true;
510                    break;
511                }
512            }
513           
514            if (!foundTask) {
515                System.out.println("different is " + task1);
516                allTasksEqual = false;
517                break;
518            }
519        }
520       
521        if (!allTasksEqual) {
522            System.out.println(finder.getFoundTasks());
523            System.out.println(tasks);
524           
525            throw new IllegalArgumentException("both tries calculated different subsequences");
526        }
527       
528        /*TrieStatisticsDumper dumper = new TrieStatisticsDumper();
529        appData.getLastTrie().process(dumper);
530        dumper.dumpCounters();*/
531       
532        appData.setLastFoundTasks(tasks);
533
534        Console.traceln(Level.FINE, "found " + appData.getLastFoundTasks().size() + " tasks " +
535                        "occurring " + appData.getLastFoundTasks().getOccurrenceCount() + " times");
536    }
537
538    /**
539     * @param appData the rule application data combining all data used for applying this rule
540     */
541    private void createNewTrie(RuleApplicationData appData) {
542        Console.traceln(Level.FINER, "training trie with a maximum sequence length of " +
543                        appData.getTrainedSequenceLength());
544
545        appData.getStopWatch().start("training trie");
546       
547        // we prepared the task instances to refer to unique tasks, if they are treated
548        // as equal. Therefore, we just compare the identity of the tasks of the task
549        // instances
550        appData.setLastTrie(new TaskInstanceTrie(identityTaskHandlingStrategy));
551   
552        appData.getLastTrie().trainSessions
553            (appData.getSessions(), appData.getTrainedSequenceLength());
554       
555        appData.getStopWatch().stop("training trie");
556    }
557
558    /**
559     * @param appData the rule application data combining all data used for applying this rule
560     */
561    private void replaceSequencesOccurringMostOften(RuleApplicationData appData) {
562        appData.detectedAndReplacedTasks(false);
563
564        if ((appData.getLastFoundTasks().size() > 0) &&
565            (appData.getLastFoundTasks().getOccurrenceCount() > 1))
566        {
567            Console.traceln(Level.FINER, "replacing tasks occurrences");
568
569            for (List<ITaskInstance> task : appData.getLastFoundTasks()) {
570                ISequence sequence = taskFactory.createNewSequence();
571               
572                Console.traceln(Level.FINEST, "replacing " + sequence.getId() + ": " + task);
573
574                List<ISequenceInstance> sequenceInstances =
575                    replaceTaskOccurrences(task, appData.getSessions(), sequence);
576               
577                harmonizeSequenceInstancesModel(sequence, sequenceInstances,task.size());
578                appData.detectedAndReplacedTasks
579                    (appData.detectedAndReplacedTasks() || (sequenceInstances.size() > 0));
580
581                if (sequenceInstances.size() < appData.getLastFoundTasks().getOccurrenceCount()) {
582                    Console.traceln(Level.FINE, sequence.getId() + ": replaced task only " +
583                                    sequenceInstances.size() + " times instead of expected " +
584                                    appData.getLastFoundTasks().getOccurrenceCount());
585                }
586            }
587        }
588       
589    }
590
591    /**
592     *
593     */
594    private void harmonizeSequenceInstancesModel(ISequence               sequence,
595                                                 List<ISequenceInstance> sequenceInstances,
596                                                 int                     sequenceLength)
597    {
598        TaskInstanceComparator comparator = preparationTaskHandlingStrategy.getTaskComparator();
599       
600        // ensure for each subtask that lexically different variants are preserved
601        for (int subTaskIndex = 0; subTaskIndex < sequenceLength; subTaskIndex++) {
602            List<ITask> subTaskVariants = new LinkedList<ITask>();
603           
604            for (ISequenceInstance sequenceInstance : sequenceInstances) {
605                ITask candidate = sequenceInstance.get(subTaskIndex).getTask();
606               
607                boolean found = false;
608               
609                for (int i = 0; i < subTaskVariants.size(); i++) {
610                    if (comparator.areLexicallyEqual(subTaskVariants.get(i), candidate)) {
611                        taskBuilder.setTask
612                            (sequenceInstance.get(subTaskIndex), subTaskVariants.get(i));
613                       
614                        found = true;
615                        break;
616                    }
617                }
618               
619                if (!found) {
620                    subTaskVariants.add(candidate);
621                }
622            }
623           
624            // if there are more than one lexically different variant of the sub task at
625            // the considered position, adapt the sequence model at that position to have
626            // a selection of the different variants. In this case also adapt the
627            // generated sequence instances to correctly contain selection instances. If
628            // there is only one variant of sub tasks at the given position, simply set
629            // this variant as the sub task of the selection. In this case, the instances
630            // can be preserved as is
631            if (subTaskVariants.size() > 1) {
632                ISelection selection = taskFactory.createNewSelection();
633               
634                for (ITask variant : subTaskVariants) {
635                    taskBuilder.addChild(selection, variant);
636                }
637               
638                taskBuilder.addChild(sequence, selection);
639               
640                for (ISequenceInstance instance : sequenceInstances) {
641                    ISelectionInstance selectionInstance =
642                        taskFactory.createNewTaskInstance(selection);
643                    taskBuilder.setChild(selectionInstance, instance.get(subTaskIndex));
644                    taskBuilder.setTaskInstance(instance, subTaskIndex, selectionInstance);
645                }
646            }
647            else if (subTaskVariants.size() == 1) {
648                taskBuilder.addChild(sequence, subTaskVariants.get(0));
649            }
650        }
651    }
652
653    /**
654     * @param tree
655     */
656    private List<ISequenceInstance> replaceTaskOccurrences(List<ITaskInstance> task,
657                                                           List<IUserSession>  sessions,
658                                                           ISequence           temporalTaskModel)
659    {
660        List<ISequenceInstance> sequenceInstances = new LinkedList<ISequenceInstance>();
661       
662        for (IUserSession session : sessions) {
663            int index = -1;
664               
665            do {
666                index = getSubListIndex(session, task, ++index);
667
668                if (index > -1) {
669                    sequenceInstances.add
670                        (RuleUtils.createNewSubSequenceInRange
671                             (session, index, index + task.size() - 1, temporalTaskModel,
672                              taskFactory, taskBuilder));
673                }
674            }
675            while (index > -1);
676        }
677       
678        return sequenceInstances;
679    }
680
681    /**
682     * @param trie
683     * @param object
684     * @return
685     */
686    private int getSubListIndex(ITaskInstanceList   list,
687                                List<ITaskInstance> subList,
688                                int                 startIndex)
689    {
690        boolean matchFound;
691        int result = -1;
692       
693        for (int i = startIndex; i <= list.size() - subList.size(); i++) {
694            matchFound = true;
695           
696            for (int j = 0; j < subList.size(); j++) {
697                // we prepared the task instances to refer to unique tasks, if they are treated
698                // as equal. Therefore, we just compare the identity of the tasks of the task
699                // instances
700                if (list.get(i + j).getTask() != subList.get(j).getTask()) {
701                    matchFound = false;
702                    break;
703                }
704            }
705           
706            if (matchFound) {
707                result = i;
708                break;
709            }
710        }
711       
712        return result;
713    }
714
715    /**
716     * @param trie
717     * @param object
718     * @return
719     */
720    private int getSubListIndex(List<ITaskInstance> list,
721                                List<ITaskInstance> subList,
722                                int                 startIndex)
723    {
724        boolean matchFound;
725        int result = -1;
726       
727        for (int i = startIndex; i <= list.size() - subList.size(); i++) {
728            matchFound = true;
729           
730            for (int j = 0; j < subList.size(); j++) {
731                // we prepared the task instances to refer to unique tasks, if they are treated
732                // as equal. Therefore, we just compare the identity of the tasks of the task
733                // instances
734                if (list.get(i + j) != subList.get(j)) {
735                    matchFound = false;
736                    break;
737                }
738            }
739           
740            if (matchFound) {
741                result = i;
742                break;
743            }
744        }
745       
746        return result;
747    }
748   
749    /**
750     * @author Patrick Harms
751     */
752    private class MaxCountAndLongestTasksFinder implements TrieProcessor<ITaskInstance> {
753       
754        /**
755         *
756         */
757        private int currentCount;
758       
759        /**
760         *
761         */
762        private List<List<ITaskInstance>> foundTasks = new LinkedList<List<ITaskInstance>>();
763
764        /**
765         *
766         */
767        public MaxCountAndLongestTasksFinder() {
768            super();
769            this.currentCount = 0;
770        }
771
772        /* (non-Javadoc)
773         * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int)
774         */
775        @Override
776        public TrieProcessor.Result process(List<ITaskInstance> foundTask, int count) {
777            if (foundTask.size() < 2) {
778                // ignore single tasks
779                return TrieProcessor.Result.CONTINUE;
780            }
781           
782            if (count < 2) {
783                // ignore singular occurrences
784                return TrieProcessor.Result.SKIP_NODE;
785            }
786
787            if (this.currentCount > count) {
788                // ignore this children of this task, as they may have only smaller counts than
789                // the already found tasks
790                return TrieProcessor.Result.SKIP_NODE;
791            }
792           
793            if (this.currentCount < count) {
794                // the provided task occurs more often that all tasks found so far.
795                // clear all found tasks and use the new count as the one searched for
796                foundTasks.clear();
797                this.currentCount = count;
798            }
799           
800            if (this.currentCount == count) {
801                // the task is of interest. Sort it into the other found tasks so that
802                // the longest come first
803                boolean added = false;
804                for (int i = 0; i < foundTasks.size(); i++) {
805                    if (foundTasks.get(i).size() < foundTask.size()) {
806                        // defensive copy
807                        foundTasks.add(i, new LinkedList<ITaskInstance>(foundTask)); // defensive copy
808                        added = true;
809                        break;
810                    }
811                }
812               
813                if (!added) {
814                    foundTasks.add(new LinkedList<ITaskInstance>(foundTask)); // defensive copy
815                }
816            }
817           
818            return TrieProcessor.Result.CONTINUE;
819        }
820
821        /**
822         *  @return
823         */
824        public Tasks getFoundTasks() {
825            removePermutationsOfShorterTasks();
826            return new Tasks(currentCount, foundTasks);
827        }
828
829        /**
830         *
831         */
832        private void removePermutationsOfShorterTasks() {
833            // now iterate the sorted list and for each task remove all other tasks that are shorter
834            // (i.e. come later in the sorted list) and that represent a subpart of the task
835            for (int i = 0; i < foundTasks.size(); i++) {
836                for (int j = i + 1; j < foundTasks.size();) {
837                    if (foundTasks.get(j).size() < foundTasks.get(i).size()) {
838                        // found a task that is a potential subtask. Check for this and remove the
839                        // subtask if needed
840                        List<ITaskInstance> longTask = foundTasks.get(i);
841                        List<ITaskInstance> shortTask = foundTasks.get(j);
842                       
843                        if (getSubListIndex(longTask, shortTask, 0) > -1) {
844                            foundTasks.remove(j);
845                        }
846                        else {
847                            j++;
848                        }
849                    }
850                    else {
851                        j++;
852                    }
853                }
854            }
855        }
856
857    }
858   
859//    /**
860//     * @author Patrick Harms
861//     */
862//    private class TrieStatisticsDumper implements TrieProcessor<ITaskInstance> {
863//       
864//        /**
865//         *
866//         */
867//        private Map<Integer, Integer> counters = new HashMap<Integer, Integer>();
868//       
869//        /* (non-Javadoc)
870//         * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int)
871//         */
872//        @Override
873//        public TrieProcessor.Result process(List<ITaskInstance> subsequence, int count) {
874//            if (subsequence.size() == 1) {
875//                Integer value = counters.get(count);
876//               
877//                if (value == null) {
878//                    value = 0;
879//                }
880//               
881//                counters.put(count, value + 1);
882//               
883//                return TrieProcessor.Result.CONTINUE;
884//            }
885//            else {
886//                // ignore singular occurrences
887//                return TrieProcessor.Result.SKIP_NODE;
888//            }
889//        }
890//
891//        /**
892//         *  @return
893//         */
894//        public void dumpCounters() {
895//            int dumpedCounters = 0;
896//           
897//            int count = 1;
898//            while (dumpedCounters < counters.size()) {
899//                Integer value = counters.get(count++);
900//                if (value != null) {
901//                    System.out.println(value + " symbols occurred " + count + " times");
902//                    dumpedCounters++;
903//                }
904//            }
905//        }
906//
907//    }
908   
909    /**
910     *
911     */
912    private static class RuleApplicationData {
913       
914        /**
915         *
916         */
917        private List<IUserSession> sessions;
918       
919        /**
920         *
921         */
922        private TaskInstanceTrie lastTrie;
923       
924        /**
925         * default and minimum trained sequence length is 3
926         */
927        private int trainedSequenceLength = 3;
928       
929        /**
930         *
931         */
932        private Tasks lastFoundTasks = new Tasks(Integer.MAX_VALUE, null);
933       
934        /**
935         *
936         */
937        private boolean detectedAndReplacedTasks;
938       
939        /**
940         *
941         */
942        private RuleApplicationResult result = new RuleApplicationResult();
943       
944        /**
945         *
946         */
947        private StopWatch stopWatch = new StopWatch();
948       
949        /**
950         *
951         */
952        private RuleApplicationData(List<IUserSession> sessions) {
953            this.sessions = sessions;
954        }
955
956        /**
957         * @return the tree
958         */
959        private List<IUserSession> getSessions() {
960            return sessions;
961        }
962
963        /**
964         * @param lastTrie the lastTrie to set
965         */
966        private void setLastTrie(TaskInstanceTrie lastTrie) {
967            this.lastTrie = lastTrie;
968        }
969
970        /**
971         * @return the lastTrie
972         */
973        private TaskInstanceTrie getLastTrie() {
974            return lastTrie;
975        }
976
977        /**
978         * @param trainedSequenceLength the trainedSequenceLength to set
979         */
980        private void setTrainedSequenceLength(int trainedSequenceLength) {
981            this.trainedSequenceLength = trainedSequenceLength;
982        }
983
984        /**
985         * @return the trainedSequenceLength
986         */
987        private int getTrainedSequenceLength() {
988            return trainedSequenceLength;
989        }
990
991        /**
992         * @param lastFoundSequences the lastFoundSequences to set
993         */
994        private void setLastFoundTasks(Tasks lastFoundSequences) {
995            this.lastFoundTasks = lastFoundSequences;
996        }
997
998        /**
999         * @return the lastFoundSequences
1000         */
1001        private Tasks getLastFoundTasks() {
1002            return lastFoundTasks;
1003        }
1004
1005        /**
1006         *
1007         */
1008        private void detectedAndReplacedTasks(boolean detectedAndReplacedTasks) {
1009            this.detectedAndReplacedTasks = detectedAndReplacedTasks;
1010        }
1011
1012        /**
1013         *
1014         */
1015        private boolean detectedAndReplacedTasks() {
1016            return detectedAndReplacedTasks;
1017        }
1018       
1019        /**
1020         * @return the result
1021         */
1022        private RuleApplicationResult getResult() {
1023            return result;
1024        }
1025
1026        /**
1027         * @return the stopWatch
1028         */
1029        private StopWatch getStopWatch() {
1030            return stopWatch;
1031        }
1032
1033    }
1034   
1035
1036    /**
1037     * @author Patrick Harms
1038     */
1039    private static class Tasks implements Iterable<List<ITaskInstance>> {
1040       
1041        /**
1042         *
1043         */
1044        private int occurrenceCount;
1045       
1046        /**
1047         *
1048         */
1049        private List<List<ITaskInstance>> sequences;
1050
1051        /**
1052         * @param occurrenceCount
1053         * @param sequences
1054         */
1055        private Tasks(int occurrenceCount, List<List<ITaskInstance>> sequences) {
1056            super();
1057            this.occurrenceCount = occurrenceCount;
1058            this.sequences = sequences;
1059        }
1060
1061        /**
1062         * @return
1063         */
1064        private int getOccurrenceCount() {
1065            return occurrenceCount;
1066        }
1067
1068        /**
1069         * @return
1070         */
1071        private int size() {
1072            return this.sequences.size();
1073        }
1074
1075        /**
1076         *
1077         */
1078
1079        /* (non-Javadoc)
1080         * @see java.lang.Iterable#iterator()
1081         */
1082        @Override
1083        public Iterator<List<ITaskInstance>> iterator() {
1084            return this.sequences.iterator();
1085        }
1086
1087        /* (non-Javadoc)
1088         * @see java.lang.Object#toString()
1089         */
1090        @Override
1091        public String toString() {
1092            StringBuffer result = new StringBuffer();
1093            result.append(this.occurrenceCount);
1094            result.append(" occurrences:\n");
1095           
1096            for (List<ITaskInstance> task : sequences) {
1097                result.append(task);
1098                result.append("\n");
1099            }
1100           
1101            return result.toString();
1102        }
1103
1104    }
1105   
1106    // methods for internal testing
1107//    private void checkMatchingOfSessions(List<List<Event>>  flattenedSessions,
1108//                                         List<IUserSession> sessions,
1109//                                         String             when)
1110//    {
1111//        List<List<Event>> currentFlattenedSessions = flattenSessions(sessions);
1112//        if (flattenedSessions.size() != currentFlattenedSessions.size()) {
1113//            System.out.println("################## number of sessions changed after " + when);
1114//        }
1115//        else {
1116//            for (int i = 0; i < flattenedSessions.size(); i++) {
1117//                List<Event> expected = flattenedSessions.get(i);
1118//                List<Event> current = currentFlattenedSessions.get(i);
1119//           
1120//                if (expected.size() != current.size()) {
1121//                    System.out.println
1122//                        ("################## length of session " + i + " changed after " + when);
1123//                }
1124//                else {
1125//                    for (int j = 0; j < expected.size(); j++) {
1126//                        if (!expected.get(j).equals(current.get(j))) {
1127//                            System.out.println("################## event " + j + " of session " +
1128//                                               i + " changed after " + when);
1129//                        }
1130//                    }
1131//                }
1132//            }     
1133//        }
1134//    }
1135//
1136//    private List<List<Event>> flattenSessions(List<IUserSession> sessions) {
1137//        List<List<Event>> flattenedSessions = new ArrayList<List<Event>>();
1138//        for (IUserSession session : sessions) {
1139//            List<Event> flattenedUserSession = new ArrayList<Event>();
1140//            flatten(session, flattenedUserSession);
1141//            flattenedSessions.add(flattenedUserSession);
1142//        }
1143//
1144//        return flattenedSessions;
1145//    }
1146//
1147//    private void flatten(IUserSession iUserSession, List<Event> flattenedUserSession) {
1148//        for (ITaskInstance instance : iUserSession) {
1149//            flatten(instance, flattenedUserSession);
1150//        }
1151//    }
1152//
1153//    private void flatten(ITaskInstance instance, List<Event> flattenedUserSession) {
1154//        if (instance instanceof ITaskInstanceList) {
1155//            for (ITaskInstance child : (ITaskInstanceList) instance) {
1156//                flatten(child, flattenedUserSession);
1157//            }
1158//        }
1159//        else if (instance instanceof ISelectionInstance) {
1160//            flatten(((ISelectionInstance) instance).getChild(), flattenedUserSession);
1161//        }
1162//        else if (instance instanceof IOptionalInstance) {
1163//            flatten(((IOptionalInstance) instance).getChild(), flattenedUserSession);
1164//        }
1165//        else if (instance instanceof IEventTaskInstance) {
1166//            flattenedUserSession.add(((IEventTaskInstance) instance).getEvent());
1167//        }
1168//    }
1169
1170}
Note: See TracBrowser for help on using the repository browser.