source: branches/ralph/src/main/java/de/ugoe/cs/autoquest/tasktrees/temporalrelation/SequenceForTaskDetectionRuleAlignment.java @ 1558

Last change on this file since 1558 was 1558, checked in by rkrimmel, 10 years ago

new Smith waterman variant

File size: 42.9 KB
Line 
1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.tasktrees.temporalrelation;
16
17import java.util.ArrayList;
18import java.util.HashMap;
19import java.util.HashSet;
20import java.util.Iterator;
21import java.util.LinkedList;
22import java.util.List;
23import java.util.Map;
24import java.util.Set;
25import java.util.logging.Level;
26
27import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.FengDoolittle;
28import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.NumberSequence;
29import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.SmithWaterman;
30import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.SmithWatermanRepeated;
31import de.ugoe.cs.autoquest.tasktrees.alignment.substitution.ObjectDistanceSubstitionMatrix;
32import de.ugoe.cs.autoquest.tasktrees.taskequality.TaskEquality;
33import de.ugoe.cs.autoquest.tasktrees.treeifc.IIteration;
34import de.ugoe.cs.autoquest.tasktrees.treeifc.IIterationInstance;
35import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelection;
36import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelectionInstance;
37import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequence;
38import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequenceInstance;
39import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask;
40import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskBuilder;
41import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskFactory;
42import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstance;
43import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstanceList;
44import de.ugoe.cs.autoquest.tasktrees.treeifc.IUserSession;
45import de.ugoe.cs.autoquest.usageprofiles.SymbolMap;
46import de.ugoe.cs.autoquest.usageprofiles.Trie;
47import de.ugoe.cs.autoquest.usageprofiles.TrieProcessor;
48import de.ugoe.cs.util.StopWatch;
49import de.ugoe.cs.util.console.Console;
50
51/**
52 * <p>
53 * This class implements the major rule for creating task trees based on a set of recorded
54 * user sessions. For this, it first harmonizes all tasks. This eases later comparison. Then it
55 * searches the sessions for iterations and replaces them accordingly. Then it searches for sub
56 * sequences being the longest and occurring most often. For each found sub sequence, it replaces
57 * the occurrences by creating appropriate {@link ISequence}s. Afterwards, again searches for
58 * iterations and then again for sub sequences until no more replacements are done.
59 * </p>
60 * <p>
61 * For determining the longest sequence occurring most often, the implementation uses a
62 * {@link Trie}. The depth of the tree is initially 3. If the algorithm has a longest sequence
63 * occurring most often whose length is equal to the depth of the trie, it recalculates the trie
64 * with an increased depth.
65 * </p>
66 *
67 * @author Patrick Harms
68 */
69class SequenceForTaskDetectionRuleAlignment implements ISessionScopeRule {
70   
71    /**
72     * <p>
73     * the task factory to be used for creating substructures for the temporal
74     * relationships identified during rul application
75     * </p>
76     */
77    private ITaskFactory taskFactory;
78    /**
79     * <p>
80     * the task builder to be used for creating substructures for the temporal relationships
81     * identified during rule application
82     * </p>
83     */
84    private ITaskBuilder taskBuilder;
85
86    /**
87     * <p>
88     * the task handling strategy to be used for comparing tasks for preparation, i.e., before
89     * the tasks are harmonized
90     * </p>
91     */
92    private TaskHandlingStrategy preparationTaskHandlingStrategy;
93   
94    /**
95     * <p>
96     * the task handling strategy to be used for comparing tasks during iteration detection an trie
97     * generation, i.e., after the tasks are harmonized
98     * </p>
99     */
100    private TaskHandlingStrategy identityTaskHandlingStrategy;;
101   
102   
103    private ArrayList<NumberSequence> numberseqs;
104   
105    /**
106     * <p>
107     * instantiates the rule and initializes it with a task equality to be considered when
108     * comparing tasks as well as a task factory and builder to be used for creating task
109     * structures.
110     * </p>
111     *
112     * @param minimalTaskEquality the task equality to be considered when comparing tasks
113     * @param taskFactory         the task factory to be used for creating substructures
114     * @param taskBuilder         the task builder to be used for creating substructures
115     */
116   
117   
118    SequenceForTaskDetectionRuleAlignment(TaskEquality minimalTaskEquality,
119                                 ITaskFactory taskFactory,
120                                 ITaskBuilder taskBuilder)
121    {
122        this.taskFactory = taskFactory;
123        this.taskBuilder = taskBuilder;
124       
125        this.preparationTaskHandlingStrategy = new TaskHandlingStrategy(minimalTaskEquality);
126        this.identityTaskHandlingStrategy = new TaskHandlingStrategy(TaskEquality.IDENTICAL);
127        numberseqs = new ArrayList<NumberSequence>();
128       
129    }
130
131    /* (non-Javadoc)
132     * @see java.lang.Object#toString()
133     */
134    @Override
135    public String toString() {
136        return "SequenceForTaskDetectionRuleAlignment";
137    }
138
139    /* (non-Javadoc)
140     * @see de.ugoe.cs.autoquest.tasktrees.temporalrelation.ISessionScopeRule#apply(java.util.List)
141     */
142    @Override
143    public RuleApplicationResult apply(List<IUserSession> sessions) {
144        RuleApplicationData appData = new RuleApplicationData(sessions);
145       
146       
147       
148        // this is the real rule application. Loop while something is replaced.
149        SymbolMap<ITaskInstance, ITask> uniqueTasks = harmonizeEventTaskInstancesModel(appData);
150        ObjectDistanceSubstitionMatrix submat = new ObjectDistanceSubstitionMatrix(uniqueTasks,20);
151        submat.generate();
152     
153       
154       
155        System.out.print("Sequence 1: ");
156        NumberSequence ns1 = numberseqs.get(7);
157        NumberSequence ns2 = numberseqs.get(8);
158        ns1.printSequence();
159        System.out.print("Sequence 2: ");
160        ns2.printSequence();
161        //SmithWatermanRepeated sw = new SmithWatermanRepeated(numberseqs.get(20).getSequence(), numberseqs.get(24).getSequence(), submat,10);
162        SmithWatermanRepeated sw = new SmithWatermanRepeated(ns1.getSequence(), ns2.getSequence(), submat,15);
163        System.out.println("Max Scrore: " + sw.getMaxScore());
164       
165        sw.printDPMatrix();
166   
167       
168       
169
170        do {
171            System.out.println();
172            FengDoolittle fd = new FengDoolittle();
173           
174           
175            //appData.getStopWatch().start("whole loop");
176            //detectAndReplaceIterations(appData);
177
178            //appData.getStopWatch().start("task replacement");
179            //detectAndReplaceTasks(appData);
180            //appData.getStopWatch().stop("task replacement");
181            //appData.getStopWatch().stop("whole loop");
182           
183           
184            //appData.getStopWatch().dumpStatistics(System.out);
185            //appData.getStopWatch().reset();
186           
187        }
188        while (appData.detectedAndReplacedTasks());
189       
190        Console.println
191            ("created " + appData.getResult().getNewlyCreatedTasks().size() +
192             " new tasks and " + appData.getResult().getNewlyCreatedTaskInstances().size() +
193             " appropriate instances\n");
194       
195        if ((appData.getResult().getNewlyCreatedTasks().size() > 0) ||
196            (appData.getResult().getNewlyCreatedTaskInstances().size() > 0))
197        {
198            appData.getResult().setRuleApplicationStatus(RuleApplicationStatus.FINISHED);
199        }
200       
201        return appData.getResult();
202    }
203
204    /**
205     * <p>
206     * harmonizes the event task instances by unifying tasks. This is done, as initially the
207     * event tasks being equal with respect to the considered task equality are distinct objects.
208     * The comparison of these distinct objects is more time consuming than comparing the object
209     * references.
210     * </p>
211     *
212     * @param appData the rule application data combining all data used for applying this rule
213     * @return Returns the unique tasks symbol map
214     */
215    private SymbolMap<ITaskInstance, ITask> harmonizeEventTaskInstancesModel(RuleApplicationData appData) {
216        Console.traceln(Level.INFO, "harmonizing task model of event task instances");
217        appData.getStopWatch().start("harmonizing event tasks");
218
219        SymbolMap<ITaskInstance, ITask> uniqueTasks =
220            preparationTaskHandlingStrategy.createSymbolMap();
221        TaskInstanceComparator comparator = preparationTaskHandlingStrategy.getTaskComparator();
222       
223        int unifiedTasks = 0;
224        ITask task;
225        List<IUserSession> sessions = appData.getSessions();
226        int sessionNo = 0;
227        numberseqs = new ArrayList<NumberSequence>();
228        for (IUserSession session : sessions) {
229            Console.traceln(Level.FINE, "handling " + (++sessionNo) + ". " + session);
230                NumberSequence templist = new NumberSequence(session.size()) ;
231           
232            for (int i = 0; i < session.size(); i++) {
233                ITaskInstance taskInstance = session.get(i);
234                task = uniqueTasks.getValue(taskInstance);
235               
236                if (task == null) {
237                    uniqueTasks.addSymbol(taskInstance, taskInstance.getTask());
238                    templist.getSequence()[i]=taskInstance.getTask().getId();
239                }
240                else {
241                    taskBuilder.setTask(taskInstance, task);
242                        templist.getSequence()[i]=task.getId();
243                    unifiedTasks++;
244                }
245            }
246            numberseqs.add(templist);
247            comparator.clearBuffers();
248        }
249       
250        appData.getStopWatch().stop("harmonizing event tasks");
251        Console.traceln(Level.INFO, "harmonized " + unifiedTasks + " task occurrences (still " +
252                        uniqueTasks.size() + " different tasks)");
253
254        appData.getStopWatch().dumpStatistics(System.out);
255        appData.getStopWatch().reset();
256                return uniqueTasks;
257    }
258
259    /**
260     * <p>
261     * searches for direct iterations of single tasks in all sequences and replaces them with
262     * {@link IIteration}s, respectively appropriate instances. Also all single occurrences of
263     * a task that is iterated somewhen are replaced with iterations to have again an efficient
264     * way for task comparisons.
265     * </p>
266     *
267     * @param appData the rule application data combining all data used for applying this rule
268     */
269    private void detectAndReplaceIterations(RuleApplicationData appData) {
270        Console.traceln(Level.FINE, "detecting iterations");
271        appData.getStopWatch().start("detecting iterations");
272       
273        List<IUserSession> sessions = appData.getSessions();
274       
275        Set<ITask> iteratedTasks = searchIteratedTasks(sessions);
276       
277        if (iteratedTasks.size() > 0) {
278            replaceIterationsOf(iteratedTasks, sessions, appData);
279        }
280       
281        appData.getStopWatch().stop("detecting iterations");
282        Console.traceln(Level.INFO, "replaced " + iteratedTasks.size() + " iterated tasks");
283    }
284
285    /**
286     * <p>
287     * searches the provided sessions for task iterations. If a task is iterated, it is added
288     * to the returned set.
289     * </p>
290     *
291     * @param the session to search for iterations in
292     *
293     * @return a set of tasks being iterated somewhere
294     */
295    private Set<ITask> searchIteratedTasks(List<IUserSession> sessions) {
296        Set<ITask> iteratedTasks = new HashSet<ITask>();
297        for (IUserSession session : sessions) {
298            for (int i = 0; i < (session.size() - 1); i++) {
299                // we prepared the task instances to refer to unique tasks, if they are treated
300                // as equal. Therefore, we just compare the identity of the tasks of the task
301                // instances
302                if (session.get(i).getTask() == session.get(i + 1).getTask()) {
303                    iteratedTasks.add(session.get(i).getTask());
304                }
305            }
306        }
307       
308        return iteratedTasks;
309    }
310
311    /**
312     * <p>
313     * replaces all occurrences of all tasks provided in the set with iterations
314     * </p>
315     *
316     * @param iteratedTasks the tasks to be replaced with iterations
317     * @param sessions      the sessions in which the tasks are to be replaced
318     * @param appData       the rule application data combining all data used for applying this rule
319     */
320    private void replaceIterationsOf(Set<ITask>          iteratedTasks,
321                                     List<IUserSession>  sessions,
322                                     RuleApplicationData appData)
323    {
324        Map<ITask, IIteration> iterations = new HashMap<ITask, IIteration>();
325        Map<IIteration, List<IIterationInstance>> iterationInstances =
326                new HashMap<IIteration, List<IIterationInstance>>();
327           
328        for (ITask iteratedTask : iteratedTasks) {
329            IIteration iteration = taskFactory.createNewIteration();
330            iterations.put(iteratedTask, iteration);
331            iterationInstances.put(iteration, new LinkedList<IIterationInstance>());
332        }
333       
334        IIterationInstance iterationInstance;
335       
336        for (IUserSession session : sessions) {
337            int index = 0;
338            iterationInstance = null;
339
340            while (index < session.size()) {
341                // we prepared the task instances to refer to unique tasks, if they are treated
342                // as equal. Therefore, we just compare the identity of the tasks of the task
343                // instances
344                ITask currentTask = session.get(index).getTask();
345                IIteration iteration = iterations.get(currentTask);
346                if (iteration != null) {
347                    if ((iterationInstance == null) || (iterationInstance.getTask() != iteration))
348                    {
349                        iterationInstance = taskFactory.createNewTaskInstance(iteration);
350                        iterationInstances.get(iteration).add(iterationInstance);
351                        taskBuilder.addTaskInstance(session, index, iterationInstance);
352                        index++;
353                    }
354                   
355                    taskBuilder.addChild(iterationInstance, session.get(index));
356                    taskBuilder.removeTaskInstance(session, index);
357                }
358                else {
359                    if (iterationInstance != null) {
360                        iterationInstance = null;
361                    }
362                    index++;
363                }
364            }
365        }
366       
367        for (Map.Entry<IIteration, List<IIterationInstance>> entry : iterationInstances.entrySet())
368        {
369            harmonizeIterationInstancesModel(entry.getKey(), entry.getValue());
370        }
371    }
372
373    /**
374     * <p>
375     * TODO clarify why this is done
376     * </p>
377     */
378    private void harmonizeIterationInstancesModel(IIteration               iteration,
379                                                  List<IIterationInstance> iterationInstances)
380    {
381        List<ITask> iteratedTaskVariants = new LinkedList<ITask>();
382        TaskInstanceComparator comparator = preparationTaskHandlingStrategy.getTaskComparator();
383       
384        // merge the lexically different variants of iterated task to a unique list
385        for (IIterationInstance iterationInstance : iterationInstances) {
386            for (ITaskInstance executionVariant : iterationInstance) {
387                ITask candidate = executionVariant.getTask();
388           
389                boolean found = false;
390                for (ITask taskVariant : iteratedTaskVariants) {
391                    if (comparator.areLexicallyEqual(taskVariant, candidate)) {
392                        taskBuilder.setTask(executionVariant, taskVariant);
393                        found = true;
394                        break;
395                    }
396                }
397               
398                if (!found) {
399                    iteratedTaskVariants.add(candidate);
400                }
401            }
402        }
403       
404        // if there are more than one lexically different variant of iterated tasks, adapt the
405        // iteration model to be a selection of different variants. In this case also adapt
406        // the generated iteration instances to correctly contain selection instances. If there
407        // is only one variant of an iterated task, simply set this as the marked task of the
408        // iteration. In this case, the instances can be preserved as is
409        if (iteratedTaskVariants.size() > 1) {
410            ISelection selection = taskFactory.createNewSelection();
411           
412            for (ITask variant : iteratedTaskVariants) {
413                taskBuilder.addChild(selection, variant);
414            }
415           
416            taskBuilder.setMarkedTask(iteration, selection);
417           
418            for (IIterationInstance instance : iterationInstances) {
419                for (int i = 0; i < instance.size(); i++) {
420                    ISelectionInstance selectionInstance =
421                        taskFactory.createNewTaskInstance(selection);
422                    taskBuilder.setChild(selectionInstance, instance.get(i));
423                    taskBuilder.setTaskInstance(instance, i, selectionInstance);
424                }
425            }
426        }
427        else {
428            taskBuilder.setMarkedTask(iteration, iteratedTaskVariants.get(0));
429        }
430    }
431
432    /**
433     * TODO go on commenting
434     * @param appData the rule application data combining all data used for applying this rule
435     */
436    private void detectAndReplaceTasks(RuleApplicationData appData) {
437        Console.traceln(Level.FINE, "detecting and replacing tasks");
438        appData.getStopWatch().start("detecting tasks");
439       
440        getSequencesOccuringMostOften(appData);
441
442        appData.getStopWatch().stop("detecting tasks");
443        appData.getStopWatch().start("replacing tasks");
444       
445        replaceSequencesOccurringMostOften(appData);
446       
447        appData.getStopWatch().stop("replacing tasks");
448        Console.traceln(Level.INFO, "detected and replaced " + appData.getLastFoundTasks().size() +
449                        " tasks occuring " + appData.getLastFoundTasks().getOccurrenceCount() +
450                        " times");
451    }
452
453    /**
454     * @param appData the rule application data combining all data used for applying this rule
455     */
456    private void getSequencesOccuringMostOften(RuleApplicationData appData) {
457        Console.traceln(Level.FINE, "determining most prominent tasks");
458
459        Tasks tasks;
460        boolean createNewTrie = (appData.getLastTrie() == null) ||
461            appData.detectedAndReplacedTasks(); // tree has changed
462       
463        do {
464            if (createNewTrie) {
465                createNewTrie(appData);
466            }
467           
468            MaxCountAndLongestTasksFinder finder = new MaxCountAndLongestTasksFinder();
469            appData.getLastTrie().process(finder);
470       
471            tasks = finder.getFoundTasks();
472           
473            createNewTrie = false;
474           
475            for (List<ITaskInstance> task : tasks) {
476                if (task.size() >= appData.getTrainedSequenceLength()) {
477                    // Trie must be recreated with a longer sequence length to be sure that
478                    // the found sequences occurring most often are found in their whole length
479                    appData.setTrainedSequenceLength(appData.getTrainedSequenceLength() + 1);
480                    createNewTrie = true;
481                    break;
482                }
483            }
484        }
485        while (createNewTrie);
486       
487        // create a normal trie for comparison
488        /*System.out.println("creating test trie for comparison");
489       
490        appData.getStopWatch().start("testtrie");
491        Trie<ITaskInstance> trie = new Trie<ITaskInstance>(identityTaskComparator);
492       
493        for (IUserSession session : appData.getSessions()) {
494            trie.train(session.getExecutedTasks(), appData.getTrainedSequenceLength());
495        }
496       
497        appData.getStopWatch().stop("testtrie");
498       
499        MaxCountAndLongestTasksFinder finder = new MaxCountAndLongestTasksFinder();
500        trie.process(finder);
501
502        boolean allTasksEqual = finder.getFoundTasks().size() == tasks.size();
503       
504        allTasksEqual &= finder.getFoundTasks().occurrenceCount == tasks.occurrenceCount;
505       
506        for (List<ITaskInstance> task1 : finder.getFoundTasks()) {
507            boolean foundTask = false;
508            for (List<ITaskInstance> task2 : tasks) {
509                boolean tasksEqual = false;
510                if (task1.size() == task2.size()) {
511                    tasksEqual = true;
512                    for (int i = 0; i < task1.size(); i++) {
513                        if (!identityTaskComparator.equals(task1.get(i).getTask(), task2.get(i).getTask()))
514                        {
515                            tasksEqual = false;
516                        }
517                    }
518                }
519               
520                if (tasksEqual) {
521                    foundTask = true;
522                    break;
523                }
524            }
525           
526            if (!foundTask) {
527                System.out.println("different is " + task1);
528                allTasksEqual = false;
529                break;
530            }
531        }
532       
533        if (!allTasksEqual) {
534            System.out.println(finder.getFoundTasks());
535            System.out.println(tasks);
536           
537            throw new IllegalArgumentException("both tries calculated different subsequences");
538        }
539       
540        /*TrieStatisticsDumper dumper = new TrieStatisticsDumper();
541        appData.getLastTrie().process(dumper);
542        dumper.dumpCounters();*/
543       
544        appData.setLastFoundTasks(tasks);
545
546        Console.traceln(Level.FINE, "found " + appData.getLastFoundTasks().size() + " tasks " +
547                        "occurring " + appData.getLastFoundTasks().getOccurrenceCount() + " times");
548    }
549
550    /**
551     * @param appData the rule application data combining all data used for applying this rule
552     */
553    private void createNewTrie(RuleApplicationData appData) {
554        Console.traceln(Level.FINER, "training trie with a maximum sequence length of " +
555                        appData.getTrainedSequenceLength());
556
557        appData.getStopWatch().start("training trie");
558       
559        // we prepared the task instances to refer to unique tasks, if they are treated
560        // as equal. Therefore, we just compare the identity of the tasks of the task
561        // instances
562        appData.setLastTrie(new TaskInstanceTrie(identityTaskHandlingStrategy));
563   
564        appData.getLastTrie().trainSessions
565            (appData.getSessions(), appData.getTrainedSequenceLength());
566       
567        appData.getStopWatch().stop("training trie");
568    }
569
570    /**
571     * @param appData the rule application data combining all data used for applying this rule
572     */
573    private void replaceSequencesOccurringMostOften(RuleApplicationData appData) {
574        appData.detectedAndReplacedTasks(false);
575
576        if ((appData.getLastFoundTasks().size() > 0) &&
577            (appData.getLastFoundTasks().getOccurrenceCount() > 1))
578        {
579            Console.traceln(Level.FINER, "replacing tasks occurrences");
580
581            for (List<ITaskInstance> task : appData.getLastFoundTasks()) {
582                ISequence sequence = taskFactory.createNewSequence();
583               
584                Console.traceln(Level.FINEST, "replacing " + sequence.getId() + ": " + task);
585
586                List<ISequenceInstance> sequenceInstances =
587                    replaceTaskOccurrences(task, appData.getSessions(), sequence);
588               
589                harmonizeSequenceInstancesModel(sequence, sequenceInstances,task.size());
590                appData.detectedAndReplacedTasks
591                    (appData.detectedAndReplacedTasks() || (sequenceInstances.size() > 0));
592
593                if (sequenceInstances.size() < appData.getLastFoundTasks().getOccurrenceCount()) {
594                    Console.traceln(Level.FINE, sequence.getId() + ": replaced task only " +
595                                    sequenceInstances.size() + " times instead of expected " +
596                                    appData.getLastFoundTasks().getOccurrenceCount());
597                }
598            }
599        }
600       
601    }
602
603    /**
604     *
605     */
606    private void harmonizeSequenceInstancesModel(ISequence               sequence,
607                                                 List<ISequenceInstance> sequenceInstances,
608                                                 int                     sequenceLength)
609    {
610        TaskInstanceComparator comparator = preparationTaskHandlingStrategy.getTaskComparator();
611       
612        // ensure for each subtask that lexically different variants are preserved
613        for (int subTaskIndex = 0; subTaskIndex < sequenceLength; subTaskIndex++) {
614            List<ITask> subTaskVariants = new LinkedList<ITask>();
615           
616            for (ISequenceInstance sequenceInstance : sequenceInstances) {
617                ITask candidate = sequenceInstance.get(subTaskIndex).getTask();
618               
619                boolean found = false;
620               
621                for (int i = 0; i < subTaskVariants.size(); i++) {
622                    if (comparator.areLexicallyEqual(subTaskVariants.get(i), candidate)) {
623                        taskBuilder.setTask
624                            (sequenceInstance.get(subTaskIndex), subTaskVariants.get(i));
625                       
626                        found = true;
627                        break;
628                    }
629                }
630               
631                if (!found) {
632                    subTaskVariants.add(candidate);
633                }
634            }
635           
636            // if there are more than one lexically different variant of the sub task at
637            // the considered position, adapt the sequence model at that position to have
638            // a selection of the different variants. In this case also adapt the
639            // generated sequence instances to correctly contain selection instances. If
640            // there is only one variant of sub tasks at the given position, simply set
641            // this variant as the sub task of the selection. In this case, the instances
642            // can be preserved as is
643            if (subTaskVariants.size() > 1) {
644                ISelection selection = taskFactory.createNewSelection();
645               
646                for (ITask variant : subTaskVariants) {
647                    taskBuilder.addChild(selection, variant);
648                }
649               
650                taskBuilder.addChild(sequence, selection);
651               
652                for (ISequenceInstance instance : sequenceInstances) {
653                    ISelectionInstance selectionInstance =
654                        taskFactory.createNewTaskInstance(selection);
655                    taskBuilder.setChild(selectionInstance, instance.get(subTaskIndex));
656                    taskBuilder.setTaskInstance(instance, subTaskIndex, selectionInstance);
657                }
658            }
659            else if (subTaskVariants.size() == 1) {
660                taskBuilder.addChild(sequence, subTaskVariants.get(0));
661            }
662        }
663    }
664
665    /**
666     * @param tree
667     */
668    private List<ISequenceInstance> replaceTaskOccurrences(List<ITaskInstance> task,
669                                                           List<IUserSession>  sessions,
670                                                           ISequence           temporalTaskModel)
671    {
672        List<ISequenceInstance> sequenceInstances = new LinkedList<ISequenceInstance>();
673       
674        for (IUserSession session : sessions) {
675            int index = -1;
676               
677            do {
678                index = getSubListIndex(session, task, ++index);
679
680                if (index > -1) {
681                    sequenceInstances.add
682                        (RuleUtils.createNewSubSequenceInRange
683                             (session, index, index + task.size() - 1, temporalTaskModel,
684                              taskFactory, taskBuilder));
685                }
686            }
687            while (index > -1);
688        }
689       
690        return sequenceInstances;
691    }
692
693    /**
694     * @param trie
695     * @param object
696     * @return
697     */
698    private int getSubListIndex(ITaskInstanceList   list,
699                                List<ITaskInstance> subList,
700                                int                 startIndex)
701    {
702        boolean matchFound;
703        int result = -1;
704       
705        for (int i = startIndex; i <= list.size() - subList.size(); i++) {
706            matchFound = true;
707           
708            for (int j = 0; j < subList.size(); j++) {
709                // we prepared the task instances to refer to unique tasks, if they are treated
710                // as equal. Therefore, we just compare the identity of the tasks of the task
711                // instances
712                if (list.get(i + j).getTask() != subList.get(j).getTask()) {
713                    matchFound = false;
714                    break;
715                }
716            }
717           
718            if (matchFound) {
719                result = i;
720                break;
721            }
722        }
723       
724        return result;
725    }
726
727    /**
728     * @param trie
729     * @param object
730     * @return
731     */
732    private int getSubListIndex(List<ITaskInstance> list,
733                                List<ITaskInstance> subList,
734                                int                 startIndex)
735    {
736        boolean matchFound;
737        int result = -1;
738       
739        for (int i = startIndex; i <= list.size() - subList.size(); i++) {
740            matchFound = true;
741           
742            for (int j = 0; j < subList.size(); j++) {
743                // we prepared the task instances to refer to unique tasks, if they are treated
744                // as equal. Therefore, we just compare the identity of the tasks of the task
745                // instances
746                if (list.get(i + j) != subList.get(j)) {
747                    matchFound = false;
748                    break;
749                }
750            }
751           
752            if (matchFound) {
753                result = i;
754                break;
755            }
756        }
757       
758        return result;
759    }
760   
761    /**
762     * @author Patrick Harms
763     */
764    private class MaxCountAndLongestTasksFinder implements TrieProcessor<ITaskInstance> {
765       
766        /**
767         *
768         */
769        private int currentCount;
770       
771        /**
772         *
773         */
774        private List<List<ITaskInstance>> foundTasks = new LinkedList<List<ITaskInstance>>();
775
776        /**
777         *
778         */
779        public MaxCountAndLongestTasksFinder() {
780            super();
781            this.currentCount = 0;
782        }
783
784        /* (non-Javadoc)
785         * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int)
786         */
787        @Override
788        public TrieProcessor.Result process(List<ITaskInstance> foundTask, int count) {
789            if (foundTask.size() < 2) {
790                // ignore single tasks
791                return TrieProcessor.Result.CONTINUE;
792            }
793           
794            if (count < 2) {
795                // ignore singular occurrences
796                return TrieProcessor.Result.SKIP_NODE;
797            }
798
799            if (this.currentCount > count) {
800                // ignore this children of this task, as they may have only smaller counts than
801                // the already found tasks
802                return TrieProcessor.Result.SKIP_NODE;
803            }
804           
805            if (this.currentCount < count) {
806                // the provided task occurs more often that all tasks found so far.
807                // clear all found tasks and use the new count as the one searched for
808                foundTasks.clear();
809                this.currentCount = count;
810            }
811           
812            if (this.currentCount == count) {
813                // the task is of interest. Sort it into the other found tasks so that
814                // the longest come first
815                boolean added = false;
816                for (int i = 0; i < foundTasks.size(); i++) {
817                    if (foundTasks.get(i).size() < foundTask.size()) {
818                        // defensive copy
819                        foundTasks.add(i, new LinkedList<ITaskInstance>(foundTask)); // defensive copy
820                        added = true;
821                        break;
822                    }
823                }
824               
825                if (!added) {
826                    foundTasks.add(new LinkedList<ITaskInstance>(foundTask)); // defensive copy
827                }
828            }
829           
830            return TrieProcessor.Result.CONTINUE;
831        }
832
833        /**
834         *  @return
835         */
836        public Tasks getFoundTasks() {
837            removePermutationsOfShorterTasks();
838            return new Tasks(currentCount, foundTasks);
839        }
840
841        /**
842         *
843         */
844        private void removePermutationsOfShorterTasks() {
845            // now iterate the sorted list and for each task remove all other tasks that are shorter
846            // (i.e. come later in the sorted list) and that represent a subpart of the task
847            for (int i = 0; i < foundTasks.size(); i++) {
848                for (int j = i + 1; j < foundTasks.size();) {
849                    if (foundTasks.get(j).size() < foundTasks.get(i).size()) {
850                        // found a task that is a potential subtask. Check for this and remove the
851                        // subtask if needed
852                        List<ITaskInstance> longTask = foundTasks.get(i);
853                        List<ITaskInstance> shortTask = foundTasks.get(j);
854                       
855                        if (getSubListIndex(longTask, shortTask, 0) > -1) {
856                            foundTasks.remove(j);
857                        }
858                        else {
859                            j++;
860                        }
861                    }
862                    else {
863                        j++;
864                    }
865                }
866            }
867        }
868
869    }
870   
871//    /**
872//     * @author Patrick Harms
873//     */
874//    private class TrieStatisticsDumper implements TrieProcessor<ITaskInstance> {
875//       
876//        /**
877//         *
878//         */
879//        private Map<Integer, Integer> counters = new HashMap<Integer, Integer>();
880//       
881//        /* (non-Javadoc)
882//         * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int)
883//         */
884//        @Override
885//        public TrieProcessor.Result process(List<ITaskInstance> subsequence, int count) {
886//            if (subsequence.size() == 1) {
887//                Integer value = counters.get(count);
888//               
889//                if (value == null) {
890//                    value = 0;
891//                }
892//               
893//                counters.put(count, value + 1);
894//               
895//                return TrieProcessor.Result.CONTINUE;
896//            }
897//            else {
898//                // ignore singular occurrences
899//                return TrieProcessor.Result.SKIP_NODE;
900//            }
901//        }
902//
903//        /**
904//         *  @return
905//         */
906//        public void dumpCounters() {
907//            int dumpedCounters = 0;
908//           
909//            int count = 1;
910//            while (dumpedCounters < counters.size()) {
911//                Integer value = counters.get(count++);
912//                if (value != null) {
913//                    System.out.println(value + " symbols occurred " + count + " times");
914//                    dumpedCounters++;
915//                }
916//            }
917//        }
918//
919//    }
920   
921    /**
922     *
923     */
924    private static class RuleApplicationData {
925       
926        /**
927         *
928         */
929        private List<IUserSession> sessions;
930       
931        /**
932         *
933         */
934        private TaskInstanceTrie lastTrie;
935       
936        /**
937         * default and minimum trained sequence length is 3
938         */
939        private int trainedSequenceLength = 3;
940       
941        /**
942         *
943         */
944        private Tasks lastFoundTasks = new Tasks(Integer.MAX_VALUE, null);
945       
946        /**
947         *
948         */
949        private boolean detectedAndReplacedTasks;
950       
951        /**
952         *
953         */
954        private RuleApplicationResult result = new RuleApplicationResult();
955       
956        /**
957         *
958         */
959        private StopWatch stopWatch = new StopWatch();
960       
961        /**
962         *
963         */
964        private RuleApplicationData(List<IUserSession> sessions) {
965            this.sessions = sessions;
966        }
967
968        /**
969         * @return the tree
970         */
971        private List<IUserSession> getSessions() {
972            return sessions;
973        }
974
975        /**
976         * @param lastTrie the lastTrie to set
977         */
978        private void setLastTrie(TaskInstanceTrie lastTrie) {
979            this.lastTrie = lastTrie;
980        }
981
982        /**
983         * @return the lastTrie
984         */
985        private TaskInstanceTrie getLastTrie() {
986            return lastTrie;
987        }
988
989        /**
990         * @param trainedSequenceLength the trainedSequenceLength to set
991         */
992        private void setTrainedSequenceLength(int trainedSequenceLength) {
993            this.trainedSequenceLength = trainedSequenceLength;
994        }
995
996        /**
997         * @return the trainedSequenceLength
998         */
999        private int getTrainedSequenceLength() {
1000            return trainedSequenceLength;
1001        }
1002
1003        /**
1004         * @param lastFoundSequences the lastFoundSequences to set
1005         */
1006        private void setLastFoundTasks(Tasks lastFoundSequences) {
1007            this.lastFoundTasks = lastFoundSequences;
1008        }
1009
1010        /**
1011         * @return the lastFoundSequences
1012         */
1013        private Tasks getLastFoundTasks() {
1014            return lastFoundTasks;
1015        }
1016
1017        /**
1018         *
1019         */
1020        private void detectedAndReplacedTasks(boolean detectedAndReplacedTasks) {
1021            this.detectedAndReplacedTasks = detectedAndReplacedTasks;
1022        }
1023
1024        /**
1025         *
1026         */
1027        private boolean detectedAndReplacedTasks() {
1028            return detectedAndReplacedTasks;
1029        }
1030       
1031        /**
1032         * @return the result
1033         */
1034        private RuleApplicationResult getResult() {
1035            return result;
1036        }
1037
1038        /**
1039         * @return the stopWatch
1040         */
1041        private StopWatch getStopWatch() {
1042            return stopWatch;
1043        }
1044
1045    }
1046   
1047
1048    /**
1049     * @author Patrick Harms
1050     */
1051    private static class Tasks implements Iterable<List<ITaskInstance>> {
1052       
1053        /**
1054         *
1055         */
1056        private int occurrenceCount;
1057       
1058        /**
1059         *
1060         */
1061        private List<List<ITaskInstance>> sequences;
1062
1063        /**
1064         * @param occurrenceCount
1065         * @param sequences
1066         */
1067        private Tasks(int occurrenceCount, List<List<ITaskInstance>> sequences) {
1068            super();
1069            this.occurrenceCount = occurrenceCount;
1070            this.sequences = sequences;
1071        }
1072
1073        /**
1074         * @return
1075         */
1076        private int getOccurrenceCount() {
1077            return occurrenceCount;
1078        }
1079
1080        /**
1081         * @return
1082         */
1083        private int size() {
1084            return this.sequences.size();
1085        }
1086
1087        /**
1088         *
1089         */
1090
1091        /* (non-Javadoc)
1092         * @see java.lang.Iterable#iterator()
1093         */
1094        @Override
1095        public Iterator<List<ITaskInstance>> iterator() {
1096            return this.sequences.iterator();
1097        }
1098
1099        /* (non-Javadoc)
1100         * @see java.lang.Object#toString()
1101         */
1102        @Override
1103        public String toString() {
1104            StringBuffer result = new StringBuffer();
1105            result.append(this.occurrenceCount);
1106            result.append(" occurrences:\n");
1107           
1108            for (List<ITaskInstance> task : sequences) {
1109                result.append(task);
1110                result.append("\n");
1111            }
1112           
1113            return result.toString();
1114        }
1115
1116    }
1117   
1118    // methods for internal testing
1119//    private void checkMatchingOfSessions(List<List<Event>>  flattenedSessions,
1120//                                         List<IUserSession> sessions,
1121//                                         String             when)
1122//    {
1123//        List<List<Event>> currentFlattenedSessions = flattenSessions(sessions);
1124//        if (flattenedSessions.size() != currentFlattenedSessions.size()) {
1125//            System.out.println("################## number of sessions changed after " + when);
1126//        }
1127//        else {
1128//            for (int i = 0; i < flattenedSessions.size(); i++) {
1129//                List<Event> expected = flattenedSessions.get(i);
1130//                List<Event> current = currentFlattenedSessions.get(i);
1131//           
1132//                if (expected.size() != current.size()) {
1133//                    System.out.println
1134//                        ("################## length of session " + i + " changed after " + when);
1135//                }
1136//                else {
1137//                    for (int j = 0; j < expected.size(); j++) {
1138//                        if (!expected.get(j).equals(current.get(j))) {
1139//                            System.out.println("################## event " + j + " of session " +
1140//                                               i + " changed after " + when);
1141//                        }
1142//                    }
1143//                }
1144//            }     
1145//        }
1146//    }
1147//
1148//    private List<List<Event>> flattenSessions(List<IUserSession> sessions) {
1149//        List<List<Event>> flattenedSessions = new ArrayList<List<Event>>();
1150//        for (IUserSession session : sessions) {
1151//            List<Event> flattenedUserSession = new ArrayList<Event>();
1152//            flatten(session, flattenedUserSession);
1153//            flattenedSessions.add(flattenedUserSession);
1154//        }
1155//
1156//        return flattenedSessions;
1157//    }
1158//
1159//    private void flatten(IUserSession iUserSession, List<Event> flattenedUserSession) {
1160//        for (ITaskInstance instance : iUserSession) {
1161//            flatten(instance, flattenedUserSession);
1162//        }
1163//    }
1164//
1165//    private void flatten(ITaskInstance instance, List<Event> flattenedUserSession) {
1166//        if (instance instanceof ITaskInstanceList) {
1167//            for (ITaskInstance child : (ITaskInstanceList) instance) {
1168//                flatten(child, flattenedUserSession);
1169//            }
1170//        }
1171//        else if (instance instanceof ISelectionInstance) {
1172//            flatten(((ISelectionInstance) instance).getChild(), flattenedUserSession);
1173//        }
1174//        else if (instance instanceof IOptionalInstance) {
1175//            flatten(((IOptionalInstance) instance).getChild(), flattenedUserSession);
1176//        }
1177//        else if (instance instanceof IEventTaskInstance) {
1178//            flattenedUserSession.add(((IEventTaskInstance) instance).getEvent());
1179//        }
1180//    }
1181
1182}
Note: See TracBrowser for help on using the repository browser.