source: branches/autoquest-core-tasktrees-alignment/src/main/java/de/ugoe/cs/autoquest/tasktrees/temporalrelation/SequenceForTaskDetectionRuleAlignment.java @ 1749

Last change on this file since 1749 was 1749, checked in by rkrimmel, 10 years ago

Code cleanup, removed dublicate storage of task items

File size: 48.4 KB
Line 
1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.tasktrees.temporalrelation;
16
17import java.io.Serializable;
18import java.util.ArrayList;
19import java.util.Collections;
20import java.util.Comparator;
21import java.util.HashMap;
22import java.util.HashSet;
23import java.util.Iterator;
24import java.util.LinkedList;
25import java.util.List;
26import java.util.Map;
27import java.util.Queue;
28import java.util.Set;
29import java.util.concurrent.ExecutorService;
30import java.util.concurrent.Executors;
31import java.util.concurrent.TimeUnit;
32import java.util.logging.Level;
33
34import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm;
35import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithmFactory;
36import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.Match;
37import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.MatchOccurrence;
38import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.NumberSequence;
39import de.ugoe.cs.autoquest.tasktrees.alignment.matrix.ObjectDistanceSubstitionMatrix;
40import de.ugoe.cs.autoquest.tasktrees.taskequality.TaskEquality;
41import de.ugoe.cs.autoquest.tasktrees.treeifc.IIteration;
42import de.ugoe.cs.autoquest.tasktrees.treeifc.IIterationInstance;
43import de.ugoe.cs.autoquest.tasktrees.treeifc.IOptional;
44import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelection;
45import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelectionInstance;
46import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequence;
47import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequenceInstance;
48import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask;
49import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskBuilder;
50import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskFactory;
51import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstance;
52import de.ugoe.cs.autoquest.tasktrees.treeifc.IUserSession;
53import de.ugoe.cs.autoquest.usageprofiles.SymbolMap;
54import de.ugoe.cs.util.StopWatch;
55import de.ugoe.cs.util.console.Console;
56
57/**
58 * <p>
59 * This class implements the major rule for creating task trees based on a set of recorded user
60 * sessions. For this, it first harmonizes all tasks. This eases later comparison. Then it searches
61 * the sessions for iterations and replaces them accordingly. Then it searches for sub sequences
62 * using alignment algorithms For each found sub sequence, it replaces the occurrences by creating
63 * appropriate {@link ISequence}s. Afterwards, again searches for iterations and then again for sub
64 * sequences until no more replacements are done.
65 * </p>
66 * <p>
67 *
68 *
69 * @author Ralph Krimmel
70 */
71public class SequenceForTaskDetectionRuleAlignment implements ISessionScopeRule {
72
73    /** The n threads. */
74    public static int nThreads = Runtime.getRuntime().availableProcessors() - 1;
75    //public static int nThreads = 1;
76
77    /** The iteration. */
78    private int iteration = 0;
79
80    private int maxIterations = 10
81            ;
82
83    private static int alignmentThreshold = 9;
84    private static int gapPenalty = -3;
85
86    private static float maxSubstitutionScore = 6;
87
88    /**
89     * <p>
90     * the task factory to be used for creating substructures for the temporal relationships
91     * identified during rul application
92     * </p>
93     * .
94     */
95    private final ITaskFactory taskFactory;
96
97    /**
98     * <p>
99     * the task builder to be used for creating substructures for the temporal relationships
100     * identified during rule application
101     * </p>
102     * .
103     */
104    private final ITaskBuilder taskBuilder;
105
106    /**
107     * <p>
108     * the task handling strategy to be used for comparing tasks for preparation, i.e., before the
109     * tasks are harmonized
110     * </p>
111     */
112    private final TaskHandlingStrategy preparationTaskHandlingStrategy;
113
114    /**
115     * <p>
116     * instantiates the rule and initializes it with a task equality to be considered when comparing
117     * tasks as well as a task factory and builder to be used for creating task structures.
118     * </p>
119     *
120     * @param minimalTaskEquality
121     *            the task equality to be considered when comparing tasks
122     * @param taskFactory
123     *            the task factory to be used for creating substructures
124     * @param taskBuilder
125     *            the task builder to be used for creating substructures
126     */
127
128    SequenceForTaskDetectionRuleAlignment(TaskEquality minimalTaskEquality,
129                                          ITaskFactory taskFactory,
130                                          ITaskBuilder taskBuilder)
131    {
132        this.taskFactory = taskFactory;
133        this.taskBuilder = taskBuilder;
134
135        this.preparationTaskHandlingStrategy = new TaskHandlingStrategy(minimalTaskEquality);
136    }
137
138    /*
139     * (non-Javadoc)
140     *
141     * @see de.ugoe.cs.autoquest.tasktrees.temporalrelation.ISessionScopeRule#apply (java.util.List)
142     */
143    @Override
144    public RuleApplicationResult apply(List<IUserSession> sessions) {
145        final RuleApplicationData appData = new RuleApplicationData(sessions);
146
147        harmonizeEventTaskInstancesModel(appData);
148
149        Console.traceln(Level.INFO, "generating substitution matrix from " +
150            appData.getNumber2Task().size() + " unique tasks");
151        appData.getStopWatch().start("substitution matrix");
152        appData.getSubmat().generate(appData.getNumber2Task().values());
153        appData.getStopWatch().stop("substitution matrix");
154
155        Console.traceln(Level.INFO, "Starting main loop");
156        do {
157            Console.traceln(Level.INFO, "Iteration Number: " + iteration);
158            iteration++;
159            appData.detectedAndReplacedTasks = false;
160            appData.getStopWatch().start("whole loop");
161            detectAndReplaceIterations(appData);
162            appData.getStopWatch().start("task replacement");
163            // Just does anything if the substitution matrix is created with the
164            // option to do so
165            appData.updateSubstitutionMatrix();
166            detectAndReplaceTasks(appData); //
167            appData.getStopWatch().stop("task replacement");
168            appData.getStopWatch().stop("whole loop");
169            // appData.getStopWatch().dumpStatistics(System.out);
170            appData.getStopWatch().reset();
171
172            // } while (appData.detectedAndReplacedTasks()||iteration < maxIterations);
173        }
174        while (iteration < maxIterations);
175        Console.println("created " + appData.getResult().getNewlyCreatedTasks().size() +
176            " new tasks and " + appData.getResult().getNewlyCreatedTaskInstances().size() +
177            " appropriate instances\n");
178
179        if ((appData.getResult().getNewlyCreatedTasks().size() > 0) ||
180            (appData.getResult().getNewlyCreatedTaskInstances().size() > 0))
181        {
182            appData.getResult().setRuleApplicationStatus(RuleApplicationStatus.FINISHED);
183        }
184        // new TaskTreeValidator().validate(appData.getSessions());
185        return appData.getResult();
186    }
187
188    /**
189     * Creates the number sequences.
190     *
191     * @param appData
192     *            the app data
193     * @return the array list
194     */
195    private ArrayList<NumberSequence> createNumberSequences(RuleApplicationData appData) {
196        final ArrayList<NumberSequence> result = new ArrayList<NumberSequence>();
197        for (int i = 0; i < appData.getSessions().size(); i++) {
198            final IUserSession session = appData.getSessions().get(i);
199            final NumberSequence templist = new NumberSequence(session.size());
200            for (int j = 0; j < session.size(); j++) {
201                final ITaskInstance taskInstance = session.get(j);
202                templist.getSequence()[j] = taskInstance.getTask().getId();
203            }
204            // Each NumberSequence is identified by its id, beginning to count
205            // at zero
206            templist.setId(i);
207            result.add(templist);
208        }
209        return result;
210    }
211
212    /**
213     * <p>
214     * searches for direct iterations of single tasks in all sequences and replaces them with
215     * {@link IIteration}s, respectively appropriate instances. Also all single occurrences of a
216     * task that is iterated somewhen are replaced with iterations to have again an efficient way
217     * for task comparisons.
218     * </p>
219     *
220     * @param appData
221     *            the rule application data combining all data used for applying this rule
222     */
223    private void detectAndReplaceIterations(RuleApplicationData appData) {
224        Console.traceln(Level.FINE, "detecting iterations");
225        appData.getStopWatch().start("detecting iterations");
226
227        final List<IUserSession> sessions = appData.getSessions();
228
229        final Set<ITask> iteratedTasks = searchIteratedTasks(sessions);
230
231        if (iteratedTasks.size() > 0) {
232            replaceIterationsOf(iteratedTasks, sessions, appData);
233        }
234
235        appData.getStopWatch().stop("detecting iterations");
236        Console.traceln(Level.INFO, "replaced " + iteratedTasks.size() + " iterated tasks");
237    }
238
239    /**
240     * Detect and replace tasks.
241     *
242     * @param appData
243     *            the rule application data combining all data used for applying this rule
244     */
245    private void detectAndReplaceTasks(RuleApplicationData appData) {
246        Console.traceln(Level.FINE, "detecting and replacing tasks");
247        appData.getStopWatch().start("detecting tasks");
248
249        // Create NumberSequences
250        appData.setNumberSequences(this.createNumberSequences(appData));
251
252        // Generate pairwise alignments
253        // appData.setMatchseqs(generatePairwiseAlignments(appData));
254        generatePairwiseAlignments(appData);
255        Console.traceln(Level.FINE, "Found " + appData.getMatchseqs().size() + " results");
256
257        // Nothing found, we can end here
258        if (appData.getMatchseqs().size() == 0) {
259            appData.detectedAndReplacedTasks = false;
260            return;
261        }
262
263        // Searching each match in all other sessions, counting its occurences
264        searchMatchesInAllSessions(appData);
265
266        // Sort results to get the most occurring results
267        Console.traceln(Level.INFO, "sorting " + appData.getMatchseqs().size() + " results");
268        final Comparator<Match> comparator = new Comparator<Match>() {
269            @Override
270            public int compare(Match m1, Match m2) {
271                int cmp = m2.occurenceCount() - m1.occurenceCount();
272                if (cmp != 0) {
273                    return cmp;
274                }
275                else {
276                    cmp = m2.size() - m1.size();
277                    if (cmp != 0) {
278                        return cmp;
279                    }
280                    else {
281                        // This should rarely happen
282                        cmp = m2.ocurrenceIDSum() - m1.ocurrenceIDSum();
283                        if (cmp != 0) {
284                            return cmp;
285                        }
286                        else {
287                            cmp = m2.taskIdSum() - m1.taskIdSum();
288
289                            return cmp;
290                        }
291                    }
292                }
293            }
294        };
295
296        Collections.sort(appData.getMatchseqs(), comparator);
297        appData.getStopWatch().stop("detecting tasks");
298
299        Console.traceln(Level.INFO, "Preparing replacments");
300        prepareReplacements(appData);
301        Console.traceln(Level.INFO, "Replacing matches in sessions");
302        appData.getStopWatch().start("replacing tasks");
303        replaceMatches(appData);
304        appData.getStopWatch().stop("replacing tasks");
305    }
306
307    // TODO: DEBUG METHOD
308    @SuppressWarnings("unused")
309    private void printMatches(RuleApplicationData appData) {
310        LinkedList<Match> matchseqs = appData.getMatchseqs();
311        if (iteration > 1) {
312            System.out.println("PRINTING MATCHES");
313            for (Iterator<Match> it = matchseqs.iterator(); it.hasNext();) {
314                Match m = it.next();
315                m.getFirstSequence().printSequence();
316                m.getSecondSequence().printSequence();
317                for (Iterator<MatchOccurrence> jt = m.getOccurences().iterator(); jt.hasNext();) {
318                    MatchOccurrence mo = jt.next();
319                    System.out.print(mo.getSequenceId() + " ");
320                }
321                System.out.println();
322                System.out.println();
323            }
324        }
325    }
326
327    /**
328     * <p>
329     * harmonizes the event task instances by unifying tasks. This is done, as initially the event
330     * tasks being equal with respect to the considered task equality are distinct objects. The
331     * comparison of these distinct objects is more time consuming than comparing the object
332     * references.
333     * </p>
334     *
335     * @param appData
336     *            the rule application data combining all data used for applying this rule
337     * @return Returns the unique tasks symbol map
338     */
339    private void harmonizeEventTaskInstancesModel(RuleApplicationData appData) {
340        Console.traceln(Level.INFO, "harmonizing task model of event task instances");
341        appData.getStopWatch().start("harmonizing event tasks");
342        final SymbolMap<ITaskInstance, ITask> uniqueTasks =
343            preparationTaskHandlingStrategy.createSymbolMap();
344
345        final TaskInstanceComparator comparator =
346            preparationTaskHandlingStrategy.getTaskComparator();
347
348        int unifiedTasks = 0;
349        ITask task;
350        final List<IUserSession> sessions = appData.getSessions();
351        for (int j = 0; j < sessions.size(); j++) {
352            final IUserSession session = sessions.get(j);
353
354            for (int i = 0; i < session.size(); i++) {
355                final ITaskInstance taskInstance = session.get(i);
356                task = uniqueTasks.getValue(taskInstance);
357
358                if (task == null) {
359                    uniqueTasks.addSymbol(taskInstance, taskInstance.getTask());
360                    appData.getNumber2Task().put(taskInstance.getTask().getId(),
361                                                 taskInstance.getTask());
362                }
363                else {
364                    taskBuilder.setTask(taskInstance, task);
365                    unifiedTasks++;
366                }
367            }
368            comparator.clearBuffers();
369        }
370
371        appData.getStopWatch().stop("harmonizing event tasks");
372        Console.traceln(Level.INFO, "harmonized " + unifiedTasks + " task occurrences (still " +
373            uniqueTasks.size() + " different tasks)");
374
375        appData.getStopWatch().dumpStatistics(System.out);
376        appData.getStopWatch().reset();
377    }
378
379    /**
380     * <p>
381     * TODO clarify why this is done (in fact, ask Patrick Harms)
382     * </p>
383     * .
384     *
385     * @param iteration
386     *            the iteration
387     * @param iterationInstances
388     *            the iteration instances
389     */
390    private void harmonizeIterationInstancesModel(IIteration iteration,
391                                                  List<IIterationInstance> iterationInstances)
392    {
393        final List<ITask> iteratedTaskVariants = new LinkedList<ITask>();
394        final TaskInstanceComparator comparator =
395            preparationTaskHandlingStrategy.getTaskComparator();
396
397        // merge the lexically different variants of iterated task to a unique
398        // list
399        for (final IIterationInstance iterationInstance : iterationInstances) {
400            for (final ITaskInstance executionVariant : iterationInstance) {
401                final ITask candidate = executionVariant.getTask();
402
403                boolean found = false;
404                for (final ITask taskVariant : iteratedTaskVariants) {
405                    if (comparator.areLexicallyEqual(taskVariant, candidate)) {
406                        taskBuilder.setTask(executionVariant, taskVariant);
407                        found = true;
408                        break;
409                    }
410                }
411
412                if (!found) {
413                    iteratedTaskVariants.add(candidate);
414                }
415            }
416        }
417
418        // if there are more than one lexically different variant of iterated
419        // tasks, adapt the
420        // iteration model to be a selection of different variants. In this case
421        // also adapt
422        // the generated iteration instances to correctly contain selection
423        // instances. If there
424        // is only one variant of an iterated task, simply set this as the
425        // marked task of the
426        // iteration. In this case, the instances can be preserved as is
427        if (iteratedTaskVariants.size() > 1) {
428            final ISelection selection = taskFactory.createNewSelection();
429
430            for (final ITask variant : iteratedTaskVariants) {
431                taskBuilder.addChild(selection, variant);
432            }
433
434            taskBuilder.setMarkedTask(iteration, selection);
435
436            for (final IIterationInstance instance : iterationInstances) {
437                for (int i = 0; i < instance.size(); i++) {
438                    final ISelectionInstance selectionInstance =
439                        taskFactory.createNewTaskInstance(selection);
440                    taskBuilder.setChild(selectionInstance, instance.get(i));
441                    taskBuilder.setTaskInstance(instance, i, selectionInstance);
442                }
443            }
444        }
445        else {
446            taskBuilder.setMarkedTask(iteration, iteratedTaskVariants.get(0));
447        }
448    }
449
450    /**
451     * Match as sequence.
452     *
453     * @param appData
454     *            RuleApplicationData needed to keep track of all created tasks
455     * @param m
456     *            The match to be converted into a Task
457     * @return The task of the match with an ISequence as its root
458     */
459    synchronized public ISequence matchAsSequence(RuleApplicationData appData, Match m) {
460        final ISequence sequence = taskFactory.createNewSequence();
461        appData.newTaskCreated(sequence);
462
463        final int[] first = m.getFirstSequence().getSequence();
464        final int[] second = m.getSecondSequence().getSequence();
465
466        // Both sequences of a match are equally long
467        for (int i = 0; i < m.getFirstSequence().size(); i++) {
468
469            // Two gaps aligned to each other: Have not seen it happening so
470            // far, just to handle it
471            if ((first[i] == -1) && (second[i] == -1)) {
472                // Do nothing here.
473            }
474            // Both events are equal, we can simply add the task referring to
475            // the number
476            else if (first[i] == second[i]) {
477                taskBuilder.addChild(sequence, appData.getNumber2Task().get(first[i]));
478            }
479            // We have a gap in the first sequence, we need to add the task of
480            // the second sequence as optional
481            else if ((first[i] == -1) && (second[i] != -1)) {
482                final IOptional optional = taskFactory.createNewOptional();
483                appData.newTaskCreated(optional);
484                taskBuilder.setMarkedTask(optional, appData.getNumber2Task().get(second[i]));
485                taskBuilder.addChild(sequence, optional);
486            }
487            // We have a gap in the second sequence, we need to add the task of
488            // the first sequence as optional
489            else if ((first[i] != -1) && (second[i] == -1)) {
490                final IOptional optional = taskFactory.createNewOptional();
491                appData.newTaskCreated(optional);
492                taskBuilder.setMarkedTask(optional, appData.getNumber2Task().get(first[i]));
493                taskBuilder.addChild(sequence, optional);
494            }
495            // Both tasks are not equal, we need to insert a selection here.
496            // Now things get complicated. We first need to check
497            // if the next position is not a selection. Then we can just create
498            // a selection
499            // of the both Tasks
500            // In the other case (more than one selection following this
501            // selection), we want to
502            // create a selection of sequences where each sequence gets the
503            // corresponding task of
504            // the its sequence in the pattern.
505            //
506            else if (i < (first.length - 1)) {
507
508                if ((first[i] != second[i]) &&
509                    (((first[i + 1] == second[i + 1]) || (first[i + 1] == -1) || (second[i + 1] == -1))))
510                {
511
512                    final ISelection selection = taskFactory.createNewSelection();
513                    appData.newTaskCreated(selection);
514                    taskBuilder.addChild(selection, appData.getNumber2Task().get(first[i]));
515                    taskBuilder.addChild(selection, appData.getNumber2Task().get(second[i]));
516                    taskBuilder.addChild(sequence, selection);
517                }
518                else {
519                    boolean selectionfound = true;
520                    final ISelection selection = taskFactory.createNewSelection();
521                    appData.newTaskCreated(selection);
522
523                    final ISequence subsequence1 = taskFactory.createNewSequence();
524                    appData.newTaskCreated(subsequence1);
525
526                    final ISequence subsequence2 = taskFactory.createNewSequence();
527                    appData.newTaskCreated(subsequence2);
528
529                    taskBuilder.addChild(selection, subsequence1);
530                    taskBuilder.addChild(selection, subsequence2);
531                    taskBuilder.addChild(sequence, selection);
532                    // TODO: We may not run till the end!
533                    while ((i < (first.length - 1)) && selectionfound) {
534                        selectionfound = false;
535                        taskBuilder.addChild(subsequence1, appData.getNumber2Task().get(first[i]));
536                        taskBuilder.addChild(subsequence2, appData.getNumber2Task().get(second[i]));
537                        if ((first[i + 1] != second[i + 1]) && (first[i + 1] != -1) &&
538                            (second[i + 1] != -1))
539                        {
540                            selectionfound = true;
541                        }
542                        else {
543                            continue;
544                        }
545                        i++;
546                    }
547                    if ((i == (first.length - 1)) && selectionfound) {
548                        taskBuilder.addChild(subsequence1, appData.getNumber2Task().get(first[i]));
549                        taskBuilder.addChild(subsequence2, appData.getNumber2Task().get(second[i]));
550                    }
551                }
552            }
553            else {
554                // i = length-1
555                if ((first[i] != second[i])) {
556
557                    final ISelection selection = taskFactory.createNewSelection();
558                    appData.newTaskCreated(selection);
559                    taskBuilder.addChild(selection, appData.getNumber2Task().get(first[i]));
560                    taskBuilder.addChild(selection, appData.getNumber2Task().get(second[i]));
561                    taskBuilder.addChild(sequence, selection);
562                }
563            }
564
565        }
566        return sequence;
567    }
568
569    /**
570     * <p>
571     * replaces all occurrences of all tasks provided in the set with iterations
572     * </p>
573     * .
574     *
575     * @param iteratedTasks
576     *            the tasks to be replaced with iterations
577     * @param sessions
578     *            the sessions in which the tasks are to be replaced
579     * @param appData
580     *            the rule application data combining all data used for applying this rule
581     */
582    private void replaceIterationsOf(Set<ITask> iteratedTasks,
583                                     List<IUserSession> sessions,
584                                     RuleApplicationData appData)
585    {
586        final Map<ITask, IIteration> iterations = new HashMap<ITask, IIteration>();
587        final Map<IIteration, List<IIterationInstance>> iterationInstances =
588            new HashMap<IIteration, List<IIterationInstance>>();
589
590        for (final ITask iteratedTask : iteratedTasks) {
591
592            final IIteration iteration = taskFactory.createNewIteration();
593            appData.newTaskCreated(iteration);
594            iterations.put(iteratedTask, iteration);
595            iterationInstances.put(iteration, new LinkedList<IIterationInstance>());
596        }
597
598        IIterationInstance iterationInstance;
599
600        for (final IUserSession session : sessions) {
601            int index = 0;
602            iterationInstance = null;
603
604            while (index < session.size()) {
605                // we prepared the task instances to refer to unique tasks, if
606                // they are treated
607                // as equal. Therefore, we just compare the identity of the
608                // tasks of the task
609                // instances
610                final ITask currentTask = session.get(index).getTask();
611                final IIteration iteration = iterations.get(currentTask);
612                if (iteration != null) {
613                    if ((iterationInstance == null) || (iterationInstance.getTask() != iteration)) {
614                        iterationInstance = taskFactory.createNewTaskInstance(iteration);
615                        iterationInstances.get(iteration).add(iterationInstance);// TODO:: Don't
616                                                                                 // create
617                        // TaskInstances here,
618                        // use a set of tasks
619                        // instead
620                        taskBuilder.addTaskInstance(session, index, iterationInstance);
621                        index++;
622                    }
623
624                    taskBuilder.addChild(iterationInstance, session.get(index));
625                    taskBuilder.removeTaskInstance(session, index);
626                }
627                else {
628                    if (iterationInstance != null) {
629                        iterationInstance = null;
630                    }
631                    index++;
632                }
633            }
634        }
635
636        for (final Map.Entry<IIteration, List<IIterationInstance>> entry : iterationInstances
637            .entrySet())
638        {
639            harmonizeIterationInstancesModel(entry.getKey(), entry.getValue());
640        }
641    }
642
643    /**
644     * Prepare replacements.
645     *
646     * @param appData
647     *            the app data
648     */
649    private void prepareReplacements(RuleApplicationData appData) {
650        appData.initializeQueues(appData.getSessions().size());
651        int taskId=0;
652        for (Iterator<Match> it = appData.getMatchseqs().iterator(); it.hasNext();) {
653            Match m = it.next();
654            if (m.getOccurences().size() > 2) {
655                final ISequence task = matchAsSequence(appData, m);
656                appData.getPreparedTasks().add(task);
657               
658                for (Iterator<MatchOccurrence> jt = m.getOccurences().iterator(); jt.hasNext();) {
659                    MatchOccurrence mo = jt.next();
660                    // System.out.println("Found this match in sequence " +mo.getSequenceId());
661                    PlannedReplacement pr = new PlannedReplacement(taskId,mo);
662                    // System.out.println("Adding match to sequence " + mo.getSequenceId());
663                    appData.getPlannedReplacements()[mo.getSequenceId()].add(pr);
664                }
665                taskId++;
666            }
667        }
668    }
669
670    /**
671     * Replace matches.
672     *
673     * @param appData
674     *            the app data
675     */
676    private void replaceMatches(RuleApplicationData appData) {
677
678        final int numberSeqSize = appData.getNumberSequences().size();
679        Console.traceln(Level.INFO, "replacing matches with " + nThreads + " threads");
680        int newThreads = nThreads;
681        if (numberSeqSize < nThreads) {
682            newThreads = numberSeqSize;
683        }
684        final int interval = numberSeqSize / newThreads;
685        int rest = numberSeqSize % newThreads;
686        final ExecutorService executor = Executors.newFixedThreadPool(nThreads);
687        for (int i = 0; i <= (numberSeqSize - interval); i += interval) {
688            int offset = 0;
689            if (rest != 0) {
690                offset = 1;
691                rest--;
692            }
693            final int from = i;
694            final int to = i + interval + offset - 1;
695            Console.traceln(Level.FINE, "Match replacing: Creating thread with matches from " +
696                from + " to " + to);
697            // search each match in every other sequence
698            final ParallelMatchReplacer replacer = new ParallelMatchReplacer(appData, from, to);
699            executor.execute(replacer);
700            i += offset;
701        }
702        executor.shutdown();
703        try {
704            executor.awaitTermination(2, TimeUnit.HOURS);
705        }
706        catch (final InterruptedException e) {
707            e.printStackTrace();
708        }
709        appData.getPreparedTasks().clear();
710    }
711
712    /**
713     * <p>
714     * searches the provided sessions for task iterations. If a task is iterated, it is added to the
715     * returned set.
716     * </p>
717     *
718     * @param sessions
719     *            the sessions
720     * @return a set of tasks being iterated somewhere
721     */
722    private Set<ITask> searchIteratedTasks(List<IUserSession> sessions) {
723        final Set<ITask> iteratedTasks = new HashSet<ITask>();
724        for (final IUserSession session : sessions) {
725            for (int i = 0; i < (session.size() - 1); i++) {
726                // we prepared the task instances to refer to unique tasks, if
727                // they are treated
728                // as equal. Therefore, we just compare the identity of the
729                // tasks of the task
730                // instances
731                if (session.get(i).getTask() == session.get(i + 1).getTask()) {
732                    iteratedTasks.add(session.get(i).getTask());
733                }
734            }
735        }
736        return iteratedTasks;
737    }
738
739    /**
740     * Generate pairwise alignments.
741     *
742     * @param appData
743     *            the app data
744     */
745    private void generatePairwiseAlignments(RuleApplicationData appData) {
746        final int numberSeqSize = appData.getNumberSequences().size();
747        appData.matchseqs = new LinkedList<Match>();
748        Console.traceln(Level.INFO, "generating pairwise alignments from " + numberSeqSize +
749            " sessions with " + nThreads + " threads");
750
751        int newThreads = nThreads;
752        if (numberSeqSize < nThreads) {
753            newThreads = numberSeqSize;
754        }
755
756        final ExecutorService executor = Executors.newFixedThreadPool(newThreads);
757        final int interval = numberSeqSize / newThreads;
758        int rest = numberSeqSize % newThreads;
759        Console.traceln(Level.FINE, "Interval: " + interval + " Rest: " + rest);
760        for (int i = 0; i <= (numberSeqSize - interval); i += interval) {
761            int offset = 0;
762            if (rest != 0) {
763                offset = 1;
764                rest--;
765            }
766
767            final int from = i;
768            final int to = i + interval + offset - 1;
769            Console.traceln(Level.FINER, "Aligning: Creating thread for sessions " + from +
770                " till " + to);
771            final ParallelPairwiseAligner aligner = new ParallelPairwiseAligner(appData, from, to);
772            executor.execute(aligner);
773            i += offset;
774        }
775        executor.shutdown();
776        try {
777            executor.awaitTermination(2, TimeUnit.HOURS);
778        }
779        catch (final InterruptedException e) {
780            e.printStackTrace();
781        }
782    }
783
784    /**
785     * Search matches in all sessions.
786     *
787     * @param appData
788     *            the app data
789     */
790    private void searchMatchesInAllSessions(RuleApplicationData appData) {
791
792        // Prepare parallel search of matchseqs
793        final int matchSeqSize = appData.getMatchseqs().size();
794        Console.traceln(Level.INFO, "searching for patterns (" + matchSeqSize +
795            ") occuring most with " + nThreads + " threads");
796        int newThreads = nThreads;
797        if (matchSeqSize < nThreads) {
798            newThreads = matchSeqSize;
799        }
800        final int interval = matchSeqSize / newThreads;
801        int rest = matchSeqSize % newThreads;
802        final ExecutorService executor = Executors.newFixedThreadPool(nThreads);
803        Console.traceln(Level.FINER, "Interval: " + interval + " Rest: " + rest);
804        for (int i = 0; i <= (matchSeqSize - interval); i += interval) {
805            int offset = 0;
806            if (rest != 0) {
807                offset = 1;
808                rest--;
809            }
810            final int from = i;
811            final int to = i + interval + offset - 1;
812            Console.traceln(Level.FINE, "Match finding: Creating thread with matches from " + from +
813                " to " + to);
814            // search each match in every other sequence
815            final ParallelMatchOcurrencesFinder finder =
816                new ParallelMatchOcurrencesFinder(appData, from, to);
817            executor.execute(finder);
818            i += offset;
819        }
820        executor.shutdown();
821        try {
822            executor.awaitTermination(2, TimeUnit.HOURS);
823        }
824        catch (final InterruptedException e) {
825            e.printStackTrace();
826        }
827
828    }
829
830    /*
831     * (non-Javadoc)
832     *
833     * @see java.lang.Object#toString()
834     */
835    @Override
836    public String toString() {
837        return "SequenceForTaskDetectionRuleAlignment";
838    }
839
840    /**
841     * The Class ParallelMatchOcurrencesFinder.
842     */
843    private class ParallelMatchOcurrencesFinder implements Runnable {
844
845        /** The app data. */
846        private final RuleApplicationData appData;
847
848        /** The from. */
849        private final int from;
850
851        /** The to. */
852        private final int to;
853
854        /**
855         * Instantiates a new parallel match ocurrences finder.
856         *
857         * @param appData
858         *            the app data
859         * @param from
860         *            the from
861         * @param to
862         *            the to
863         */
864        ParallelMatchOcurrencesFinder(RuleApplicationData appData, int from, int to) {
865            this.appData = appData;
866            this.from = from;
867            this.to = to;
868        }
869
870        /*
871         * (non-Javadoc)
872         *
873         * @see java.lang.Runnable#run()
874         */
875        @Override
876        public void run() {
877            int count = 0;
878            final int size = to + 1 - from;
879
880            for (int i = from; i <= to; i++) {
881                final Match pattern = appData.getMatchseqs().get(i);
882                count++;
883                RuleUtils.printProgressPercentage("Match finding progress", count, size);
884                // Skip sequences with more 0 events (scrolls) than other
885                // events.
886                // Both of the pattern sequences are equally long, so the zero
887                // counts just need to be smaller than the length of one
888                // sequence
889                if ((pattern.getFirstSequence().eventCount(0) +
890                    pattern.getSecondSequence().eventCount(0) + 1) > pattern.getFirstSequence()
891                    .size())
892                {
893                    continue;
894                }
895
896                for (int j = 0; j < appData.getNumberSequences().size(); j++) {
897                    final LinkedList<Integer> startpositions =
898                        appData.getNumberSequences().get(j).containsPattern(pattern);
899                    if (startpositions.size() > 0) {
900                        for (final Iterator<Integer> jt = startpositions.iterator(); jt.hasNext();)
901                        {
902                            final int start = jt.next();
903                            pattern.addOccurence(new MatchOccurrence(start, start + pattern.size(),
904                                                                     j));
905                        }
906                    }
907                }
908            }
909        }
910    }
911
912    /**
913     * The Class ParallelPairwiseAligner.
914     */
915    private class ParallelPairwiseAligner implements Runnable {
916
917        /** The app data. */
918        private final RuleApplicationData appData;
919
920        /** The from. */
921        private final int from;
922
923        /** The to. */
924        private final int to;
925
926        /**
927         * Instantiates a new parallel pairwise aligner.
928         *
929         * @param appData
930         *            the app data
931         * @param from
932         *            the from
933         * @param to
934         *            the to
935         */
936        ParallelPairwiseAligner(RuleApplicationData appData, int from, int to) {
937            this.appData = appData;
938            this.from = from;
939            this.to = to;
940        }
941
942        /*
943         * (non-Javadoc)
944         *
945         * @see java.lang.Runnable#run()
946         */
947        @Override
948        public void run() {
949            int count = 0;
950            final int size = to + 1 - from;
951
952            for (int i = from; i <= to; i++) {
953                final NumberSequence ns1 = appData.getNumberSequences().get(i);
954                count++;
955                RuleUtils.printProgressPercentage("Aligning Progress", count, size);
956                for (int j = 0; j < appData.getNumberSequences().size(); j++) {
957                    final NumberSequence ns2 = appData.getNumberSequences().get(j);
958                    if (i != j) {
959                        final AlignmentAlgorithm aa = AlignmentAlgorithmFactory.create();
960                        // threshold was 9
961                        aa.align(ns1, ns2, appData.getSubmat(),
962                                 SequenceForTaskDetectionRuleAlignment.alignmentThreshold);
963                        synchronized (appData.getMatchseqs()) {
964                            appData.getMatchseqs().addAll(aa.getMatches());
965                        }
966                    }
967                }
968            }
969        }
970    }
971
972    /**
973     * The Class ParallelPairwiseAligner.
974     */
975    private class ParallelMatchReplacer implements Runnable {
976
977        /** The app data. */
978        private final RuleApplicationData appData;
979
980        /** The from. */
981        private final int from;
982
983        /** The to. */
984        private final int to;
985
986        /**
987         * Instantiates a new parallel pairwise aligner.
988         *
989         * @param appData
990         *            the app data
991         * @param from
992         *            the from
993         * @param to
994         *            the to
995         */
996        ParallelMatchReplacer(RuleApplicationData appData, int from, int to) {
997            this.appData = appData;
998            this.from = from;
999            this.to = to;
1000        }
1001
1002        /*
1003         * (non-Javadoc)
1004         *
1005         * @see java.lang.Runnable#run()
1006         */
1007        @Override
1008        public void run() {
1009            for (int i = from; i <= to; i++) {
1010                //System.out.println("Replacing in SEQUENCE " + i);
1011                /*
1012                 * HashMap for keeping track in which sequence which replacement has been performed.
1013                 * Neccessary for updating the indices of other occurrences accordingly
1014                 */
1015                LinkedList<MatchOccurrence> replacedOccurrences = new LinkedList<MatchOccurrence>();
1016                invalidOccurence: while (!appData.getPlannedReplacements()[i].isEmpty()) {
1017
1018                    PlannedReplacement pr = appData.getPlannedReplacements()[i].remove();
1019                   
1020                    MatchOccurrence oc = pr.getOccurrence();
1021                    // check if we have any replaced occurrence with
1022                    // indexes
1023                    // smaller than ours. If so, we need to adjust
1024                    // our start and end points of the replacement.
1025                    // Also do a check if we have replaced this
1026                    // specific MatchOccurence in this sequence already.
1027                    // Jump to the next occurrence if this is the case.
1028                    // This is no more necessary once the matches
1029                    // are harmonized.
1030
1031                    for (final Iterator<MatchOccurrence> jt = replacedOccurrences.iterator(); jt
1032                        .hasNext();)
1033                    {
1034                        final MatchOccurrence tmpOC = jt.next();
1035
1036                        if ((oc.getStartindex() >= tmpOC.getStartindex()) &&
1037                            (oc.getStartindex() <= tmpOC.getEndindex()))
1038                        {
1039                            continue invalidOccurence;
1040                        }
1041                        if (oc.getEndindex() >= tmpOC.getStartindex()) {
1042                            continue invalidOccurence;
1043
1044                        }
1045                        else if (oc.getStartindex() > tmpOC.getEndindex()) {
1046                            final int diff = tmpOC.getEndindex() - tmpOC.getStartindex();
1047                            // Just to be sure.
1048                            if (diff > 0) {
1049                                oc.setStartindex((oc.getStartindex() - diff) + 1);
1050                                oc.setEndindex((oc.getEndindex() - diff) + 1);
1051                            }
1052                            else {
1053                                Console
1054                                    .traceln(Level.WARNING,
1055                                             "End index of a Match before start. This should never happen");
1056                            }
1057                        }
1058                    }
1059                    synchronized (appData) {
1060                        appData.detectedAndReplacedTasks = true;
1061                    }
1062                   
1063
1064                    //System.out.println("Replacing in Sequence " + oc.getSequenceId() +
1065                    //    " at position " + oc.getStartindex() + " till " + oc.getEndindex());
1066                    final ISequenceInstance sequenceInstances =
1067                        RuleUtils.createNewSubSequenceInRange(appData.getSessions()
1068                                                                  .get(oc.getSequenceId()), oc
1069                                                                  .getStartindex(), oc
1070                                                                  .getEndindex(), (ISequence) appData.getPreparedTasks().get(pr.getMatchId()),
1071                                                              taskFactory, taskBuilder);
1072
1073                    // Adjust the length of the match regarding to the
1074                    // length of
1075                    // instance. (OptionalInstances may be shorter)
1076                    oc.setEndindex((oc.getStartindex() + sequenceInstances.size()) -
1077                        RuleUtils.missedOptionals);
1078
1079                    replacedOccurrences.add(oc);
1080
1081                }
1082            }
1083        }
1084    }
1085   
1086    private static class PlannedReplacement {
1087        private int matchId;
1088        private int getMatchId() {
1089            return matchId;
1090        }
1091
1092        private MatchOccurrence getOccurrence() {
1093            return occurrence;
1094        }
1095
1096        private MatchOccurrence occurrence;
1097       
1098        private PlannedReplacement(int matchId,MatchOccurrence occurrence) {
1099            this.matchId = matchId;
1100            this.occurrence = occurrence;
1101        }
1102    }
1103
1104    /**
1105     * The Class RuleApplicationData.
1106     */
1107    private static class RuleApplicationData implements Serializable {
1108
1109        /** The Constant serialVersionUID. */
1110        private static final long serialVersionUID = -7559657686755522960L;
1111
1112        /**
1113         * The number2task HashMap. Since we align the tasks just by their integer id, we need this
1114         * to convert them back to Tasks again
1115         */
1116        private final HashMap<Integer, ITask> number2task;
1117
1118
1119        /**
1120         * The substitution matrix used by the alignment algorithm to be able to compare distances
1121         * of tasks
1122         */
1123        private final ObjectDistanceSubstitionMatrix submat;
1124
1125        private LinkedList<PlannedReplacement>[] plannedReplacements;
1126
1127        /** The list of all found matches */
1128        private LinkedList<Match> matchseqs;
1129       
1130        private ArrayList<ITask> preparedTasks;
1131
1132        public ArrayList<ITask> getPreparedTasks() {
1133            return preparedTasks;
1134        }
1135
1136        /**
1137         * The generated NumberSequences. This is the integer representation of the user sessions
1138         */
1139        private ArrayList<NumberSequence> numberseqs;
1140
1141        /** The list of newly created tasks (of one iteration). */
1142        private final LinkedList<ITask> newTasks;
1143
1144        /** The user sessions containing all EventTasks/Instances */
1145        private final List<IUserSession> sessions;
1146
1147        /** True if we replaced something in the user sessions in one iteration. */
1148        private boolean detectedAndReplacedTasks;
1149
1150        /** The result we return from this rule */
1151        private final RuleApplicationResult result;
1152
1153        /** Stop Watch to measure performance */
1154        private final StopWatch stopWatch;
1155
1156        /**
1157         * Instantiates a new rule application data.
1158         *
1159         * @param sessions
1160         *            The user sessions
1161         */
1162        private RuleApplicationData(List<IUserSession> sessions) {
1163            this.sessions = sessions;
1164            numberseqs = new ArrayList<NumberSequence>();
1165            number2task = new HashMap<Integer, ITask>();
1166            stopWatch = new StopWatch();
1167            result = new RuleApplicationResult();
1168            preparedTasks = new ArrayList<ITask>();
1169            submat = new ObjectDistanceSubstitionMatrix(maxSubstitutionScore, gapPenalty, false);
1170            newTasks = new LinkedList<ITask>();
1171            this.detectedAndReplacedTasks = true;
1172        }
1173
1174        private void initializeQueues(int size) {
1175            plannedReplacements = new LinkedList[size];
1176            for (int i = 0; i < size; i++) {
1177                plannedReplacements[i] = new LinkedList<PlannedReplacement>();
1178            }
1179        }
1180
1181        public Queue<PlannedReplacement>[] getPlannedReplacements() {
1182            return plannedReplacements;
1183        }
1184
1185        /**
1186         * Detected and replaced tasks.
1187         *
1188         * @return true, if successful
1189         */
1190        private boolean detectedAndReplacedTasks() {
1191            return detectedAndReplacedTasks;
1192        }
1193
1194        /**
1195         * Gets the match sequences.
1196         *
1197         * @return the match sequences
1198         */
1199        public LinkedList<Match> getMatchseqs() {
1200            return matchseqs;
1201        }
1202
1203        /**
1204         * Gets the new tasks.
1205         *
1206         * @return the new tasks
1207         */
1208        public LinkedList<ITask> getNewTasks() {
1209            return newTasks;
1210        }
1211
1212        /**
1213         * Gets the number2task.
1214         *
1215         * @return the number2task HashMap
1216         */
1217        private HashMap<Integer, ITask> getNumber2Task() {
1218            return number2task;
1219        }
1220
1221        /**
1222         * Gets the number sequences.
1223         *
1224         * @return the number sequences
1225         */
1226        private ArrayList<NumberSequence> getNumberSequences() {
1227            return numberseqs;
1228        }
1229
1230        /**
1231         * Gets the result.
1232         *
1233         * @return the result
1234         */
1235        private RuleApplicationResult getResult() {
1236            return result;
1237        }
1238
1239        /**
1240         * Gets the sessions.
1241         *
1242         * @return the UserSessions as List.
1243         */
1244        private List<IUserSession> getSessions() {
1245            return sessions;
1246        }
1247
1248        /**
1249         * Gets the stop watch.
1250         *
1251         * @return the stopWatch
1252         */
1253        private StopWatch getStopWatch() {
1254            return stopWatch;
1255        }
1256
1257        /**
1258         * Gets the substitution matrix.
1259         *
1260         * @return the substitution matrix
1261         */
1262        private ObjectDistanceSubstitionMatrix getSubmat() {
1263            return submat;
1264        }
1265
1266        /**
1267         * New task created.
1268         *
1269         * @param task
1270         *            can be called when new tasks are created to keep track of all newly created
1271         *            tasks
1272         */
1273        private void newTaskCreated(ITask task) {
1274            number2task.put(task.getId(), task);
1275            newTasks.add(task);
1276        }
1277
1278        /**
1279         * Reset newly created tasks.
1280         */
1281        synchronized private void resetNewlyCreatedTasks() {
1282            newTasks.clear();
1283        }
1284
1285        /**
1286         * Sets the number sequences.
1287         *
1288         * @param numberseqs
1289         *            the new number sequences
1290         */
1291        private void setNumberSequences(ArrayList<NumberSequence> numberseqs) {
1292            this.numberseqs = numberseqs;
1293        }
1294
1295        /**
1296         * Update substitution matrix.
1297         */
1298        private void updateSubstitutionMatrix() {
1299            submat.update(getNewTasks());
1300            resetNewlyCreatedTasks();
1301        }
1302
1303    }
1304
1305}
Note: See TracBrowser for help on using the repository browser.