source: branches/autoquest-core-tasktrees-alignment/src/main/java/de/ugoe/cs/autoquest/tasktrees/temporalrelation/SequenceForTaskDetectionRuleAlignment.java @ 1760

Last change on this file since 1760 was 1760, checked in by rkrimmel, 10 years ago

Memory improvement and bugs

File size: 48.4 KB
Line 
1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.tasktrees.temporalrelation;
16
17import java.io.Serializable;
18import java.util.ArrayDeque;
19import java.util.ArrayList;
20import java.util.Collections;
21import java.util.Comparator;
22import java.util.HashMap;
23import java.util.HashSet;
24import java.util.Iterator;
25import java.util.LinkedList;
26import java.util.List;
27import java.util.Map;
28import java.util.Queue;
29import java.util.Set;
30import java.util.concurrent.ExecutorService;
31import java.util.concurrent.Executors;
32import java.util.concurrent.TimeUnit;
33import java.util.logging.Level;
34
35import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm;
36import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithmFactory;
37import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.Match;
38import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.MatchOccurrence;
39import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.NumberSequence;
40import de.ugoe.cs.autoquest.tasktrees.alignment.matrix.ObjectDistanceSubstitionMatrix;
41import de.ugoe.cs.autoquest.tasktrees.taskequality.TaskEquality;
42import de.ugoe.cs.autoquest.tasktrees.treeifc.IIteration;
43import de.ugoe.cs.autoquest.tasktrees.treeifc.IIterationInstance;
44import de.ugoe.cs.autoquest.tasktrees.treeifc.IOptional;
45import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelection;
46import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelectionInstance;
47import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequence;
48import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequenceInstance;
49import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask;
50import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskBuilder;
51import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskFactory;
52import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstance;
53import de.ugoe.cs.autoquest.tasktrees.treeifc.IUserSession;
54import de.ugoe.cs.autoquest.usageprofiles.SymbolMap;
55import de.ugoe.cs.util.StopWatch;
56import de.ugoe.cs.util.console.Console;
57
58/**
59 * <p>
60 * This class implements the major rule for creating task trees based on a set of recorded user
61 * sessions. For this, it first harmonizes all tasks. This eases later comparison. Then it searches
62 * the sessions for iterations and replaces them accordingly. Then it searches for sub sequences
63 * using alignment algorithms For each found sub sequence, it replaces the occurrences by creating
64 * appropriate {@link ISequence}s. Afterwards, again searches for iterations and then again for sub
65 * sequences until no more replacements are done.
66 * </p>
67 * <p>
68 *
69 *
70 * @author Ralph Krimmel
71 */
72public class SequenceForTaskDetectionRuleAlignment implements ISessionScopeRule {
73
74    /** The n threads. */
75    public static int nThreads = Runtime.getRuntime().availableProcessors() - 1;
76    //public static int nThreads = 1;
77
78    /** The iteration. */
79    private int iteration = 0;
80
81    private int maxIterations = 10
82            ;
83
84    private static int alignmentThreshold = 9;
85    private static int gapPenalty = -3;
86
87    private static float maxSubstitutionScore = 6;
88
89    /**
90     * <p>
91     * the task factory to be used for creating substructures for the temporal relationships
92     * identified during rul application
93     * </p>
94     * .
95     */
96    private final ITaskFactory taskFactory;
97
98    /**
99     * <p>
100     * the task builder to be used for creating substructures for the temporal relationships
101     * identified during rule application
102     * </p>
103     * .
104     */
105    private final ITaskBuilder taskBuilder;
106
107    /**
108     * <p>
109     * the task handling strategy to be used for comparing tasks for preparation, i.e., before the
110     * tasks are harmonized
111     * </p>
112     */
113    private final TaskHandlingStrategy preparationTaskHandlingStrategy;
114
115    /**
116     * <p>
117     * instantiates the rule and initializes it with a task equality to be considered when comparing
118     * tasks as well as a task factory and builder to be used for creating task structures.
119     * </p>
120     *
121     * @param minimalTaskEquality
122     *            the task equality to be considered when comparing tasks
123     * @param taskFactory
124     *            the task factory to be used for creating substructures
125     * @param taskBuilder
126     *            the task builder to be used for creating substructures
127     */
128
129    SequenceForTaskDetectionRuleAlignment(TaskEquality minimalTaskEquality,
130                                          ITaskFactory taskFactory,
131                                          ITaskBuilder taskBuilder)
132    {
133        this.taskFactory = taskFactory;
134        this.taskBuilder = taskBuilder;
135
136        this.preparationTaskHandlingStrategy = new TaskHandlingStrategy(minimalTaskEquality);
137    }
138
139    /*
140     * (non-Javadoc)
141     *
142     * @see de.ugoe.cs.autoquest.tasktrees.temporalrelation.ISessionScopeRule#apply (java.util.List)
143     */
144    @Override
145    public RuleApplicationResult apply(List<IUserSession> sessions) {
146        final RuleApplicationData appData = new RuleApplicationData(sessions);
147
148        harmonizeEventTaskInstancesModel(appData);
149
150        Console.traceln(Level.INFO, "generating substitution matrix from " +
151            appData.getNumber2Task().size() + " unique tasks");
152        appData.getStopWatch().start("substitution matrix");
153        appData.getSubmat().generate(appData.getNumber2Task().values());
154        appData.getStopWatch().stop("substitution matrix");
155
156        Console.traceln(Level.INFO, "Starting main loop");
157        do {
158            Console.traceln(Level.INFO, "Iteration Number: " + iteration);
159            iteration++;
160            appData.detectedAndReplacedTasks = false;
161            appData.getStopWatch().start("whole loop");
162            detectAndReplaceIterations(appData);
163            appData.getStopWatch().start("task replacement");
164            // Just does anything if the substitution matrix is created with the
165            // option to do so
166            appData.updateSubstitutionMatrix();
167            detectAndReplaceTasks(appData); //
168            appData.getStopWatch().stop("task replacement");
169            appData.getStopWatch().stop("whole loop");
170            appData.getStopWatch().dumpStatistics(System.out);
171            appData.getStopWatch().reset();
172
173        } while (appData.detectedAndReplacedTasks());
174        //} while (iteration < maxIterations);
175        Console.println("created " + appData.getResult().getNewlyCreatedTasks().size() +
176            " new tasks and " + appData.getResult().getNewlyCreatedTaskInstances().size() +
177            " appropriate instances\n");
178
179        if ((appData.getResult().getNewlyCreatedTasks().size() > 0) ||
180            (appData.getResult().getNewlyCreatedTaskInstances().size() > 0))
181        {
182            appData.getResult().setRuleApplicationStatus(RuleApplicationStatus.FINISHED);
183        }
184        // new TaskTreeValidator().validate(appData.getSessions());
185        return appData.getResult();
186    }
187
188    /**
189     * Creates the number sequences.
190     *
191     * @param appData
192     *            the app data
193     * @return the array list
194     */
195    private ArrayList<NumberSequence> createNumberSequences(RuleApplicationData appData) {
196        final ArrayList<NumberSequence> result = new ArrayList<NumberSequence>();
197        for (int i = 0; i < appData.getSessions().size(); i++) {
198            final IUserSession session = appData.getSessions().get(i);
199            final NumberSequence templist = new NumberSequence(session.size());
200            for (int j = 0; j < session.size(); j++) {
201                final ITaskInstance taskInstance = session.get(j);
202                templist.getSequence()[j] = taskInstance.getTask().getId();
203            }
204            // Each NumberSequence is identified by its id, beginning to count
205            // at zero
206            templist.setId(i);
207            result.add(templist);
208        }
209        return result;
210    }
211
212    /**
213     * <p>
214     * searches for direct iterations of single tasks in all sequences and replaces them with
215     * {@link IIteration}s, respectively appropriate instances. Also all single occurrences of a
216     * task that is iterated somewhen are replaced with iterations to have again an efficient way
217     * for task comparisons.
218     * </p>
219     *
220     * @param appData
221     *            the rule application data combining all data used for applying this rule
222     */
223    private void detectAndReplaceIterations(RuleApplicationData appData) {
224        Console.traceln(Level.FINE, "detecting iterations");
225        appData.getStopWatch().start("detecting iterations");
226
227        final List<IUserSession> sessions = appData.getSessions();
228
229        final Set<ITask> iteratedTasks = searchIteratedTasks(sessions);
230
231        if (iteratedTasks.size() > 0) {
232            replaceIterationsOf(iteratedTasks, sessions, appData);
233        }
234
235        appData.getStopWatch().stop("detecting iterations");
236        Console.traceln(Level.INFO, "replaced " + iteratedTasks.size() + " iterated tasks");
237    }
238
239    /**
240     * Detect and replace tasks.
241     *
242     * @param appData
243     *            the rule application data combining all data used for applying this rule
244     */
245    private void detectAndReplaceTasks(RuleApplicationData appData) {
246        Console.traceln(Level.FINE, "detecting and replacing tasks");
247        appData.getStopWatch().start("detecting tasks");
248
249        // Create NumberSequences
250        appData.setNumberSequences(this.createNumberSequences(appData));
251
252        // Generate pairwise alignments
253        // appData.setMatchseqs(generatePairwiseAlignments(appData));
254        generatePairwiseAlignments(appData);
255        Console.traceln(Level.FINE, "Found " + appData.getMatchseqs().size() + " results");
256
257        // Nothing found, we can end here
258        if (appData.getMatchseqs().size() == 0) {
259            appData.detectedAndReplacedTasks = false;
260            return;
261        }
262
263        // Searching each match in all other sessions, counting its occurences
264        searchMatchesInAllSessions(appData);
265
266        // Sort results to get the most occurring results
267        Console.traceln(Level.INFO, "sorting " + appData.getMatchseqs().size() + " results");
268        final Comparator<Match> comparator = new Comparator<Match>() {
269            @Override
270            public int compare(Match m1, Match m2) {
271                int cmp = m2.occurenceCount() - m1.occurenceCount();
272                if (cmp != 0) {
273                    return cmp;
274                }
275                else {
276                    cmp = m2.size() - m1.size();
277                    if (cmp != 0) {
278                        return cmp;
279                    }
280                    else {
281                        // This should rarely happen
282                        cmp = m2.ocurrenceIDSum() - m1.ocurrenceIDSum();
283                        if (cmp != 0) {
284                            return cmp;
285                        }
286                        else {
287                            cmp = m2.taskIdSum() - m1.taskIdSum();
288
289                            return cmp;
290                        }
291                    }
292                }
293            }
294        };
295
296        Collections.sort(appData.getMatchseqs(), comparator);
297        appData.getStopWatch().stop("detecting tasks");
298
299        Console.traceln(Level.INFO, "Preparing replacments");
300        prepareReplacements(appData);
301        Console.traceln(Level.INFO, "Replacing matches in sessions");
302        appData.getStopWatch().start("replacing tasks");
303        replaceMatches(appData);
304        appData.getStopWatch().stop("replacing tasks");
305    }
306
307    // TODO: DEBUG METHOD
308    @SuppressWarnings("unused")
309    private void printMatches(RuleApplicationData appData) {
310        LinkedList<Match> matchseqs = appData.getMatchseqs();
311        if (iteration > 1) {
312            System.out.println("PRINTING MATCHES");
313            for (Iterator<Match> it = matchseqs.iterator(); it.hasNext();) {
314                Match m = it.next();
315                m.getFirstSequence().printSequence();
316                m.getSecondSequence().printSequence();
317                for (Iterator<MatchOccurrence> jt = m.getOccurences().iterator(); jt.hasNext();) {
318                    MatchOccurrence mo = jt.next();
319                    System.out.print(mo.getSequenceId() + " ");
320                }
321                System.out.println();
322                System.out.println();
323            }
324        }
325    }
326
327    /**
328     * <p>
329     * harmonizes the event task instances by unifying tasks. This is done, as initially the event
330     * tasks being equal with respect to the considered task equality are distinct objects. The
331     * comparison of these distinct objects is more time consuming than comparing the object
332     * references.
333     * </p>
334     *
335     * @param appData
336     *            the rule application data combining all data used for applying this rule
337     * @return Returns the unique tasks symbol map
338     */
339    private void harmonizeEventTaskInstancesModel(RuleApplicationData appData) {
340        Console.traceln(Level.INFO, "harmonizing task model of event task instances");
341        appData.getStopWatch().start("harmonizing event tasks");
342        final SymbolMap<ITaskInstance, ITask> uniqueTasks =
343            preparationTaskHandlingStrategy.createSymbolMap();
344
345        final TaskInstanceComparator comparator =
346            preparationTaskHandlingStrategy.getTaskComparator();
347
348        int unifiedTasks = 0;
349        ITask task;
350        final List<IUserSession> sessions = appData.getSessions();
351        for (int j = 0; j < sessions.size(); j++) {
352            final IUserSession session = sessions.get(j);
353
354            for (int i = 0; i < session.size(); i++) {
355                final ITaskInstance taskInstance = session.get(i);
356                task = uniqueTasks.getValue(taskInstance);
357
358                if (task == null) {
359                    uniqueTasks.addSymbol(taskInstance, taskInstance.getTask());
360                    appData.getNumber2Task().put(taskInstance.getTask().getId(),
361                                                 taskInstance.getTask());
362                }
363                else {
364                    taskBuilder.setTask(taskInstance, task);
365                    unifiedTasks++;
366                }
367            }
368            comparator.clearBuffers();
369        }
370
371        appData.getStopWatch().stop("harmonizing event tasks");
372        Console.traceln(Level.INFO, "harmonized " + unifiedTasks + " task occurrences (still " +
373            uniqueTasks.size() + " different tasks)");
374
375        appData.getStopWatch().dumpStatistics(System.out);
376        appData.getStopWatch().reset();
377    }
378
379    /**
380     * <p>
381     * TODO clarify why this is done (in fact, ask Patrick Harms)
382     * </p>
383     * .
384     *
385     * @param iteration
386     *            the iteration
387     * @param iterationInstances
388     *            the iteration instances
389     */
390    private void harmonizeIterationInstancesModel(IIteration iteration,
391                                                  List<IIterationInstance> iterationInstances)
392    {
393        final List<ITask> iteratedTaskVariants = new LinkedList<ITask>();
394        final TaskInstanceComparator comparator =
395            preparationTaskHandlingStrategy.getTaskComparator();
396
397        // merge the lexically different variants of iterated task to a unique
398        // list
399        for (final IIterationInstance iterationInstance : iterationInstances) {
400            for (final ITaskInstance executionVariant : iterationInstance) {
401                final ITask candidate = executionVariant.getTask();
402
403                boolean found = false;
404                for (final ITask taskVariant : iteratedTaskVariants) {
405                    if (comparator.areLexicallyEqual(taskVariant, candidate)) {
406                        taskBuilder.setTask(executionVariant, taskVariant);
407                        found = true;
408                        break;
409                    }
410                }
411
412                if (!found) {
413                    iteratedTaskVariants.add(candidate);
414                }
415            }
416        }
417
418        // if there are more than one lexically different variant of iterated
419        // tasks, adapt the
420        // iteration model to be a selection of different variants. In this case
421        // also adapt
422        // the generated iteration instances to correctly contain selection
423        // instances. If there
424        // is only one variant of an iterated task, simply set this as the
425        // marked task of the
426        // iteration. In this case, the instances can be preserved as is
427        if (iteratedTaskVariants.size() > 1) {
428            final ISelection selection = taskFactory.createNewSelection();
429
430            for (final ITask variant : iteratedTaskVariants) {
431                taskBuilder.addChild(selection, variant);
432            }
433
434            taskBuilder.setMarkedTask(iteration, selection);
435
436            for (final IIterationInstance instance : iterationInstances) {
437                for (int i = 0; i < instance.size(); i++) {
438                    final ISelectionInstance selectionInstance =
439                        taskFactory.createNewTaskInstance(selection);
440                    taskBuilder.setChild(selectionInstance, instance.get(i));
441                    taskBuilder.setTaskInstance(instance, i, selectionInstance);
442                }
443            }
444        }
445        else {
446            taskBuilder.setMarkedTask(iteration, iteratedTaskVariants.get(0));
447        }
448    }
449
450    /**
451     * Match as sequence.
452     *
453     * @param appData
454     *            RuleApplicationData needed to keep track of all created tasks
455     * @param m
456     *            The match to be converted into a Task
457     * @return The task of the match with an ISequence as its root
458     */
459    synchronized public ISequence matchAsSequence(RuleApplicationData appData, Match m) {
460        final ISequence sequence = taskFactory.createNewSequence();
461        appData.newTaskCreated(sequence);
462
463        final int[] first = m.getFirstSequence().getSequence();
464        final int[] second = m.getSecondSequence().getSequence();
465
466        // Both sequences of a match are equally long
467        for (int i = 0; i < m.getFirstSequence().size(); i++) {
468
469            // Two gaps aligned to each other: Have not seen it happening so
470            // far, just to handle it
471            if ((first[i] == -1) && (second[i] == -1)) {
472                // Do nothing here.
473            }
474            // Both events are equal, we can simply add the task referring to
475            // the number
476            else if (first[i] == second[i]) {
477                taskBuilder.addChild(sequence, appData.getNumber2Task().get(first[i]));
478            }
479            // We have a gap in the first sequence, we need to add the task of
480            // the second sequence as optional
481            else if ((first[i] == -1) && (second[i] != -1)) {
482                final IOptional optional = taskFactory.createNewOptional();
483                appData.newTaskCreated(optional);
484                taskBuilder.setMarkedTask(optional, appData.getNumber2Task().get(second[i]));
485                taskBuilder.addChild(sequence, optional);
486            }
487            // We have a gap in the second sequence, we need to add the task of
488            // the first sequence as optional
489            else if ((first[i] != -1) && (second[i] == -1)) {
490                final IOptional optional = taskFactory.createNewOptional();
491                appData.newTaskCreated(optional);
492                taskBuilder.setMarkedTask(optional, appData.getNumber2Task().get(first[i]));
493                taskBuilder.addChild(sequence, optional);
494            }
495            // Both tasks are not equal, we need to insert a selection here.
496            // Now things get complicated. We first need to check
497            // if the next position is not a selection. Then we can just create
498            // a selection
499            // of the both Tasks
500            // In the other case (more than one selection following this
501            // selection), we want to
502            // create a selection of sequences where each sequence gets the
503            // corresponding task of
504            // the its sequence in the pattern.
505            //
506            else if (i < (first.length - 1)) {
507
508                if ((first[i] != second[i]) &&
509                    (((first[i + 1] == second[i + 1]) || (first[i + 1] == -1) || (second[i + 1] == -1))))
510                {
511
512                    final ISelection selection = taskFactory.createNewSelection();
513                    appData.newTaskCreated(selection);
514                    taskBuilder.addChild(selection, appData.getNumber2Task().get(first[i]));
515                    taskBuilder.addChild(selection, appData.getNumber2Task().get(second[i]));
516                    taskBuilder.addChild(sequence, selection);
517                }
518                else {
519                    boolean selectionfound = true;
520                    final ISelection selection = taskFactory.createNewSelection();
521                    appData.newTaskCreated(selection);
522
523                    final ISequence subsequence1 = taskFactory.createNewSequence();
524                    appData.newTaskCreated(subsequence1);
525
526                    final ISequence subsequence2 = taskFactory.createNewSequence();
527                    appData.newTaskCreated(subsequence2);
528
529                    taskBuilder.addChild(selection, subsequence1);
530                    taskBuilder.addChild(selection, subsequence2);
531                    taskBuilder.addChild(sequence, selection);
532                    // TODO: We may not run till the end!
533                    while ((i < (first.length - 1)) && selectionfound) {
534                        selectionfound = false;
535                        taskBuilder.addChild(subsequence1, appData.getNumber2Task().get(first[i]));
536                        taskBuilder.addChild(subsequence2, appData.getNumber2Task().get(second[i]));
537                        if ((first[i + 1] != second[i + 1]) && (first[i + 1] != -1) &&
538                            (second[i + 1] != -1))
539                        {
540                            selectionfound = true;
541                        }
542                        else {
543                            continue;
544                        }
545                        i++;
546                    }
547                    if ((i == (first.length - 1)) && selectionfound) {
548                        taskBuilder.addChild(subsequence1, appData.getNumber2Task().get(first[i]));
549                        taskBuilder.addChild(subsequence2, appData.getNumber2Task().get(second[i]));
550                    }
551                }
552            }
553            else {
554                // i = length-1
555                if ((first[i] != second[i])) {
556
557                    final ISelection selection = taskFactory.createNewSelection();
558                    appData.newTaskCreated(selection);
559                    taskBuilder.addChild(selection, appData.getNumber2Task().get(first[i]));
560                    taskBuilder.addChild(selection, appData.getNumber2Task().get(second[i]));
561                    taskBuilder.addChild(sequence, selection);
562                }
563            }
564
565        }
566        return sequence;
567    }
568
569    /**
570     * <p>
571     * replaces all occurrences of all tasks provided in the set with iterations
572     * </p>
573     * .
574     *
575     * @param iteratedTasks
576     *            the tasks to be replaced with iterations
577     * @param sessions
578     *            the sessions in which the tasks are to be replaced
579     * @param appData
580     *            the rule application data combining all data used for applying this rule
581     */
582    private void replaceIterationsOf(Set<ITask> iteratedTasks,
583                                     List<IUserSession> sessions,
584                                     RuleApplicationData appData)
585    {
586        final Map<ITask, IIteration> iterations = new HashMap<ITask, IIteration>();
587        final Map<IIteration, List<IIterationInstance>> iterationInstances =
588            new HashMap<IIteration, List<IIterationInstance>>();
589
590        for (final ITask iteratedTask : iteratedTasks) {
591
592            final IIteration iteration = taskFactory.createNewIteration();
593            appData.newTaskCreated(iteration);
594            iterations.put(iteratedTask, iteration);
595            iterationInstances.put(iteration, new LinkedList<IIterationInstance>());
596        }
597
598        IIterationInstance iterationInstance;
599
600        for (final IUserSession session : sessions) {
601            int index = 0;
602            iterationInstance = null;
603
604            while (index < session.size()) {
605                // we prepared the task instances to refer to unique tasks, if
606                // they are treated
607                // as equal. Therefore, we just compare the identity of the
608                // tasks of the task
609                // instances
610                final ITask currentTask = session.get(index).getTask();
611                final IIteration iteration = iterations.get(currentTask);
612                if (iteration != null) {
613                    if ((iterationInstance == null) || (iterationInstance.getTask() != iteration)) {
614                        iterationInstance = taskFactory.createNewTaskInstance(iteration);
615                        iterationInstances.get(iteration).add(iterationInstance);// TODO:: Don't
616                                                                                 // create
617                        // TaskInstances here,
618                        // use a set of tasks
619                        // instead
620                        taskBuilder.addTaskInstance(session, index, iterationInstance);
621                        index++;
622                    }
623
624                    taskBuilder.addChild(iterationInstance, session.get(index));
625                    taskBuilder.removeTaskInstance(session, index);
626                }
627                else {
628                    if (iterationInstance != null) {
629                        iterationInstance = null;
630                    }
631                    index++;
632                }
633            }
634        }
635
636        for (final Map.Entry<IIteration, List<IIterationInstance>> entry : iterationInstances
637            .entrySet())
638        {
639            harmonizeIterationInstancesModel(entry.getKey(), entry.getValue());
640        }
641    }
642
643    /**
644     * Prepare replacements.
645     *
646     * @param appData
647     *            the app data
648     */
649    private void prepareReplacements(RuleApplicationData appData) {
650        appData.initializeQueues(appData.getSessions().size());
651        int taskId=0;
652        for (Iterator<Match> it = appData.getMatchseqs().iterator(); it.hasNext();) {
653            Match m = it.next();
654            if (m.getOccurences().size() > 2) {
655                final ISequence task = matchAsSequence(appData, m);
656                appData.getPreparedTasks().add(task);
657               
658                for (Iterator<MatchOccurrence> jt = m.getOccurences().iterator(); jt.hasNext();) {
659                    MatchOccurrence mo = jt.next();
660                    // System.out.println("Found this match in sequence " +mo.getSequenceId());
661                    PlannedReplacement pr = new PlannedReplacement(taskId,mo);
662                    // System.out.println("Adding match to sequence " + mo.getSequenceId());
663                    appData.getPlannedReplacements()[mo.getSequenceId()].add(pr);
664                }
665                taskId++;
666            }
667        }
668        appData.matchseqs =null;
669    }
670
671    /**
672     * Replace matches.
673     *
674     * @param appData
675     *            the app data
676     */
677    private void replaceMatches(RuleApplicationData appData) {
678
679        final int numberSeqSize = appData.getNumberSequences().size();
680        Console.traceln(Level.INFO, "replacing matches with " + nThreads + " threads");
681        int newThreads = nThreads;
682        if (numberSeqSize < nThreads) {
683            newThreads = numberSeqSize;
684        }
685        final int interval = numberSeqSize / newThreads;
686        int rest = numberSeqSize % newThreads;
687        final ExecutorService executor = Executors.newFixedThreadPool(nThreads);
688        for (int i = 0; i <= (numberSeqSize - interval); i += interval) {
689            int offset = 0;
690            if (rest != 0) {
691                offset = 1;
692                rest--;
693            }
694            final int from = i;
695            final int to = i + interval + offset - 1;
696            Console.traceln(Level.FINE, "Match replacing: Creating thread with matches from " +
697                from + " to " + to);
698            // search each match in every other sequence
699            final ParallelMatchReplacer replacer = new ParallelMatchReplacer(appData, from, to);
700            executor.execute(replacer);
701            i += offset;
702        }
703        executor.shutdown();
704        try {
705            executor.awaitTermination(2, TimeUnit.HOURS);
706        }
707        catch (final InterruptedException e) {
708            e.printStackTrace();
709        }
710        appData.getPreparedTasks().clear();
711    }
712
713    /**
714     * <p>
715     * searches the provided sessions for task iterations. If a task is iterated, it is added to the
716     * returned set.
717     * </p>
718     *
719     * @param sessions
720     *            the sessions
721     * @return a set of tasks being iterated somewhere
722     */
723    private Set<ITask> searchIteratedTasks(List<IUserSession> sessions) {
724        final Set<ITask> iteratedTasks = new HashSet<ITask>();
725        for (final IUserSession session : sessions) {
726            for (int i = 0; i < (session.size() - 1); i++) {
727                // we prepared the task instances to refer to unique tasks, if
728                // they are treated
729                // as equal. Therefore, we just compare the identity of the
730                // tasks of the task
731                // instances
732                if (session.get(i).getTask() == session.get(i + 1).getTask()) {
733                    iteratedTasks.add(session.get(i).getTask());
734                }
735            }
736        }
737        return iteratedTasks;
738    }
739
740    /**
741     * Generate pairwise alignments.
742     *
743     * @param appData
744     *            the app data
745     */
746    private void generatePairwiseAlignments(RuleApplicationData appData) {
747        final int numberSeqSize = appData.getNumberSequences().size();
748        appData.matchseqs = new LinkedList<Match>();
749        Console.traceln(Level.INFO, "generating pairwise alignments from " + numberSeqSize +
750            " sessions with " + nThreads + " threads");
751
752        int newThreads = nThreads;
753        if (numberSeqSize < nThreads) {
754            newThreads = numberSeqSize;
755        }
756
757        final ExecutorService executor = Executors.newFixedThreadPool(newThreads);
758        final int interval = numberSeqSize / newThreads;
759        int rest = numberSeqSize % newThreads;
760        Console.traceln(Level.FINE, "Interval: " + interval + " Rest: " + rest);
761        for (int i = 0; i <= (numberSeqSize - interval); i += interval) {
762            int offset = 0;
763            if (rest != 0) {
764                offset = 1;
765                rest--;
766            }
767
768            final int from = i;
769            final int to = i + interval + offset - 1;
770            Console.traceln(Level.FINER, "Aligning: Creating thread for sessions " + from +
771                " till " + to);
772            final ParallelPairwiseAligner aligner = new ParallelPairwiseAligner(appData, from, to);
773            executor.execute(aligner);
774            i += offset;
775        }
776        executor.shutdown();
777        try {
778            executor.awaitTermination(2, TimeUnit.HOURS);
779        }
780        catch (final InterruptedException e) {
781            e.printStackTrace();
782        }
783    }
784
785    /**
786     * Search matches in all sessions.
787     *
788     * @param appData
789     *            the app data
790     */
791    private void searchMatchesInAllSessions(RuleApplicationData appData) {
792
793        // Prepare parallel search of matchseqs
794        final int matchSeqSize = appData.getMatchseqs().size();
795        Console.traceln(Level.INFO, "searching for patterns (" + matchSeqSize +
796            ") occuring most with " + nThreads + " threads");
797        int newThreads = nThreads;
798        if (matchSeqSize < nThreads) {
799            newThreads = matchSeqSize;
800        }
801        final int interval = matchSeqSize / newThreads;
802        int rest = matchSeqSize % newThreads;
803        final ExecutorService executor = Executors.newFixedThreadPool(nThreads);
804        Console.traceln(Level.FINER, "Interval: " + interval + " Rest: " + rest);
805        for (int i = 0; i <= (matchSeqSize - interval); i += interval) {
806            int offset = 0;
807            if (rest != 0) {
808                offset = 1;
809                rest--;
810            }
811            final int from = i;
812            final int to = i + interval + offset - 1;
813            Console.traceln(Level.FINE, "Match finding: Creating thread with matches from " + from +
814                " to " + to);
815            // search each match in every other sequence
816            final ParallelMatchOcurrencesFinder finder =
817                new ParallelMatchOcurrencesFinder(appData, from, to);
818            executor.execute(finder);
819            i += offset;
820        }
821        executor.shutdown();
822        try {
823            executor.awaitTermination(2, TimeUnit.HOURS);
824        }
825        catch (final InterruptedException e) {
826            e.printStackTrace();
827        }
828
829    }
830
831    /*
832     * (non-Javadoc)
833     *
834     * @see java.lang.Object#toString()
835     */
836    @Override
837    public String toString() {
838        return "SequenceForTaskDetectionRuleAlignment";
839    }
840
841    /**
842     * The Class ParallelMatchOcurrencesFinder.
843     */
844    private class ParallelMatchOcurrencesFinder implements Runnable {
845
846        /** The app data. */
847        private final RuleApplicationData appData;
848
849        /** The from. */
850        private final int from;
851
852        /** The to. */
853        private final int to;
854
855        /**
856         * Instantiates a new parallel match ocurrences finder.
857         *
858         * @param appData
859         *            the app data
860         * @param from
861         *            the from
862         * @param to
863         *            the to
864         */
865        ParallelMatchOcurrencesFinder(RuleApplicationData appData, int from, int to) {
866            this.appData = appData;
867            this.from = from;
868            this.to = to;
869        }
870
871        /*
872         * (non-Javadoc)
873         *
874         * @see java.lang.Runnable#run()
875         */
876        @Override
877        public void run() {
878            int count = 0;
879            final int size = to + 1 - from;
880
881            for (int i = from; i <= to; i++) {
882                final Match pattern = appData.getMatchseqs().get(i);
883                count++;
884                RuleUtils.printProgressPercentage("Match finding progress", count, size);
885                // Skip sequences with more 0 events (scrolls) than other
886                // events.
887                // Both of the pattern sequences are equally long, so the zero
888                // counts just need to be smaller than the length of one
889                // sequence
890                if ((pattern.getFirstSequence().eventCount(0) +
891                    pattern.getSecondSequence().eventCount(0) + 1) > pattern.getFirstSequence()
892                    .size())
893                {
894                    continue;
895                }
896
897                for (int j = 0; j < appData.getNumberSequences().size(); j++) {
898                    final LinkedList<Integer> startpositions =
899                        appData.getNumberSequences().get(j).containsPattern(pattern);
900                    if (startpositions.size() > 0) {
901                        for (final Iterator<Integer> jt = startpositions.iterator(); jt.hasNext();)
902                        {
903                            final int start = jt.next();
904                            pattern.addOccurence(new MatchOccurrence(start, start + pattern.size(),
905                                                                     j));
906                        }
907                    }
908                }
909            }
910        }
911    }
912
913    /**
914     * The Class ParallelPairwiseAligner.
915     */
916    private class ParallelPairwiseAligner implements Runnable {
917
918        /** The app data. */
919        private final RuleApplicationData appData;
920
921        /** The from. */
922        private final int from;
923
924        /** The to. */
925        private final int to;
926
927        /**
928         * Instantiates a new parallel pairwise aligner.
929         *
930         * @param appData
931         *            the app data
932         * @param from
933         *            the from
934         * @param to
935         *            the to
936         */
937        ParallelPairwiseAligner(RuleApplicationData appData, int from, int to) {
938            this.appData = appData;
939            this.from = from;
940            this.to = to;
941        }
942
943        /*
944         * (non-Javadoc)
945         *
946         * @see java.lang.Runnable#run()
947         */
948        @Override
949        public void run() {
950            int count = 0;
951            final int size = to + 1 - from;
952
953            for (int i = from; i <= to; i++) {
954                final NumberSequence ns1 = appData.getNumberSequences().get(i);
955                count++;
956                RuleUtils.printProgressPercentage("Aligning Progress", count, size);
957                for (int j = 0; j < appData.getNumberSequences().size(); j++) {
958                    final NumberSequence ns2 = appData.getNumberSequences().get(j);
959                    if (i != j) {
960                        final AlignmentAlgorithm aa = AlignmentAlgorithmFactory.create();
961                        // threshold was 9
962                        aa.align(ns1, ns2, appData.getSubmat(),
963                                 SequenceForTaskDetectionRuleAlignment.alignmentThreshold);
964                        synchronized (appData.getMatchseqs()) {
965                            appData.getMatchseqs().addAll(aa.getMatches());
966                        }
967                    }
968                }
969            }
970        }
971    }
972
973    /**
974     * The Class ParallelPairwiseAligner.
975     */
976    private class ParallelMatchReplacer implements Runnable {
977
978        /** The app data. */
979        private final RuleApplicationData appData;
980
981        /** The from. */
982        private final int from;
983
984        /** The to. */
985        private final int to;
986
987        /**
988         * Instantiates a new parallel pairwise aligner.
989         *
990         * @param appData
991         *            the app data
992         * @param from
993         *            the from
994         * @param to
995         *            the to
996         */
997        ParallelMatchReplacer(RuleApplicationData appData, int from, int to) {
998            this.appData = appData;
999            this.from = from;
1000            this.to = to;
1001        }
1002
1003        /*
1004         * (non-Javadoc)
1005         *
1006         * @see java.lang.Runnable#run()
1007         */
1008        @Override
1009        public void run() {
1010            for (int i = from; i <= to; i++) {
1011                //System.out.println("Replacing in SEQUENCE " + i);
1012                /*
1013                 * HashMap for keeping track in which sequence which replacement has been performed.
1014                 * Neccessary for updating the indices of other occurrences accordingly
1015                 */
1016                LinkedList<MatchOccurrence> replacedOccurrences = new LinkedList<MatchOccurrence>();
1017                invalidOccurence: while (!appData.getPlannedReplacements()[i].isEmpty()) {
1018
1019                    PlannedReplacement pr = appData.getPlannedReplacements()[i].remove();
1020                   
1021                    MatchOccurrence oc = pr.getOccurrence();
1022                    // check if we have any replaced occurrence with
1023                    // indexes
1024                    // smaller than ours. If so, we need to adjust
1025                    // our start and end points of the replacement.
1026                    // Also do a check if we have replaced this
1027                    // specific MatchOccurence in this sequence already.
1028                    // Jump to the next occurrence if this is the case.
1029                    // This is no more necessary once the matches
1030                    // are harmonized.
1031
1032                    for (final Iterator<MatchOccurrence> jt = replacedOccurrences.iterator(); jt
1033                        .hasNext();)
1034                    {
1035                        final MatchOccurrence tmpOC = jt.next();
1036
1037                        if ((oc.getStartindex() >= tmpOC.getStartindex()) &&
1038                            (oc.getStartindex() <= tmpOC.getEndindex()))
1039                        {
1040                            continue invalidOccurence;
1041                        }
1042                        if (oc.getEndindex() >= tmpOC.getStartindex()) {
1043                            continue invalidOccurence;
1044
1045                        }
1046                        else if (oc.getStartindex() > tmpOC.getEndindex()) {
1047                            final int diff = tmpOC.getEndindex() - tmpOC.getStartindex();
1048                            // Just to be sure.
1049                            if (diff > 0) {
1050                                oc.setStartindex((oc.getStartindex() - diff) + 1);
1051                                oc.setEndindex((oc.getEndindex() - diff) + 1);
1052                            }
1053                            else {
1054                                Console
1055                                    .traceln(Level.WARNING,
1056                                             "End index of a Match before start. This should never happen");
1057                            }
1058                        }
1059                    }
1060                    synchronized (appData) {
1061                        appData.detectedAndReplacedTasks = true;
1062                    }
1063                   
1064
1065                    //System.out.println("Replacing in Sequence " + oc.getSequenceId() +
1066                    //    " at position " + oc.getStartindex() + " till " + oc.getEndindex());
1067                    final ISequenceInstance sequenceInstances =
1068                        RuleUtils.createNewSubSequenceInRange(appData.getSessions()
1069                                                                  .get(oc.getSequenceId()), oc
1070                                                                  .getStartindex(), oc
1071                                                                  .getEndindex(), (ISequence) appData.getPreparedTasks().get(pr.getMatchId()),
1072                                                              taskFactory, taskBuilder);
1073
1074                    // Adjust the length of the match regarding to the
1075                    // length of
1076                    // instance. (OptionalInstances may be shorter)
1077                    oc.setEndindex((oc.getStartindex() + sequenceInstances.size()) -
1078                        RuleUtils.missedOptionals);
1079
1080                    replacedOccurrences.add(oc);
1081
1082                }
1083            }
1084        }
1085    }
1086   
1087    private static class PlannedReplacement {
1088        private int matchId;
1089        private int getMatchId() {
1090            return matchId;
1091        }
1092
1093        private MatchOccurrence getOccurrence() {
1094            return occurrence;
1095        }
1096
1097        private MatchOccurrence occurrence;
1098       
1099        private PlannedReplacement(int matchId,MatchOccurrence occurrence) {
1100            this.matchId = matchId;
1101            this.occurrence = occurrence;
1102        }
1103    }
1104
1105    /**
1106     * The Class RuleApplicationData.
1107     */
1108    private static class RuleApplicationData implements Serializable {
1109
1110        /** The Constant serialVersionUID. */
1111        private static final long serialVersionUID = -7559657686755522960L;
1112
1113        /**
1114         * The number2task HashMap. Since we align the tasks just by their integer id, we need this
1115         * to convert them back to Tasks again
1116         */
1117        private final HashMap<Integer, ITask> number2task;
1118
1119
1120        /**
1121         * The substitution matrix used by the alignment algorithm to be able to compare distances
1122         * of tasks
1123         */
1124        private final ObjectDistanceSubstitionMatrix submat;
1125
1126        private ArrayDeque<PlannedReplacement>[] plannedReplacements;
1127
1128        /** The list of all found matches */
1129        private LinkedList<Match> matchseqs;
1130       
1131        private ArrayList<ITask> preparedTasks;
1132
1133        public ArrayList<ITask> getPreparedTasks() {
1134            return preparedTasks;
1135        }
1136
1137        /**
1138         * The generated NumberSequences. This is the integer representation of the user sessions
1139         */
1140        private ArrayList<NumberSequence> numberseqs;
1141
1142        /** The list of newly created tasks (of one iteration). */
1143        private final LinkedList<ITask> newTasks;
1144
1145        /** The user sessions containing all EventTasks/Instances */
1146        private final List<IUserSession> sessions;
1147
1148        /** True if we replaced something in the user sessions in one iteration. */
1149        private boolean detectedAndReplacedTasks;
1150
1151        /** The result we return from this rule */
1152        private final RuleApplicationResult result;
1153
1154        /** Stop Watch to measure performance */
1155        private final StopWatch stopWatch;
1156
1157        /**
1158         * Instantiates a new rule application data.
1159         *
1160         * @param sessions
1161         *            The user sessions
1162         */
1163        private RuleApplicationData(List<IUserSession> sessions) {
1164            this.sessions = sessions;
1165            numberseqs = new ArrayList<NumberSequence>();
1166            number2task = new HashMap<Integer, ITask>();
1167            stopWatch = new StopWatch();
1168            result = new RuleApplicationResult();
1169            preparedTasks = new ArrayList<ITask>();
1170            submat = new ObjectDistanceSubstitionMatrix(maxSubstitutionScore, gapPenalty, false);
1171            newTasks = new LinkedList<ITask>();
1172            this.detectedAndReplacedTasks = true;
1173        }
1174
1175        private void initializeQueues(int size) {
1176            plannedReplacements = new ArrayDeque[size];
1177            for (int i = 0; i < size; i++) {
1178                plannedReplacements[i] = new ArrayDeque<PlannedReplacement>(30);
1179            }
1180        }
1181
1182        public ArrayDeque<PlannedReplacement>[] getPlannedReplacements() {
1183            return plannedReplacements;
1184        }
1185
1186        /**
1187         * Detected and replaced tasks.
1188         *
1189         * @return true, if successful
1190         */
1191        private boolean detectedAndReplacedTasks() {
1192            return detectedAndReplacedTasks;
1193        }
1194
1195        /**
1196         * Gets the match sequences.
1197         *
1198         * @return the match sequences
1199         */
1200        public LinkedList<Match> getMatchseqs() {
1201            return matchseqs;
1202        }
1203
1204        /**
1205         * Gets the new tasks.
1206         *
1207         * @return the new tasks
1208         */
1209        public LinkedList<ITask> getNewTasks() {
1210            return newTasks;
1211        }
1212
1213        /**
1214         * Gets the number2task.
1215         *
1216         * @return the number2task HashMap
1217         */
1218        private HashMap<Integer, ITask> getNumber2Task() {
1219            return number2task;
1220        }
1221
1222        /**
1223         * Gets the number sequences.
1224         *
1225         * @return the number sequences
1226         */
1227        private ArrayList<NumberSequence> getNumberSequences() {
1228            return numberseqs;
1229        }
1230
1231        /**
1232         * Gets the result.
1233         *
1234         * @return the result
1235         */
1236        private RuleApplicationResult getResult() {
1237            return result;
1238        }
1239
1240        /**
1241         * Gets the sessions.
1242         *
1243         * @return the UserSessions as List.
1244         */
1245        private List<IUserSession> getSessions() {
1246            return sessions;
1247        }
1248
1249        /**
1250         * Gets the stop watch.
1251         *
1252         * @return the stopWatch
1253         */
1254        private StopWatch getStopWatch() {
1255            return stopWatch;
1256        }
1257
1258        /**
1259         * Gets the substitution matrix.
1260         *
1261         * @return the substitution matrix
1262         */
1263        private ObjectDistanceSubstitionMatrix getSubmat() {
1264            return submat;
1265        }
1266
1267        /**
1268         * New task created.
1269         *
1270         * @param task
1271         *            can be called when new tasks are created to keep track of all newly created
1272         *            tasks
1273         */
1274        private void newTaskCreated(ITask task) {
1275            number2task.put(task.getId(), task);
1276            newTasks.add(task);
1277        }
1278
1279        /**
1280         * Reset newly created tasks.
1281         */
1282        synchronized private void resetNewlyCreatedTasks() {
1283            newTasks.clear();
1284        }
1285
1286        /**
1287         * Sets the number sequences.
1288         *
1289         * @param numberseqs
1290         *            the new number sequences
1291         */
1292        private void setNumberSequences(ArrayList<NumberSequence> numberseqs) {
1293            this.numberseqs = numberseqs;
1294        }
1295
1296        /**
1297         * Update substitution matrix.
1298         */
1299        private void updateSubstitutionMatrix() {
1300            submat.update(getNewTasks());
1301            resetNewlyCreatedTasks();
1302        }
1303
1304    }
1305
1306}
Note: See TracBrowser for help on using the repository browser.