source: trunk/autoquest-core-tasktrees/src/main/java/de/ugoe/cs/autoquest/tasktrees/temporalrelation/utils/MostSimilarTaskDeterminer.java @ 1767

Last change on this file since 1767 was 1767, checked in by pharms, 10 years ago
  • first version of merging similar task trees
File size: 20.2 KB
Line 
1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.tasktrees.temporalrelation.utils;
16
17import java.util.ArrayList;
18import java.util.HashMap;
19import java.util.HashSet;
20import java.util.Iterator;
21import java.util.LinkedList;
22import java.util.List;
23import java.util.Map;
24import java.util.Set;
25import java.util.logging.Level;
26
27import de.ugoe.cs.autoquest.tasktrees.temporalrelation.TaskInstanceComparator;
28import de.ugoe.cs.autoquest.tasktrees.treeifc.DefaultTaskTraversingVisitor;
29import de.ugoe.cs.autoquest.tasktrees.treeifc.IStructuringTemporalRelationship;
30import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask;
31import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInfo;
32import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskModel;
33import de.ugoe.cs.autoquest.tasktrees.treeifc.TaskMetric;
34import de.ugoe.cs.autoquest.tasktrees.treeifc.TaskTreeUtils;
35import de.ugoe.cs.util.console.Console;
36
37/**
38 * <p>
39 * This class is a utility class for detecting similar tasks. It compares all tasks in a given
40 * list with each other and then chooses the pair with the highest level of similarity. For
41 * comparison, the tasks are traversed and only the traversals are compared with each other.
42 * </p>
43 * <p>
44 * Several task pairs may have the same similarity level. In this case, the class performs a
45 * filtering of the pairs to result in a merging order for the tasks that is always the same.
46 * </p>
47 * <p>
48 * If provided with many tasks, this class starts several threads to let the comparisons run in
49 * parallel.
50 * </p>
51 *
52 * @author Patrick Harms
53 */
54public class MostSimilarTaskDeterminer {
55
56    /**
57     * If this similarity level is exceeded, two tasks are not considered similar anymore. 33 means
58     * that at most 33% of the elements of both task traversals are not the same.
59     */
60    private static int MAX_DIFF_LEVEL = 33;
61   
62    /**
63     * default indicator for unequal tasks
64     */
65    private static final SimilarTasks UNEQUAL_TASKS = new SimilarTasks(null, null, null, null, null);
66   
67    /**
68     * task comparator used internally
69     */
70    private TaskInstanceComparator comparator;
71   
72    /**
73     * for performance reasons, some task comparisons can be excluded beforehand and the exclusions
74     * are stored in this list
75     */
76    private Set<Long> comparisonsToSkip = new HashSet<>();
77
78    /**
79     * Initialize instances of this class with the task comparator to be used
80     */
81    public MostSimilarTaskDeterminer(TaskInstanceComparator comparator) {
82        this.comparator = comparator;
83    }
84
85    /**
86     * add a pair of comparisons that is not done to increase performance
87     */
88    public void addComparisonToSkip(ITask task1, ITask task2) {
89        comparisonsToSkip.add(getMapId(task1, task2));
90    }
91
92    /**
93     * returns a list of most similar tasks. Independent of the order of the provided tasks, the
94     * returned list will always be the same for the same input elements.
95     */
96    public List<SimilarTasks> getMostSimilarTasks(List<ITask> tasks, ITaskModel taskModel) {
97        Console.println("comparing " + tasks.size() + " tasks with each other");
98
99        LinkedList<SimilarTasks> mostSimilarTasksList = performComparisons(tasks);
100       
101        applyFilterForSmallestDiffLevel(mostSimilarTasksList);
102        applyFilterForParents(mostSimilarTasksList);
103        applyFilterForMostCoveredEvents(mostSimilarTasksList, taskModel);
104       
105        Console.traceln
106            (Level.FINER, "calculated " + mostSimilarTasksList.size() + " most similar tasks");
107
108        return mostSimilarTasksList;
109    }
110
111    /**
112     * filters the given list of similar tasks for those having the smalled diff level, which is
113     * in turn the highest similarity
114     */
115    private void applyFilterForSmallestDiffLevel(LinkedList<SimilarTasks> mostSimilarTasksList) {
116        // determine the smallest diff level
117        int smallestDiffLevel = Integer.MAX_VALUE;
118       
119        for (SimilarTasks candidate : mostSimilarTasksList) {
120            if (candidate.getDiffLevel() < smallestDiffLevel) {
121                smallestDiffLevel = candidate.getDiffLevel();
122            }
123        }
124       
125        if (smallestDiffLevel <= MAX_DIFF_LEVEL) {
126            // remove all entries with a higher diff level
127            Iterator<SimilarTasks> listIterator = mostSimilarTasksList.iterator();
128       
129            while (listIterator.hasNext()) {
130                if (listIterator.next().getDiffLevel() > smallestDiffLevel) {
131                    listIterator.remove();
132                }
133            }
134        }
135        else {
136            mostSimilarTasksList.clear();
137        }
138    }
139   
140    /**
141     * ensures that the given list of similar tasks does not contain two pairs, where one refers
142     * to a child task of another pair.
143     */
144    private void applyFilterForParents(LinkedList<SimilarTasks> mostSimilarTasksList) {
145       
146        // remove all entries being parents of another entry or where both tasks are
147        // generated through this rule
148        Iterator<SimilarTasks> listIterator = mostSimilarTasksList.iterator();
149   
150        while (listIterator.hasNext()) {
151            SimilarTasks candidate = listIterator.next();
152           
153            ITask task1 = candidate.getLeftHandSide();
154            ITask task2 = candidate.getRightHandSide();
155            for (SimilarTasks potentialChild : mostSimilarTasksList) {
156                ITask task3 = potentialChild.getLeftHandSide();
157                ITask task4 = potentialChild.getRightHandSide();
158                if (TaskTreeUtils.isChild(task3, task1) ||
159                    TaskTreeUtils.isChild(task3, task2) ||
160                    TaskTreeUtils.isChild(task4, task1) ||
161                    TaskTreeUtils.isChild(task4, task2))
162                {
163                    listIterator.remove();
164                    break;
165                }
166            }
167        }
168    }
169
170    /**
171     * performs a filtering of the detected similar tasks which ensures, that the list does not
172     * contain two pairs referring to the same task and that in such cases always the same pair
173     * will remain.
174     */
175    private void applyFilterForMostCoveredEvents(LinkedList<SimilarTasks> mostSimilarTasksList,
176                                                 ITaskModel               taskModel)
177    {
178        // check, if several remaining similar tasks refer to the same task
179        Map<ITask, LinkedList<SimilarTasks>> referredTasks =
180            new HashMap<ITask, LinkedList<SimilarTasks>>();
181       
182        for (SimilarTasks similarTasks : mostSimilarTasksList) {
183            ensureMapping(similarTasks.getLeftHandSide(), similarTasks, referredTasks);
184            ensureMapping(similarTasks.getRightHandSide(), similarTasks, referredTasks);
185        }
186       
187        // remove all entries of tasks occurring only once
188        List<ITask> tasksOccuringOnce = new LinkedList<ITask>();
189
190        for (Map.Entry<ITask, LinkedList<SimilarTasks>> entry : referredTasks.entrySet()) {
191            if (entry.getValue().size() <= 1) {
192                tasksOccuringOnce.add(entry.getKey());
193            }
194        }
195
196        for (ITask taskToRemove : tasksOccuringOnce) {
197            referredTasks.remove(taskToRemove);
198        }
199
200        // if there are remaining tasks occurring several times, try to extract one similar tasks
201        // object, that should be merged first
202
203        if (referredTasks.size() > 0) {
204            mostSimilarTasksList.clear();
205
206            System.out.println("several comparisons for the same task exist with same diff level " +
207                               "--> filtering for pair to be merged first");
208
209            SimilarTasks firstToMerge = null;
210
211            for (LinkedList<SimilarTasks> pairs : referredTasks.values()) {
212                for (SimilarTasks current : pairs) {
213                    if (firstToMerge == null) {
214                        firstToMerge = current;
215                    }
216                    else if (firstToMerge != current) {
217                        firstToMerge = getFirstToMerge(firstToMerge, current, taskModel);
218                    }
219                }
220            }
221           
222            if (firstToMerge != null) {
223                mostSimilarTasksList.add(firstToMerge);
224            }
225        }
226       
227    }
228
229    /**
230     * <p>
231     * compares two similar tasks and decides which of them is to be merged first.
232     * </p>
233     */
234    private SimilarTasks getFirstToMerge(SimilarTasks first,
235                                         SimilarTasks second,
236                                         ITaskModel   taskModel)
237    {
238       
239        int valFirst = getTaskMetric(first, TaskMetric.EVENT_COVERAGE, taskModel);
240        int valSecond = getTaskMetric(second, TaskMetric.EVENT_COVERAGE, taskModel);
241
242        if (valSecond > valFirst) {
243            return second;
244        }
245        else if (valSecond < valFirst) {
246            return first;
247        }
248
249        // no of covered events is equal, try to distinguish by count
250
251        valFirst = getTaskMetric(first, TaskMetric.COUNT, taskModel);
252        valSecond = getTaskMetric(second, TaskMetric.COUNT, taskModel);
253
254        if (valSecond > valFirst) {
255            return second;
256        }
257        else if (valSecond < valFirst) {
258            return first;
259        }
260
261        // count is equal, try to distinguish by depth
262
263        valFirst = getTaskMetric(first, TaskMetric.DEPTH, taskModel);
264        valSecond = getTaskMetric(second, TaskMetric.DEPTH, taskModel);
265
266        if (valSecond < valFirst) {
267            return second;
268        }
269        else if (valSecond > valFirst) {
270            return first;
271        }
272
273        // no of covered events is equal, try to distinguish by count
274
275        valFirst = cumulateTaskMetric(first, TaskMetric.COUNT, taskModel);
276        valSecond = cumulateTaskMetric(second, TaskMetric.COUNT, taskModel);
277
278        if (valSecond > valFirst) {
279            return second;
280        }
281        else if (valSecond < valFirst) {
282            return first;
283        }
284
285        // depth is equal. Calculate for both the similarity
286        // based on which the merging will take place
287        valFirst =
288            SimilarTasks.getMergableLevelOfSimilarity(first, comparator).getDiffLevel();
289        valSecond =
290            SimilarTasks.getMergableLevelOfSimilarity(second, comparator).getDiffLevel();
291
292        if (valSecond > valFirst) {
293            return second;
294        }
295        else if (valSecond < valFirst) {
296            return first;
297        }
298
299        first.dump(System.out);
300        second.dump(System.out);
301
302        throw new RuntimeException
303            ("several tasks are similar so that it is undecidable which to merge first");
304    }
305
306    /**
307     *
308     */
309    private void ensureMapping(ITask                                task,
310                               SimilarTasks                         similarTasks,
311                               Map<ITask, LinkedList<SimilarTasks>> map)
312    {
313        LinkedList<SimilarTasks> value = map.get(task);
314       
315        if (value == null) {
316            value = new LinkedList<SimilarTasks>();
317            map.put(task, value);
318        }
319       
320        value.add(similarTasks);
321    }
322
323
324    /**
325     * starts several threads performing the task comparisons
326     */
327    private LinkedList<SimilarTasks> performComparisons(List<ITask> tasks) {
328        LinkedList<SimilarTasks> mostSimilarTasks = new LinkedList<SimilarTasks>();
329        List<Runnable> startedRunnables = new LinkedList<Runnable>();
330        List<Runnable> runnables = createRunnables(tasks, mostSimilarTasks, startedRunnables);
331
332        // create Runnables for all buckets
333        int noOfParallelThreads = Math.min(2, runnables.size());
334       
335        Console.traceln(Level.FINER, "will start " + runnables.size() + " threads");
336       
337        // start the Threads, wait for their completion, and start the next ones if there are
338        // remaining
339        synchronized (startedRunnables) {
340            while ((runnables.size() > 0) || (startedRunnables.size() > 0)) {
341                while ((startedRunnables.size() < noOfParallelThreads) && (runnables.size() > 0)) {
342                    Runnable runnable = runnables.remove(0);
343                    startedRunnables.add(runnable);
344                    new Thread(runnable).start();
345                    Console.traceln(Level.FINEST, "started next thread");
346                }
347           
348                try {
349                    startedRunnables.wait();
350                }
351                catch (InterruptedException e) {
352                    // should not happen
353                    Console.logException(e);
354                }
355
356                Console.traceln(Level.FINEST, "thread finished (" + runnables.size() + ", " +
357                                startedRunnables.size() + ")");
358            }
359        }
360        Console.traceln(Level.FINER, "all threads finished");
361       
362        return mostSimilarTasks;
363    }
364
365    /**
366     * creates runnables for comparing tasks by subdiving the input set of comparisons into
367     * several subsets where each is compared in a corresponding runnable. The subsets are
368     * at most 150000 comparisons large.
369     */
370    private List<Runnable> createRunnables(List<ITask>              tasks,
371                                           LinkedList<SimilarTasks> mostSimilarTasksList,
372                                           List<Runnable>           startedRunnables)
373    {
374        // subdivide comparisons into buckets to be spread to different threads
375        int noOfComparisons = 0;
376        int noOfScheduledComparisons = 0;
377        List<Integer> bucketIndexes = new ArrayList<Integer>();
378       
379        bucketIndexes.add(0);
380       
381        for (int i = 0; i < tasks.size(); i++) {
382            noOfComparisons += i;
383           
384            if ((noOfComparisons - noOfScheduledComparisons) > 150000) {
385                bucketIndexes.add(i);
386                noOfScheduledComparisons = noOfComparisons;
387            }
388        }
389       
390        bucketIndexes.add(tasks.size());
391       
392        Console.traceln(Level.FINER, "scheduling " + noOfComparisons + " comparisons");
393       
394        List<Runnable> runnables = new LinkedList<Runnable>();
395       
396        for (int i = 1; i < bucketIndexes.size(); i++) {
397            CompareRunnable runnable = new CompareRunnable
398                (tasks, bucketIndexes.get(i - 1), bucketIndexes.get(i), mostSimilarTasksList,
399                 startedRunnables);
400           
401            runnables.add(runnable);
402        }
403       
404        return runnables;
405    }
406
407    /**
408     * convenience method to get the value of a task metric
409     */
410    private int getTaskMetric(SimilarTasks similarTasks, TaskMetric metric, ITaskModel taskModel) {
411        if (similarTasks == null) {
412            return 0;
413        }
414       
415        ITaskInfo info1 = taskModel.getTaskInfo(similarTasks.getLeftHandSide());
416        ITaskInfo info2 = taskModel.getTaskInfo(similarTasks.getRightHandSide());
417       
418        return info1.getMeasureValue(metric) + info2.getMeasureValue(metric);
419    }
420   
421    /**
422     * convenience method to get the cumulative value of a task metric
423     */
424    private int cumulateTaskMetric(SimilarTasks     similarTasks,
425                                   final TaskMetric metric,
426                                   final ITaskModel taskModel)
427    {
428        final int[] value = new int[1];
429        value[0] = 0;
430       
431        DefaultTaskTraversingVisitor visitor = new DefaultTaskTraversingVisitor() {
432            @Override
433            public void visit(IStructuringTemporalRelationship relationship) {
434                value[0] += taskModel.getTaskInfo(relationship).getMeasureValue(metric);
435                super.visit(relationship);
436            }
437        };
438       
439        similarTasks.getLeftHandSide().accept(visitor);
440        similarTasks.getRightHandSide().accept(visitor);
441       
442        return value[0];
443    }
444
445    /**
446     * convenience method to check if a specific comparison shall be skipped
447     */
448    private boolean isComparisonToSkip(ITask task1, ITask task2) {
449        return comparisonsToSkip.contains(getMapId(task1, task2));
450    }
451
452    /**
453     * convenience method to get a unique id for representing a pair of two tasks
454     */
455    private long getMapId(ITask task1, ITask task2) {
456        if (task1.getId() < task2.getId()) {
457            return (((long) task1.getId()) << 32) + task2.getId();
458        }
459        else {
460            return (((long) task2.getId()) << 32) + task1.getId();
461        }
462    }
463
464    /**
465     * Runnable performing a subset of task comparisons in a dedicated thread
466     */
467    public class CompareRunnable implements Runnable {
468
469        /** */
470        private List<ITask> taskList;
471       
472        /** */
473        private int startIndex;
474       
475        /** */
476        private int endIndex;
477
478        /** */
479        private List<SimilarTasks> mostSimilarTasksList;
480
481        /** */
482        private List<Runnable> unfinishedRunnables;
483       
484        /**
485         *
486         */
487        public CompareRunnable(List<ITask>         taskList,
488                               int                 startIndex,
489                               int                 endIndex,
490                               List<SimilarTasks>  mostSimilarTasksList,
491                               List<Runnable>      unfinishedRunnables)
492        {
493            this.taskList = taskList;
494            this.startIndex = startIndex;
495            this.endIndex = endIndex;
496            this.mostSimilarTasksList = mostSimilarTasksList;
497            this.unfinishedRunnables = unfinishedRunnables;
498        }
499
500        /* (non-Javadoc)
501         * @see java.lang.Runnable#run()
502         */
503        @Override
504        public void run() {
505            int noOfComparisons = 0;
506           
507            for (int i = startIndex; i < endIndex; i++) {
508                noOfComparisons += i;
509            }
510
511            System.out.println("performing " + noOfComparisons + " comparisons");
512           
513            SimilarTasks mostSimilarTasks = UNEQUAL_TASKS;
514            List<SimilarTasks> allMostSimilarTasks = new LinkedList<SimilarTasks>();
515           
516            for (int i = startIndex + 1; i < endIndex; i++) {
517                /*if (appData.isSelfCreatedTask(taskList.get(i))) {
518                    continue;
519                }*/
520               
521                for (int j = startIndex; j < i; j++) {
522                    /*if (appData.isSelfCreatedTask(taskList.get(j))) {
523                        continue;
524                    }*/
525                   
526                    if (isComparisonToSkip(taskList.get(i), taskList.get(j))) {
527                        continue;
528                    }
529                   
530                    SimilarTasks similarTasks = SimilarTasks.compareTasks
531                        (taskList.get(i), taskList.get(j), comparator);
532                   
533                    if (similarTasks.isInBetweenDifference()) {
534                        if (similarTasks.getDiffLevel() < mostSimilarTasks.getDiffLevel()) {
535                            mostSimilarTasks = similarTasks;
536                            allMostSimilarTasks.clear();
537                            allMostSimilarTasks.add(similarTasks);
538                        }
539                        else if (similarTasks.getDiffLevel() == mostSimilarTasks.getDiffLevel()) {
540                            allMostSimilarTasks.add(similarTasks);
541                        }
542                    }
543                }
544            }
545           
546            synchronized (unfinishedRunnables) {
547                mostSimilarTasksList.addAll(allMostSimilarTasks);
548               
549                //System.out.println("notifying finish");
550                for (int i = 0; i < unfinishedRunnables.size(); i++) {
551                    if (unfinishedRunnables.get(i) == this) {
552                        unfinishedRunnables.remove(i);
553                        unfinishedRunnables.notify();
554                    }
555                }
556            }
557        }
558
559    }
560}
Note: See TracBrowser for help on using the repository browser.