// Copyright 2012 Georg-August-Universität Göttingen, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package de.ugoe.cs.autoquest.tasktrees.temporalrelation.utils; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.logging.Level; import de.ugoe.cs.autoquest.tasktrees.temporalrelation.TaskInstanceComparator; import de.ugoe.cs.autoquest.tasktrees.treeifc.DefaultTaskTraversingVisitor; import de.ugoe.cs.autoquest.tasktrees.treeifc.IStructuringTemporalRelationship; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInfo; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskModel; import de.ugoe.cs.autoquest.tasktrees.treeifc.TaskMetric; import de.ugoe.cs.autoquest.tasktrees.treeifc.TaskTreeUtils; import de.ugoe.cs.util.console.Console; /** *

* This class is a utility class for detecting similar tasks. It compares all tasks in a given * list with each other and then chooses the pair with the highest level of similarity. For * comparison, the tasks are traversed and only the traversals are compared with each other. *

*

* Several task pairs may have the same similarity level. In this case, the class performs a * filtering of the pairs to result in a merging order for the tasks that is always the same. *

*

* If provided with many tasks, this class starts several threads to let the comparisons run in * parallel. *

* * @author Patrick Harms */ public class MostSimilarTaskDeterminer { /** * If this similarity level is exceeded, two tasks are not considered similar anymore. 33 means * that at most 33% of the elements of both task traversals are not the same. */ private static int MAX_DIFF_LEVEL = 33; /** * default indicator for unequal tasks */ private static final SimilarTasks UNEQUAL_TASKS = new SimilarTasks(null, null, null, null, null); /** * task comparator used internally */ private TaskInstanceComparator comparator; /** * for performance reasons, some task comparisons can be excluded beforehand and the exclusions * are stored in this list */ private Set comparisonsToSkip = new HashSet<>(); /** * Initialize instances of this class with the task comparator to be used */ public MostSimilarTaskDeterminer(TaskInstanceComparator comparator) { this.comparator = comparator; } /** * add a pair of comparisons that is not done to increase performance */ public void addComparisonToSkip(ITask task1, ITask task2) { comparisonsToSkip.add(getMapId(task1, task2)); } /** * returns a list of most similar tasks. Independent of the order of the provided tasks, the * returned list will always be the same for the same input elements. */ public List getMostSimilarTasks(List tasks, ITaskModel taskModel) { Console.println("comparing " + tasks.size() + " tasks with each other"); LinkedList mostSimilarTasksList = performComparisons(tasks); applyFilterForSmallestDiffLevel(mostSimilarTasksList); applyFilterForParents(mostSimilarTasksList); applyFilterForMostCoveredEvents(mostSimilarTasksList, taskModel); Console.traceln (Level.FINER, "calculated " + mostSimilarTasksList.size() + " most similar tasks"); return mostSimilarTasksList; } /** * filters the given list of similar tasks for those having the smalled diff level, which is * in turn the highest similarity */ private void applyFilterForSmallestDiffLevel(LinkedList mostSimilarTasksList) { // determine the smallest diff level int smallestDiffLevel = Integer.MAX_VALUE; for (SimilarTasks candidate : mostSimilarTasksList) { if (candidate.getDiffLevel() < smallestDiffLevel) { smallestDiffLevel = candidate.getDiffLevel(); } } if (smallestDiffLevel <= MAX_DIFF_LEVEL) { // remove all entries with a higher diff level Iterator listIterator = mostSimilarTasksList.iterator(); while (listIterator.hasNext()) { if (listIterator.next().getDiffLevel() > smallestDiffLevel) { listIterator.remove(); } } } else { mostSimilarTasksList.clear(); } } /** * ensures that the given list of similar tasks does not contain two pairs, where one refers * to a child task of another pair. */ private void applyFilterForParents(LinkedList mostSimilarTasksList) { // remove all entries being parents of another entry or where both tasks are // generated through this rule Iterator listIterator = mostSimilarTasksList.iterator(); while (listIterator.hasNext()) { SimilarTasks candidate = listIterator.next(); ITask task1 = candidate.getLeftHandSide(); ITask task2 = candidate.getRightHandSide(); for (SimilarTasks potentialChild : mostSimilarTasksList) { ITask task3 = potentialChild.getLeftHandSide(); ITask task4 = potentialChild.getRightHandSide(); if (TaskTreeUtils.isChild(task3, task1) || TaskTreeUtils.isChild(task3, task2) || TaskTreeUtils.isChild(task4, task1) || TaskTreeUtils.isChild(task4, task2)) { listIterator.remove(); break; } } } } /** * performs a filtering of the detected similar tasks which ensures, that the list does not * contain two pairs referring to the same task and that in such cases always the same pair * will remain. */ private void applyFilterForMostCoveredEvents(LinkedList mostSimilarTasksList, ITaskModel taskModel) { // check, if several remaining similar tasks refer to the same task Map> referredTasks = new HashMap>(); for (SimilarTasks similarTasks : mostSimilarTasksList) { ensureMapping(similarTasks.getLeftHandSide(), similarTasks, referredTasks); ensureMapping(similarTasks.getRightHandSide(), similarTasks, referredTasks); } // remove all entries of tasks occurring only once List tasksOccuringOnce = new LinkedList(); for (Map.Entry> entry : referredTasks.entrySet()) { if (entry.getValue().size() <= 1) { tasksOccuringOnce.add(entry.getKey()); } } for (ITask taskToRemove : tasksOccuringOnce) { referredTasks.remove(taskToRemove); } // if there are remaining tasks occurring several times, try to extract one similar tasks // object, that should be merged first if (referredTasks.size() > 0) { mostSimilarTasksList.clear(); System.out.println("several comparisons for the same task exist with same diff level " + "--> filtering for pair to be merged first"); SimilarTasks firstToMerge = null; for (LinkedList pairs : referredTasks.values()) { for (SimilarTasks current : pairs) { if (firstToMerge == null) { firstToMerge = current; } else if (firstToMerge != current) { firstToMerge = getFirstToMerge(firstToMerge, current, taskModel); } } } if (firstToMerge != null) { mostSimilarTasksList.add(firstToMerge); } } } /** *

* compares two similar tasks and decides which of them is to be merged first. *

*/ private SimilarTasks getFirstToMerge(SimilarTasks first, SimilarTasks second, ITaskModel taskModel) { int valFirst = getTaskMetric(first, TaskMetric.EVENT_COVERAGE, taskModel); int valSecond = getTaskMetric(second, TaskMetric.EVENT_COVERAGE, taskModel); if (valSecond > valFirst) { return second; } else if (valSecond < valFirst) { return first; } // no of covered events is equal, try to distinguish by count valFirst = getTaskMetric(first, TaskMetric.COUNT, taskModel); valSecond = getTaskMetric(second, TaskMetric.COUNT, taskModel); if (valSecond > valFirst) { return second; } else if (valSecond < valFirst) { return first; } // count is equal, try to distinguish by depth valFirst = getTaskMetric(first, TaskMetric.DEPTH, taskModel); valSecond = getTaskMetric(second, TaskMetric.DEPTH, taskModel); if (valSecond < valFirst) { return second; } else if (valSecond > valFirst) { return first; } // no of covered events is equal, try to distinguish by count valFirst = cumulateTaskMetric(first, TaskMetric.COUNT, taskModel); valSecond = cumulateTaskMetric(second, TaskMetric.COUNT, taskModel); if (valSecond > valFirst) { return second; } else if (valSecond < valFirst) { return first; } // depth is equal. Calculate for both the similarity // based on which the merging will take place valFirst = SimilarTasks.getMergableLevelOfSimilarity(first, comparator).getDiffLevel(); valSecond = SimilarTasks.getMergableLevelOfSimilarity(second, comparator).getDiffLevel(); if (valSecond > valFirst) { return second; } else if (valSecond < valFirst) { return first; } first.dump(System.out); second.dump(System.out); throw new RuntimeException ("several tasks are similar so that it is undecidable which to merge first"); } /** * */ private void ensureMapping(ITask task, SimilarTasks similarTasks, Map> map) { LinkedList value = map.get(task); if (value == null) { value = new LinkedList(); map.put(task, value); } value.add(similarTasks); } /** * starts several threads performing the task comparisons */ private LinkedList performComparisons(List tasks) { LinkedList mostSimilarTasks = new LinkedList(); List startedRunnables = new LinkedList(); List runnables = createRunnables(tasks, mostSimilarTasks, startedRunnables); // create Runnables for all buckets int noOfParallelThreads = Math.min(2, runnables.size()); Console.traceln(Level.FINER, "will start " + runnables.size() + " threads"); // start the Threads, wait for their completion, and start the next ones if there are // remaining synchronized (startedRunnables) { while ((runnables.size() > 0) || (startedRunnables.size() > 0)) { while ((startedRunnables.size() < noOfParallelThreads) && (runnables.size() > 0)) { Runnable runnable = runnables.remove(0); startedRunnables.add(runnable); new Thread(runnable).start(); Console.traceln(Level.FINEST, "started next thread"); } try { startedRunnables.wait(); } catch (InterruptedException e) { // should not happen Console.logException(e); } Console.traceln(Level.FINEST, "thread finished (" + runnables.size() + ", " + startedRunnables.size() + ")"); } } Console.traceln(Level.FINER, "all threads finished"); return mostSimilarTasks; } /** * creates runnables for comparing tasks by subdiving the input set of comparisons into * several subsets where each is compared in a corresponding runnable. The subsets are * at most 150000 comparisons large. */ private List createRunnables(List tasks, LinkedList mostSimilarTasksList, List startedRunnables) { // subdivide comparisons into buckets to be spread to different threads int noOfComparisons = 0; int noOfScheduledComparisons = 0; List bucketIndexes = new ArrayList(); bucketIndexes.add(0); for (int i = 0; i < tasks.size(); i++) { noOfComparisons += i; if ((noOfComparisons - noOfScheduledComparisons) > 150000) { bucketIndexes.add(i); noOfScheduledComparisons = noOfComparisons; } } bucketIndexes.add(tasks.size()); Console.traceln(Level.FINER, "scheduling " + noOfComparisons + " comparisons"); List runnables = new LinkedList(); for (int i = 1; i < bucketIndexes.size(); i++) { CompareRunnable runnable = new CompareRunnable (tasks, bucketIndexes.get(i - 1), bucketIndexes.get(i), mostSimilarTasksList, startedRunnables); runnables.add(runnable); } return runnables; } /** * convenience method to get the value of a task metric */ private int getTaskMetric(SimilarTasks similarTasks, TaskMetric metric, ITaskModel taskModel) { if (similarTasks == null) { return 0; } ITaskInfo info1 = taskModel.getTaskInfo(similarTasks.getLeftHandSide()); ITaskInfo info2 = taskModel.getTaskInfo(similarTasks.getRightHandSide()); return info1.getMeasureValue(metric) + info2.getMeasureValue(metric); } /** * convenience method to get the cumulative value of a task metric */ private int cumulateTaskMetric(SimilarTasks similarTasks, final TaskMetric metric, final ITaskModel taskModel) { final int[] value = new int[1]; value[0] = 0; DefaultTaskTraversingVisitor visitor = new DefaultTaskTraversingVisitor() { @Override public void visit(IStructuringTemporalRelationship relationship) { value[0] += taskModel.getTaskInfo(relationship).getMeasureValue(metric); super.visit(relationship); } }; similarTasks.getLeftHandSide().accept(visitor); similarTasks.getRightHandSide().accept(visitor); return value[0]; } /** * convenience method to check if a specific comparison shall be skipped */ private boolean isComparisonToSkip(ITask task1, ITask task2) { return comparisonsToSkip.contains(getMapId(task1, task2)); } /** * convenience method to get a unique id for representing a pair of two tasks */ private long getMapId(ITask task1, ITask task2) { if (task1.getId() < task2.getId()) { return (((long) task1.getId()) << 32) + task2.getId(); } else { return (((long) task2.getId()) << 32) + task1.getId(); } } /** * Runnable performing a subset of task comparisons in a dedicated thread */ public class CompareRunnable implements Runnable { /** */ private List taskList; /** */ private int startIndex; /** */ private int endIndex; /** */ private List mostSimilarTasksList; /** */ private List unfinishedRunnables; /** * */ public CompareRunnable(List taskList, int startIndex, int endIndex, List mostSimilarTasksList, List unfinishedRunnables) { this.taskList = taskList; this.startIndex = startIndex; this.endIndex = endIndex; this.mostSimilarTasksList = mostSimilarTasksList; this.unfinishedRunnables = unfinishedRunnables; } /* (non-Javadoc) * @see java.lang.Runnable#run() */ @Override public void run() { int noOfComparisons = 0; for (int i = startIndex; i < endIndex; i++) { noOfComparisons += i; } System.out.println("performing " + noOfComparisons + " comparisons"); SimilarTasks mostSimilarTasks = UNEQUAL_TASKS; List allMostSimilarTasks = new LinkedList(); for (int i = startIndex + 1; i < endIndex; i++) { /*if (appData.isSelfCreatedTask(taskList.get(i))) { continue; }*/ for (int j = startIndex; j < i; j++) { /*if (appData.isSelfCreatedTask(taskList.get(j))) { continue; }*/ if (isComparisonToSkip(taskList.get(i), taskList.get(j))) { continue; } SimilarTasks similarTasks = SimilarTasks.compareTasks (taskList.get(i), taskList.get(j), comparator); if (similarTasks.isInBetweenDifference()) { if (similarTasks.getDiffLevel() < mostSimilarTasks.getDiffLevel()) { mostSimilarTasks = similarTasks; allMostSimilarTasks.clear(); allMostSimilarTasks.add(similarTasks); } else if (similarTasks.getDiffLevel() == mostSimilarTasks.getDiffLevel()) { allMostSimilarTasks.add(similarTasks); } } } } synchronized (unfinishedRunnables) { mostSimilarTasksList.addAll(allMostSimilarTasks); //System.out.println("notifying finish"); for (int i = 0; i < unfinishedRunnables.size(); i++) { if (unfinishedRunnables.get(i) == this) { unfinishedRunnables.remove(i); unfinishedRunnables.notify(); } } } } } }