// Copyright 2012 Georg-August-Universität Göttingen, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package de.ugoe.cs.autoquest.tasktrees.temporalrelation.utils; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.logging.Level; import de.ugoe.cs.autoquest.tasktrees.temporalrelation.TaskComparator; import de.ugoe.cs.autoquest.tasktrees.treeifc.DefaultTaskTraversingVisitor; import de.ugoe.cs.autoquest.tasktrees.treeifc.IEventTaskInstance; import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelectionInstance; import de.ugoe.cs.autoquest.tasktrees.treeifc.IStructuringTemporalRelationship; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInfo; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstance; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstanceList; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskModel; import de.ugoe.cs.autoquest.tasktrees.treeifc.TaskMetric; import de.ugoe.cs.autoquest.tasktrees.treeifc.TaskTreeUtils; import de.ugoe.cs.util.console.Console; /** *

* This class is a utility class for detecting similar tasks. It compares all tasks in a given * list with each other and then chooses the pair with the highest level of similarity. For * comparison, the tasks are traversed and only the traversals are compared with each other. *

*

* Several task pairs may have the same similarity level. In this case, the class performs a * filtering of the pairs to result in a merging order for the tasks that is always the same. *

*

* If provided with many tasks, this class starts several threads to let the comparisons run in * parallel. *

* * @author Patrick Harms */ public class MostSimilarTaskDeterminer { /** * If this similarity level is exceeded, two tasks are not considered similar anymore. 33 means * that at most 33% of the elements of both task traversals are not the same. */ private static int MAX_DIFF_LEVEL = 33; /** * task comparator used internally */ private TaskComparator comparator; /** * for performance reasons, some task comparisons can be excluded beforehand and the exclusions * are stored in this list */ private Set comparisonsToSkip = new HashSet<>(); /** TODO comment */ private long comparisonCounter = 0; /** * Initialize instances of this class with the task comparator to be used */ public MostSimilarTaskDeterminer(TaskComparator comparator) { this.comparator = comparator; } /** * add a pair of comparisons that is not done to increase performance */ public void addComparisonToSkip(ITask task1, ITask task2) { comparisonsToSkip.add(getMapId(task1, task2)); } /** * returns a list of most similar tasks. Independent of the order of the provided tasks, the * returned list will always be the same for the same input elements. */ public List getMostSimilarTasks(List tasks, ITaskModel taskModel) { Console.println("comparing " + tasks.size() + " sequences with each other"); LinkedList mostSimilarTasksList = performComparisons(taskModel, tasks); Console.println("initially found " + mostSimilarTasksList.size() + " similar sequences"); applyFilterForSmallestDiffLevel(mostSimilarTasksList); Console.println("only " + mostSimilarTasksList.size() + " have the smallest diff level"); applyFilterForParents(mostSimilarTasksList); Console.println(mostSimilarTasksList.size() + " remain after filtering for parents"); applyFilterForMostCoveredEvents(mostSimilarTasksList, taskModel); Console.println("calculated " + mostSimilarTasksList.size() + " most similar sequences"); return mostSimilarTasksList; } /** * filters the given list of similar tasks for those having the smalled diff level, which is * in turn the highest similarity */ private void applyFilterForSmallestDiffLevel(LinkedList mostSimilarTasksList) { // determine the smallest diff level int smallestDiffLevel = Integer.MAX_VALUE; for (SimilarTasks candidate : mostSimilarTasksList) { if (candidate.getDiffLevel() < smallestDiffLevel) { smallestDiffLevel = candidate.getDiffLevel(); } } if (smallestDiffLevel <= MAX_DIFF_LEVEL) { // remove all entries with a higher diff level Iterator listIterator = mostSimilarTasksList.iterator(); while (listIterator.hasNext()) { if (listIterator.next().getDiffLevel() > smallestDiffLevel) { listIterator.remove(); } } } else { mostSimilarTasksList.clear(); } } /** * ensures that the given list of similar tasks does not contain two pairs, where one refers * to a child task of another pair. */ private void applyFilterForParents(LinkedList mostSimilarTasksList) { // remove all entries being parents of another entry or where both tasks are // generated through this rule Iterator listIterator = mostSimilarTasksList.iterator(); List similarTasksToRemove = new LinkedList(); while (listIterator.hasNext()) { SimilarTasks candidate = listIterator.next(); ITask task1 = candidate.getLeftHandSide(); ITask task2 = candidate.getRightHandSide(); for (SimilarTasks potentialChild : mostSimilarTasksList) { ITask task3 = potentialChild.getLeftHandSide(); ITask task4 = potentialChild.getRightHandSide(); if (TaskTreeUtils.isChild(task3, task1) || TaskTreeUtils.isChild(task3, task2) || TaskTreeUtils.isChild(task4, task1) || TaskTreeUtils.isChild(task4, task2)) { similarTasksToRemove.add(candidate); break; } } } listIterator = mostSimilarTasksList.iterator(); while (listIterator.hasNext()) { SimilarTasks candidate = listIterator.next(); for (SimilarTasks toRemove : similarTasksToRemove) { if (candidate == toRemove) { listIterator.remove(); } } } } /** * performs a filtering of the detected similar tasks which ensures, that the list does not * contain two pairs referring to the same task and that in such cases always the same pair * will remain. */ private void applyFilterForMostCoveredEvents(LinkedList mostSimilarTasksList, ITaskModel taskModel) { // check, if several remaining similar tasks refer to the same task Map> referredTasks = new HashMap>(); for (SimilarTasks similarTasks : mostSimilarTasksList) { ensureMapping(similarTasks.getLeftHandSide(), similarTasks, referredTasks); ensureMapping(similarTasks.getRightHandSide(), similarTasks, referredTasks); } // remove all entries of tasks occurring only once List tasksOccuringOnce = new LinkedList(); for (Map.Entry> entry : referredTasks.entrySet()) { if (entry.getValue().size() <= 1) { tasksOccuringOnce.add(entry.getKey()); } } for (ITask taskToRemove : tasksOccuringOnce) { referredTasks.remove(taskToRemove); } // if there are remaining tasks occurring several times, try to extract one similar tasks // object, that should be merged first if (referredTasks.size() > 0) { mostSimilarTasksList.clear(); Console.traceln(Level.FINEST, "several comparisons for the same task exist with " + "same diff level --> filtering for pair to be merged first"); SimilarTasks firstToMerge = null; for (LinkedList pairs : referredTasks.values()) { for (SimilarTasks current : pairs) { if (firstToMerge == null) { firstToMerge = current; } else if (firstToMerge != current) { firstToMerge = getFirstToMerge(firstToMerge, current, taskModel); } } } if (firstToMerge != null) { mostSimilarTasksList.add(firstToMerge); } } } /** *

* compares two similar tasks and decides which of them is to be merged first. *

*/ private SimilarTasks getFirstToMerge(SimilarTasks first, SimilarTasks second, ITaskModel taskModel) { int valFirst = getTaskMetric(first, TaskMetric.EVENT_COVERAGE, taskModel); int valSecond = getTaskMetric(second, TaskMetric.EVENT_COVERAGE, taskModel); if (valSecond > valFirst) { return second; } else if (valSecond < valFirst) { return first; } // no of covered events is equal, try to distinguish by count valFirst = getTaskMetric(first, TaskMetric.COUNT, taskModel); valSecond = getTaskMetric(second, TaskMetric.COUNT, taskModel); if (valSecond > valFirst) { return second; } else if (valSecond < valFirst) { return first; } // count is equal, try to distinguish by depth valFirst = getTaskMetric(first, TaskMetric.DEPTH, taskModel); valSecond = getTaskMetric(second, TaskMetric.DEPTH, taskModel); if (valSecond < valFirst) { return second; } else if (valSecond > valFirst) { return first; } // no of covered events is equal, try to distinguish by count valFirst = cumulateTaskMetric(first, TaskMetric.COUNT, taskModel); valSecond = cumulateTaskMetric(second, TaskMetric.COUNT, taskModel); if (valSecond > valFirst) { return second; } else if (valSecond < valFirst) { return first; } // depth is equal. Calculate for both the similarity // based on which the merging will take place SimilarTasks tmp = SimilarTasks.getMergableLevelOfSimilarity(first, comparator); valFirst = tmp != null ? tmp.getDiffLevel() : Integer.MAX_VALUE; tmp = SimilarTasks.getMergableLevelOfSimilarity(second, comparator); valSecond = tmp != null ? tmp.getDiffLevel() : Integer.MAX_VALUE; if (valSecond < valFirst) { return second; } else if (valSecond > valFirst) { return first; } first.dump(System.out); second.dump(System.out); throw new RuntimeException ("several tasks are similar so that it is undecidable which to merge first"); } /** * */ private void ensureMapping(ITask task, SimilarTasks similarTasks, Map> map) { LinkedList value = map.get(task); if (value == null) { value = new LinkedList(); map.put(task, value); } value.add(similarTasks); } /** * starts several threads performing the task comparisons */ private LinkedList performComparisons(ITaskModel taskModel, List tasks) { LinkedList mostSimilarTasks = new LinkedList(); List startedRunnables = new LinkedList(); comparisonCounter = 0; // groups size is minimal 100 or maximal 1000 int groupSize = Math.max (100, Math.min(3000, 1 + (tasks.size() / Runtime.getRuntime().availableProcessors()))); synchronized (startedRunnables) { int start1 = 0; int end1; int start2; int end2; do { end1 = Math.min(start1 + groupSize, tasks.size()); TaskTraversal[] leftHandTraversals = new TaskTraversal[end1 - start1]; Console.traceln(Level.FINE, "calculating left hand traversals from " + start1 + " to " + end1); int index = 0; for (int j = start1; j < end1; j++) { leftHandTraversals[index++] = TaskTraversal.getTraversal(tasks.get(j), null); } start2 = 0; do { end2 = Math.min(start2 + groupSize, end1 - 1); TaskTraversal[] rightHandTraversals = new TaskTraversal[end2 - start2]; if (end2 <= start1) { Console.traceln(Level.FINE, "calculating right hand traversals from " + start2 + " to " + end2); // traversals need to be created index = 0; for (int j = start2; j < end2; j++) { rightHandTraversals[index++] = TaskTraversal.getTraversal(tasks.get(j), null); } } else { // traversals can be reused Console.traceln(Level.FINE, "reusing traversals for right hand from " + start2 + " to " + end2); rightHandTraversals = leftHandTraversals; } Runnable runnable = new CompareRunnable(taskModel, leftHandTraversals, rightHandTraversals, mostSimilarTasks, startedRunnables); while (startedRunnables.size() >= Math.max(1, Runtime.getRuntime().availableProcessors())) { try { Console.traceln(Level.FINER, "waiting for next thread to finish"); startedRunnables.wait(); Console.traceln(Level.FINER, "thread finished"); } catch (InterruptedException e) { // should not happen Console.logException(e); } } Console.traceln(Level.FINER, "starting next thread"); startedRunnables.add(runnable); new Thread(runnable).start(); Console.traceln(Level.FINER, "started next thread " + runnable); start2 = end2; } while (end2 < (end1 - 1)); start1 = end1; } while (end1 < tasks.size()); while (startedRunnables.size() > 0) { try { Console.traceln(Level.FINER, "waiting for next thread to finish"); startedRunnables.wait(); Console.traceln(Level.FINER, "thread finished"); } catch (InterruptedException e) { // should not happen Console.logException(e); } } } Console.traceln (Level.FINER, "all threads finished, " + comparisonCounter + " comparisons done"); if (comparisonCounter != (((tasks.size() - 1) * tasks.size()) / 2)) { throw new RuntimeException(comparisonCounter + " " + (((tasks.size() - 1) * tasks.size()) / 2)); } return mostSimilarTasks; } /** * convenience method to get the value of a task metric */ private int getTaskMetric(SimilarTasks similarTasks, TaskMetric metric, ITaskModel taskModel) { if (similarTasks == null) { return 0; } ITaskInfo info1 = taskModel.getTaskInfo(similarTasks.getLeftHandSide()); ITaskInfo info2 = taskModel.getTaskInfo(similarTasks.getRightHandSide()); return info1.getMeasureValue(metric) + info2.getMeasureValue(metric); } /** * convenience method to get the cumulative value of a task metric */ private int cumulateTaskMetric(SimilarTasks similarTasks, final TaskMetric metric, final ITaskModel taskModel) { final int[] value = new int[1]; value[0] = 0; DefaultTaskTraversingVisitor visitor = new DefaultTaskTraversingVisitor() { @Override public void visit(IStructuringTemporalRelationship relationship) { value[0] += taskModel.getTaskInfo(relationship).getMeasureValue(metric); super.visit(relationship); } }; similarTasks.getLeftHandSide().accept(visitor); similarTasks.getRightHandSide().accept(visitor); return value[0]; } /** * convenience method to check if a specific comparison shall be skipped */ private boolean isComparisonToSkip(ITask task1, ITask task2) { return comparisonsToSkip.contains(getMapId(task1, task2)); } /** * convenience method to get a unique id for representing a pair of two tasks */ private long getMapId(ITask task1, ITask task2) { if (task1.getId() < task2.getId()) { return (((long) task1.getId()) << 32) + task2.getId(); } else { return (((long) task2.getId()) << 32) + task1.getId(); } } /** * Runnable performing a subset of task comparisons in a dedicated thread */ public class CompareRunnable implements Runnable { /** */ private ITaskModel taskModel; /** */ private TaskTraversal[] leftHandTraversals; /** */ private TaskTraversal[] rightHandTraversals; /** */ private List mostSimilarTasksList; /** */ private List unfinishedRunnables; /** * */ public CompareRunnable(ITaskModel taskModel, TaskTraversal[] leftHandTraversals, TaskTraversal[] rightHandTraversals, List mostSimilarTasksList, List unfinishedRunnables) { this.taskModel = taskModel; this.leftHandTraversals = leftHandTraversals; this.rightHandTraversals = rightHandTraversals; this.mostSimilarTasksList = mostSimilarTasksList; this.unfinishedRunnables = unfinishedRunnables; } /* (non-Javadoc) * @see java.lang.Runnable#run() */ @Override public void run() { SimilarTasks mostSimilarTasks = SimilarTasks.UNEQUAL_TASKS; int mostSimilarDiffLevel = MAX_DIFF_LEVEL; List allMostSimilarTasks = new LinkedList(); int counter = 0; LEFT_HAND_TRAVERSAL: for (int i = 0; i < leftHandTraversals.length; i++) { ITask leftHandTask = leftHandTraversals[i].getTask(); RIGHT_HAND_TRAVERSAL: for (int j = 0; j < rightHandTraversals.length; j++) { ITask rightHandTask = rightHandTraversals[j].getTask(); try { if (leftHandTask == rightHandTask) { continue LEFT_HAND_TRAVERSAL; } counter++; if (isComparisonToSkip(leftHandTask, rightHandTask)) { continue RIGHT_HAND_TRAVERSAL; } if (!isBasicallySimilar(leftHandTraversals[i], rightHandTraversals[j])) { continue RIGHT_HAND_TRAVERSAL; } SimilarTasks similarTasks1 = SimilarTasks.compareTraversals (leftHandTraversals[i], rightHandTraversals[j], comparator); SimilarTasks similarTasks2 = SimilarTasks.compareTraversals (rightHandTraversals[j], leftHandTraversals[i], comparator); if ((similarTasks1.getDiffLevel() <= mostSimilarDiffLevel) || (similarTasks2.getDiffLevel() <= mostSimilarDiffLevel)) { SimilarTasks similarTasks = getSimilarTasksToPrefer(similarTasks1, similarTasks2); if (similarTasks.isInBetweenDifference()) { if (similarTasks.getDiffLevel() < mostSimilarDiffLevel) { mostSimilarTasks = similarTasks; mostSimilarDiffLevel = mostSimilarTasks.getDiffLevel(); allMostSimilarTasks.clear(); allMostSimilarTasks.add(similarTasks); } else if (similarTasks.getDiffLevel() == mostSimilarDiffLevel) { allMostSimilarTasks.add(similarTasks); } } } } catch (Exception e) { e.printStackTrace(); SimilarTasks similarTasks1 = SimilarTasks.compareTraversals (leftHandTraversals[i], rightHandTraversals[j], comparator); SimilarTasks similarTasks2 = SimilarTasks.compareTraversals (rightHandTraversals[j], leftHandTraversals[i], comparator); similarTasks1.dump(System.err); similarTasks2.dump(System.err); } } } synchronized (unfinishedRunnables) { mostSimilarTasksList.addAll(allMostSimilarTasks); comparisonCounter += counter; for (int i = 0; i < unfinishedRunnables.size(); i++) { if (unfinishedRunnables.get(i) == this) { unfinishedRunnables.remove(i); unfinishedRunnables.notify(); } } } } /** * */ private boolean isBasicallySimilar(TaskTraversal traversal1, TaskTraversal traversal2) { int length1 = traversal1.size(); int length2 = traversal2.size(); int maxLength = Math.max(length1, length2); int lengthDiff = 100 * Math.abs(length1 - length2) / maxLength; if (lengthDiff > MAX_DIFF_LEVEL) { return false; } else { return true; } } /** *

* TODO: comment *

* * @param similarTasks1 * @param similarTasks2 * @return */ private SimilarTasks getSimilarTasksToPrefer(SimilarTasks similarTasks1, SimilarTasks similarTasks2) { if (similarTasks2.getDiffLevel() > similarTasks1.getDiffLevel()) { return similarTasks2; } else if (similarTasks1.getDiffLevel() > similarTasks2.getDiffLevel()) { return similarTasks1; } else if (similarTasks1.getDiffLevel() == similarTasks2.getDiffLevel()) { ITask first = similarTasks1.getLeftHandSide(); ITask second = similarTasks2.getLeftHandSide(); long valFirst = getTaskMetric(first, TaskMetric.EVENT_COVERAGE); long valSecond = getTaskMetric(second, TaskMetric.EVENT_COVERAGE); if (valSecond > valFirst) { return similarTasks2; } else if (valSecond < valFirst) { return similarTasks1; } // no of covered events is equal, try to distinguish by count valFirst = getTaskMetric(first, TaskMetric.COUNT); valSecond = getTaskMetric(second, TaskMetric.COUNT); if (valSecond > valFirst) { return similarTasks2; } else if (valSecond < valFirst) { return similarTasks1; } // count is equal, try to distinguish by depth valFirst = getTaskMetric(first, TaskMetric.DEPTH); valSecond = getTaskMetric(second, TaskMetric.DEPTH); if (valSecond < valFirst) { return similarTasks2; } else if (valSecond > valFirst) { return similarTasks1; } // no of covered events is equal, try to distinguish by count valFirst = cumulateTaskMetric(first, TaskMetric.COUNT); valSecond = cumulateTaskMetric(second, TaskMetric.COUNT); if (valSecond > valFirst) { return similarTasks2; } else if (valSecond < valFirst) { return similarTasks1; } // depth is equal. Calculate for both the similarity // based on which the merging will take place SimilarTasks tmp = SimilarTasks.getMergableLevelOfSimilarity(similarTasks1, comparator); valFirst = tmp != null ? tmp.getDiffLevel() : Integer.MAX_VALUE; tmp = SimilarTasks.getMergableLevelOfSimilarity(similarTasks2, comparator); valSecond = tmp != null ? tmp.getDiffLevel() : Integer.MAX_VALUE; if (valSecond < valFirst) { return similarTasks2; } else if (valSecond > valFirst) { return similarTasks1; } valFirst = getFirstTimestamp(first); valSecond = getFirstTimestamp(second); if (valSecond > valFirst) { return similarTasks1; } else if (valSecond < valFirst) { return similarTasks2; } similarTasks1.dump(System.out); similarTasks2.dump(System.out); throw new RuntimeException ("several tasks are similar so that it is undecidable which to merge first"); } return null; } /** *

* TODO: comment *

* * @param first * @param eventCoverage * @param taskModel2 * @return */ private int getTaskMetric(ITask task, TaskMetric metric) { ITaskInfo info = taskModel.getTaskInfo(task); return info.getMeasureValue(metric); } /** * convenience method to get the cumulative value of a task metric */ private int cumulateTaskMetric(ITask task, final TaskMetric metric) { final int[] value = new int[1]; value[0] = 0; DefaultTaskTraversingVisitor visitor = new DefaultTaskTraversingVisitor() { @Override public void visit(IStructuringTemporalRelationship relationship) { value[0] += taskModel.getTaskInfo(relationship).getMeasureValue(metric); super.visit(relationship); } }; task.accept(visitor); return value[0]; } /** *

* TODO: comment *

* * @param first * @return */ private long getFirstTimestamp(ITask task) { long timestamp = Long.MAX_VALUE; for (ITaskInstance instance : task.getInstances()) { ITaskInstance eventTaskInstance = instance; do { if (eventTaskInstance instanceof ITaskInstanceList) { eventTaskInstance = ((ITaskInstanceList) eventTaskInstance).get(0); } else if (eventTaskInstance instanceof ISelectionInstance) { eventTaskInstance = ((ISelectionInstance) eventTaskInstance).getChild(); } } while (!(eventTaskInstance instanceof IEventTaskInstance)); if (eventTaskInstance != null) { long newTimestamp = ((IEventTaskInstance) eventTaskInstance).getEvent().getTimestamp(); if (timestamp > newTimestamp) { timestamp = newTimestamp; } } } return timestamp; } /* (non-Javadoc) * @see java.lang.Object#toString() */ @Override public String toString() { return "CompareThread(" + leftHandTraversals.length + ", " + rightHandTraversals.length + ")"; } } }