// Copyright 2012 Georg-August-Universität Göttingen, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package de.ugoe.cs.autoquest.tasktrees.temporalrelation.utils;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import de.ugoe.cs.autoquest.tasktrees.temporalrelation.TaskComparator;
import de.ugoe.cs.autoquest.tasktrees.treeifc.DefaultTaskTraversingVisitor;
import de.ugoe.cs.autoquest.tasktrees.treeifc.IEventTaskInstance;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelectionInstance;
import de.ugoe.cs.autoquest.tasktrees.treeifc.IStructuringTemporalRelationship;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInfo;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstance;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstanceList;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskModel;
import de.ugoe.cs.autoquest.tasktrees.treeifc.TaskMetric;
import de.ugoe.cs.autoquest.tasktrees.treeifc.TaskTreeUtils;
import de.ugoe.cs.util.console.Console;
/**
*
* This class is a utility class for detecting similar tasks. It compares all tasks in a given
* list with each other and then chooses the pair with the highest level of similarity. For
* comparison, the tasks are traversed and only the traversals are compared with each other.
*
*
* Several task pairs may have the same similarity level. In this case, the class performs a
* filtering of the pairs to result in a merging order for the tasks that is always the same.
*
*
* If provided with many tasks, this class starts several threads to let the comparisons run in
* parallel.
*
*
* @author Patrick Harms
*/
public class MostSimilarTaskDeterminer {
/**
* If this similarity level is exceeded, two tasks are not considered similar anymore. 33 means
* that at most 33% of the elements of both task traversals are not the same.
*/
private static int MAX_DIFF_LEVEL = 33;
/**
* task comparator used internally
*/
private TaskComparator comparator;
/**
* for performance reasons, some task comparisons can be excluded beforehand and the exclusions
* are stored in this list
*/
private Set comparisonsToSkip = new HashSet<>();
/** TODO comment */
private long comparisonCounter = 0;
/**
* Initialize instances of this class with the task comparator to be used
*/
public MostSimilarTaskDeterminer(TaskComparator comparator) {
this.comparator = comparator;
}
/**
* add a pair of comparisons that is not done to increase performance
*/
public void addComparisonToSkip(ITask task1, ITask task2) {
comparisonsToSkip.add(getMapId(task1, task2));
}
/**
* returns a list of most similar tasks. Independent of the order of the provided tasks, the
* returned list will always be the same for the same input elements.
*/
public List getMostSimilarTasks(List tasks, ITaskModel taskModel) {
Console.println("comparing " + tasks.size() + " sequences with each other");
LinkedList mostSimilarTasksList = performComparisons(taskModel, tasks);
Console.println("initially found " + mostSimilarTasksList.size() + " similar sequences");
applyFilterForSmallestDiffLevel(mostSimilarTasksList);
Console.println("only " + mostSimilarTasksList.size() + " have the smallest diff level");
applyFilterForParents(mostSimilarTasksList);
Console.println(mostSimilarTasksList.size() + " remain after filtering for parents");
applyFilterForMostCoveredEvents(mostSimilarTasksList, taskModel);
Console.println("calculated " + mostSimilarTasksList.size() + " most similar sequences");
return mostSimilarTasksList;
}
/**
* filters the given list of similar tasks for those having the smallest diff level, which is
* in turn the highest similarity
*/
private void applyFilterForSmallestDiffLevel(LinkedList mostSimilarTasksList) {
// determine the smallest diff level
int smallestDiffLevel = Integer.MAX_VALUE;
for (SimilarTasks candidate : mostSimilarTasksList) {
if (candidate.getDiffLevel() < smallestDiffLevel) {
smallestDiffLevel = candidate.getDiffLevel();
}
}
if (smallestDiffLevel <= MAX_DIFF_LEVEL) {
// remove all entries with a higher diff level
Iterator listIterator = mostSimilarTasksList.iterator();
while (listIterator.hasNext()) {
if (listIterator.next().getDiffLevel() > smallestDiffLevel) {
listIterator.remove();
}
}
Console.println("smallest diff level is " + smallestDiffLevel);
}
else {
mostSimilarTasksList.clear();
}
}
/**
* ensures that the given list of similar tasks does not contain two pairs, where one refers
* to a child task of another pair.
*/
private void applyFilterForParents(LinkedList mostSimilarTasksList) {
// remove all entries being parents of another entry or where both tasks are
// generated through this rule
Iterator listIterator = mostSimilarTasksList.iterator();
List similarTasksToRemove = new LinkedList();
while (listIterator.hasNext()) {
SimilarTasks candidate = listIterator.next();
ITask task1 = candidate.getLeftHandSide();
ITask task2 = candidate.getRightHandSide();
for (SimilarTasks potentialChild : mostSimilarTasksList) {
ITask task3 = potentialChild.getLeftHandSide();
ITask task4 = potentialChild.getRightHandSide();
if (TaskTreeUtils.isChild(task3, task1) ||
TaskTreeUtils.isChild(task3, task2) ||
TaskTreeUtils.isChild(task4, task1) ||
TaskTreeUtils.isChild(task4, task2))
{
similarTasksToRemove.add(candidate);
break;
}
}
}
listIterator = mostSimilarTasksList.iterator();
while (listIterator.hasNext()) {
SimilarTasks candidate = listIterator.next();
for (SimilarTasks toRemove : similarTasksToRemove) {
if (candidate == toRemove) {
listIterator.remove();
}
}
}
}
/**
* performs a filtering of the detected similar tasks which ensures, that the list does not
* contain two pairs referring to the same task and that in such cases always the same pair
* will remain.
*/
private void applyFilterForMostCoveredEvents(LinkedList mostSimilarTasksList,
ITaskModel taskModel)
{
for (SimilarTasks similarTasks : mostSimilarTasksList) {
System.out.println(similarTasks.getLeftHandSide() + " " + similarTasks.getRightHandSide());
}
// check, if several remaining similar tasks refer to the same task
Map> referredTasks =
new HashMap>();
for (SimilarTasks similarTasks : mostSimilarTasksList) {
ensureMapping(similarTasks.getLeftHandSide(), similarTasks, referredTasks);
ensureMapping(similarTasks.getRightHandSide(), similarTasks, referredTasks);
}
// remove all entries of tasks occurring only once
List tasksOccuringOnce = new LinkedList();
for (Map.Entry> entry : referredTasks.entrySet()) {
if (entry.getValue().size() <= 1) {
tasksOccuringOnce.add(entry.getKey());
}
}
for (ITask taskToRemove : tasksOccuringOnce) {
referredTasks.remove(taskToRemove);
}
// if there are remaining tasks occurring several times, try to extract one similar tasks
// object, that should be merged first
if (referredTasks.size() > 0) {
mostSimilarTasksList.clear();
Console.traceln(Level.FINEST, "several comparisons for the same task exist with " +
"same diff level --> filtering for pair to be merged first");
SimilarTasks firstToMerge = null;
for (LinkedList pairs : referredTasks.values()) {
for (SimilarTasks current : pairs) {
if (firstToMerge == null) {
firstToMerge = current;
}
else if (firstToMerge != current) {
firstToMerge = getFirstToMerge(firstToMerge, current, taskModel);
}
}
}
if (firstToMerge != null) {
mostSimilarTasksList.add(firstToMerge);
}
}
}
/**
*
* compares two similar tasks and decides which of them is to be merged first.
*
*/
private SimilarTasks getFirstToMerge(SimilarTasks first,
SimilarTasks second,
ITaskModel taskModel)
{
int valFirst = getTaskMetric(first, TaskMetric.EVENT_COVERAGE, taskModel);
int valSecond = getTaskMetric(second, TaskMetric.EVENT_COVERAGE, taskModel);
if (valSecond > valFirst) {
return second;
}
else if (valSecond < valFirst) {
return first;
}
// no of covered events is equal, try to distinguish by count
valFirst = getTaskMetric(first, TaskMetric.COUNT, taskModel);
valSecond = getTaskMetric(second, TaskMetric.COUNT, taskModel);
if (valSecond > valFirst) {
return second;
}
else if (valSecond < valFirst) {
return first;
}
// count is equal, try to distinguish by depth
valFirst = getTaskMetric(first, TaskMetric.DEPTH, taskModel);
valSecond = getTaskMetric(second, TaskMetric.DEPTH, taskModel);
if (valSecond < valFirst) {
return second;
}
else if (valSecond > valFirst) {
return first;
}
// no of covered events is equal, try to distinguish by count
valFirst = cumulateTaskMetric(first, TaskMetric.COUNT, taskModel);
valSecond = cumulateTaskMetric(second, TaskMetric.COUNT, taskModel);
if (valSecond > valFirst) {
return second;
}
else if (valSecond < valFirst) {
return first;
}
// depth is equal. Calculate for both the similarity
// based on which the merging will take place
SimilarTasks tmp = SimilarTasks.getMergableLevelOfSimilarity(first, comparator);
valFirst = tmp != null ? tmp.getDiffLevel() : Integer.MAX_VALUE;
tmp = SimilarTasks.getMergableLevelOfSimilarity(second, comparator);
valSecond = tmp != null ? tmp.getDiffLevel() : Integer.MAX_VALUE;
if (valSecond < valFirst) {
return second;
}
else if (valSecond > valFirst) {
return first;
}
first.dump(System.out);
second.dump(System.out);
throw new RuntimeException
("several tasks are similar so that it is undecidable which to merge first");
}
/**
*
*/
private void ensureMapping(ITask task,
SimilarTasks similarTasks,
Map> map)
{
LinkedList value = map.get(task);
if (value == null) {
value = new LinkedList();
map.put(task, value);
}
value.add(similarTasks);
}
/**
* starts several threads performing the task comparisons
*/
private LinkedList performComparisons(ITaskModel taskModel, List tasks) {
LinkedList mostSimilarTasks = new LinkedList();
List startedRunnables = new LinkedList();
comparisonCounter = 0;
// groups size is minimal 100 or maximal 1000
int groupSize = Math.max
(100, Math.min(3000, 1 + (tasks.size() / Runtime.getRuntime().availableProcessors())));
synchronized (startedRunnables) {
int start1 = 0;
int end1;
int start2;
int end2;
do {
end1 = Math.min(start1 + groupSize, tasks.size());
TaskTraversal[] leftHandTraversals = new TaskTraversal[end1 - start1];
Console.traceln(Level.FINE, "calculating left hand traversals from " + start1 +
" to " + end1);
int index = 0;
for (int j = start1; j < end1; j++) {
leftHandTraversals[index++] =
TaskTraversal.getTraversal(tasks.get(j), null);
}
start2 = 0;
do {
end2 = Math.min(start2 + groupSize, end1 - 1);
TaskTraversal[] rightHandTraversals = new TaskTraversal[end2 - start2];
if (end2 <= start1) {
Console.traceln(Level.FINE, "calculating right hand traversals from " +
start2 + " to " + end2);
// traversals need to be created
index = 0;
for (int j = start2; j < end2; j++) {
rightHandTraversals[index++] =
TaskTraversal.getTraversal(tasks.get(j), null);
}
}
else {
// traversals can be reused
Console.traceln(Level.FINE, "reusing traversals for right hand from " +
start2 + " to " + end2);
rightHandTraversals = leftHandTraversals;
}
Runnable runnable = new CompareRunnable(taskModel, leftHandTraversals,
rightHandTraversals,
mostSimilarTasks, startedRunnables);
while (startedRunnables.size() >=
Math.max(1, Runtime.getRuntime().availableProcessors()))
{
try {
Console.traceln(Level.FINER, "waiting for next thread to finish");
startedRunnables.wait();
Console.traceln(Level.FINER, "thread finished");
}
catch (InterruptedException e) {
// should not happen
Console.logException(e);
}
}
Console.traceln(Level.FINER, "starting next thread");
startedRunnables.add(runnable);
new Thread(runnable).start();
Console.traceln(Level.FINER, "started next thread " + runnable);
start2 = end2;
}
while (end2 < (end1 - 1));
start1 = end1;
}
while (end1 < tasks.size());
while (startedRunnables.size() > 0) {
try {
Console.traceln(Level.FINER, "waiting for next thread to finish");
startedRunnables.wait();
Console.traceln(Level.FINER, "thread finished");
}
catch (InterruptedException e) {
// should not happen
Console.logException(e);
}
}
}
Console.traceln
(Level.FINER, "all threads finished, " + comparisonCounter + " comparisons done");
if (comparisonCounter != (((tasks.size() - 1) * tasks.size()) / 2)) {
throw new RuntimeException(comparisonCounter + " " +
(((tasks.size() - 1) * tasks.size()) / 2));
}
return mostSimilarTasks;
}
/**
* convenience method to get the value of a task metric
*/
private int getTaskMetric(SimilarTasks similarTasks, TaskMetric metric, ITaskModel taskModel) {
if (similarTasks == null) {
return 0;
}
ITaskInfo info1 = taskModel.getTaskInfo(similarTasks.getLeftHandSide());
ITaskInfo info2 = taskModel.getTaskInfo(similarTasks.getRightHandSide());
return info1.getMeasureValue(metric) + info2.getMeasureValue(metric);
}
/**
* convenience method to get the cumulative value of a task metric
*/
private int cumulateTaskMetric(SimilarTasks similarTasks,
final TaskMetric metric,
final ITaskModel taskModel)
{
final int[] value = new int[1];
value[0] = 0;
DefaultTaskTraversingVisitor visitor = new DefaultTaskTraversingVisitor() {
@Override
public void visit(IStructuringTemporalRelationship relationship) {
value[0] += taskModel.getTaskInfo(relationship).getMeasureValue(metric);
super.visit(relationship);
}
};
similarTasks.getLeftHandSide().accept(visitor);
similarTasks.getRightHandSide().accept(visitor);
return value[0];
}
/**
* convenience method to check if a specific comparison shall be skipped
*/
private boolean isComparisonToSkip(ITask task1, ITask task2) {
return comparisonsToSkip.contains(getMapId(task1, task2));
}
/**
* convenience method to get a unique id for representing a pair of two tasks
*/
private long getMapId(ITask task1, ITask task2) {
if (task1.getId() < task2.getId()) {
return (((long) task1.getId()) << 32) + task2.getId();
}
else {
return (((long) task2.getId()) << 32) + task1.getId();
}
}
/**
* Runnable performing a subset of task comparisons in a dedicated thread
*/
public class CompareRunnable implements Runnable {
/** */
private ITaskModel taskModel;
/** */
private TaskTraversal[] leftHandTraversals;
/** */
private TaskTraversal[] rightHandTraversals;
/** */
private List mostSimilarTasksList;
/** */
private List unfinishedRunnables;
/**
*
*/
public CompareRunnable(ITaskModel taskModel,
TaskTraversal[] leftHandTraversals,
TaskTraversal[] rightHandTraversals,
List mostSimilarTasksList,
List unfinishedRunnables)
{
this.taskModel = taskModel;
this.leftHandTraversals = leftHandTraversals;
this.rightHandTraversals = rightHandTraversals;
this.mostSimilarTasksList = mostSimilarTasksList;
this.unfinishedRunnables = unfinishedRunnables;
}
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
@Override
public void run() {
SimilarTasks mostSimilarTasks = SimilarTasks.UNEQUAL_TASKS;
int mostSimilarDiffLevel = MAX_DIFF_LEVEL;
List allMostSimilarTasks = new LinkedList();
int counter = 0;
LEFT_HAND_TRAVERSAL:
for (int i = 0; i < leftHandTraversals.length; i++) {
ITask leftHandTask = leftHandTraversals[i].getTask();
RIGHT_HAND_TRAVERSAL:
for (int j = 0; j < rightHandTraversals.length; j++) {
ITask rightHandTask = rightHandTraversals[j].getTask();
try {
if (leftHandTask == rightHandTask) {
continue LEFT_HAND_TRAVERSAL;
}
counter++;
if (isComparisonToSkip(leftHandTask, rightHandTask)) {
continue RIGHT_HAND_TRAVERSAL;
}
if (!isBasicallySimilar(leftHandTraversals[i], rightHandTraversals[j])) {
continue RIGHT_HAND_TRAVERSAL;
}
SimilarTasks similarTasks1 = SimilarTasks.compareTraversals
(leftHandTraversals[i], rightHandTraversals[j], comparator);
SimilarTasks similarTasks2 = SimilarTasks.compareTraversals
(rightHandTraversals[j], leftHandTraversals[i], comparator);
if ((similarTasks1.getDiffLevel() <= mostSimilarDiffLevel) ||
(similarTasks2.getDiffLevel() <= mostSimilarDiffLevel))
{
SimilarTasks similarTasks =
getSimilarTasksToPrefer(similarTasks1, similarTasks2);
if (similarTasks.isInBetweenDifference() ||
(similarTasks.getPatch().getDeltas().size() == 0))
{
if (similarTasks.getDiffLevel() < mostSimilarDiffLevel) {
mostSimilarTasks = similarTasks;
mostSimilarDiffLevel = mostSimilarTasks.getDiffLevel();
allMostSimilarTasks.clear();
allMostSimilarTasks.add(similarTasks);
}
else if (similarTasks.getDiffLevel() == mostSimilarDiffLevel) {
allMostSimilarTasks.add(similarTasks);
}
}
}
}
catch (Exception e) {
e.printStackTrace();
SimilarTasks similarTasks1 = SimilarTasks.compareTraversals
(leftHandTraversals[i], rightHandTraversals[j], comparator);
SimilarTasks similarTasks2 = SimilarTasks.compareTraversals
(rightHandTraversals[j], leftHandTraversals[i], comparator);
similarTasks1.dump(System.err);
similarTasks2.dump(System.err);
}
}
}
synchronized (unfinishedRunnables) {
mostSimilarTasksList.addAll(allMostSimilarTasks);
comparisonCounter += counter;
for (int i = 0; i < unfinishedRunnables.size(); i++) {
if (unfinishedRunnables.get(i) == this) {
unfinishedRunnables.remove(i);
unfinishedRunnables.notify();
}
}
}
}
/**
*
*/
private boolean isBasicallySimilar(TaskTraversal traversal1, TaskTraversal traversal2) {
int length1 = traversal1.size();
int length2 = traversal2.size();
int maxLength = Math.max(length1, length2);
int lengthDiff = 100 * Math.abs(length1 - length2) / maxLength;
if (lengthDiff > MAX_DIFF_LEVEL) {
return false;
}
else {
return true;
}
}
/**
*
* TODO: comment
*
*
* @param similarTasks1
* @param similarTasks2
* @return
*/
private SimilarTasks getSimilarTasksToPrefer(SimilarTasks similarTasks1,
SimilarTasks similarTasks2)
{
if (similarTasks2.getDiffLevel() > similarTasks1.getDiffLevel()) {
return similarTasks2;
}
else if (similarTasks1.getDiffLevel() > similarTasks2.getDiffLevel()) {
return similarTasks1;
}
else if (similarTasks1.getDiffLevel() == similarTasks2.getDiffLevel()) {
ITask first = similarTasks1.getLeftHandSide();
ITask second = similarTasks2.getLeftHandSide();
long valFirst = getTaskMetric(first, TaskMetric.EVENT_COVERAGE);
long valSecond = getTaskMetric(second, TaskMetric.EVENT_COVERAGE);
if (valSecond > valFirst) {
return similarTasks2;
}
else if (valSecond < valFirst) {
return similarTasks1;
}
// no of covered events is equal, try to distinguish by count
valFirst = getTaskMetric(first, TaskMetric.COUNT);
valSecond = getTaskMetric(second, TaskMetric.COUNT);
if (valSecond > valFirst) {
return similarTasks2;
}
else if (valSecond < valFirst) {
return similarTasks1;
}
// count is equal, try to distinguish by depth
valFirst = getTaskMetric(first, TaskMetric.DEPTH);
valSecond = getTaskMetric(second, TaskMetric.DEPTH);
if (valSecond < valFirst) {
return similarTasks2;
}
else if (valSecond > valFirst) {
return similarTasks1;
}
// no of covered events is equal, try to distinguish by count
valFirst = cumulateTaskMetric(first, TaskMetric.COUNT);
valSecond = cumulateTaskMetric(second, TaskMetric.COUNT);
if (valSecond > valFirst) {
return similarTasks2;
}
else if (valSecond < valFirst) {
return similarTasks1;
}
// depth is equal. Calculate for both the similarity
// based on which the merging will take place
SimilarTasks tmp =
SimilarTasks.getMergableLevelOfSimilarity(similarTasks1, comparator);
valFirst = tmp != null ? tmp.getDiffLevel() : Integer.MAX_VALUE;
tmp = SimilarTasks.getMergableLevelOfSimilarity(similarTasks2, comparator);
valSecond = tmp != null ? tmp.getDiffLevel() : Integer.MAX_VALUE;
if (valSecond < valFirst) {
return similarTasks2;
}
else if (valSecond > valFirst) {
return similarTasks1;
}
valFirst = getFirstTimestamp(first);
valSecond = getFirstTimestamp(second);
if (valSecond > valFirst) {
return similarTasks1;
}
else if (valSecond < valFirst) {
return similarTasks2;
}
similarTasks1.dump(System.out);
similarTasks2.dump(System.out);
throw new RuntimeException
("several tasks are similar so that it is undecidable which to merge first");
}
return null;
}
/**
*
* TODO: comment
*
*
* @param first
* @param eventCoverage
* @param taskModel2
* @return
*/
private int getTaskMetric(ITask task, TaskMetric metric) {
ITaskInfo info = taskModel.getTaskInfo(task);
return info.getMeasureValue(metric);
}
/**
* convenience method to get the cumulative value of a task metric
*/
private int cumulateTaskMetric(ITask task,
final TaskMetric metric)
{
final int[] value = new int[1];
value[0] = 0;
DefaultTaskTraversingVisitor visitor = new DefaultTaskTraversingVisitor() {
@Override
public void visit(IStructuringTemporalRelationship relationship) {
value[0] += taskModel.getTaskInfo(relationship).getMeasureValue(metric);
super.visit(relationship);
}
};
task.accept(visitor);
return value[0];
}
/**
*
* TODO: comment
*
*
* @param first
* @return
*/
private long getFirstTimestamp(ITask task) {
long timestamp = Long.MAX_VALUE;
for (ITaskInstance instance : task.getInstances()) {
ITaskInstance eventTaskInstance = instance;
do {
if (eventTaskInstance instanceof ITaskInstanceList) {
eventTaskInstance = ((ITaskInstanceList) eventTaskInstance).get(0);
}
else if (eventTaskInstance instanceof ISelectionInstance) {
eventTaskInstance = ((ISelectionInstance) eventTaskInstance).getChild();
}
}
while (!(eventTaskInstance instanceof IEventTaskInstance));
if (eventTaskInstance != null) {
long newTimestamp =
((IEventTaskInstance) eventTaskInstance).getEvent().getTimestamp();
if (timestamp > newTimestamp) {
timestamp = newTimestamp;
}
}
}
return timestamp;
}
/* (non-Javadoc)
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
return "CompareThread(" + leftHandTraversals.length + ", " +
rightHandTraversals.length + ")";
}
}
}