// Copyright 2012 Georg-August-Universität Göttingen, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package de.ugoe.cs.autoquest.tasktrees.temporalrelation;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.Match;
import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.MatchOccurence;
import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.NumberSequence;
import de.ugoe.cs.autoquest.tasktrees.alignment.matrix.PairwiseAlignmentGenerator;
import de.ugoe.cs.autoquest.tasktrees.alignment.matrix.PairwiseAlignmentStorage;
import de.ugoe.cs.autoquest.tasktrees.alignment.matrix.ObjectDistanceSubstitionMatrix;
import de.ugoe.cs.autoquest.tasktrees.taskequality.TaskEquality;
import de.ugoe.cs.autoquest.tasktrees.treeifc.IIteration;
import de.ugoe.cs.autoquest.tasktrees.treeifc.IIterationInstance;
import de.ugoe.cs.autoquest.tasktrees.treeifc.IOptional;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelection;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelectionInstance;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequence;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequenceInstance;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskBuilder;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskFactory;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstance;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstanceList;
import de.ugoe.cs.autoquest.tasktrees.treeifc.IUserSession;
import de.ugoe.cs.autoquest.usageprofiles.SymbolMap;
import de.ugoe.cs.util.StopWatch;
import de.ugoe.cs.util.console.Console;
/**
*
* This class implements the major rule for creating task trees based on a set
* of recorded user sessions. For this, it first harmonizes all tasks. This
* eases later comparison. Then it searches the sessions for iterations and
* replaces them accordingly. Then it searches for sub sequences being the
* longest and occurring most often. For each found sub sequence, it replaces
* the occurrences by creating appropriate {@link ISequence}s. Afterwards, again
* searches for iterations and then again for sub sequences until no more
* replacements are done.
*
*
*
*
* @author Patrick Harms
*/
class SequenceForTaskDetectionRuleAlignment implements ISessionScopeRule {
/**
*
* the task factory to be used for creating substructures for the temporal
* relationships identified during rul application
*
*/
private ITaskFactory taskFactory;
/**
*
* the task builder to be used for creating substructures for the temporal
* relationships identified during rule application
*
*/
private ITaskBuilder taskBuilder;
/**
*
* the task handling strategy to be used for comparing tasks for
* preparation, i.e., before the tasks are harmonized
*
*/
private TaskHandlingStrategy preparationTaskHandlingStrategy;
/**
*
* the task handling strategy to be used for comparing tasks during
* iteration detection i.e., after the tasks are
* harmonized
*
*/
private TaskHandlingStrategy identityTaskHandlingStrategy;
/**
*
* instantiates the rule and initializes it with a task equality to be
* considered when comparing tasks as well as a task factory and builder to
* be used for creating task structures.
*
*
* @param minimalTaskEquality
* the task equality to be considered when comparing tasks
* @param taskFactory
* the task factory to be used for creating substructures
* @param taskBuilder
* the task builder to be used for creating substructures
*/
SequenceForTaskDetectionRuleAlignment(TaskEquality minimalTaskEquality,
ITaskFactory taskFactory, ITaskBuilder taskBuilder) {
this.taskFactory = taskFactory;
this.taskBuilder = taskBuilder;
this.preparationTaskHandlingStrategy = new TaskHandlingStrategy(
minimalTaskEquality);
this.identityTaskHandlingStrategy = new TaskHandlingStrategy(
TaskEquality.IDENTICAL);
}
/*
* (non-Javadoc)
*
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
return "SequenceForTaskDetectionRuleAlignment";
}
/*
* (non-Javadoc)
*
* @see
* de.ugoe.cs.autoquest.tasktrees.temporalrelation.ISessionScopeRule#apply
* (java.util.List)
*/
@Override
public RuleApplicationResult apply(List sessions) {
RuleApplicationData appData = new RuleApplicationData(sessions);
// this is the real rule application. Loop while something is replaced.
SymbolMap uniqueTasks = harmonizeEventTaskInstancesModel(appData);
// Generate a substitution matrix between all occurring events.
Console.traceln(Level.INFO, "generating substitution matrix");
ObjectDistanceSubstitionMatrix submat = new ObjectDistanceSubstitionMatrix(
uniqueTasks, 6, -3);
submat.generate();
// Generate pairwise alignments
Console.traceln(Level.INFO, "generating pairwise alignments");
LinkedList matchseqs = new LinkedList();
PairwiseAlignmentStorage alignments = PairwiseAlignmentGenerator
.generate(appData.getNumberSequences(), submat, 9);
// Retrieve all matches reached a specific threshold
Console.traceln(Level.INFO, "retrieving significant sequence pieces");
for (int i = 0; i < appData.getNumberSequences().size(); i++) {
Console.traceln(
Level.FINEST,
"retrieving significant sequence pieces: "
+ Math.round((float) i / (float) appData.getNumberSequences().size()
* 100) + "%");
for (int j = 0; j < appData.getNumberSequences().size(); j++) {
if (i != j) {
matchseqs.addAll(alignments.get(i, j).getMatches());
}
}
}
Console.traceln(Level.FINEST,
"retrieving significant sequence pieces: 100%");
Console.traceln(Level.INFO, "searching for patterns occuring most");
// search each match in every other sequence
for (Iterator it = matchseqs.iterator(); it.hasNext();) {
Match pattern = it.next();
// Skip sequences with more 0 events (scrolls) than other events.
// Both of the pattern sequences are equally long, so the added 0
// counts
// just need to be smaller than the length of one sequence
if (pattern.getFirstSequence().eventCount(0)
+ pattern.getSecondSequence().eventCount(0) + 1 > pattern
.getFirstSequence().size())
continue;
for (int j = 0; j < appData.getNumberSequences().size(); j++) {
LinkedList startpositions = appData.getNumberSequences().get(j)
.containsPattern(pattern);
if (startpositions.size() > 0) {
for (Iterator jt = startpositions.iterator(); jt
.hasNext();) {
pattern.addOccurence(
new MatchOccurence(jt.next(), j));
}
}
}
}
Console.traceln(Level.INFO, "sorting results");
// Sort results to get the most occurring results
Comparator comparator = new Comparator() {
public int compare(Match m1, Match m2) {
return m2.occurenceCount() - m1.occurenceCount(); // use your
// logic
}
};
Collections.sort(matchseqs, comparator);
// TODO: Harmonize matches: finding double matches and merge them
/*
Console.traceln(Level.INFO, "harmonizing matches");
int i=0;
while(i 2) {
ITask task = matchAsTask(appData,matchseqs.get(i));
System.out.println(task);
//matchseqs.get(i).getFirstSequence().printSequence();
//matchseqs.get(i).getSecondSequence().printSequence();
//System.out.println("Found pattern "
// + matchseqs.get(i).occurenceCount() + " times");
System.out.println();
}
}
alignments = null;
do {
appData.getStopWatch().start("whole loop"); //
detectAndReplaceIterations(appData);
appData.getStopWatch().start("task replacement"); //
//detectAndReplaceTasks(appData); //
appData.getStopWatch().stop("task replacement"); //
appData.getStopWatch().stop("whole loop");
appData.getStopWatch().dumpStatistics(System.out); //
appData.getStopWatch().reset();
} while (appData.detectedAndReplacedTasks());
Console.println("created "
+ appData.getResult().getNewlyCreatedTasks().size()
+ " new tasks and "
+ appData.getResult().getNewlyCreatedTaskInstances().size()
+ " appropriate instances\n");
if ((appData.getResult().getNewlyCreatedTasks().size() > 0)
|| (appData.getResult().getNewlyCreatedTaskInstances().size() > 0)) {
appData.getResult().setRuleApplicationStatus(
RuleApplicationStatus.FINISHED);
}
return appData.getResult();
}
/**
*
* harmonizes the event task instances by unifying tasks. This is done, as
* initially the event tasks being equal with respect to the considered task
* equality are distinct objects. The comparison of these distinct objects
* is more time consuming than comparing the object references.
*
*
* @param appData
* the rule application data combining all data used for applying
* this rule
* @return Returns the unique tasks symbol map
*/
private SymbolMap harmonizeEventTaskInstancesModel(
RuleApplicationData appData) {
Console.traceln(Level.INFO,
"harmonizing task model of event task instances");
appData.getStopWatch().start("harmonizing event tasks");
SymbolMap uniqueTasks = preparationTaskHandlingStrategy
.createSymbolMap();
TaskInstanceComparator comparator = preparationTaskHandlingStrategy
.getTaskComparator();
int unifiedTasks = 0;
ITask task;
List sessions = appData.getSessions();
int sessionNo = 0;
for (IUserSession session : sessions) {
Console.traceln(Level.FINE, "handling " + (++sessionNo) + ". "
+ session);
NumberSequence templist = new NumberSequence(session.size());
for (int i = 0; i < session.size(); i++) {
ITaskInstance taskInstance = session.get(i);
task = uniqueTasks.getValue(taskInstance);
if (task == null) {
uniqueTasks.addSymbol(taskInstance, taskInstance.getTask());
templist.getSequence()[i] = taskInstance.getTask().getId();
} else {
taskBuilder.setTask(taskInstance, task);
templist.getSequence()[i] = task.getId();
unifiedTasks++;
}
appData.getNumber2Task().put(templist.getSequence()[i], taskInstance.getTask());
}
templist.setId(appData.getNumberSequences().size());
appData.getNumberSequences().add(templist);
comparator.clearBuffers();
}
appData.getStopWatch().stop("harmonizing event tasks");
Console.traceln(Level.INFO, "harmonized " + unifiedTasks
+ " task occurrences (still " + uniqueTasks.size()
+ " different tasks)");
appData.getStopWatch().dumpStatistics(System.out);
appData.getStopWatch().reset();
return uniqueTasks;
}
/**
*
* searches for direct iterations of single tasks in all sequences and
* replaces them with {@link IIteration}s, respectively appropriate
* instances. Also all single occurrences of a task that is iterated
* somewhen are replaced with iterations to have again an efficient way for
* task comparisons.
*
*
* @param appData
* the rule application data combining all data used for applying
* this rule
*/
private void detectAndReplaceIterations(RuleApplicationData appData) {
Console.traceln(Level.FINE, "detecting iterations");
appData.getStopWatch().start("detecting iterations");
List sessions = appData.getSessions();
Set iteratedTasks = searchIteratedTasks(sessions);
if (iteratedTasks.size() > 0) {
replaceIterationsOf(iteratedTasks, sessions, appData);
}
appData.getStopWatch().stop("detecting iterations");
Console.traceln(Level.INFO, "replaced " + iteratedTasks.size()
+ " iterated tasks");
}
/**
*
* searches the provided sessions for task iterations. If a task is
* iterated, it is added to the returned set.
*
*
* @param the
* session to search for iterations in
*
* @return a set of tasks being iterated somewhere
*/
private Set searchIteratedTasks(List sessions) {
Set iteratedTasks = new HashSet();
for (IUserSession session : sessions) {
for (int i = 0; i < (session.size() - 1); i++) {
// we prepared the task instances to refer to unique tasks, if
// they are treated
// as equal. Therefore, we just compare the identity of the
// tasks of the task
// instances
if (session.get(i).getTask() == session.get(i + 1).getTask()) {
iteratedTasks.add(session.get(i).getTask());
}
}
}
return iteratedTasks;
}
/**
*
* replaces all occurrences of all tasks provided in the set with iterations
*
*
* @param iteratedTasks
* the tasks to be replaced with iterations
* @param sessions
* the sessions in which the tasks are to be replaced
* @param appData
* the rule application data combining all data used for applying
* this rule
*/
private void replaceIterationsOf(Set iteratedTasks,
List sessions, RuleApplicationData appData) {
Map iterations = new HashMap();
Map> iterationInstances = new HashMap>();
for (ITask iteratedTask : iteratedTasks) {
IIteration iteration = taskFactory.createNewIteration();
iterations.put(iteratedTask, iteration);
iterationInstances.put(iteration,
new LinkedList());
}
IIterationInstance iterationInstance;
for (IUserSession session : sessions) {
int index = 0;
iterationInstance = null;
while (index < session.size()) {
// we prepared the task instances to refer to unique tasks, if
// they are treated
// as equal. Therefore, we just compare the identity of the
// tasks of the task
// instances
ITask currentTask = session.get(index).getTask();
IIteration iteration = iterations.get(currentTask);
if (iteration != null) {
if ((iterationInstance == null)
|| (iterationInstance.getTask() != iteration)) {
iterationInstance = taskFactory
.createNewTaskInstance(iteration);
iterationInstances.get(iteration)
.add(iterationInstance);
taskBuilder.addTaskInstance(session, index,
iterationInstance);
index++;
}
taskBuilder.addChild(iterationInstance, session.get(index));
taskBuilder.removeTaskInstance(session, index);
} else {
if (iterationInstance != null) {
iterationInstance = null;
}
index++;
}
}
}
for (Map.Entry> entry : iterationInstances
.entrySet()) {
harmonizeIterationInstancesModel(entry.getKey(), entry.getValue());
}
}
ITask matchAsTask(RuleApplicationData appData,Match m) {
ISequence sequence = taskFactory.createNewSequence();
int[] first = m.getFirstSequence().getSequence();
int[] second = m.getSecondSequence().getSequence();
System.out.println("Number2Task size: " +appData.getNumber2Task().size());
//Both sequences of a match are equally long
for(int i=0;i
* TODO clarify why this is done
*
*/
private void harmonizeIterationInstancesModel(IIteration iteration,
List iterationInstances) {
List iteratedTaskVariants = new LinkedList();
TaskInstanceComparator comparator = preparationTaskHandlingStrategy
.getTaskComparator();
// merge the lexically different variants of iterated task to a unique
// list
for (IIterationInstance iterationInstance : iterationInstances) {
for (ITaskInstance executionVariant : iterationInstance) {
ITask candidate = executionVariant.getTask();
boolean found = false;
for (ITask taskVariant : iteratedTaskVariants) {
if (comparator.areLexicallyEqual(taskVariant, candidate)) {
taskBuilder.setTask(executionVariant, taskVariant);
found = true;
break;
}
}
if (!found) {
iteratedTaskVariants.add(candidate);
}
}
}
// if there are more than one lexically different variant of iterated
// tasks, adapt the
// iteration model to be a selection of different variants. In this case
// also adapt
// the generated iteration instances to correctly contain selection
// instances. If there
// is only one variant of an iterated task, simply set this as the
// marked task of the
// iteration. In this case, the instances can be preserved as is
if (iteratedTaskVariants.size() > 1) {
ISelection selection = taskFactory.createNewSelection();
for (ITask variant : iteratedTaskVariants) {
taskBuilder.addChild(selection, variant);
}
taskBuilder.setMarkedTask(iteration, selection);
for (IIterationInstance instance : iterationInstances) {
for (int i = 0; i < instance.size(); i++) {
ISelectionInstance selectionInstance = taskFactory
.createNewTaskInstance(selection);
taskBuilder.setChild(selectionInstance, instance.get(i));
taskBuilder.setTaskInstance(instance, i, selectionInstance);
}
}
} else {
taskBuilder.setMarkedTask(iteration, iteratedTaskVariants.get(0));
}
}
/**
* TODO go on commenting
*
* @param appData
* the rule application data combining all data used for applying
* this rule
*/
private void detectAndReplaceTasks(RuleApplicationData appData) {
Console.traceln(Level.FINE, "detecting and replacing tasks");
appData.getStopWatch().start("detecting tasks");
//getSequencesOccuringMostOften(appData);
appData.getStopWatch().stop("detecting tasks");
appData.getStopWatch().start("replacing tasks");
replaceSequencesOccurringMostOften(appData);
appData.getStopWatch().stop("replacing tasks");
//Console.traceln(Level.INFO, "detected and replaced "
// + appData.getLastFoundTasks().size() + " tasks occuring "
// + appData.getLastFoundTasks().getOccurrenceCount() + " times");
}
/**
* @param appData
* the rule application data combining all data used for applying
* this rule
*/
private void replaceSequencesOccurringMostOften(RuleApplicationData appData) {
appData.detectedAndReplacedTasks(false);
Console.traceln(Level.FINER, "replacing tasks occurrences");
/*
for (List task : appData.getLastFoundTasks()) {
ISequence sequence = taskFactory.createNewSequence();
Console.traceln(Level.FINEST, "replacing " + sequence.getId()
+ ": " + task);
List sequenceInstances = replaceTaskOccurrences(
task, appData.getSessions(), sequence);
harmonizeSequenceInstancesModel(sequence, sequenceInstances,
task.size());
appData.detectedAndReplacedTasks(appData
.detectedAndReplacedTasks()
|| (sequenceInstances.size() > 0));
if (sequenceInstances.size() < appData.getLastFoundTasks()
.getOccurrenceCount()) {
Console.traceln(Level.FINE,
sequence.getId()
+ ": replaced task only "
+ sequenceInstances.size()
+ " times instead of expected "
+ appData.getLastFoundTasks()
.getOccurrenceCount());
}
}
*/
}
/**
*
*/
private void harmonizeSequenceInstancesModel(ISequence sequence,
List sequenceInstances, int sequenceLength) {
TaskInstanceComparator comparator = preparationTaskHandlingStrategy
.getTaskComparator();
// ensure for each subtask that lexically different variants are
// preserved
for (int subTaskIndex = 0; subTaskIndex < sequenceLength; subTaskIndex++) {
List subTaskVariants = new LinkedList();
for (ISequenceInstance sequenceInstance : sequenceInstances) {
ITask candidate = sequenceInstance.get(subTaskIndex).getTask();
boolean found = false;
for (int i = 0; i < subTaskVariants.size(); i++) {
if (comparator.areLexicallyEqual(subTaskVariants.get(i),
candidate)) {
taskBuilder.setTask(sequenceInstance.get(subTaskIndex),
subTaskVariants.get(i));
found = true;
break;
}
}
if (!found) {
subTaskVariants.add(candidate);
}
}
// if there are more than one lexically different variant of the sub
// task at
// the considered position, adapt the sequence model at that
// position to have
// a selection of the different variants. In this case also adapt
// the
// generated sequence instances to correctly contain selection
// instances. If
// there is only one variant of sub tasks at the given position,
// simply set
// this variant as the sub task of the selection. In this case, the
// instances
// can be preserved as is
if (subTaskVariants.size() > 1) {
ISelection selection = taskFactory.createNewSelection();
for (ITask variant : subTaskVariants) {
taskBuilder.addChild(selection, variant);
}
taskBuilder.addChild(sequence, selection);
for (ISequenceInstance instance : sequenceInstances) {
ISelectionInstance selectionInstance = taskFactory
.createNewTaskInstance(selection);
taskBuilder.setChild(selectionInstance,
instance.get(subTaskIndex));
taskBuilder.setTaskInstance(instance, subTaskIndex,
selectionInstance);
}
} else if (subTaskVariants.size() == 1) {
taskBuilder.addChild(sequence, subTaskVariants.get(0));
}
}
}
/**
* @param tree
*/
private List replaceTaskOccurrences(
List task, List sessions,
ISequence temporalTaskModel) {
List sequenceInstances = new LinkedList();
for (IUserSession session : sessions) {
int index = -1;
do {
index = getSubListIndex(session, task, ++index);
if (index > -1) {
sequenceInstances.add(RuleUtils
.createNewSubSequenceInRange(session, index, index
+ task.size() - 1, temporalTaskModel,
taskFactory, taskBuilder));
}
} while (index > -1);
}
return sequenceInstances;
}
/**
* @param trie
* @param object
* @return
*/
private int getSubListIndex(ITaskInstanceList list,
List subList, int startIndex) {
boolean matchFound;
int result = -1;
for (int i = startIndex; i <= list.size() - subList.size(); i++) {
matchFound = true;
for (int j = 0; j < subList.size(); j++) {
// we prepared the task instances to refer to unique tasks, if
// they are treated
// as equal. Therefore, we just compare the identity of the
// tasks of the task
// instances
if (list.get(i + j).getTask() != subList.get(j).getTask()) {
matchFound = false;
break;
}
}
if (matchFound) {
result = i;
break;
}
}
return result;
}
/**
*
*/
private static class RuleApplicationData {
private HashMap number2task;
private ArrayList numberseqs;
/**
*
*/
private List sessions;
/**
*
*/
private boolean detectedAndReplacedTasks;
/**
*
*/
private RuleApplicationResult result;
/**
*
*/
private StopWatch stopWatch;
/**
*
*/
private RuleApplicationData(List sessions) {
this.sessions = sessions;
numberseqs = new ArrayList();
number2task = new HashMap();
stopWatch= new StopWatch();
result = new RuleApplicationResult();
}
/**
* @return the tree
*/
private List getSessions() {
return sessions;
}
private ArrayList getNumberSequences() {
return numberseqs;
}
/**
*
*/
private void detectedAndReplacedTasks(boolean detectedAndReplacedTasks) {
this.detectedAndReplacedTasks = detectedAndReplacedTasks;
}
/**
*
*/
private boolean detectedAndReplacedTasks() {
return detectedAndReplacedTasks;
}
/**
* @return the result
*/
private RuleApplicationResult getResult() {
return result;
}
/**
* @return the stopWatch
*/
private StopWatch getStopWatch() {
return stopWatch;
}
private HashMap getNumber2Task() {
return number2task;
}
}
}