// Copyright 2012 Georg-August-Universität Göttingen, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package de.ugoe.cs.autoquest.tasktrees.temporalrelation;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.FengDoolittle;
import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.NumberSequence;
import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.SmithWatermanRepeated;
import de.ugoe.cs.autoquest.tasktrees.alignment.matrix.ObjectDistanceSubstitionMatrix;
import de.ugoe.cs.autoquest.tasktrees.alignment.matrix.TriangleMatrix;
import de.ugoe.cs.autoquest.tasktrees.alignment.matrix.UPGMAMatrix;
import de.ugoe.cs.autoquest.tasktrees.alignment.pal.tree.UPGMATree;
import de.ugoe.cs.autoquest.tasktrees.taskequality.TaskEquality;
import de.ugoe.cs.autoquest.tasktrees.treeifc.IIteration;
import de.ugoe.cs.autoquest.tasktrees.treeifc.IIterationInstance;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelection;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelectionInstance;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequence;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequenceInstance;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskBuilder;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskFactory;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstance;
import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstanceList;
import de.ugoe.cs.autoquest.tasktrees.treeifc.IUserSession;
import de.ugoe.cs.autoquest.usageprofiles.SymbolMap;
import de.ugoe.cs.autoquest.usageprofiles.Trie;
import de.ugoe.cs.autoquest.usageprofiles.TrieProcessor;
import de.ugoe.cs.util.StopWatch;
import de.ugoe.cs.util.console.Console;
/**
*
* This class implements the major rule for creating task trees based on a set
* of recorded user sessions. For this, it first harmonizes all tasks. This
* eases later comparison. Then it searches the sessions for iterations and
* replaces them accordingly. Then it searches for sub sequences being the
* longest and occurring most often. For each found sub sequence, it replaces
* the occurrences by creating appropriate {@link ISequence}s. Afterwards, again
* searches for iterations and then again for sub sequences until no more
* replacements are done.
*
*
* For determining the longest sequence occurring most often, the implementation
* uses a {@link Trie}. The depth of the tree is initially 3. If the algorithm
* has a longest sequence occurring most often whose length is equal to the
* depth of the trie, it recalculates the trie with an increased depth.
*
*
* @author Patrick Harms
*/
class SequenceForTaskDetectionRuleAlignment implements ISessionScopeRule {
/**
*
* the task factory to be used for creating substructures for the temporal
* relationships identified during rul application
*
*/
private ITaskFactory taskFactory;
/**
*
* the task builder to be used for creating substructures for the temporal
* relationships identified during rule application
*
*/
private ITaskBuilder taskBuilder;
/**
*
* the task handling strategy to be used for comparing tasks for
* preparation, i.e., before the tasks are harmonized
*
*/
private TaskHandlingStrategy preparationTaskHandlingStrategy;
/**
*
* the task handling strategy to be used for comparing tasks during
* iteration detection an trie generation, i.e., after the tasks are
* harmonized
*
*/
private TaskHandlingStrategy identityTaskHandlingStrategy;;
private ArrayList numberseqs;
/**
*
* instantiates the rule and initializes it with a task equality to be
* considered when comparing tasks as well as a task factory and builder to
* be used for creating task structures.
*
*
* @param minimalTaskEquality
* the task equality to be considered when comparing tasks
* @param taskFactory
* the task factory to be used for creating substructures
* @param taskBuilder
* the task builder to be used for creating substructures
*/
SequenceForTaskDetectionRuleAlignment(TaskEquality minimalTaskEquality,
ITaskFactory taskFactory, ITaskBuilder taskBuilder) {
this.taskFactory = taskFactory;
this.taskBuilder = taskBuilder;
this.preparationTaskHandlingStrategy = new TaskHandlingStrategy(
minimalTaskEquality);
this.identityTaskHandlingStrategy = new TaskHandlingStrategy(
TaskEquality.IDENTICAL);
numberseqs = new ArrayList();
}
/*
* (non-Javadoc)
*
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
return "SequenceForTaskDetectionRuleAlignment";
}
/*
* (non-Javadoc)
*
* @see
* de.ugoe.cs.autoquest.tasktrees.temporalrelation.ISessionScopeRule#apply
* (java.util.List)
*/
@Override
public RuleApplicationResult apply(List sessions) {
RuleApplicationData appData = new RuleApplicationData(sessions);
// this is the real rule application. Loop while something is replaced.
SymbolMap uniqueTasks = harmonizeEventTaskInstancesModel(appData);
ObjectDistanceSubstitionMatrix submat = new ObjectDistanceSubstitionMatrix(
uniqueTasks, 5);
submat.generate();
UPGMAMatrix sequenceDistances = new UPGMAMatrix(numberseqs.size());
sequenceDistances.initialize(Double.POSITIVE_INFINITY);
for (int i = 0; i < numberseqs.size(); i++) {
NumberSequence ns1 = numberseqs.get(i);
//System.out.print("Sequence " + i + ": ");
//ns1.printSequence();
for (int j = 0; j < numberseqs.size(); j++) {
NumberSequence ns2 = numberseqs.get(j);
if (i != j) {
int smithWatermanThreshold = 10;
SmithWatermanRepeated twoSequences = new SmithWatermanRepeated(
ns1.getSequence(), ns2.getSequence(), submat,
smithWatermanThreshold);
SmithWatermanRepeated sameSequence1 = new SmithWatermanRepeated(
ns1.getSequence(), ns1.getSequence(), submat,
smithWatermanThreshold);
SmithWatermanRepeated sameSequence2 = new SmithWatermanRepeated(
ns2.getSequence(), ns2.getSequence(), submat,
smithWatermanThreshold);
SmithWatermanRepeated randomSequence = new SmithWatermanRepeated(
ns1.shuffle().getSequence(),ns2.shuffle().getSequence(),submat,smithWatermanThreshold);
double score = twoSequences.getAlignmentScore();
// Scores of the sequence being aligned to itself
double sSelf1 = sameSequence1.getAlignmentScore();
double sSelf2 = sameSequence2.getAlignmentScore();
double sRand = randomSequence.getAlignmentScore();
double sMax = (sSelf1 + sSelf2) / 2;
double sEff = (score - sRand)/ (sMax - sRand);
if(sEff < 0) {
sEff = 0;
}
double distance = -Math.log(sEff);
if(!Double.isInfinite(distance) && !Double.isNaN(distance)) {
//System.out.println("Score: " + String.format("%5.1f", score) + " sRand: " + String.format("%5.1f", sRand) + " sMax: " + String.format("%6.1f", sMax) + " distance: " + String.format("%2.3f",distance));
if(distance < sequenceDistances.get(i, j)) {
sequenceDistances.set(i,j,distance );
}
}
if (score > 0) {
/*
* System.out.println(
* "=================================================");
* System.out.println("| Sequence " +
* String.format("%3d", i) +" Sequence " +
* String.format("%3d", j) + " |");
* System.out.println(
* "=================================================");
* System.out.print("Sequence " + i + ": ");
* ns1.printSequence(); System.out.print("Sequence " + j
* + ": "); ns2.printSequence(); System.out.println();
* //System.out.println("Max Scrore: " +
* sw.getMaxScore());
* System.out.println("Alignment Score: " +
* sw.getAlignmentScore()); //sw.printDPMatrix();
* sw.traceback(); System.out.println();
* System.out.println();
*/
}
}
}
}
System.out.println(sequenceDistances.toString());
UPGMATree guidetree = new UPGMATree(numberseqs, sequenceDistances);
System.out.println(guidetree.toString());
do {
System.out.println();
//FengDoolittle fd = new FengDoolittle();
// appData.getStopWatch().start("whole loop");
// detectAndReplaceIterations(appData);
// appData.getStopWatch().start("task replacement");
// detectAndReplaceTasks(appData);
// appData.getStopWatch().stop("task replacement");
// appData.getStopWatch().stop("whole loop");
// appData.getStopWatch().dumpStatistics(System.out);
// appData.getStopWatch().reset();
} while (appData.detectedAndReplacedTasks());
Console.println("created "
+ appData.getResult().getNewlyCreatedTasks().size()
+ " new tasks and "
+ appData.getResult().getNewlyCreatedTaskInstances().size()
+ " appropriate instances\n");
if ((appData.getResult().getNewlyCreatedTasks().size() > 0)
|| (appData.getResult().getNewlyCreatedTaskInstances().size() > 0)) {
appData.getResult().setRuleApplicationStatus(
RuleApplicationStatus.FINISHED);
}
return appData.getResult();
}
/**
*
* harmonizes the event task instances by unifying tasks. This is done, as
* initially the event tasks being equal with respect to the considered task
* equality are distinct objects. The comparison of these distinct objects
* is more time consuming than comparing the object references.
*
*
* @param appData
* the rule application data combining all data used for applying
* this rule
* @return Returns the unique tasks symbol map
*/
private SymbolMap harmonizeEventTaskInstancesModel(
RuleApplicationData appData) {
Console.traceln(Level.INFO,
"harmonizing task model of event task instances");
appData.getStopWatch().start("harmonizing event tasks");
SymbolMap uniqueTasks = preparationTaskHandlingStrategy
.createSymbolMap();
TaskInstanceComparator comparator = preparationTaskHandlingStrategy
.getTaskComparator();
int unifiedTasks = 0;
ITask task;
List sessions = appData.getSessions();
int sessionNo = 0;
numberseqs = new ArrayList();
for (IUserSession session : sessions) {
Console.traceln(Level.FINE, "handling " + (++sessionNo) + ". "
+ session);
NumberSequence templist = new NumberSequence(session.size());
for (int i = 0; i < session.size(); i++) {
ITaskInstance taskInstance = session.get(i);
task = uniqueTasks.getValue(taskInstance);
if (task == null) {
uniqueTasks.addSymbol(taskInstance, taskInstance.getTask());
templist.getSequence()[i] = taskInstance.getTask().getId();
} else {
taskBuilder.setTask(taskInstance, task);
templist.getSequence()[i] = task.getId();
unifiedTasks++;
}
}
numberseqs.add(templist);
comparator.clearBuffers();
}
appData.getStopWatch().stop("harmonizing event tasks");
Console.traceln(Level.INFO, "harmonized " + unifiedTasks
+ " task occurrences (still " + uniqueTasks.size()
+ " different tasks)");
appData.getStopWatch().dumpStatistics(System.out);
appData.getStopWatch().reset();
return uniqueTasks;
}
/**
*
* searches for direct iterations of single tasks in all sequences and
* replaces them with {@link IIteration}s, respectively appropriate
* instances. Also all single occurrences of a task that is iterated
* somewhen are replaced with iterations to have again an efficient way for
* task comparisons.
*
*
* @param appData
* the rule application data combining all data used for applying
* this rule
*/
private void detectAndReplaceIterations(RuleApplicationData appData) {
Console.traceln(Level.FINE, "detecting iterations");
appData.getStopWatch().start("detecting iterations");
List sessions = appData.getSessions();
Set iteratedTasks = searchIteratedTasks(sessions);
if (iteratedTasks.size() > 0) {
replaceIterationsOf(iteratedTasks, sessions, appData);
}
appData.getStopWatch().stop("detecting iterations");
Console.traceln(Level.INFO, "replaced " + iteratedTasks.size()
+ " iterated tasks");
}
/**
*
* searches the provided sessions for task iterations. If a task is
* iterated, it is added to the returned set.
*
*
* @param the
* session to search for iterations in
*
* @return a set of tasks being iterated somewhere
*/
private Set searchIteratedTasks(List sessions) {
Set iteratedTasks = new HashSet();
for (IUserSession session : sessions) {
for (int i = 0; i < (session.size() - 1); i++) {
// we prepared the task instances to refer to unique tasks, if
// they are treated
// as equal. Therefore, we just compare the identity of the
// tasks of the task
// instances
if (session.get(i).getTask() == session.get(i + 1).getTask()) {
iteratedTasks.add(session.get(i).getTask());
}
}
}
return iteratedTasks;
}
/**
*
* replaces all occurrences of all tasks provided in the set with iterations
*
*
* @param iteratedTasks
* the tasks to be replaced with iterations
* @param sessions
* the sessions in which the tasks are to be replaced
* @param appData
* the rule application data combining all data used for applying
* this rule
*/
private void replaceIterationsOf(Set iteratedTasks,
List sessions, RuleApplicationData appData) {
Map iterations = new HashMap();
Map> iterationInstances = new HashMap>();
for (ITask iteratedTask : iteratedTasks) {
IIteration iteration = taskFactory.createNewIteration();
iterations.put(iteratedTask, iteration);
iterationInstances.put(iteration,
new LinkedList());
}
IIterationInstance iterationInstance;
for (IUserSession session : sessions) {
int index = 0;
iterationInstance = null;
while (index < session.size()) {
// we prepared the task instances to refer to unique tasks, if
// they are treated
// as equal. Therefore, we just compare the identity of the
// tasks of the task
// instances
ITask currentTask = session.get(index).getTask();
IIteration iteration = iterations.get(currentTask);
if (iteration != null) {
if ((iterationInstance == null)
|| (iterationInstance.getTask() != iteration)) {
iterationInstance = taskFactory
.createNewTaskInstance(iteration);
iterationInstances.get(iteration)
.add(iterationInstance);
taskBuilder.addTaskInstance(session, index,
iterationInstance);
index++;
}
taskBuilder.addChild(iterationInstance, session.get(index));
taskBuilder.removeTaskInstance(session, index);
} else {
if (iterationInstance != null) {
iterationInstance = null;
}
index++;
}
}
}
for (Map.Entry> entry : iterationInstances
.entrySet()) {
harmonizeIterationInstancesModel(entry.getKey(), entry.getValue());
}
}
/**
*
* TODO clarify why this is done
*
*/
private void harmonizeIterationInstancesModel(IIteration iteration,
List iterationInstances) {
List iteratedTaskVariants = new LinkedList();
TaskInstanceComparator comparator = preparationTaskHandlingStrategy
.getTaskComparator();
// merge the lexically different variants of iterated task to a unique
// list
for (IIterationInstance iterationInstance : iterationInstances) {
for (ITaskInstance executionVariant : iterationInstance) {
ITask candidate = executionVariant.getTask();
boolean found = false;
for (ITask taskVariant : iteratedTaskVariants) {
if (comparator.areLexicallyEqual(taskVariant, candidate)) {
taskBuilder.setTask(executionVariant, taskVariant);
found = true;
break;
}
}
if (!found) {
iteratedTaskVariants.add(candidate);
}
}
}
// if there are more than one lexically different variant of iterated
// tasks, adapt the
// iteration model to be a selection of different variants. In this case
// also adapt
// the generated iteration instances to correctly contain selection
// instances. If there
// is only one variant of an iterated task, simply set this as the
// marked task of the
// iteration. In this case, the instances can be preserved as is
if (iteratedTaskVariants.size() > 1) {
ISelection selection = taskFactory.createNewSelection();
for (ITask variant : iteratedTaskVariants) {
taskBuilder.addChild(selection, variant);
}
taskBuilder.setMarkedTask(iteration, selection);
for (IIterationInstance instance : iterationInstances) {
for (int i = 0; i < instance.size(); i++) {
ISelectionInstance selectionInstance = taskFactory
.createNewTaskInstance(selection);
taskBuilder.setChild(selectionInstance, instance.get(i));
taskBuilder.setTaskInstance(instance, i, selectionInstance);
}
}
} else {
taskBuilder.setMarkedTask(iteration, iteratedTaskVariants.get(0));
}
}
/**
* TODO go on commenting
*
* @param appData
* the rule application data combining all data used for applying
* this rule
*/
private void detectAndReplaceTasks(RuleApplicationData appData) {
Console.traceln(Level.FINE, "detecting and replacing tasks");
appData.getStopWatch().start("detecting tasks");
getSequencesOccuringMostOften(appData);
appData.getStopWatch().stop("detecting tasks");
appData.getStopWatch().start("replacing tasks");
replaceSequencesOccurringMostOften(appData);
appData.getStopWatch().stop("replacing tasks");
Console.traceln(Level.INFO, "detected and replaced "
+ appData.getLastFoundTasks().size() + " tasks occuring "
+ appData.getLastFoundTasks().getOccurrenceCount() + " times");
}
/**
* @param appData
* the rule application data combining all data used for applying
* this rule
*/
private void getSequencesOccuringMostOften(RuleApplicationData appData) {
Console.traceln(Level.FINE, "determining most prominent tasks");
Tasks tasks;
boolean createNewTrie = (appData.getLastTrie() == null)
|| appData.detectedAndReplacedTasks(); // tree has changed
do {
if (createNewTrie) {
createNewTrie(appData);
}
MaxCountAndLongestTasksFinder finder = new MaxCountAndLongestTasksFinder();
appData.getLastTrie().process(finder);
tasks = finder.getFoundTasks();
createNewTrie = false;
for (List task : tasks) {
if (task.size() >= appData.getTrainedSequenceLength()) {
// Trie must be recreated with a longer sequence length to
// be sure that
// the found sequences occurring most often are found in
// their whole length
appData.setTrainedSequenceLength(appData
.getTrainedSequenceLength() + 1);
createNewTrie = true;
break;
}
}
} while (createNewTrie);
// create a normal trie for comparison
/*
* System.out.println("creating test trie for comparison");
*
* appData.getStopWatch().start("testtrie"); Trie trie =
* new Trie(identityTaskComparator);
*
* for (IUserSession session : appData.getSessions()) {
* trie.train(session.getExecutedTasks(),
* appData.getTrainedSequenceLength()); }
*
* appData.getStopWatch().stop("testtrie");
*
* MaxCountAndLongestTasksFinder finder = new
* MaxCountAndLongestTasksFinder(); trie.process(finder);
*
* boolean allTasksEqual = finder.getFoundTasks().size() ==
* tasks.size();
*
* allTasksEqual &= finder.getFoundTasks().occurrenceCount ==
* tasks.occurrenceCount;
*
* for (List task1 : finder.getFoundTasks()) { boolean
* foundTask = false; for (List task2 : tasks) { boolean
* tasksEqual = false; if (task1.size() == task2.size()) { tasksEqual =
* true; for (int i = 0; i < task1.size(); i++) { if
* (!identityTaskComparator.equals(task1.get(i).getTask(),
* task2.get(i).getTask())) { tasksEqual = false; } } }
*
* if (tasksEqual) { foundTask = true; break; } }
*
* if (!foundTask) { System.out.println("different is " + task1);
* allTasksEqual = false; break; } }
*
* if (!allTasksEqual) { System.out.println(finder.getFoundTasks());
* System.out.println(tasks);
*
* throw new
* IllegalArgumentException("both tries calculated different subsequences"
* ); }
*
* /*TrieStatisticsDumper dumper = new TrieStatisticsDumper();
* appData.getLastTrie().process(dumper); dumper.dumpCounters();
*/
appData.setLastFoundTasks(tasks);
Console.traceln(Level.FINE, "found "
+ appData.getLastFoundTasks().size() + " tasks " + "occurring "
+ appData.getLastFoundTasks().getOccurrenceCount() + " times");
}
/**
* @param appData
* the rule application data combining all data used for applying
* this rule
*/
private void createNewTrie(RuleApplicationData appData) {
Console.traceln(
Level.FINER,
"training trie with a maximum sequence length of "
+ appData.getTrainedSequenceLength());
appData.getStopWatch().start("training trie");
// we prepared the task instances to refer to unique tasks, if they are
// treated
// as equal. Therefore, we just compare the identity of the tasks of the
// task
// instances
appData.setLastTrie(new TaskInstanceTrie(identityTaskHandlingStrategy));
appData.getLastTrie().trainSessions(appData.getSessions(),
appData.getTrainedSequenceLength());
appData.getStopWatch().stop("training trie");
}
/**
* @param appData
* the rule application data combining all data used for applying
* this rule
*/
private void replaceSequencesOccurringMostOften(RuleApplicationData appData) {
appData.detectedAndReplacedTasks(false);
if ((appData.getLastFoundTasks().size() > 0)
&& (appData.getLastFoundTasks().getOccurrenceCount() > 1)) {
Console.traceln(Level.FINER, "replacing tasks occurrences");
for (List task : appData.getLastFoundTasks()) {
ISequence sequence = taskFactory.createNewSequence();
Console.traceln(Level.FINEST, "replacing " + sequence.getId()
+ ": " + task);
List sequenceInstances = replaceTaskOccurrences(
task, appData.getSessions(), sequence);
harmonizeSequenceInstancesModel(sequence, sequenceInstances,
task.size());
appData.detectedAndReplacedTasks(appData
.detectedAndReplacedTasks()
|| (sequenceInstances.size() > 0));
if (sequenceInstances.size() < appData.getLastFoundTasks()
.getOccurrenceCount()) {
Console.traceln(Level.FINE,
sequence.getId()
+ ": replaced task only "
+ sequenceInstances.size()
+ " times instead of expected "
+ appData.getLastFoundTasks()
.getOccurrenceCount());
}
}
}
}
/**
*
*/
private void harmonizeSequenceInstancesModel(ISequence sequence,
List sequenceInstances, int sequenceLength) {
TaskInstanceComparator comparator = preparationTaskHandlingStrategy
.getTaskComparator();
// ensure for each subtask that lexically different variants are
// preserved
for (int subTaskIndex = 0; subTaskIndex < sequenceLength; subTaskIndex++) {
List subTaskVariants = new LinkedList();
for (ISequenceInstance sequenceInstance : sequenceInstances) {
ITask candidate = sequenceInstance.get(subTaskIndex).getTask();
boolean found = false;
for (int i = 0; i < subTaskVariants.size(); i++) {
if (comparator.areLexicallyEqual(subTaskVariants.get(i),
candidate)) {
taskBuilder.setTask(sequenceInstance.get(subTaskIndex),
subTaskVariants.get(i));
found = true;
break;
}
}
if (!found) {
subTaskVariants.add(candidate);
}
}
// if there are more than one lexically different variant of the sub
// task at
// the considered position, adapt the sequence model at that
// position to have
// a selection of the different variants. In this case also adapt
// the
// generated sequence instances to correctly contain selection
// instances. If
// there is only one variant of sub tasks at the given position,
// simply set
// this variant as the sub task of the selection. In this case, the
// instances
// can be preserved as is
if (subTaskVariants.size() > 1) {
ISelection selection = taskFactory.createNewSelection();
for (ITask variant : subTaskVariants) {
taskBuilder.addChild(selection, variant);
}
taskBuilder.addChild(sequence, selection);
for (ISequenceInstance instance : sequenceInstances) {
ISelectionInstance selectionInstance = taskFactory
.createNewTaskInstance(selection);
taskBuilder.setChild(selectionInstance,
instance.get(subTaskIndex));
taskBuilder.setTaskInstance(instance, subTaskIndex,
selectionInstance);
}
} else if (subTaskVariants.size() == 1) {
taskBuilder.addChild(sequence, subTaskVariants.get(0));
}
}
}
/**
* @param tree
*/
private List replaceTaskOccurrences(
List task, List sessions,
ISequence temporalTaskModel) {
List sequenceInstances = new LinkedList();
for (IUserSession session : sessions) {
int index = -1;
do {
index = getSubListIndex(session, task, ++index);
if (index > -1) {
sequenceInstances.add(RuleUtils
.createNewSubSequenceInRange(session, index, index
+ task.size() - 1, temporalTaskModel,
taskFactory, taskBuilder));
}
} while (index > -1);
}
return sequenceInstances;
}
/**
* @param trie
* @param object
* @return
*/
private int getSubListIndex(ITaskInstanceList list,
List subList, int startIndex) {
boolean matchFound;
int result = -1;
for (int i = startIndex; i <= list.size() - subList.size(); i++) {
matchFound = true;
for (int j = 0; j < subList.size(); j++) {
// we prepared the task instances to refer to unique tasks, if
// they are treated
// as equal. Therefore, we just compare the identity of the
// tasks of the task
// instances
if (list.get(i + j).getTask() != subList.get(j).getTask()) {
matchFound = false;
break;
}
}
if (matchFound) {
result = i;
break;
}
}
return result;
}
/**
* @param trie
* @param object
* @return
*/
private int getSubListIndex(List list,
List subList, int startIndex) {
boolean matchFound;
int result = -1;
for (int i = startIndex; i <= list.size() - subList.size(); i++) {
matchFound = true;
for (int j = 0; j < subList.size(); j++) {
// we prepared the task instances to refer to unique tasks, if
// they are treated
// as equal. Therefore, we just compare the identity of the
// tasks of the task
// instances
if (list.get(i + j) != subList.get(j)) {
matchFound = false;
break;
}
}
if (matchFound) {
result = i;
break;
}
}
return result;
}
/**
* @author Patrick Harms
*/
private class MaxCountAndLongestTasksFinder implements
TrieProcessor {
/**
*
*/
private int currentCount;
/**
*
*/
private List> foundTasks = new LinkedList>();
/**
*
*/
public MaxCountAndLongestTasksFinder() {
super();
this.currentCount = 0;
}
/*
* (non-Javadoc)
*
* @see
* de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util
* .List, int)
*/
@Override
public TrieProcessor.Result process(List foundTask,
int count) {
if (foundTask.size() < 2) {
// ignore single tasks
return TrieProcessor.Result.CONTINUE;
}
if (count < 2) {
// ignore singular occurrences
return TrieProcessor.Result.SKIP_NODE;
}
if (this.currentCount > count) {
// ignore this children of this task, as they may have only
// smaller counts than
// the already found tasks
return TrieProcessor.Result.SKIP_NODE;
}
if (this.currentCount < count) {
// the provided task occurs more often that all tasks found so
// far.
// clear all found tasks and use the new count as the one
// searched for
foundTasks.clear();
this.currentCount = count;
}
if (this.currentCount == count) {
// the task is of interest. Sort it into the other found tasks
// so that
// the longest come first
boolean added = false;
for (int i = 0; i < foundTasks.size(); i++) {
if (foundTasks.get(i).size() < foundTask.size()) {
// defensive copy
foundTasks.add(i, new LinkedList(
foundTask)); // defensive copy
added = true;
break;
}
}
if (!added) {
foundTasks.add(new LinkedList(foundTask)); // defensive
// copy
}
}
return TrieProcessor.Result.CONTINUE;
}
/**
* @return
*/
public Tasks getFoundTasks() {
removePermutationsOfShorterTasks();
return new Tasks(currentCount, foundTasks);
}
/**
*
*/
private void removePermutationsOfShorterTasks() {
// now iterate the sorted list and for each task remove all other
// tasks that are shorter
// (i.e. come later in the sorted list) and that represent a subpart
// of the task
for (int i = 0; i < foundTasks.size(); i++) {
for (int j = i + 1; j < foundTasks.size();) {
if (foundTasks.get(j).size() < foundTasks.get(i).size()) {
// found a task that is a potential subtask. Check for
// this and remove the
// subtask if needed
List longTask = foundTasks.get(i);
List shortTask = foundTasks.get(j);
if (getSubListIndex(longTask, shortTask, 0) > -1) {
foundTasks.remove(j);
} else {
j++;
}
} else {
j++;
}
}
}
}
}
// /**
// * @author Patrick Harms
// */
// private class TrieStatisticsDumper implements
// TrieProcessor {
//
// /**
// *
// */
// private Map counters = new HashMap();
//
// /* (non-Javadoc)
// * @see
// de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List,
// int)
// */
// @Override
// public TrieProcessor.Result process(List subsequence, int
// count) {
// if (subsequence.size() == 1) {
// Integer value = counters.get(count);
//
// if (value == null) {
// value = 0;
// }
//
// counters.put(count, value + 1);
//
// return TrieProcessor.Result.CONTINUE;
// }
// else {
// // ignore singular occurrences
// return TrieProcessor.Result.SKIP_NODE;
// }
// }
//
// /**
// * @return
// */
// public void dumpCounters() {
// int dumpedCounters = 0;
//
// int count = 1;
// while (dumpedCounters < counters.size()) {
// Integer value = counters.get(count++);
// if (value != null) {
// System.out.println(value + " symbols occurred " + count + " times");
// dumpedCounters++;
// }
// }
// }
//
// }
/**
*
*/
private static class RuleApplicationData {
/**
*
*/
private List sessions;
/**
*
*/
private TaskInstanceTrie lastTrie;
/**
* default and minimum trained sequence length is 3
*/
private int trainedSequenceLength = 3;
/**
*
*/
private Tasks lastFoundTasks = new Tasks(Integer.MAX_VALUE, null);
/**
*
*/
private boolean detectedAndReplacedTasks;
/**
*
*/
private RuleApplicationResult result = new RuleApplicationResult();
/**
*
*/
private StopWatch stopWatch = new StopWatch();
/**
*
*/
private RuleApplicationData(List sessions) {
this.sessions = sessions;
}
/**
* @return the tree
*/
private List getSessions() {
return sessions;
}
/**
* @param lastTrie
* the lastTrie to set
*/
private void setLastTrie(TaskInstanceTrie lastTrie) {
this.lastTrie = lastTrie;
}
/**
* @return the lastTrie
*/
private TaskInstanceTrie getLastTrie() {
return lastTrie;
}
/**
* @param trainedSequenceLength
* the trainedSequenceLength to set
*/
private void setTrainedSequenceLength(int trainedSequenceLength) {
this.trainedSequenceLength = trainedSequenceLength;
}
/**
* @return the trainedSequenceLength
*/
private int getTrainedSequenceLength() {
return trainedSequenceLength;
}
/**
* @param lastFoundSequences
* the lastFoundSequences to set
*/
private void setLastFoundTasks(Tasks lastFoundSequences) {
this.lastFoundTasks = lastFoundSequences;
}
/**
* @return the lastFoundSequences
*/
private Tasks getLastFoundTasks() {
return lastFoundTasks;
}
/**
*
*/
private void detectedAndReplacedTasks(boolean detectedAndReplacedTasks) {
this.detectedAndReplacedTasks = detectedAndReplacedTasks;
}
/**
*
*/
private boolean detectedAndReplacedTasks() {
return detectedAndReplacedTasks;
}
/**
* @return the result
*/
private RuleApplicationResult getResult() {
return result;
}
/**
* @return the stopWatch
*/
private StopWatch getStopWatch() {
return stopWatch;
}
}
/**
* @author Patrick Harms
*/
private static class Tasks implements Iterable> {
/**
*
*/
private int occurrenceCount;
/**
*
*/
private List> sequences;
/**
* @param occurrenceCount
* @param sequences
*/
private Tasks(int occurrenceCount, List> sequences) {
super();
this.occurrenceCount = occurrenceCount;
this.sequences = sequences;
}
/**
* @return
*/
private int getOccurrenceCount() {
return occurrenceCount;
}
/**
* @return
*/
private int size() {
return this.sequences.size();
}
/**
*
*/
/*
* (non-Javadoc)
*
* @see java.lang.Iterable#iterator()
*/
@Override
public Iterator> iterator() {
return this.sequences.iterator();
}
/*
* (non-Javadoc)
*
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
StringBuffer result = new StringBuffer();
result.append(this.occurrenceCount);
result.append(" occurrences:\n");
for (List task : sequences) {
result.append(task);
result.append("\n");
}
return result.toString();
}
}
// methods for internal testing
// private void checkMatchingOfSessions(List> flattenedSessions,
// List sessions,
// String when)
// {
// List> currentFlattenedSessions = flattenSessions(sessions);
// if (flattenedSessions.size() != currentFlattenedSessions.size()) {
// System.out.println("################## number of sessions changed after "
// + when);
// }
// else {
// for (int i = 0; i < flattenedSessions.size(); i++) {
// List expected = flattenedSessions.get(i);
// List current = currentFlattenedSessions.get(i);
//
// if (expected.size() != current.size()) {
// System.out.println
// ("################## length of session " + i + " changed after " + when);
// }
// else {
// for (int j = 0; j < expected.size(); j++) {
// if (!expected.get(j).equals(current.get(j))) {
// System.out.println("################## event " + j + " of session " +
// i + " changed after " + when);
// }
// }
// }
// }
// }
// }
//
// private List> flattenSessions(List sessions) {
// List> flattenedSessions = new ArrayList>();
// for (IUserSession session : sessions) {
// List flattenedUserSession = new ArrayList();
// flatten(session, flattenedUserSession);
// flattenedSessions.add(flattenedUserSession);
// }
//
// return flattenedSessions;
// }
//
// private void flatten(IUserSession iUserSession, List
// flattenedUserSession) {
// for (ITaskInstance instance : iUserSession) {
// flatten(instance, flattenedUserSession);
// }
// }
//
// private void flatten(ITaskInstance instance, List
// flattenedUserSession) {
// if (instance instanceof ITaskInstanceList) {
// for (ITaskInstance child : (ITaskInstanceList) instance) {
// flatten(child, flattenedUserSession);
// }
// }
// else if (instance instanceof ISelectionInstance) {
// flatten(((ISelectionInstance) instance).getChild(),
// flattenedUserSession);
// }
// else if (instance instanceof IOptionalInstance) {
// flatten(((IOptionalInstance) instance).getChild(), flattenedUserSession);
// }
// else if (instance instanceof IEventTaskInstance) {
// flattenedUserSession.add(((IEventTaskInstance) instance).getEvent());
// }
// }
}