// Copyright 2012 Georg-August-Universität Göttingen, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package de.ugoe.cs.autoquest.tasktrees.temporalrelation; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask; import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstance; import de.ugoe.cs.autoquest.tasktrees.treeifc.IUserSession; import de.ugoe.cs.autoquest.usageprofiles.SymbolMap; import de.ugoe.cs.autoquest.usageprofiles.Trie; import de.ugoe.cs.autoquest.usageprofiles.TrieProcessor; /** *

* This trie implementation is a performance optimization for generating task trees. It does not * create a full trie but adds only those subsequences having a chance of occurring most often. * For this, it initially counts the number of occurrences of each task instance. Then, during * training, it regularly determines the number of the currently most often occurring sequence. If * this number is higher than the count of a task instance to be trained, the task instance is * skipped the not added to the trie. *

* * @author Patrick Harms */ class TaskInstanceTrie extends Trie { /** */ private static final long serialVersionUID = 1L; /** *

* the task handling strategy to be used for comparing tasks *

*/ private TaskHandlingStrategy taskStrategy; /** *

* instantiated the trie with the task handling strategy to be used *

* * @param taskStrategy the task handling strategy to be used for comparing tasks */ public TaskInstanceTrie(TaskHandlingStrategy taskStrategy) { super(taskStrategy); this.taskStrategy = taskStrategy; } /** *

* trains this trie with the provided user sessions up to the provided maximum depth using * the optimization described in the description of this class. *

* * @param userSessions the sessions for which this trie is to be trained * @param maxOrder the depth of the trie */ public void trainSessions(List userSessions, int maxOrder) { if (maxOrder < 1) { return; } SymbolMap equalTaskInstancesMap = taskStrategy.createSymbolMap(); Map instanceCountMap = new HashMap(); System.out.println("preparing training"); int noOfTaskInstances = 0; for (IUserSession session : userSessions) { for (ITaskInstance taskInstance : session) { Counter counter = equalTaskInstancesMap.getValue(taskInstance); if (counter == null) { counter = new Counter(); equalTaskInstancesMap.addSymbol(taskInstance, counter); } counter.count++; instanceCountMap.put(taskInstance.getTask(), counter); noOfTaskInstances++; } } System.out.println("performing training of " + noOfTaskInstances + " task instances"); Counter processedTaskInstances = new Counter(); int counterRecheckAt = noOfTaskInstances / 10; // recheck the maximum count after each // 10% of processed task instances for (IUserSession session : userSessions) { train(session, maxOrder, instanceCountMap, processedTaskInstances, counterRecheckAt); } updateKnownSymbols(); } /* (non-Javadoc) * @see de.ugoe.cs.autoquest.usageprofiles.Trie#equals(java.lang.Object) */ @Override public boolean equals(Object other) { if (this == other) { return true; } else if (other instanceof TaskInstanceTrie) { return super.equals(other); } else { return false; } } /* (non-Javadoc) * @see de.ugoe.cs.autoquest.usageprofiles.Trie#hashCode() */ @Override public int hashCode() { return super.hashCode(); } /** *

* internally used convenience method for implementing the training optimization *

*/ private void train(IUserSession userSession, int maxOrder, Map taskInstanceCountMap, Counter processedTaskInstances, int counterRecheckAt) { List subsequence = new LinkedList(); int sequenceMaxCount = 0; for (ITaskInstance currentTaskInstance : userSession) { int occurrenceCount = taskInstanceCountMap.get(currentTaskInstance.getTask()).count; if (processedTaskInstances.count >= counterRecheckAt) { sequenceMaxCount = getCurrentSequenceMaxCount(); processedTaskInstances.count = 0; } if (occurrenceCount < sequenceMaxCount) { // this task instance does not need to be considered, as it occurs not often enough // to be part of a sequence, that occurs most often. Therefore train all remaining // sequences so far and go on, until the next useful sequence is found. while (subsequence.size() > 1) { add(subsequence); subsequence.remove(0); } subsequence.clear(); } else { subsequence.add(currentTaskInstance); if (subsequence.size() == maxOrder) { add(subsequence); subsequence.remove(0); } } processedTaskInstances.count++; } // add shorter remaining subsequences, if any while (subsequence.size() > 1) { add(subsequence); subsequence.remove(0); } } /** *

* determines the current maximum count of sequences of a minimal length of two. Task instances * occuring more seldom do not have to be considered anymore *

* * @return the current maximum count of sequences of a minimal length of two */ private int getCurrentSequenceMaxCount() { MaxSequenceCountFinder processor = new MaxSequenceCountFinder(); super.process(processor); return processor.getMaxCount(); } /** *

* trie processor identifying the current maximum count of sequences of a minimal length of two *

* * @author Patrick Harms */ private static class MaxSequenceCountFinder implements TrieProcessor { /** *

* the current maximum count *

*/ private int currentCount = 0; /* (non-Javadoc) * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int) */ @Override public TrieProcessor.Result process(List foundTask, int count) { if (foundTask.size() == 2) { this.currentCount = Math.max(this.currentCount, count); // do not consider children return TrieProcessor.Result.SKIP_NODE; } else { return TrieProcessor.Result.CONTINUE; } } /** *

* returns the current maximum count *

*/ private int getMaxCount() { return currentCount; } } /** *

* counter object to be able to call something by the counters reference *

* * @author Patrick Harms */ private static class Counter { int count = 0; } }