source: trunk/autoquest-core-tasktrees/src/main/java/de/ugoe/cs/autoquest/tasktrees/temporalrelation/TaskInstanceTrie.java @ 1891

Last change on this file since 1891 was 1853, checked in by pharms, 10 years ago
  • rename of task instance comparator to task comparator as it actually compares tasks
File size: 8.6 KB
RevLine 
[1256]1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.tasktrees.temporalrelation;
16
17import java.util.HashMap;
18import java.util.LinkedList;
19import java.util.List;
20import java.util.Map;
[1853]21import java.util.logging.Level;
[1256]22
[1285]23import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask;
[1256]24import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstance;
25import de.ugoe.cs.autoquest.tasktrees.treeifc.IUserSession;
26import de.ugoe.cs.autoquest.usageprofiles.SymbolMap;
27import de.ugoe.cs.autoquest.usageprofiles.Trie;
28import de.ugoe.cs.autoquest.usageprofiles.TrieProcessor;
[1853]29import de.ugoe.cs.util.console.Console;
[1256]30
31/**
32 * <p>
[1401]33 * This trie implementation is a performance optimization for generating task trees. It does not
34 * create a full trie but adds only those subsequences having a chance of occurring most often.
35 * For this, it initially counts the number of occurrences of each task instance. Then, during
36 * training, it regularly determines the number of the currently most often occurring sequence. If
37 * this number is higher than the count of a task instance to be trained, the task instance is
38 * skipped the not added to the trie.
[1256]39 * </p>
40 *
41 * @author Patrick Harms
42 */
[1853]43class TaskInstanceTrie extends Trie<ITask> {
[1256]44
45    /**  */
46    private static final long serialVersionUID = 1L;
47
48    /**
49     * <p>
[1401]50     * the task handling strategy to be used for comparing tasks
[1256]51     * </p>
52     */
[1285]53    private TaskHandlingStrategy taskStrategy;
54   
[1256]55    /**
56     * <p>
[1401]57     * instantiated the trie with the task handling strategy to be used
[1256]58     * </p>
59     *
[1401]60     * @param taskStrategy the task handling strategy to be used for comparing tasks
[1256]61     */
[1285]62    public TaskInstanceTrie(TaskHandlingStrategy taskStrategy) {
63        super(taskStrategy);
64        this.taskStrategy = taskStrategy;
[1256]65    }
66
67    /**
[1401]68     * <p>
69     * trains this trie with the provided user sessions up to the provided maximum depth using
70     * the optimization described in the description of this class.
71     * </p>
72     *
73     * @param userSessions the sessions for which this trie is to be trained
74     * @param maxOrder     the depth of the trie
[1256]75     */
76    public void trainSessions(List<IUserSession> userSessions, int maxOrder) {
77        if (maxOrder < 1) {
78            return;
79        }
80       
[1853]81        SymbolMap<ITask, Counter> equalTaskInstancesMap =
[1285]82            taskStrategy.createSymbolMap();
[1256]83       
[1285]84        Map<ITask, Counter> instanceCountMap = new HashMap<ITask, Counter>();
[1256]85       
[1853]86        Console.traceln(Level.FINEST, "preparing training");
[1285]87        int noOfTaskInstances = 0;
[1256]88        for (IUserSession session : userSessions) {
89            for (ITaskInstance taskInstance : session) {
[1853]90                Counter counter = equalTaskInstancesMap.getValue(taskInstance.getTask());
[1256]91               
[1285]92                if (counter == null) {
93                    counter = new Counter();
[1853]94                    equalTaskInstancesMap.addSymbol(taskInstance.getTask(), counter);
[1256]95                }
96               
[1285]97                counter.count++;
98                instanceCountMap.put(taskInstance.getTask(), counter);
99               
100                noOfTaskInstances++;
[1256]101            }
102        }
103       
[1853]104        Console.traceln
105            (Level.FINEST, "performing training of " + noOfTaskInstances + " task instances");
106       
[1285]107        Counter processedTaskInstances = new Counter();
108        int counterRecheckAt = noOfTaskInstances / 10; // recheck the maximum count after each
109                                                       // 10% of processed task instances
[1256]110        for (IUserSession session : userSessions) {
[1285]111            train(session, maxOrder, instanceCountMap, processedTaskInstances, counterRecheckAt);
[1256]112        }     
113       
114        updateKnownSymbols();
115    }
116   
[1267]117    /* (non-Javadoc)
118     * @see de.ugoe.cs.autoquest.usageprofiles.Trie#equals(java.lang.Object)
119     */
120    @Override
121    public boolean equals(Object other) {
122        if (this == other) {
123            return true;
124        }
125        else if (other instanceof TaskInstanceTrie) {
126            return super.equals(other);
127        }
128        else {
129            return false;
130        }
131    }
132
[1273]133    /* (non-Javadoc)
134     * @see de.ugoe.cs.autoquest.usageprofiles.Trie#hashCode()
135     */
136    @Override
137    public int hashCode() {
138        return super.hashCode();
139    }
140
[1256]141    /**
[1401]142     * <p>
143     * internally used convenience method for implementing the training optimization
144     * </p>
[1256]145     */
[1285]146    private void train(IUserSession        userSession,
147                       int                 maxOrder,
148                       Map<ITask, Counter> taskInstanceCountMap,
149                       Counter             processedTaskInstances,
150                       int                 counterRecheckAt)
[1256]151    {
[1853]152        List<ITask> subsequence = new LinkedList<ITask>();
[1256]153       
154        int sequenceMaxCount = 0;
155       
[1401]156        for (ITaskInstance currentTaskInstance : userSession) {
[1256]157           
[1285]158            int occurrenceCount = taskInstanceCountMap.get(currentTaskInstance.getTask()).count;
159           
160            if (processedTaskInstances.count >= counterRecheckAt) {
[1256]161                sequenceMaxCount = getCurrentSequenceMaxCount();
[1285]162                processedTaskInstances.count = 0;
[1256]163            }
164           
165            if (occurrenceCount < sequenceMaxCount) {
166                // this task instance does not need to be considered, as it occurs not often enough
167                // to be part of a sequence, that occurs most often. Therefore train all remaining
168                // sequences so far and go on, until the next useful sequence is found.
169               
170                while (subsequence.size() > 1) {
171                    add(subsequence);
172                    subsequence.remove(0);
173                }
174               
175                subsequence.clear();
176            }
177            else {
[1853]178                subsequence.add(currentTaskInstance.getTask());
[1256]179
180                if (subsequence.size() == maxOrder) {
181                    add(subsequence);
182                    subsequence.remove(0);
183                }
184            }
[1285]185           
186            processedTaskInstances.count++;
[1256]187        }
188       
189        // add shorter remaining subsequences, if any
190        while (subsequence.size() > 1) {
191            add(subsequence);
192            subsequence.remove(0);
193        }
194    }
195
196    /**
197     * <p>
[1401]198     * determines the current maximum count of sequences of a minimal length of two. Task instances
199     * occuring more seldom do not have to be considered anymore
[1256]200     * </p>
201     *
[1401]202     * @return the current maximum count of sequences of a minimal length of two
[1256]203     */
204    private int getCurrentSequenceMaxCount() {
205        MaxSequenceCountFinder processor = new MaxSequenceCountFinder();
206        super.process(processor);
207        return processor.getMaxCount();
208    }
209
210    /**
[1401]211     * <p>
212     * trie processor identifying the current maximum count of sequences of a minimal length of two
213     * </p>
214     *
[1256]215     * @author Patrick Harms
216     */
[1853]217    private static class MaxSequenceCountFinder implements TrieProcessor<ITask> {
[1256]218       
219        /**
[1401]220         * <p>
221         * the current maximum count
222         * </p>
[1256]223         */
224        private int currentCount = 0;
225       
226        /* (non-Javadoc)
227         * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int)
228         */
229        @Override
[1853]230        public TrieProcessor.Result process(List<ITask> foundTask, int count) {
[1256]231            if (foundTask.size() == 2) {
232                this.currentCount = Math.max(this.currentCount, count);
233               
234                // do not consider children
235                return TrieProcessor.Result.SKIP_NODE;
236            }
237            else {
238                return TrieProcessor.Result.CONTINUE;
239            }
240        }
241
242        /**
[1401]243         * <p>
244         * returns the current maximum count
245         * </p>
[1256]246         */
247        private int getMaxCount() {
248            return currentCount;
249        }
250
251    }
252   
[1285]253    /**
[1401]254     * <p>
255     * counter object to be able to call something by the counters reference
256     * </p>
257     *
[1285]258     * @author Patrick Harms
259     */
260    private static class Counter {
261        int count = 0;
262    }
263       
[1256]264}
Note: See TracBrowser for help on using the repository browser.