source: trunk/autoquest-core-tasktrees/src/main/java/de/ugoe/cs/autoquest/tasktrees/temporalrelation/TaskInstanceTrie.java @ 1853

Last change on this file since 1853 was 1853, checked in by pharms, 10 years ago
  • rename of task instance comparator to task comparator as it actually compares tasks
File size: 8.6 KB
Line 
1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.tasktrees.temporalrelation;
16
17import java.util.HashMap;
18import java.util.LinkedList;
19import java.util.List;
20import java.util.Map;
21import java.util.logging.Level;
22
23import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask;
24import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstance;
25import de.ugoe.cs.autoquest.tasktrees.treeifc.IUserSession;
26import de.ugoe.cs.autoquest.usageprofiles.SymbolMap;
27import de.ugoe.cs.autoquest.usageprofiles.Trie;
28import de.ugoe.cs.autoquest.usageprofiles.TrieProcessor;
29import de.ugoe.cs.util.console.Console;
30
31/**
32 * <p>
33 * This trie implementation is a performance optimization for generating task trees. It does not
34 * create a full trie but adds only those subsequences having a chance of occurring most often.
35 * For this, it initially counts the number of occurrences of each task instance. Then, during
36 * training, it regularly determines the number of the currently most often occurring sequence. If
37 * this number is higher than the count of a task instance to be trained, the task instance is
38 * skipped the not added to the trie.
39 * </p>
40 *
41 * @author Patrick Harms
42 */
43class TaskInstanceTrie extends Trie<ITask> {
44
45    /**  */
46    private static final long serialVersionUID = 1L;
47
48    /**
49     * <p>
50     * the task handling strategy to be used for comparing tasks
51     * </p>
52     */
53    private TaskHandlingStrategy taskStrategy;
54   
55    /**
56     * <p>
57     * instantiated the trie with the task handling strategy to be used
58     * </p>
59     *
60     * @param taskStrategy the task handling strategy to be used for comparing tasks
61     */
62    public TaskInstanceTrie(TaskHandlingStrategy taskStrategy) {
63        super(taskStrategy);
64        this.taskStrategy = taskStrategy;
65    }
66
67    /**
68     * <p>
69     * trains this trie with the provided user sessions up to the provided maximum depth using
70     * the optimization described in the description of this class.
71     * </p>
72     *
73     * @param userSessions the sessions for which this trie is to be trained
74     * @param maxOrder     the depth of the trie
75     */
76    public void trainSessions(List<IUserSession> userSessions, int maxOrder) {
77        if (maxOrder < 1) {
78            return;
79        }
80       
81        SymbolMap<ITask, Counter> equalTaskInstancesMap =
82            taskStrategy.createSymbolMap();
83       
84        Map<ITask, Counter> instanceCountMap = new HashMap<ITask, Counter>();
85       
86        Console.traceln(Level.FINEST, "preparing training");
87        int noOfTaskInstances = 0;
88        for (IUserSession session : userSessions) {
89            for (ITaskInstance taskInstance : session) {
90                Counter counter = equalTaskInstancesMap.getValue(taskInstance.getTask());
91               
92                if (counter == null) {
93                    counter = new Counter();
94                    equalTaskInstancesMap.addSymbol(taskInstance.getTask(), counter);
95                }
96               
97                counter.count++;
98                instanceCountMap.put(taskInstance.getTask(), counter);
99               
100                noOfTaskInstances++;
101            }
102        }
103       
104        Console.traceln
105            (Level.FINEST, "performing training of " + noOfTaskInstances + " task instances");
106       
107        Counter processedTaskInstances = new Counter();
108        int counterRecheckAt = noOfTaskInstances / 10; // recheck the maximum count after each
109                                                       // 10% of processed task instances
110        for (IUserSession session : userSessions) {
111            train(session, maxOrder, instanceCountMap, processedTaskInstances, counterRecheckAt);
112        }     
113       
114        updateKnownSymbols();
115    }
116   
117    /* (non-Javadoc)
118     * @see de.ugoe.cs.autoquest.usageprofiles.Trie#equals(java.lang.Object)
119     */
120    @Override
121    public boolean equals(Object other) {
122        if (this == other) {
123            return true;
124        }
125        else if (other instanceof TaskInstanceTrie) {
126            return super.equals(other);
127        }
128        else {
129            return false;
130        }
131    }
132
133    /* (non-Javadoc)
134     * @see de.ugoe.cs.autoquest.usageprofiles.Trie#hashCode()
135     */
136    @Override
137    public int hashCode() {
138        return super.hashCode();
139    }
140
141    /**
142     * <p>
143     * internally used convenience method for implementing the training optimization
144     * </p>
145     */
146    private void train(IUserSession        userSession,
147                       int                 maxOrder,
148                       Map<ITask, Counter> taskInstanceCountMap,
149                       Counter             processedTaskInstances,
150                       int                 counterRecheckAt)
151    {
152        List<ITask> subsequence = new LinkedList<ITask>();
153       
154        int sequenceMaxCount = 0;
155       
156        for (ITaskInstance currentTaskInstance : userSession) {
157           
158            int occurrenceCount = taskInstanceCountMap.get(currentTaskInstance.getTask()).count;
159           
160            if (processedTaskInstances.count >= counterRecheckAt) {
161                sequenceMaxCount = getCurrentSequenceMaxCount();
162                processedTaskInstances.count = 0;
163            }
164           
165            if (occurrenceCount < sequenceMaxCount) {
166                // this task instance does not need to be considered, as it occurs not often enough
167                // to be part of a sequence, that occurs most often. Therefore train all remaining
168                // sequences so far and go on, until the next useful sequence is found.
169               
170                while (subsequence.size() > 1) {
171                    add(subsequence);
172                    subsequence.remove(0);
173                }
174               
175                subsequence.clear();
176            }
177            else {
178                subsequence.add(currentTaskInstance.getTask());
179
180                if (subsequence.size() == maxOrder) {
181                    add(subsequence);
182                    subsequence.remove(0);
183                }
184            }
185           
186            processedTaskInstances.count++;
187        }
188       
189        // add shorter remaining subsequences, if any
190        while (subsequence.size() > 1) {
191            add(subsequence);
192            subsequence.remove(0);
193        }
194    }
195
196    /**
197     * <p>
198     * determines the current maximum count of sequences of a minimal length of two. Task instances
199     * occuring more seldom do not have to be considered anymore
200     * </p>
201     *
202     * @return the current maximum count of sequences of a minimal length of two
203     */
204    private int getCurrentSequenceMaxCount() {
205        MaxSequenceCountFinder processor = new MaxSequenceCountFinder();
206        super.process(processor);
207        return processor.getMaxCount();
208    }
209
210    /**
211     * <p>
212     * trie processor identifying the current maximum count of sequences of a minimal length of two
213     * </p>
214     *
215     * @author Patrick Harms
216     */
217    private static class MaxSequenceCountFinder implements TrieProcessor<ITask> {
218       
219        /**
220         * <p>
221         * the current maximum count
222         * </p>
223         */
224        private int currentCount = 0;
225       
226        /* (non-Javadoc)
227         * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int)
228         */
229        @Override
230        public TrieProcessor.Result process(List<ITask> foundTask, int count) {
231            if (foundTask.size() == 2) {
232                this.currentCount = Math.max(this.currentCount, count);
233               
234                // do not consider children
235                return TrieProcessor.Result.SKIP_NODE;
236            }
237            else {
238                return TrieProcessor.Result.CONTINUE;
239            }
240        }
241
242        /**
243         * <p>
244         * returns the current maximum count
245         * </p>
246         */
247        private int getMaxCount() {
248            return currentCount;
249        }
250
251    }
252   
253    /**
254     * <p>
255     * counter object to be able to call something by the counters reference
256     * </p>
257     *
258     * @author Patrick Harms
259     */
260    private static class Counter {
261        int count = 0;
262    }
263       
264}
Note: See TracBrowser for help on using the repository browser.