source: trunk/autoquest-core-tasktrees/src/main/java/de/ugoe/cs/autoquest/tasktrees/temporalrelation/TaskInstanceTrie.java @ 2146

Last change on this file since 2146 was 2130, checked in by pharms, 8 years ago
  • improved partial trie training to make it even more partial by not training instances of tasks that occur less often then the currently detected sequence also in other sessions
File size: 8.8 KB
Line 
1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.tasktrees.temporalrelation;
16
17import java.util.HashMap;
18import java.util.LinkedList;
19import java.util.List;
20import java.util.Map;
21import java.util.logging.Level;
22
23import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask;
24import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstance;
25import de.ugoe.cs.autoquest.tasktrees.treeifc.IUserSession;
26import de.ugoe.cs.autoquest.usageprofiles.SymbolMap;
27import de.ugoe.cs.autoquest.usageprofiles.Trie;
28import de.ugoe.cs.autoquest.usageprofiles.TrieProcessor;
29import de.ugoe.cs.util.console.Console;
30
31/**
32 * <p>
33 * This trie implementation is a performance optimization for generating task trees. It does not
34 * create a full trie but adds only those subsequences having a chance of occurring most often.
35 * For this, it initially counts the number of occurrences of each task instance. Then, during
36 * training, it regularly determines the number of the currently most often occurring sequence. If
37 * this number is higher than the count of a task instance to be trained, the task instance is
38 * skipped the not added to the trie.
39 * </p>
40 *
41 * @author Patrick Harms
42 */
43class TaskInstanceTrie extends Trie<ITask> {
44
45    /**  */
46    private static final long serialVersionUID = 1L;
47
48    /**
49     * <p>
50     * the task handling strategy to be used for comparing tasks
51     * </p>
52     */
53    private TaskHandlingStrategy taskStrategy;
54   
55    /**
56     * <p>
57     * instantiated the trie with the task handling strategy to be used
58     * </p>
59     *
60     * @param taskStrategy the task handling strategy to be used for comparing tasks
61     */
62    public TaskInstanceTrie(TaskHandlingStrategy taskStrategy) {
63        super(taskStrategy);
64        this.taskStrategy = taskStrategy;
65    }
66
67    /**
68     * <p>
69     * trains this trie with the provided user sessions up to the provided maximum depth using
70     * the optimization described in the description of this class.
71     * </p>
72     *
73     * @param userSessions the sessions for which this trie is to be trained
74     * @param maxOrder     the depth of the trie
75     */
76    public void trainSessions(List<IUserSession> userSessions, int maxOrder) {
77        if (maxOrder < 1) {
78            return;
79        }
80       
81        SymbolMap<ITask, Counter> equalTaskInstancesMap =
82            taskStrategy.createSymbolMap();
83       
84        Map<ITask, Counter> instanceCountMap = new HashMap<ITask, Counter>();
85        Console.traceln(Level.FINEST, "preparing training");
86        int noOfTaskInstances = 0;
87        for (IUserSession session : userSessions) {
88            for (ITaskInstance taskInstance : session) {
89                Counter counter = equalTaskInstancesMap.getValue(taskInstance.getTask());
90               
91                if (counter == null) {
92                    counter = new Counter();
93                    equalTaskInstancesMap.addSymbol(taskInstance.getTask(), counter);
94                }
95               
96                counter.count++;
97                instanceCountMap.put(taskInstance.getTask(), counter);
98               
99                noOfTaskInstances++;
100            }
101        }
102       
103        Console.traceln
104            (Level.FINEST, "performing training of " + noOfTaskInstances + " task instances");
105       
106        Counter processedTaskInstances = new Counter();
107        Counter currentSequenceMaxCount = new Counter();
108        int counterRecheckAt = noOfTaskInstances / 20; // recheck the maximum count after each
109                                                       // 10% of processed task instances
110        for (IUserSession session : userSessions) {
111            train(session, maxOrder, instanceCountMap, processedTaskInstances,
112                  currentSequenceMaxCount, counterRecheckAt);
113        }     
114       
115        updateKnownSymbols();
116    }
117   
118    /* (non-Javadoc)
119     * @see de.ugoe.cs.autoquest.usageprofiles.Trie#equals(java.lang.Object)
120     */
121    @Override
122    public boolean equals(Object other) {
123        if (this == other) {
124            return true;
125        }
126        else if (other instanceof TaskInstanceTrie) {
127            return super.equals(other);
128        }
129        else {
130            return false;
131        }
132    }
133
134    /* (non-Javadoc)
135     * @see de.ugoe.cs.autoquest.usageprofiles.Trie#hashCode()
136     */
137    @Override
138    public int hashCode() {
139        return super.hashCode();
140    }
141
142    /**
143     * <p>
144     * internally used convenience method for implementing the training optimization
145     * </p>
146     */
147    private void train(IUserSession        userSession,
148                       int                 maxOrder,
149                       Map<ITask, Counter> taskInstanceCountMap,
150                       Counter             processedTaskInstances,
151                       Counter             currentSequenceMaxCount,
152                       int                 counterRecheckAt)
153    {
154        List<ITask> subsequence = new LinkedList<ITask>();
155       
156        for (ITaskInstance currentTaskInstance : userSession) {
157           
158            int occurrenceCount = taskInstanceCountMap.get(currentTaskInstance.getTask()).count;
159           
160            if (processedTaskInstances.count >= counterRecheckAt) {
161                currentSequenceMaxCount.count = getCurrentSequenceMaxCount();
162                processedTaskInstances.count = 0;
163            }
164           
165            if (occurrenceCount < currentSequenceMaxCount.count) {
166                // this task instance does not need to be considered, as it occurs not often enough
167                // to be part of a sequence, that occurs most often. Therefore train all remaining
168                // sequences so far and go on, until the next useful sequence is found.
169               
170                while (subsequence.size() > 1) {
171                    add(subsequence);
172                    subsequence.remove(0);
173                }
174               
175                subsequence.clear();
176            }
177            else {
178                subsequence.add(currentTaskInstance.getTask());
179
180                if (subsequence.size() == maxOrder) {
181                    add(subsequence);
182                    subsequence.remove(0);
183                }
184            }
185           
186            processedTaskInstances.count++;
187        }
188       
189        // add shorter remaining subsequences, if any
190        while (subsequence.size() > 1) {
191            add(subsequence);
192            subsequence.remove(0);
193        }
194    }
195
196    /**
197     * <p>
198     * determines the current maximum count of sequences of a minimal length of two. Task instances
199     * occuring more seldom do not have to be considered anymore
200     * </p>
201     *
202     * @return the current maximum count of sequences of a minimal length of two
203     */
204    private int getCurrentSequenceMaxCount() {
205        MaxSequenceCountFinder processor = new MaxSequenceCountFinder();
206        super.process(processor);
207        return processor.getMaxCount();
208    }
209
210    /**
211     * <p>
212     * trie processor identifying the current maximum count of sequences of a minimal length of two
213     * </p>
214     *
215     * @author Patrick Harms
216     */
217    private static class MaxSequenceCountFinder implements TrieProcessor<ITask> {
218       
219        /**
220         * <p>
221         * the current maximum count
222         * </p>
223         */
224        private int currentCount = 0;
225       
226        /* (non-Javadoc)
227         * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int)
228         */
229        @Override
230        public TrieProcessor.Result process(List<ITask> foundTask, int count) {
231            if (foundTask.size() == 2) {
232                this.currentCount = Math.max(this.currentCount, count);
233               
234                // do not consider children
235                return TrieProcessor.Result.SKIP_NODE;
236            }
237            else {
238                return TrieProcessor.Result.CONTINUE;
239            }
240        }
241
242        /**
243         * <p>
244         * returns the current maximum count
245         * </p>
246         */
247        private int getMaxCount() {
248            return currentCount;
249        }
250
251    }
252   
253    /**
254     * <p>
255     * counter object to be able to call something by the counters reference
256     * </p>
257     *
258     * @author Patrick Harms
259     */
260    private static class Counter {
261        int count = 0;
262    }
263       
264}
Note: See TracBrowser for help on using the repository browser.