source: branches/ralph/src/main/java/de/ugoe/cs/autoquest/tasktrees/temporalrelation/TaskInstanceTrie.java @ 1587

Last change on this file since 1587 was 1401, checked in by pharms, 10 years ago
File size: 8.5 KB
Line 
1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.tasktrees.temporalrelation;
16
17import java.util.HashMap;
18import java.util.LinkedList;
19import java.util.List;
20import java.util.Map;
21
22import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask;
23import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstance;
24import de.ugoe.cs.autoquest.tasktrees.treeifc.IUserSession;
25import de.ugoe.cs.autoquest.usageprofiles.SymbolMap;
26import de.ugoe.cs.autoquest.usageprofiles.Trie;
27import de.ugoe.cs.autoquest.usageprofiles.TrieProcessor;
28
29/**
30 * <p>
31 * This trie implementation is a performance optimization for generating task trees. It does not
32 * create a full trie but adds only those subsequences having a chance of occurring most often.
33 * For this, it initially counts the number of occurrences of each task instance. Then, during
34 * training, it regularly determines the number of the currently most often occurring sequence. If
35 * this number is higher than the count of a task instance to be trained, the task instance is
36 * skipped the not added to the trie.
37 * </p>
38 *
39 * @author Patrick Harms
40 */
41class TaskInstanceTrie extends Trie<ITaskInstance> {
42
43    /**  */
44    private static final long serialVersionUID = 1L;
45
46    /**
47     * <p>
48     * the task handling strategy to be used for comparing tasks
49     * </p>
50     */
51    private TaskHandlingStrategy taskStrategy;
52   
53    /**
54     * <p>
55     * instantiated the trie with the task handling strategy to be used
56     * </p>
57     *
58     * @param taskStrategy the task handling strategy to be used for comparing tasks
59     */
60    public TaskInstanceTrie(TaskHandlingStrategy taskStrategy) {
61        super(taskStrategy);
62        this.taskStrategy = taskStrategy;
63    }
64
65    /**
66     * <p>
67     * trains this trie with the provided user sessions up to the provided maximum depth using
68     * the optimization described in the description of this class.
69     * </p>
70     *
71     * @param userSessions the sessions for which this trie is to be trained
72     * @param maxOrder     the depth of the trie
73     */
74    public void trainSessions(List<IUserSession> userSessions, int maxOrder) {
75        if (maxOrder < 1) {
76            return;
77        }
78       
79        SymbolMap<ITaskInstance, Counter> equalTaskInstancesMap =
80            taskStrategy.createSymbolMap();
81       
82        Map<ITask, Counter> instanceCountMap = new HashMap<ITask, Counter>();
83       
84        System.out.println("preparing training");
85        int noOfTaskInstances = 0;
86        for (IUserSession session : userSessions) {
87            for (ITaskInstance taskInstance : session) {
88                Counter counter = equalTaskInstancesMap.getValue(taskInstance);
89               
90                if (counter == null) {
91                    counter = new Counter();
92                    equalTaskInstancesMap.addSymbol(taskInstance, counter);
93                }
94               
95                counter.count++;
96                instanceCountMap.put(taskInstance.getTask(), counter);
97               
98                noOfTaskInstances++;
99            }
100        }
101       
102        System.out.println("performing training of " + noOfTaskInstances + " task instances");
103        Counter processedTaskInstances = new Counter();
104        int counterRecheckAt = noOfTaskInstances / 10; // recheck the maximum count after each
105                                                       // 10% of processed task instances
106        for (IUserSession session : userSessions) {
107            train(session, maxOrder, instanceCountMap, processedTaskInstances, counterRecheckAt);
108        }     
109       
110        updateKnownSymbols();
111    }
112   
113    /* (non-Javadoc)
114     * @see de.ugoe.cs.autoquest.usageprofiles.Trie#equals(java.lang.Object)
115     */
116    @Override
117    public boolean equals(Object other) {
118        if (this == other) {
119            return true;
120        }
121        else if (other instanceof TaskInstanceTrie) {
122            return super.equals(other);
123        }
124        else {
125            return false;
126        }
127    }
128
129    /* (non-Javadoc)
130     * @see de.ugoe.cs.autoquest.usageprofiles.Trie#hashCode()
131     */
132    @Override
133    public int hashCode() {
134        return super.hashCode();
135    }
136
137    /**
138     * <p>
139     * internally used convenience method for implementing the training optimization
140     * </p>
141     */
142    private void train(IUserSession        userSession,
143                       int                 maxOrder,
144                       Map<ITask, Counter> taskInstanceCountMap,
145                       Counter             processedTaskInstances,
146                       int                 counterRecheckAt)
147    {
148        List<ITaskInstance> subsequence = new LinkedList<ITaskInstance>();
149       
150        int sequenceMaxCount = 0;
151       
152        for (ITaskInstance currentTaskInstance : userSession) {
153           
154            int occurrenceCount = taskInstanceCountMap.get(currentTaskInstance.getTask()).count;
155           
156            if (processedTaskInstances.count >= counterRecheckAt) {
157                sequenceMaxCount = getCurrentSequenceMaxCount();
158                processedTaskInstances.count = 0;
159            }
160           
161            if (occurrenceCount < sequenceMaxCount) {
162                // this task instance does not need to be considered, as it occurs not often enough
163                // to be part of a sequence, that occurs most often. Therefore train all remaining
164                // sequences so far and go on, until the next useful sequence is found.
165               
166                while (subsequence.size() > 1) {
167                    add(subsequence);
168                    subsequence.remove(0);
169                }
170               
171                subsequence.clear();
172            }
173            else {
174                subsequence.add(currentTaskInstance);
175
176                if (subsequence.size() == maxOrder) {
177                    add(subsequence);
178                    subsequence.remove(0);
179                }
180            }
181           
182            processedTaskInstances.count++;
183        }
184       
185        // add shorter remaining subsequences, if any
186        while (subsequence.size() > 1) {
187            add(subsequence);
188            subsequence.remove(0);
189        }
190    }
191
192    /**
193     * <p>
194     * determines the current maximum count of sequences of a minimal length of two. Task instances
195     * occuring more seldom do not have to be considered anymore
196     * </p>
197     *
198     * @return the current maximum count of sequences of a minimal length of two
199     */
200    private int getCurrentSequenceMaxCount() {
201        MaxSequenceCountFinder processor = new MaxSequenceCountFinder();
202        super.process(processor);
203        return processor.getMaxCount();
204    }
205
206    /**
207     * <p>
208     * trie processor identifying the current maximum count of sequences of a minimal length of two
209     * </p>
210     *
211     * @author Patrick Harms
212     */
213    private static class MaxSequenceCountFinder implements TrieProcessor<ITaskInstance> {
214       
215        /**
216         * <p>
217         * the current maximum count
218         * </p>
219         */
220        private int currentCount = 0;
221       
222        /* (non-Javadoc)
223         * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int)
224         */
225        @Override
226        public TrieProcessor.Result process(List<ITaskInstance> foundTask, int count) {
227            if (foundTask.size() == 2) {
228                this.currentCount = Math.max(this.currentCount, count);
229               
230                // do not consider children
231                return TrieProcessor.Result.SKIP_NODE;
232            }
233            else {
234                return TrieProcessor.Result.CONTINUE;
235            }
236        }
237
238        /**
239         * <p>
240         * returns the current maximum count
241         * </p>
242         */
243        private int getMaxCount() {
244            return currentCount;
245        }
246
247    }
248   
249    /**
250     * <p>
251     * counter object to be able to call something by the counters reference
252     * </p>
253     *
254     * @author Patrick Harms
255     */
256    private static class Counter {
257        int count = 0;
258    }
259       
260}
Note: See TracBrowser for help on using the repository browser.