source: trunk/autoquest-core-tasktrees/src/main/java/de/ugoe/cs/autoquest/tasktrees/temporalrelation/TaskInstanceTrie.java @ 1553

Last change on this file since 1553 was 1401, checked in by pharms, 10 years ago
File size: 8.5 KB
RevLine 
[1256]1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.tasktrees.temporalrelation;
16
17import java.util.HashMap;
18import java.util.LinkedList;
19import java.util.List;
20import java.util.Map;
21
[1285]22import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask;
[1256]23import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstance;
24import de.ugoe.cs.autoquest.tasktrees.treeifc.IUserSession;
25import de.ugoe.cs.autoquest.usageprofiles.SymbolMap;
26import de.ugoe.cs.autoquest.usageprofiles.Trie;
27import de.ugoe.cs.autoquest.usageprofiles.TrieProcessor;
28
29/**
30 * <p>
[1401]31 * This trie implementation is a performance optimization for generating task trees. It does not
32 * create a full trie but adds only those subsequences having a chance of occurring most often.
33 * For this, it initially counts the number of occurrences of each task instance. Then, during
34 * training, it regularly determines the number of the currently most often occurring sequence. If
35 * this number is higher than the count of a task instance to be trained, the task instance is
36 * skipped the not added to the trie.
[1256]37 * </p>
38 *
39 * @author Patrick Harms
40 */
[1285]41class TaskInstanceTrie extends Trie<ITaskInstance> {
[1256]42
43    /**  */
44    private static final long serialVersionUID = 1L;
45
46    /**
47     * <p>
[1401]48     * the task handling strategy to be used for comparing tasks
[1256]49     * </p>
50     */
[1285]51    private TaskHandlingStrategy taskStrategy;
52   
[1256]53    /**
54     * <p>
[1401]55     * instantiated the trie with the task handling strategy to be used
[1256]56     * </p>
57     *
[1401]58     * @param taskStrategy the task handling strategy to be used for comparing tasks
[1256]59     */
[1285]60    public TaskInstanceTrie(TaskHandlingStrategy taskStrategy) {
61        super(taskStrategy);
62        this.taskStrategy = taskStrategy;
[1256]63    }
64
65    /**
[1401]66     * <p>
67     * trains this trie with the provided user sessions up to the provided maximum depth using
68     * the optimization described in the description of this class.
69     * </p>
70     *
71     * @param userSessions the sessions for which this trie is to be trained
72     * @param maxOrder     the depth of the trie
[1256]73     */
74    public void trainSessions(List<IUserSession> userSessions, int maxOrder) {
75        if (maxOrder < 1) {
76            return;
77        }
78       
[1285]79        SymbolMap<ITaskInstance, Counter> equalTaskInstancesMap =
80            taskStrategy.createSymbolMap();
[1256]81       
[1285]82        Map<ITask, Counter> instanceCountMap = new HashMap<ITask, Counter>();
[1256]83       
84        System.out.println("preparing training");
[1285]85        int noOfTaskInstances = 0;
[1256]86        for (IUserSession session : userSessions) {
87            for (ITaskInstance taskInstance : session) {
[1285]88                Counter counter = equalTaskInstancesMap.getValue(taskInstance);
[1256]89               
[1285]90                if (counter == null) {
91                    counter = new Counter();
92                    equalTaskInstancesMap.addSymbol(taskInstance, counter);
[1256]93                }
94               
[1285]95                counter.count++;
96                instanceCountMap.put(taskInstance.getTask(), counter);
97               
98                noOfTaskInstances++;
[1256]99            }
100        }
101       
[1285]102        System.out.println("performing training of " + noOfTaskInstances + " task instances");
103        Counter processedTaskInstances = new Counter();
104        int counterRecheckAt = noOfTaskInstances / 10; // recheck the maximum count after each
105                                                       // 10% of processed task instances
[1256]106        for (IUserSession session : userSessions) {
[1285]107            train(session, maxOrder, instanceCountMap, processedTaskInstances, counterRecheckAt);
[1256]108        }     
109       
110        updateKnownSymbols();
111    }
112   
[1267]113    /* (non-Javadoc)
114     * @see de.ugoe.cs.autoquest.usageprofiles.Trie#equals(java.lang.Object)
115     */
116    @Override
117    public boolean equals(Object other) {
118        if (this == other) {
119            return true;
120        }
121        else if (other instanceof TaskInstanceTrie) {
122            return super.equals(other);
123        }
124        else {
125            return false;
126        }
127    }
128
[1273]129    /* (non-Javadoc)
130     * @see de.ugoe.cs.autoquest.usageprofiles.Trie#hashCode()
131     */
132    @Override
133    public int hashCode() {
134        return super.hashCode();
135    }
136
[1256]137    /**
[1401]138     * <p>
139     * internally used convenience method for implementing the training optimization
140     * </p>
[1256]141     */
[1285]142    private void train(IUserSession        userSession,
143                       int                 maxOrder,
144                       Map<ITask, Counter> taskInstanceCountMap,
145                       Counter             processedTaskInstances,
146                       int                 counterRecheckAt)
[1256]147    {
148        List<ITaskInstance> subsequence = new LinkedList<ITaskInstance>();
149       
150        int sequenceMaxCount = 0;
151       
[1401]152        for (ITaskInstance currentTaskInstance : userSession) {
[1256]153           
[1285]154            int occurrenceCount = taskInstanceCountMap.get(currentTaskInstance.getTask()).count;
155           
156            if (processedTaskInstances.count >= counterRecheckAt) {
[1256]157                sequenceMaxCount = getCurrentSequenceMaxCount();
[1285]158                processedTaskInstances.count = 0;
[1256]159            }
160           
161            if (occurrenceCount < sequenceMaxCount) {
162                // this task instance does not need to be considered, as it occurs not often enough
163                // to be part of a sequence, that occurs most often. Therefore train all remaining
164                // sequences so far and go on, until the next useful sequence is found.
165               
166                while (subsequence.size() > 1) {
167                    add(subsequence);
168                    subsequence.remove(0);
169                }
170               
171                subsequence.clear();
172            }
173            else {
174                subsequence.add(currentTaskInstance);
175
176                if (subsequence.size() == maxOrder) {
177                    add(subsequence);
178                    subsequence.remove(0);
179                }
180            }
[1285]181           
182            processedTaskInstances.count++;
[1256]183        }
184       
185        // add shorter remaining subsequences, if any
186        while (subsequence.size() > 1) {
187            add(subsequence);
188            subsequence.remove(0);
189        }
190    }
191
192    /**
193     * <p>
[1401]194     * determines the current maximum count of sequences of a minimal length of two. Task instances
195     * occuring more seldom do not have to be considered anymore
[1256]196     * </p>
197     *
[1401]198     * @return the current maximum count of sequences of a minimal length of two
[1256]199     */
200    private int getCurrentSequenceMaxCount() {
201        MaxSequenceCountFinder processor = new MaxSequenceCountFinder();
202        super.process(processor);
203        return processor.getMaxCount();
204    }
205
206    /**
[1401]207     * <p>
208     * trie processor identifying the current maximum count of sequences of a minimal length of two
209     * </p>
210     *
[1256]211     * @author Patrick Harms
212     */
[1267]213    private static class MaxSequenceCountFinder implements TrieProcessor<ITaskInstance> {
[1256]214       
215        /**
[1401]216         * <p>
217         * the current maximum count
218         * </p>
[1256]219         */
220        private int currentCount = 0;
221       
222        /* (non-Javadoc)
223         * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int)
224         */
225        @Override
226        public TrieProcessor.Result process(List<ITaskInstance> foundTask, int count) {
227            if (foundTask.size() == 2) {
228                this.currentCount = Math.max(this.currentCount, count);
229               
230                // do not consider children
231                return TrieProcessor.Result.SKIP_NODE;
232            }
233            else {
234                return TrieProcessor.Result.CONTINUE;
235            }
236        }
237
238        /**
[1401]239         * <p>
240         * returns the current maximum count
241         * </p>
[1256]242         */
243        private int getMaxCount() {
244            return currentCount;
245        }
246
247    }
248   
[1285]249    /**
[1401]250     * <p>
251     * counter object to be able to call something by the counters reference
252     * </p>
253     *
[1285]254     * @author Patrick Harms
255     */
256    private static class Counter {
257        int count = 0;
258    }
259       
[1256]260}
Note: See TracBrowser for help on using the repository browser.