source: branches/autoquest-core-tasktrees-alignment/src/main/java/de/ugoe/cs/autoquest/tasktrees/temporalrelation/TaskInstanceTrie.java @ 1735

Last change on this file since 1735 was 1734, checked in by rkrimmel, 10 years ago

Added automatically created javadoc, still needs to be commented properly though

File size: 7.6 KB
Line 
1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.tasktrees.temporalrelation;
16
17import java.util.HashMap;
18import java.util.LinkedList;
19import java.util.List;
20import java.util.Map;
21
22import de.ugoe.cs.autoquest.tasktrees.treeifc.ITask;
23import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskInstance;
24import de.ugoe.cs.autoquest.tasktrees.treeifc.IUserSession;
25import de.ugoe.cs.autoquest.usageprofiles.SymbolMap;
26import de.ugoe.cs.autoquest.usageprofiles.Trie;
27import de.ugoe.cs.autoquest.usageprofiles.TrieProcessor;
28
29// TODO: Auto-generated Javadoc
30/**
31 * <p>
32 * This trie implementation is a performance optimization for generating task
33 * trees. It does not create a full trie but adds only those subsequences having
34 * a chance of occurring most often. For this, it initially counts the number of
35 * occurrences of each task instance. Then, during training, it regularly
36 * determines the number of the currently most often occurring sequence. If this
37 * number is higher than the count of a task instance to be trained, the task
38 * instance is skipped the not added to the trie.
39 * </p>
40 *
41 * @author Patrick Harms
42 */
43class TaskInstanceTrie extends Trie<ITaskInstance> {
44
45        /**
46         * <p>
47         * counter object to be able to call something by the counters reference
48         * </p>.
49         *
50         * @author Patrick Harms
51         */
52        private static class Counter {
53               
54                /** The count. */
55                int count = 0;
56        }
57
58        /**
59         * <p>
60         * trie processor identifying the current maximum count of sequences of a
61         * minimal length of two
62         * </p>.
63         *
64         * @author Patrick Harms
65         */
66        private static class MaxSequenceCountFinder implements
67                        TrieProcessor<ITaskInstance> {
68
69                /** <p> the current maximum count </p>. */
70                private int currentCount = 0;
71
72                /**
73                 * <p>
74                 * returns the current maximum count
75                 * </p>.
76                 *
77                 * @return the max count
78                 */
79                private int getMaxCount() {
80                        return currentCount;
81                }
82
83                /*
84                 * (non-Javadoc)
85                 *
86                 * @see
87                 * de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util
88                 * .List, int)
89                 */
90                @Override
91                public TrieProcessor.Result process(List<ITaskInstance> foundTask,
92                                int count) {
93                        if (foundTask.size() == 2) {
94                                this.currentCount = Math.max(this.currentCount, count);
95
96                                // do not consider children
97                                return TrieProcessor.Result.SKIP_NODE;
98                        } else {
99                                return TrieProcessor.Result.CONTINUE;
100                        }
101                }
102
103        }
104
105        /** The Constant serialVersionUID. */
106        private static final long serialVersionUID = 1L;
107
108        /** <p> the task handling strategy to be used for comparing tasks </p>. */
109        private final TaskHandlingStrategy taskStrategy;
110
111        /**
112         * <p>
113         * instantiated the trie with the task handling strategy to be used
114         * </p>.
115         *
116         * @param taskStrategy            the task handling strategy to be used for comparing tasks
117         */
118        public TaskInstanceTrie(TaskHandlingStrategy taskStrategy) {
119                super(taskStrategy);
120                this.taskStrategy = taskStrategy;
121        }
122
123        /*
124         * (non-Javadoc)
125         *
126         * @see de.ugoe.cs.autoquest.usageprofiles.Trie#equals(java.lang.Object)
127         */
128        @Override
129        public boolean equals(Object other) {
130                if (this == other) {
131                        return true;
132                } else if (other instanceof TaskInstanceTrie) {
133                        return super.equals(other);
134                } else {
135                        return false;
136                }
137        }
138
139        /**
140         * <p>
141         * determines the current maximum count of sequences of a minimal length of
142         * two. Task instances occuring more seldom do not have to be considered
143         * anymore
144         * </p>
145         *
146         * @return the current maximum count of sequences of a minimal length of two
147         */
148        private int getCurrentSequenceMaxCount() {
149                final MaxSequenceCountFinder processor = new MaxSequenceCountFinder();
150                super.process(processor);
151                return processor.getMaxCount();
152        }
153
154        /*
155         * (non-Javadoc)
156         *
157         * @see de.ugoe.cs.autoquest.usageprofiles.Trie#hashCode()
158         */
159        @Override
160        public int hashCode() {
161                return super.hashCode();
162        }
163
164        /**
165         * <p>
166         * internally used convenience method for implementing the training
167         * optimization
168         * </p>.
169         *
170         * @param userSession the user session
171         * @param maxOrder the max order
172         * @param taskInstanceCountMap the task instance count map
173         * @param processedTaskInstances the processed task instances
174         * @param counterRecheckAt the counter recheck at
175         */
176        private void train(IUserSession userSession, int maxOrder,
177                        Map<ITask, Counter> taskInstanceCountMap,
178                        Counter processedTaskInstances, int counterRecheckAt) {
179                final List<ITaskInstance> subsequence = new LinkedList<ITaskInstance>();
180
181                int sequenceMaxCount = 0;
182
183                for (final ITaskInstance currentTaskInstance : userSession) {
184
185                        final int occurrenceCount = taskInstanceCountMap
186                                        .get(currentTaskInstance.getTask()).count;
187
188                        if (processedTaskInstances.count >= counterRecheckAt) {
189                                sequenceMaxCount = getCurrentSequenceMaxCount();
190                                processedTaskInstances.count = 0;
191                        }
192
193                        if (occurrenceCount < sequenceMaxCount) {
194                                // this task instance does not need to be considered, as it
195                                // occurs not often enough
196                                // to be part of a sequence, that occurs most often. Therefore
197                                // train all remaining
198                                // sequences so far and go on, until the next useful sequence is
199                                // found.
200
201                                while (subsequence.size() > 1) {
202                                        add(subsequence);
203                                        subsequence.remove(0);
204                                }
205
206                                subsequence.clear();
207                        } else {
208                                subsequence.add(currentTaskInstance);
209
210                                if (subsequence.size() == maxOrder) {
211                                        add(subsequence);
212                                        subsequence.remove(0);
213                                }
214                        }
215
216                        processedTaskInstances.count++;
217                }
218
219                // add shorter remaining subsequences, if any
220                while (subsequence.size() > 1) {
221                        add(subsequence);
222                        subsequence.remove(0);
223                }
224        }
225
226        /**
227         * <p>
228         * trains this trie with the provided user sessions up to the provided
229         * maximum depth using the optimization described in the description of this
230         * class.
231         * </p>
232         *
233         * @param userSessions
234         *            the sessions for which this trie is to be trained
235         * @param maxOrder
236         *            the depth of the trie
237         */
238        public void trainSessions(List<IUserSession> userSessions, int maxOrder) {
239                if (maxOrder < 1) {
240                        return;
241                }
242
243                final SymbolMap<ITaskInstance, Counter> equalTaskInstancesMap = taskStrategy
244                                .createSymbolMap();
245
246                final Map<ITask, Counter> instanceCountMap = new HashMap<ITask, Counter>();
247
248                System.out.println("preparing training");
249                int noOfTaskInstances = 0;
250                for (final IUserSession session : userSessions) {
251                        for (final ITaskInstance taskInstance : session) {
252                                Counter counter = equalTaskInstancesMap.getValue(taskInstance);
253
254                                if (counter == null) {
255                                        counter = new Counter();
256                                        equalTaskInstancesMap.addSymbol(taskInstance, counter);
257                                }
258
259                                counter.count++;
260                                instanceCountMap.put(taskInstance.getTask(), counter);
261
262                                noOfTaskInstances++;
263                        }
264                }
265
266                System.out.println("performing training of " + noOfTaskInstances
267                                + " task instances");
268                final Counter processedTaskInstances = new Counter();
269                final int counterRecheckAt = noOfTaskInstances / 10; // recheck the
270                                                                                                                                // maximum count
271                                                                                                                                // after each
272                // 10% of processed task instances
273                for (final IUserSession session : userSessions) {
274                        train(session, maxOrder, instanceCountMap, processedTaskInstances,
275                                        counterRecheckAt);
276                }
277
278                updateKnownSymbols();
279        }
280
281}
Note: See TracBrowser for help on using the repository browser.