source: trunk/autoquest-core-usageprofiles/src/main/java/de/ugoe/cs/autoquest/usageprofiles/TrieNode.java @ 1282

Last change on this file since 1282 was 1282, checked in by pharms, 11 years ago
  • added support for symbol management strategy in tries, especially for storing them
  • adapted comparator approach accordingly
  • provided default implementation for symbol management strategies
  • added, extended and improved java doc
  • Property svn:mime-type set to text/plain
File size: 15.2 KB
Line 
1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.usageprofiles;
16
17import java.io.Serializable;
18import java.util.Collection;
19import java.util.LinkedList;
20import java.util.List;
21
22import de.ugoe.cs.autoquest.usageprofiles.Trie.Edge;
23import de.ugoe.cs.autoquest.usageprofiles.Trie.TrieVertex;
24import de.ugoe.cs.util.StringTools;
25import edu.uci.ics.jung.graph.DelegateTree;
26import edu.uci.ics.jung.graph.Tree;
27
28/**
29 * <p>
30 * This class implements a node of a trie. Each node is associated with a symbol and has a counter.
31 * The counter marks the number of occurrences of the sequence defined by the path from the root of
32 * the trie to this node.
33 * </p>
34 *
35 * @author Steffen Herbold, Patrick Harms
36 *
37 * @param <T>
38 *            Type of the symbols that are stored in the trie.
39 * @see Trie
40 */
41class TrieNode<T> implements Serializable {
42
43    /**
44     * <p>
45     * Id for object serialization.
46     * </p>
47     */
48    private static final long serialVersionUID = 1L;
49
50    /**
51     * <p>
52     * Counter for the number of occurrences of the sequence.
53     * </p>
54     */
55    private int count;
56
57    /**
58     * <p>
59     * Symbol associated with the node.
60     * </p>
61     */
62    private final T symbol;
63
64    /**
65     * <p>
66     * Child nodes of this node. If the node is a leaf this collection is empty.
67     * </p>
68     */
69    private SymbolMap<T, TrieNode<T>> children;
70   
71    /**
72     * <p>
73     * Strategy for handling symbols, i.e., for comparing and storing them
74     * </p>
75     */
76    private SymbolStrategy<T> strategy;
77
78    /**
79     * <p>
80     * Constructor. Creates a new TrieNode without a symbol associated.<br>
81     * <b>This constructor should only be used to create the root node of the trie!</b>
82     * The strategy used by the trie node will be a {@link DefaultSymbolStrategy}.
83     * </p>
84     */
85    TrieNode() {
86        this(new DefaultSymbolStrategy<T>());
87    }
88
89    /**
90     * <p>
91     * Constructor. Creates a new TrieNode. The symbol must not be null.
92     * The strategy used by the trie node will be a {@link DefaultSymbolStrategy}.
93     * </p>
94     *
95     * @param symbol
96     *            symbol associated with the trie node
97     *
98     * @throws IllegalArgumentException
99     *            if the provided symbol is null
100     */
101    TrieNode(T symbol) {
102        this(symbol, new DefaultSymbolStrategy<T>());
103    }
104
105    /**
106     * <p>
107     * Constructor. Creates a new TrieNode without a symbol associated using the provided
108     * strategy.<br>
109     * <b>This constructor should only be used to create the root node of the trie!</b>
110     * <br>The strategy must not be null.
111     * </p>
112     *
113     * @param strategy
114     *            the strategy used for comparing and storing symbols
115     *
116     * @throws IllegalArgumentException
117     *            if the provided strategy is null
118     */
119    TrieNode(SymbolStrategy<T> strategy) {
120        if (strategy == null) {
121            throw new IllegalArgumentException("strategy must not be null!");
122        }
123        this.strategy = strategy;
124        this.symbol = null;
125        count = 0;
126        children = strategy.createSymbolMap();
127    }
128
129    /**
130     * <p>
131     * Constructor. Creates a new TrieNode using the provided strategy. The symbol and the
132     * strategy must not be null.
133     * </p>
134     *
135     * @param symbol
136     *            symbol associated with the trie node
137     * @param strategy
138     *            the strategy used for comparing and storing symbols
139     *
140     * @throws IllegalArgumentException
141     *            if the provided symbol or strategy is null
142     */
143    TrieNode(T symbol, SymbolStrategy<T> strategy) {
144        if (symbol == null) {
145            throw new IllegalArgumentException
146                ("symbol must not be null. null is reserved for root node!");
147        }
148        this.symbol = symbol;
149       
150        if (strategy == null) {
151            throw new IllegalArgumentException("strategy must not be null!");
152        }
153        this.strategy = strategy;
154
155        count = 0;
156        children = strategy.createSymbolMap();
157    }
158
159    /**
160     * <p>
161     * Copy-Constructor. Creates a new TrieNode as copy of other. Other must not be null.
162     * </p>
163     *
164     * @param other the trie node to create the copy of
165     *
166     * @throws IllegalArgumentException
167     *            if the provided node is null
168     */
169    TrieNode(TrieNode<T> other) {
170        if (other == null) {
171            throw new IllegalArgumentException("other must not be null");
172        }
173        symbol = other.symbol;
174        count = other.count;
175        strategy = other.strategy;
176        children = strategy.createSymbolMap();
177       
178        for (TrieNode<T> child : other.children.getValues()) {
179            children.addSymbol(child.getSymbol(), new TrieNode<T>(child));
180        }
181    }
182
183    /**
184     * <p>
185     * Adds a given subsequence to the trie and increases the counters accordingly.
186     * </p>
187     *
188     * @param subsequence
189     *            subsequence whose counters are increased
190     * @see Trie#add(List)
191     */
192    public void add(List<T> subsequence) {
193        if (!subsequence.isEmpty()) {
194            if (!strategy.getSymbolComparator().equals(symbol, subsequence.get(0))) {
195                // should be guaranteed by the recursion/TrieRoot!
196                throw new AssertionError("Invalid trie operation!");
197            }
198            count++;
199            subsequence.remove(0);
200            if (!subsequence.isEmpty()) {
201                T nextSymbol = subsequence.get(0);
202                getChildCreate(nextSymbol).add(subsequence);
203            }
204        }
205    }
206
207    /**
208     * <p>
209     * Returns the symbol associated with the node.
210     * </p>
211     *
212     * @return symbol associated with the node
213     */
214    public T getSymbol() {
215        return symbol;
216    }
217
218    /**
219     * <p>
220     * Returns the number of occurrences of the sequence represented by the node.
221     * </p>
222     *
223     * @return number of occurrences of the sequence represented by the node
224     */
225    public int getCount() {
226        return count;
227    }
228
229    /**
230     * <p>
231     * Returns the child of the node associated with the given symbol or creates it if it does not
232     * exist yet.
233     * </p>
234     *
235     * @param symbol
236     *            symbol whose node is required
237     * @return node associated with the symbol
238     * @see Trie#getChildCreate(Object)
239     */
240    protected TrieNode<T> getChildCreate(T symbol) {
241        TrieNode<T> node = getChild(symbol);
242        if (node == null) {
243            node = new TrieNode<T>(symbol, strategy);
244            children.addSymbol(symbol, node);
245        }
246        return node;
247    }
248
249    /**
250     * <p>
251     * Returns the child of the node associated with the given symbol or null if it does not exist.
252     * </p>
253     *
254     * @param symbol
255     *            symbol whose node is required
256     * @return node associated with the symbol; null if no such node exists
257     * @see Trie#getChild(Object)
258     */
259    protected TrieNode<T> getChild(T symbol) {
260        return children.getValue(symbol);
261    }
262
263    /**
264     * <p>
265     * Returns all children of this node.
266     * </p>
267     *
268     * @return children of this node
269     */
270    protected Collection<TrieNode<T>> getChildren() {
271        return children.getValues();
272    }
273
274    /**
275     * <p>
276     * Searches the sub-trie of this trie node for a given sequence and returns the node associated
277     * with the sequence or null if no such node is found.
278     * </p>
279     *
280     * @param sequence
281     *            sequence that is searched for
282     * @return node associated with the sequence
283     * @see Trie#find(List)
284     */
285    public TrieNode<T> find(List<T> subsequence) {
286        TrieNode<T> result = null;
287        if (subsequence.isEmpty()) {
288            result = this;
289        }
290        else {
291            TrieNode<T> node = getChild(subsequence.get(0));
292            if (node != null) {
293                subsequence.remove(0);
294                result = node.find(subsequence);
295            }
296        }
297        return result;
298    }
299
300    /**
301     * <p>
302     * Returns a collection of all symbols that follow this node, i.e., the symbols associated
303     * with the children of this node.
304     * </p>
305     *
306     * @return symbols follow this node
307     * @see TrieNode#getFollowingSymbols()
308     */
309    public Collection<T> getFollowingSymbols() {
310        Collection<T> followingSymbols = new LinkedList<T>();
311        for (TrieNode<T> child : children.getValues()) {
312            followingSymbols.add(child.getSymbol());
313        }
314        return followingSymbols;
315    }
316
317    /**
318     * <p>
319     * The string representation of a node is {@code symbol.toString()#count}
320     * </p>
321     *
322     * @see java.lang.Object#toString()
323     */
324    @Override
325    public String toString() {
326        String str;
327       
328        if (symbol == null) {
329            str = "ROOT";
330        }
331        else {
332            str = symbol.toString() + " #" + count;
333        }
334       
335        if (!children.isEmpty()) {
336            str += StringTools.ENDLINE + children.toString();
337        }
338        return str;
339    }
340
341    /**
342     * <p>
343     * Generates a {@link Tree} representation of the trie.
344     * </p>
345     *
346     * @param parent
347     *            parent vertex in the generated tree
348     * @param graph
349     *            complete tree
350     */
351    void getGraph(TrieVertex parent, DelegateTree<TrieVertex, Edge> graph) {
352        TrieVertex currentVertex;
353        if (isRoot()) {
354            currentVertex = new TrieVertex("root");
355            graph.addVertex(currentVertex);
356        }
357        else {
358            currentVertex = new TrieVertex(getSymbol().toString() + "#" + getCount());
359            graph.addChild(new Edge(), parent, currentVertex);
360        }
361        for (TrieNode<T> node : children.getValues()) {
362            node.getGraph(currentVertex, graph);
363        }
364    }
365
366    /**
367     * <p>
368     * Appends the current node to the dot representation of the trie.
369     * </p>
370     *
371     * @param stringBuilder
372     *            {@link StringBuilder} to which the dot representation is appended
373     */
374    void appendDotRepresentation(StringBuilder stringBuilder) {
375        String thisSaneId;
376        if (isRoot()) {
377            thisSaneId = "root";
378        }
379        else {
380            thisSaneId =
381                symbol.toString().replace("\"", "\\\"").replaceAll("[\r\n]", "") + "#" + count;
382        }
383        stringBuilder.append(" " + hashCode() + " [label=\"" + thisSaneId + "\"];" +
384            StringTools.ENDLINE);
385        for (TrieNode<T> childNode : children.getValues()) {
386            stringBuilder.append(" " + hashCode() + " -> " + childNode.hashCode() + ";" +
387                StringTools.ENDLINE);
388        }
389        for (TrieNode<T> childNode : children.getValues()) {
390            childNode.appendDotRepresentation(stringBuilder);
391        }
392    }
393
394    /**
395     * <p>
396     * Checks if the node is a leaf.
397     * </p>
398     *
399     * @return true if the node is a leaf, false otherwise.
400     */
401    protected boolean isLeaf() {
402        return children.isEmpty();
403    }
404
405    /**
406     * <p>
407     * Checks if the node is the root.
408     * </p>
409     *
410     * @return true if the node is the root of the trie, false otherwise
411     */
412    protected boolean isRoot() {
413        return symbol == null;
414    }
415
416    /**
417     * <p>
418     * Recursive method that collects all nodes that are ancestors of leafs and stores them in the
419     * set.
420     * </p>
421     *
422     * @param ancestors
423     *            set of all ancestors of leafs
424     */
425    protected void getLeafAncestors(List<TrieNode<T>> ancestors) {
426        boolean isAncestor = false;
427        for (TrieNode<T> child : children.getValues()) {
428            child.getLeafAncestors(ancestors);
429            isAncestor |= child.isLeaf();
430        }
431        if (isAncestor) {
432            ancestors.add(this);
433        }
434    }
435
436    /**
437     * <p>
438     * Returns the number of descendants of this node that are leafs. This does not only include
439     * direct children of this node, but all leafs in the sub-trie with this node as root.
440     * </p>
441     *
442     * @return number of leafs in this sub-trie
443     */
444    protected int getNumLeafs() {
445        int numLeafs = 0;
446        for (TrieNode<T> child : children.getValues()) {
447            if (child.isLeaf()) {
448                numLeafs++;
449            }
450            else {
451                numLeafs += child.getNumLeafs();
452            }
453        }
454        return numLeafs;
455    }
456
457    /**
458     * <p>
459     * Sets the {@link #count} of this node.
460     * </p>
461     * <p>
462     * This function should only be used sparingly and very carefully! The count is usually
463     * maintained automatically by the training procedures.
464     * </p>
465     *
466     * @param count
467     *            new count
468     */
469    protected void setCount(int count) {
470        this.count = count;
471    }
472
473    /**
474     * <p>
475     * Two TrieNodes are defined as equal, if their {@link #count}, {@link #symbol}, and
476     * {@link #children} are equal. For the comparison of their symbols the comparator provided
477     * by the symbol management strategy is used.
478     * </p>
479     *
480     * @see java.lang.Object#equals(java.lang.Object)
481     */
482    @SuppressWarnings({ "rawtypes", "unchecked" })
483    @Override
484    public boolean equals(Object other) {
485        if (other == this) {
486            return true;
487        }
488        if (other instanceof TrieNode) {
489            TrieNode otherNode = (TrieNode) other;
490            if (symbol == null) {
491                return count == otherNode.count && otherNode.symbol == null &&
492                    children.equals(otherNode.children);
493            }
494            else {
495                return count == otherNode.count &&
496                    symbol.getClass().isInstance(((TrieNode) other).symbol) &&
497                    strategy.getSymbolComparator().equals(symbol, ((TrieNode<T>) other).symbol) &&
498                    children.equals(((TrieNode) other).children);
499            }
500        }
501        return false;
502    }
503
504    /*
505     * (non-Javadoc)
506     *
507     * @see java.lang.Object#hashCode()
508     */
509    @Override
510    public int hashCode() {
511        int multiplier = 17;
512        int hash = 42;
513        if (symbol != null) {
514            hash = multiplier * hash + symbol.hashCode();
515        }
516        hash = multiplier * hash + count;
517        hash = multiplier * hash + children.hashCode();
518        return hash;
519    }
520}
Note: See TracBrowser for help on using the repository browser.