source: trunk/autoquest-core-usageprofiles/src/main/java/de/ugoe/cs/autoquest/usageprofiles/TrieNode.java @ 1188

Last change on this file since 1188 was 1188, checked in by pharms, 11 years ago
  • remove a find bugs warning
  • Property svn:mime-type set to text/plain
File size: 14.6 KB
Line 
1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.usageprofiles;
16
17import java.io.Serializable;
18import java.util.Collection;
19import java.util.LinkedList;
20import java.util.List;
21
22import de.ugoe.cs.autoquest.usageprofiles.Trie.Edge;
23import de.ugoe.cs.autoquest.usageprofiles.Trie.TrieVertex;
24import de.ugoe.cs.util.StringTools;
25import edu.uci.ics.jung.graph.DelegateTree;
26import edu.uci.ics.jung.graph.Tree;
27
28/**
29 * <p>
30 * This class implements a node of a trie. Each node is associated with a symbol and has a counter.
31 * The counter marks the number of occurences of the sequence defined by the path from the root of
32 * the trie to this node.
33 * </p>
34 *
35 * @author Steffen Herbold, Patrick Harms
36 *
37 * @param <T>
38 *            Type of the symbols that are stored in the trie.
39 * @see Trie
40 */
41class TrieNode<T> implements Serializable {
42
43    /**
44     * <p>
45     * Id for object serialization.
46     * </p>
47     */
48    private static final long serialVersionUID = 1L;
49
50    /**
51     * <p>
52     * Counter for the number of occurences of the sequence.
53     * </p>
54     */
55    private int count;
56
57    /**
58     * <p>
59     * Symbol associated with the node.
60     * </p>
61     */
62    private final T symbol;
63
64    /**
65     * <p>
66     * Child nodes of this node. If the node is a leaf this collection is empty.
67     * </p>
68     */
69    private Collection<TrieNode<T>> children;
70
71    /**
72     * <p>
73     * Comparator to be used for comparing the symbols with each other
74     * </p>
75     */
76    private transient SymbolComparator<T> comparator;
77
78    /**
79     * <p>
80     * Constructor. Creates a new TrieNode without a symbol associated.<br>
81     * <b>This constructor should only be used to create the root node of the trie!</b>
82     * The comparator used by the tree node will be a {@link DefaultSymbolComparator}.
83     * </p>
84     */
85    TrieNode() {
86        this(new DefaultSymbolComparator<T>());
87    }
88
89    /**
90     * <p>
91     * Constructor. Creates a new TrieNode. The symbol must not be null.
92     * The comparator used by the tree node will be a {@link DefaultSymbolComparator}.
93     * </p>
94     *
95     * @param symbol
96     *            symbol associated with the trie node
97     */
98    TrieNode(T symbol) {
99        this(symbol, new DefaultSymbolComparator<T>());
100    }
101
102    /**
103     * <p>
104     * Constructor. Creates a new TrieNode without a symbol associated using the provided
105     * comparator.<br>
106     * <b>This constructor should only be used to create the root node of the trie!</b>
107     * <br>The comparator must not be null.
108     * </p>
109     */
110    TrieNode(SymbolComparator<T> comparator) {
111        if (comparator == null) {
112            throw new IllegalArgumentException("comparator must not be null!");
113        }
114        this.comparator = comparator;
115        this.symbol = null;
116        count = 0;
117        children = new LinkedList<TrieNode<T>>();
118    }
119
120    /**
121     * <p>
122     * Constructor. Creates a new TrieNode using the provided comparator. The symbol and the
123     * comparator must not be null.
124     * </p>
125     *
126     * @param symbol
127     *            symbol associated with the trie node
128     */
129    TrieNode(T symbol, SymbolComparator<T> comparator) {
130        if (symbol == null) {
131            throw new IllegalArgumentException
132                ("symbol must not be null. null is reserved for root node!");
133        }
134        this.symbol = symbol;
135       
136        if (comparator == null) {
137            throw new IllegalArgumentException("comparator must not be null!");
138        }
139        this.comparator = comparator;
140
141        count = 0;
142        children = new LinkedList<TrieNode<T>>();
143    }
144
145    /**
146     * <p>
147     * Copy-Constructor. Creates a new TrieNode as copy of other. Other must not be null.
148     * </p>
149     *
150     * @param other
151     */
152    TrieNode(TrieNode<T> other) {
153        if (other == null) {
154            throw new IllegalArgumentException("other must not be null");
155        }
156        symbol = other.symbol;
157        count = other.count;
158        comparator = other.comparator;
159        children = new LinkedList<TrieNode<T>>();
160        for (TrieNode<T> child : other.children) {
161            children.add(new TrieNode<T>(child));
162        }
163    }
164
165    /**
166     * <p>
167     * Adds a given subsequence to the trie and increases the counters accordingly.
168     * </p>
169     *
170     * @param subsequence
171     *            subsequence whose counters are increased
172     * @see Trie#add(List)
173     */
174    public void add(List<T> subsequence) {
175        if (!subsequence.isEmpty()) {
176            if (!comparator.equals(symbol, subsequence.get(0))) { // should be guaranteed by
177                                                                  // the recursion/TrieRoot!
178                throw new AssertionError("Invalid trie operation!");
179            }
180            count++;
181            subsequence.remove(0);
182            if (!subsequence.isEmpty()) {
183                T nextSymbol = subsequence.get(0);
184                getChildCreate(nextSymbol).add(subsequence);
185            }
186        }
187    }
188
189    /**
190     * <p>
191     * Returns the symbol associated with the node.
192     * </p>
193     *
194     * @return symbol associated with the node
195     */
196    public T getSymbol() {
197        return symbol;
198    }
199
200    /**
201     * <p>
202     * Returns the number of occurences of the sequence represented by the node.
203     * </p>
204     *
205     * @return number of occurences of the sequence represented by the node
206     */
207    public int getCount() {
208        return count;
209    }
210
211    /**
212     * <p>
213     * Returns the child of the node associated with the given symbol or creates it if it does not
214     * exist yet.
215     * </p>
216     *
217     * @param symbol
218     *            symbol whose node is required
219     * @return node associated with the symbol
220     * @see Trie#getChildCreate(Object)
221     */
222    protected TrieNode<T> getChildCreate(T symbol) {
223        TrieNode<T> node = getChild(symbol);
224        if (node == null) {
225            node = new TrieNode<T>(symbol, comparator);
226            children.add(node);
227        }
228        return node;
229    }
230
231    /**
232     * <p>
233     * Returns the child of the node associated with the given symbol or null if it does not exist.
234     * </p>
235     *
236     * @param symbol
237     *            symbol whose node is required
238     * @return node associated with the symbol; null if no such node exists
239     * @see Trie#getChild(Object)
240     */
241    protected TrieNode<T> getChild(T symbol) {
242        for (TrieNode<T> child : children) {
243            if (comparator.equals(child.getSymbol(), symbol)) {
244                return child;
245            }
246        }
247        return null;
248    }
249
250    /**
251     * <p>
252     * Returns all children of this node.
253     * </p>
254     *
255     * @return children of this node
256     */
257    protected Collection<TrieNode<T>> getChildren() {
258        return children;
259    }
260
261    /**
262     * <p>
263     * Searches the sub-trie of this trie node for a given sequence and returns the node associated
264     * with the sequence or null if no such node is found.
265     * </p>
266     *
267     * @param sequence
268     *            sequence that is searched for
269     * @return node associated with the sequence
270     * @see Trie#find(List)
271     */
272    public TrieNode<T> find(List<T> subsequence) {
273        TrieNode<T> result = null;
274        if (subsequence.isEmpty()) {
275            result = this;
276        }
277        else {
278            TrieNode<T> node = getChild(subsequence.get(0));
279            if (node != null) {
280                subsequence.remove(0);
281                result = node.find(subsequence);
282            }
283        }
284        return result;
285    }
286
287    /**
288     * <p>
289     * Returns a collection of all symbols that follow a this node, i.e., the symbols associated
290     * with the children of this node.
291     * </p>
292     *
293     * @return symbols follow this node
294     * @see TrieNode#getFollowingSymbols()
295     */
296    public Collection<T> getFollowingSymbols() {
297        Collection<T> followingSymbols = new LinkedList<T>();
298        for (TrieNode<T> child : children) {
299            followingSymbols.add(child.getSymbol());
300        }
301        return followingSymbols;
302    }
303
304    /**
305     * <p>
306     * The string representation of a node is {@code symbol.toString()#count}
307     * </p>
308     *
309     * @see java.lang.Object#toString()
310     */
311    @Override
312    public String toString() {
313        String str;
314       
315        if (symbol == null) {
316            str = "ROOT";
317        }
318        else {
319            str = symbol.toString() + " #" + count;
320        }
321       
322        if (!children.isEmpty()) {
323            str += StringTools.ENDLINE + children.toString();
324        }
325        return str;
326    }
327
328    /**
329     * <p>
330     * Generates a {@link Tree} representation of the trie.
331     * </p>
332     *
333     * @param parent
334     *            parent vertex in the generated tree
335     * @param graph
336     *            complete tree
337     */
338    void getGraph(TrieVertex parent, DelegateTree<TrieVertex, Edge> graph) {
339        TrieVertex currentVertex;
340        if (isRoot()) {
341            currentVertex = new TrieVertex("root");
342            graph.addVertex(currentVertex);
343        }
344        else {
345            currentVertex = new TrieVertex(getSymbol().toString() + "#" + getCount());
346            graph.addChild(new Edge(), parent, currentVertex);
347        }
348        for (TrieNode<T> node : children) {
349            node.getGraph(currentVertex, graph);
350        }
351    }
352
353    /**
354     * <p>
355     * Appends the current node to the dot representation of the trie.
356     * </p>
357     *
358     * @param stringBuilder
359     *            {@link StringBuilder} to which the dot representation is appended
360     */
361    void appendDotRepresentation(StringBuilder stringBuilder) {
362        String thisSaneId;
363        if (isRoot()) {
364            thisSaneId = "root";
365        }
366        else {
367            thisSaneId =
368                symbol.toString().replace("\"", "\\\"").replaceAll("[\r\n]", "") + "#" + count;
369        }
370        stringBuilder.append(" " + hashCode() + " [label=\"" + thisSaneId + "\"];" +
371            StringTools.ENDLINE);
372        for (TrieNode<T> childNode : children) {
373            stringBuilder.append(" " + hashCode() + " -> " + childNode.hashCode() + ";" +
374                StringTools.ENDLINE);
375        }
376        for (TrieNode<T> childNode : children) {
377            childNode.appendDotRepresentation(stringBuilder);
378        }
379    }
380
381    /**
382     * <p>
383     * Checks if the node is a leaf.
384     * </p>
385     *
386     * @return true if the node is a leaf, false otherwise.
387     */
388    protected boolean isLeaf() {
389        return children.isEmpty();
390    }
391
392    /**
393     * <p>
394     * Checks if the node is the root.
395     * </p>
396     *
397     * @return true if the node is the root of the trie, false otherwise
398     */
399    protected boolean isRoot() {
400        return symbol == null;
401    }
402
403    /**
404     * <p>
405     * Recursive methods that collects all nodes that are ancestors of leafs and stores them in the
406     * set.
407     * </p>
408     *
409     * @param ancestors
410     *            set of all ancestors of leafs
411     */
412    protected void getLeafAncestors(List<TrieNode<T>> ancestors) {
413        boolean isAncestor = false;
414        for (TrieNode<T> child : children) {
415            child.getLeafAncestors(ancestors);
416            isAncestor |= child.isLeaf();
417        }
418        if (isAncestor) {
419            ancestors.add(this);
420        }
421    }
422
423    /**
424     * <p>
425     * Returns the number of descendants of this node that are leafs. This does not only include
426     * direct children of this node, but all leafs in the sub-trie with this node as root.
427     * </p>
428     *
429     * @return number of leafs in this sub-trie
430     */
431    protected int getNumLeafs() {
432        int numLeafs = 0;
433        for (TrieNode<T> child : children) {
434            if (child.isLeaf()) {
435                numLeafs++;
436            }
437            else {
438                numLeafs += child.getNumLeafs();
439            }
440        }
441        return numLeafs;
442    }
443
444    /**
445     * <p>
446     * Sets the {@link #count} of this node.
447     * </p>
448     * <p>
449     * This function should only be used sparingly and very carefully! The count is usually
450     * maintained automatically by the training procedures.
451     * </p>
452     *
453     * @param count
454     *            new count
455     */
456    protected void setCount(int count) {
457        this.count = count;
458    }
459
460    /**
461     * <p>
462     * Two TrieNodes are defined as equal, if their {@link #count}, {@link #symbol}, and
463     * {@link #children} are equal. For the comparison of their symbols the comparator is used.
464     * </p>
465     *
466     * @see java.lang.Object#equals(java.lang.Object)
467     */
468    @SuppressWarnings({ "rawtypes", "unchecked" })
469    @Override
470    public boolean equals(Object other) {
471        if (other == this) {
472            return true;
473        }
474        if (other instanceof TrieNode) {
475            TrieNode otherNode = (TrieNode) other;
476            if (symbol == null) {
477                return count == otherNode.count && otherNode.symbol == null &&
478                    children.equals(otherNode.children);
479            }
480            else {
481                return count == otherNode.count &&
482                    symbol.getClass().isInstance(((TrieNode) other).symbol) &&
483                    comparator.equals(symbol, ((TrieNode<T>) other).symbol) &&
484                    children.equals(((TrieNode) other).children);
485            }
486        }
487        return false;
488    }
489
490    /*
491     * (non-Javadoc)
492     *
493     * @see java.lang.Object#hashCode()
494     */
495    @Override
496    public int hashCode() {
497        int multiplier = 17;
498        int hash = 42;
499        if (symbol != null) {
500            hash = multiplier * hash + symbol.hashCode();
501        }
502        hash = multiplier * hash + count;
503        hash = multiplier * hash + children.hashCode();
504        return hash;
505    }
506}
Note: See TracBrowser for help on using the repository browser.