source: trunk/quest-core-usageprofiles/src/main/java/de/ugoe/cs/quest/usageprofiles/TrieNode.java @ 655

Last change on this file since 655 was 655, checked in by pharms, 12 years ago
  • removed old copyright file header
  • Property svn:mime-type set to text/plain
File size: 12.2 KB
Line 
1package de.ugoe.cs.quest.usageprofiles;
2
3import java.io.Serializable;
4import java.security.InvalidParameterException;
5import java.util.Collection;
6import java.util.LinkedList;
7import java.util.List;
8
9import de.ugoe.cs.quest.usageprofiles.Trie.Edge;
10import de.ugoe.cs.quest.usageprofiles.Trie.TrieVertex;
11import de.ugoe.cs.util.StringTools;
12import edu.uci.ics.jung.graph.DelegateTree;
13import edu.uci.ics.jung.graph.Tree;
14
15/**
16 * <p>
17 * This class implements a node of a trie. Each node is associated with a symbol and has a counter.
18 * The counter marks the number of occurences of the sequence defined by the path from the root of
19 * the trie to this node.
20 * </p>
21 *
22 * @author Steffen Herbold
23 *
24 * @param <T>
25 *            Type of the symbols that are stored in the trie.
26 * @see Trie
27 */
28class TrieNode<T> implements Serializable {
29
30    /**
31     * <p>
32     * Id for object serialization.
33     * </p>
34     */
35    private static final long serialVersionUID = 1L;
36
37    /**
38     * <p>
39     * Counter for the number of occurences of the sequence.
40     * </p>
41     */
42    private int count;
43
44    /**
45     * <p>
46     * Symbol associated with the node.
47     * </p>
48     */
49    private final T symbol;
50
51    /**
52     * <p>
53     * Child nodes of this node. If the node is a leaf this collection is empty.
54     * </p>
55     */
56    private Collection<TrieNode<T>> children;
57
58    /**
59     * <p>
60     * Constructor. Creates a new TrieNode without a symbol associated.<br>
61     * <b>This constructor should only be used to create the root node of the trie!</b>
62     * </p>
63     */
64    TrieNode() {
65        this.symbol = null;
66        count = 0;
67        children = new LinkedList<TrieNode<T>>();
68    }
69
70    /**
71     * <p>
72     * Constructor. Creates a new TrieNode. The symbol must not be null.
73     * </p>
74     *
75     * @param symbol
76     *            symbol associated with the trie node
77     */
78    TrieNode(T symbol) {
79        if (symbol == null) {
80            throw new InvalidParameterException(
81                                                "symbol must not be null. null is reserved for root node!");
82        }
83        this.symbol = symbol;
84        count = 0;
85        children = new LinkedList<TrieNode<T>>();
86    }
87
88    /**
89     * <p>
90     * Copy-Constructor. Creates a new TrieNode as copy of other. Other must not be null.
91     * </p>
92     *
93     * @param other
94     */
95    TrieNode(TrieNode<T> other) {
96        if (other == null) {
97            throw new InvalidParameterException("other must not be null");
98        }
99        symbol = other.symbol;
100        count = other.count;
101        children = new LinkedList<TrieNode<T>>();
102        for (TrieNode<T> child : other.children) {
103            children.add(new TrieNode<T>(child));
104        }
105    }
106
107    /**
108     * <p>
109     * Adds a given subsequence to the trie and increases the counters accordingly.
110     * </p>
111     *
112     * @param subsequence
113     *            subsequence whose counters are increased
114     * @see Trie#add(List)
115     */
116    public void add(List<T> subsequence) {
117        if (!subsequence.isEmpty()) {
118            if (!symbol.equals(subsequence.get(0))) { // should be guaranteed by
119                                                      // the
120                                                      // recursion/TrieRoot!
121                throw new AssertionError("Invalid trie operation!");
122            }
123            count++;
124            subsequence.remove(0);
125            if (!subsequence.isEmpty()) {
126                T nextSymbol = subsequence.get(0);
127                getChildCreate(nextSymbol).add(subsequence);
128            }
129        }
130    }
131
132    /**
133     * <p>
134     * Returns the symbol associated with the node.
135     * </p>
136     *
137     * @return symbol associated with the node
138     */
139    public T getSymbol() {
140        return symbol;
141    }
142
143    /**
144     * <p>
145     * Returns the number of occurences of the sequence represented by the node.
146     * </p>
147     *
148     * @return number of occurences of the sequence represented by the node
149     */
150    public int getCount() {
151        return count;
152    }
153
154    /**
155     * <p>
156     * Returns the child of the node associated with the given symbol or creates it if it does not
157     * exist yet.
158     * </p>
159     *
160     * @param symbol
161     *            symbol whose node is required
162     * @return node associated with the symbol
163     * @see Trie#getChildCreate(Object)
164     */
165    protected TrieNode<T> getChildCreate(T symbol) {
166        TrieNode<T> node = getChild(symbol);
167        if (node == null) {
168            node = new TrieNode<T>(symbol);
169            children.add(node);
170        }
171        return node;
172    }
173
174    /**
175     * <p>
176     * Returns the child of the node associated with the given symbol or null if it does not exist.
177     * </p>
178     *
179     * @param symbol
180     *            symbol whose node is required
181     * @return node associated with the symbol; null if no such node exists
182     * @see Trie#getChild(Object)
183     */
184    protected TrieNode<T> getChild(T symbol) {
185        for (TrieNode<T> child : children) {
186            if (child.getSymbol().equals(symbol)) {
187                return child;
188            }
189        }
190        return null;
191    }
192
193    /**
194     * <p>
195     * Returns all children of this node.
196     * </p>
197     *
198     * @return children of this node
199     */
200    protected Collection<TrieNode<T>> getChildren() {
201        return children;
202    }
203
204    /**
205     * <p>
206     * Searches the sub-trie of this trie node for a given sequence and returns the node associated
207     * with the sequence or null if no such node is found.
208     * </p>
209     *
210     * @param sequence
211     *            sequence that is searched for
212     * @return node associated with the sequence
213     * @see Trie#find(List)
214     */
215    public TrieNode<T> find(List<T> subsequence) {
216        TrieNode<T> result = null;
217        if (subsequence.isEmpty()) {
218            result = this;
219        }
220        else {
221            TrieNode<T> node = getChild(subsequence.get(0));
222            if (node != null) {
223                subsequence.remove(0);
224                result = node.find(subsequence);
225            }
226        }
227        return result;
228    }
229
230    /**
231     * <p>
232     * Returns a collection of all symbols that follow a this node, i.e., the symbols associated
233     * with the children of this node.
234     * </p>
235     *
236     * @return symbols follow this node
237     * @see TrieNode#getFollowingSymbols()
238     */
239    public Collection<T> getFollowingSymbols() {
240        Collection<T> followingSymbols = new LinkedList<T>();
241        for (TrieNode<T> child : children) {
242            followingSymbols.add(child.getSymbol());
243        }
244        return followingSymbols;
245    }
246
247    /**
248     * <p>
249     * The string representation of a node is {@code symbol.toString()#count}
250     * </p>
251     *
252     * @see java.lang.Object#toString()
253     */
254    @Override
255    public String toString() {
256        String str = symbol.toString() + " #" + count;
257        if (!children.isEmpty()) {
258            str += StringTools.ENDLINE + children.toString();
259        }
260        return str;
261    }
262
263    /**
264     * <p>
265     * Generates a {@link Tree} represenation of the trie.
266     * </p>
267     *
268     * @param parent
269     *            parent vertex in the generated tree
270     * @param graph
271     *            complete tree
272     */
273    void getGraph(TrieVertex parent, DelegateTree<TrieVertex, Edge> graph) {
274        TrieVertex currentVertex;
275        if (isRoot()) {
276            currentVertex = new TrieVertex("root");
277            graph.addVertex(currentVertex);
278        }
279        else {
280            currentVertex = new TrieVertex(getSymbol().toString() + "#" + getCount());
281            graph.addChild(new Edge(), parent, currentVertex);
282        }
283        for (TrieNode<T> node : children) {
284            node.getGraph(currentVertex, graph);
285        }
286    }
287
288    /**
289     * <p>
290     * Appends the current node to the dot representation of the trie.
291     * </p>
292     *
293     * @param stringBuilder
294     *            {@link StringBuilder} to which the dot representation is appended
295     */
296    void appendDotRepresentation(StringBuilder stringBuilder) {
297        String thisSaneId;
298        if (isRoot()) {
299            thisSaneId = "root";
300        }
301        else {
302            thisSaneId =
303                symbol.toString().replace("\"", "\\\"").replaceAll("[\r\n]", "") + "#" + count;
304        }
305        stringBuilder.append(" " + hashCode() + " [label=\"" + thisSaneId + "\"];" +
306            StringTools.ENDLINE);
307        for (TrieNode<T> childNode : children) {
308            stringBuilder.append(" " + hashCode() + " -> " + childNode.hashCode() + ";" +
309                StringTools.ENDLINE);
310        }
311        for (TrieNode<T> childNode : children) {
312            childNode.appendDotRepresentation(stringBuilder);
313        }
314    }
315
316    /**
317     * <p>
318     * Checks if the node is a leaf.
319     * </p>
320     *
321     * @return true if the node is a leaf, false otherwise.
322     */
323    protected boolean isLeaf() {
324        return children.isEmpty();
325    }
326
327    /**
328     * <p>
329     * Checks if the node is the root.
330     * </p>
331     *
332     * @return true if the node is the root of the trie, false otherwise
333     */
334    protected boolean isRoot() {
335        return symbol == null;
336    }
337
338    /**
339     * <p>
340     * Recursive methods that collects all nodes that are ancestors of leafs and stores them in the
341     * set.
342     * </p>
343     *
344     * @param ancestors
345     *            set of all ancestors of leafs
346     */
347    protected void getLeafAncestors(List<TrieNode<T>> ancestors) {
348        boolean isAncestor = false;
349        for (TrieNode<T> child : children) {
350            child.getLeafAncestors(ancestors);
351            isAncestor |= child.isLeaf();
352        }
353        if (isAncestor) {
354            ancestors.add(this);
355        }
356    }
357
358    /**
359     * <p>
360     * Returns the number of descendants of this node that are leafs. This does not only include
361     * direct children of this node, but all leafs in the sub-trie with this node as root.
362     * </p>
363     *
364     * @return number of leafs in this sub-trie
365     */
366    protected int getNumLeafs() {
367        int numLeafs = 0;
368        for (TrieNode<T> child : children) {
369            if (child.isLeaf()) {
370                numLeafs++;
371            }
372            else {
373                numLeafs += child.getNumLeafs();
374            }
375        }
376        return numLeafs;
377    }
378
379    /**
380     * <p>
381     * Sets the {@link #count} of this node.
382     * </p>
383     * <p>
384     * This function should only be used sparingly and very carefully! The count is usually
385     * maintained automatically by the training procedures.
386     * </p>
387     *
388     * @param count
389     *            new count
390     */
391    protected void setCount(int count) {
392        this.count = count;
393    }
394
395    /**
396     * <p>
397     * Two TrieNodes are defined as equal, if their {@link #count}, {@link #symbol}, and
398     * {@link #children} are equal.
399     * </p>
400     *
401     * @see java.lang.Object#equals(java.lang.Object)
402     */
403    @SuppressWarnings("rawtypes")
404    @Override
405    public boolean equals(Object other) {
406        if (other == this) {
407            return true;
408        }
409        if (other instanceof TrieNode) {
410            TrieNode otherNode = (TrieNode) other;
411            if (symbol == null) {
412                return count == otherNode.count && otherNode.symbol == null &&
413                    children.equals(otherNode.children);
414            }
415            else {
416                return count == otherNode.count && symbol.equals(((TrieNode) other).symbol) &&
417                    children.equals(((TrieNode) other).children);
418            }
419        }
420        return false;
421    }
422
423    /*
424     * (non-Javadoc)
425     *
426     * @see java.lang.Object#hashCode()
427     */
428    @Override
429    public int hashCode() {
430        int multiplier = 17;
431        int hash = 42;
432        if (symbol != null) {
433            hash = multiplier * hash + symbol.hashCode();
434        }
435        hash = multiplier * hash + count;
436        hash = multiplier * hash + children.hashCode();
437        return hash;
438    }
439}
Note: See TracBrowser for help on using the repository browser.