source: trunk/autoquest-core-usageprofiles/src/main/java/de/ugoe/cs/autoquest/usageprofiles/TrieNode.java @ 922

Last change on this file since 922 was 922, checked in by sherbold, 12 years ago
  • renaming of packages from de.ugoe.cs.quest to de.ugoe.cs.autoquest
  • Property svn:mime-type set to text/plain
File size: 12.2 KB
Line 
1package de.ugoe.cs.autoquest.usageprofiles;
2
3import java.io.Serializable;
4import java.util.Collection;
5import java.util.LinkedList;
6import java.util.List;
7
8import de.ugoe.cs.autoquest.usageprofiles.Trie.Edge;
9import de.ugoe.cs.autoquest.usageprofiles.Trie.TrieVertex;
10import de.ugoe.cs.util.StringTools;
11import edu.uci.ics.jung.graph.DelegateTree;
12import edu.uci.ics.jung.graph.Tree;
13
14/**
15 * <p>
16 * This class implements a node of a trie. Each node is associated with a symbol and has a counter.
17 * The counter marks the number of occurences of the sequence defined by the path from the root of
18 * the trie to this node.
19 * </p>
20 *
21 * @author Steffen Herbold
22 *
23 * @param <T>
24 *            Type of the symbols that are stored in the trie.
25 * @see Trie
26 */
27class TrieNode<T> implements Serializable {
28
29    /**
30     * <p>
31     * Id for object serialization.
32     * </p>
33     */
34    private static final long serialVersionUID = 1L;
35
36    /**
37     * <p>
38     * Counter for the number of occurences of the sequence.
39     * </p>
40     */
41    private int count;
42
43    /**
44     * <p>
45     * Symbol associated with the node.
46     * </p>
47     */
48    private final T symbol;
49
50    /**
51     * <p>
52     * Child nodes of this node. If the node is a leaf this collection is empty.
53     * </p>
54     */
55    private Collection<TrieNode<T>> children;
56
57    /**
58     * <p>
59     * Constructor. Creates a new TrieNode without a symbol associated.<br>
60     * <b>This constructor should only be used to create the root node of the trie!</b>
61     * </p>
62     */
63    TrieNode() {
64        this.symbol = null;
65        count = 0;
66        children = new LinkedList<TrieNode<T>>();
67    }
68
69    /**
70     * <p>
71     * Constructor. Creates a new TrieNode. The symbol must not be null.
72     * </p>
73     *
74     * @param symbol
75     *            symbol associated with the trie node
76     */
77    TrieNode(T symbol) {
78        if (symbol == null) {
79            throw new IllegalArgumentException(
80                                                "symbol must not be null. null is reserved for root node!");
81        }
82        this.symbol = symbol;
83        count = 0;
84        children = new LinkedList<TrieNode<T>>();
85    }
86
87    /**
88     * <p>
89     * Copy-Constructor. Creates a new TrieNode as copy of other. Other must not be null.
90     * </p>
91     *
92     * @param other
93     */
94    TrieNode(TrieNode<T> other) {
95        if (other == null) {
96            throw new IllegalArgumentException("other must not be null");
97        }
98        symbol = other.symbol;
99        count = other.count;
100        children = new LinkedList<TrieNode<T>>();
101        for (TrieNode<T> child : other.children) {
102            children.add(new TrieNode<T>(child));
103        }
104    }
105
106    /**
107     * <p>
108     * Adds a given subsequence to the trie and increases the counters accordingly.
109     * </p>
110     *
111     * @param subsequence
112     *            subsequence whose counters are increased
113     * @see Trie#add(List)
114     */
115    public void add(List<T> subsequence) {
116        if (!subsequence.isEmpty()) {
117            if (!symbol.equals(subsequence.get(0))) { // should be guaranteed by
118                                                      // the
119                                                      // recursion/TrieRoot!
120                throw new AssertionError("Invalid trie operation!");
121            }
122            count++;
123            subsequence.remove(0);
124            if (!subsequence.isEmpty()) {
125                T nextSymbol = subsequence.get(0);
126                getChildCreate(nextSymbol).add(subsequence);
127            }
128        }
129    }
130
131    /**
132     * <p>
133     * Returns the symbol associated with the node.
134     * </p>
135     *
136     * @return symbol associated with the node
137     */
138    public T getSymbol() {
139        return symbol;
140    }
141
142    /**
143     * <p>
144     * Returns the number of occurences of the sequence represented by the node.
145     * </p>
146     *
147     * @return number of occurences of the sequence represented by the node
148     */
149    public int getCount() {
150        return count;
151    }
152
153    /**
154     * <p>
155     * Returns the child of the node associated with the given symbol or creates it if it does not
156     * exist yet.
157     * </p>
158     *
159     * @param symbol
160     *            symbol whose node is required
161     * @return node associated with the symbol
162     * @see Trie#getChildCreate(Object)
163     */
164    protected TrieNode<T> getChildCreate(T symbol) {
165        TrieNode<T> node = getChild(symbol);
166        if (node == null) {
167            node = new TrieNode<T>(symbol);
168            children.add(node);
169        }
170        return node;
171    }
172
173    /**
174     * <p>
175     * Returns the child of the node associated with the given symbol or null if it does not exist.
176     * </p>
177     *
178     * @param symbol
179     *            symbol whose node is required
180     * @return node associated with the symbol; null if no such node exists
181     * @see Trie#getChild(Object)
182     */
183    protected TrieNode<T> getChild(T symbol) {
184        for (TrieNode<T> child : children) {
185            if (child.getSymbol().equals(symbol)) {
186                return child;
187            }
188        }
189        return null;
190    }
191
192    /**
193     * <p>
194     * Returns all children of this node.
195     * </p>
196     *
197     * @return children of this node
198     */
199    protected Collection<TrieNode<T>> getChildren() {
200        return children;
201    }
202
203    /**
204     * <p>
205     * Searches the sub-trie of this trie node for a given sequence and returns the node associated
206     * with the sequence or null if no such node is found.
207     * </p>
208     *
209     * @param sequence
210     *            sequence that is searched for
211     * @return node associated with the sequence
212     * @see Trie#find(List)
213     */
214    public TrieNode<T> find(List<T> subsequence) {
215        TrieNode<T> result = null;
216        if (subsequence.isEmpty()) {
217            result = this;
218        }
219        else {
220            TrieNode<T> node = getChild(subsequence.get(0));
221            if (node != null) {
222                subsequence.remove(0);
223                result = node.find(subsequence);
224            }
225        }
226        return result;
227    }
228
229    /**
230     * <p>
231     * Returns a collection of all symbols that follow a this node, i.e., the symbols associated
232     * with the children of this node.
233     * </p>
234     *
235     * @return symbols follow this node
236     * @see TrieNode#getFollowingSymbols()
237     */
238    public Collection<T> getFollowingSymbols() {
239        Collection<T> followingSymbols = new LinkedList<T>();
240        for (TrieNode<T> child : children) {
241            followingSymbols.add(child.getSymbol());
242        }
243        return followingSymbols;
244    }
245
246    /**
247     * <p>
248     * The string representation of a node is {@code symbol.toString()#count}
249     * </p>
250     *
251     * @see java.lang.Object#toString()
252     */
253    @Override
254    public String toString() {
255        String str = symbol.toString() + " #" + count;
256        if (!children.isEmpty()) {
257            str += StringTools.ENDLINE + children.toString();
258        }
259        return str;
260    }
261
262    /**
263     * <p>
264     * Generates a {@link Tree} represenation of the trie.
265     * </p>
266     *
267     * @param parent
268     *            parent vertex in the generated tree
269     * @param graph
270     *            complete tree
271     */
272    void getGraph(TrieVertex parent, DelegateTree<TrieVertex, Edge> graph) {
273        TrieVertex currentVertex;
274        if (isRoot()) {
275            currentVertex = new TrieVertex("root");
276            graph.addVertex(currentVertex);
277        }
278        else {
279            currentVertex = new TrieVertex(getSymbol().toString() + "#" + getCount());
280            graph.addChild(new Edge(), parent, currentVertex);
281        }
282        for (TrieNode<T> node : children) {
283            node.getGraph(currentVertex, graph);
284        }
285    }
286
287    /**
288     * <p>
289     * Appends the current node to the dot representation of the trie.
290     * </p>
291     *
292     * @param stringBuilder
293     *            {@link StringBuilder} to which the dot representation is appended
294     */
295    void appendDotRepresentation(StringBuilder stringBuilder) {
296        String thisSaneId;
297        if (isRoot()) {
298            thisSaneId = "root";
299        }
300        else {
301            thisSaneId =
302                symbol.toString().replace("\"", "\\\"").replaceAll("[\r\n]", "") + "#" + count;
303        }
304        stringBuilder.append(" " + hashCode() + " [label=\"" + thisSaneId + "\"];" +
305            StringTools.ENDLINE);
306        for (TrieNode<T> childNode : children) {
307            stringBuilder.append(" " + hashCode() + " -> " + childNode.hashCode() + ";" +
308                StringTools.ENDLINE);
309        }
310        for (TrieNode<T> childNode : children) {
311            childNode.appendDotRepresentation(stringBuilder);
312        }
313    }
314
315    /**
316     * <p>
317     * Checks if the node is a leaf.
318     * </p>
319     *
320     * @return true if the node is a leaf, false otherwise.
321     */
322    protected boolean isLeaf() {
323        return children.isEmpty();
324    }
325
326    /**
327     * <p>
328     * Checks if the node is the root.
329     * </p>
330     *
331     * @return true if the node is the root of the trie, false otherwise
332     */
333    protected boolean isRoot() {
334        return symbol == null;
335    }
336
337    /**
338     * <p>
339     * Recursive methods that collects all nodes that are ancestors of leafs and stores them in the
340     * set.
341     * </p>
342     *
343     * @param ancestors
344     *            set of all ancestors of leafs
345     */
346    protected void getLeafAncestors(List<TrieNode<T>> ancestors) {
347        boolean isAncestor = false;
348        for (TrieNode<T> child : children) {
349            child.getLeafAncestors(ancestors);
350            isAncestor |= child.isLeaf();
351        }
352        if (isAncestor) {
353            ancestors.add(this);
354        }
355    }
356
357    /**
358     * <p>
359     * Returns the number of descendants of this node that are leafs. This does not only include
360     * direct children of this node, but all leafs in the sub-trie with this node as root.
361     * </p>
362     *
363     * @return number of leafs in this sub-trie
364     */
365    protected int getNumLeafs() {
366        int numLeafs = 0;
367        for (TrieNode<T> child : children) {
368            if (child.isLeaf()) {
369                numLeafs++;
370            }
371            else {
372                numLeafs += child.getNumLeafs();
373            }
374        }
375        return numLeafs;
376    }
377
378    /**
379     * <p>
380     * Sets the {@link #count} of this node.
381     * </p>
382     * <p>
383     * This function should only be used sparingly and very carefully! The count is usually
384     * maintained automatically by the training procedures.
385     * </p>
386     *
387     * @param count
388     *            new count
389     */
390    protected void setCount(int count) {
391        this.count = count;
392    }
393
394    /**
395     * <p>
396     * Two TrieNodes are defined as equal, if their {@link #count}, {@link #symbol}, and
397     * {@link #children} are equal.
398     * </p>
399     *
400     * @see java.lang.Object#equals(java.lang.Object)
401     */
402    @SuppressWarnings("rawtypes")
403    @Override
404    public boolean equals(Object other) {
405        if (other == this) {
406            return true;
407        }
408        if (other instanceof TrieNode) {
409            TrieNode otherNode = (TrieNode) other;
410            if (symbol == null) {
411                return count == otherNode.count && otherNode.symbol == null &&
412                    children.equals(otherNode.children);
413            }
414            else {
415                return count == otherNode.count && symbol.equals(((TrieNode) other).symbol) &&
416                    children.equals(((TrieNode) other).children);
417            }
418        }
419        return false;
420    }
421
422    /*
423     * (non-Javadoc)
424     *
425     * @see java.lang.Object#hashCode()
426     */
427    @Override
428    public int hashCode() {
429        int multiplier = 17;
430        int hash = 42;
431        if (symbol != null) {
432            hash = multiplier * hash + symbol.hashCode();
433        }
434        hash = multiplier * hash + count;
435        hash = multiplier * hash + children.hashCode();
436        return hash;
437    }
438}
Note: See TracBrowser for help on using the repository browser.