source: trunk/autoquest-core-usageprofiles/src/main/java/de/ugoe/cs/autoquest/usageprofiles/TrieNode.java @ 1251

Last change on this file since 1251 was 1251, checked in by pharms, 11 years ago
  • improved performance of the trie for large alphabets by using the symbol map. This is an improved list of symbols that allows a more efficient lookup for symbols using buckets of symbol as an initial search order
  • Property svn:mime-type set to text/plain
File size: 14.7 KB
Line 
1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.usageprofiles;
16
17import java.io.Serializable;
18import java.util.Collection;
19import java.util.LinkedList;
20import java.util.List;
21
22import de.ugoe.cs.autoquest.usageprofiles.Trie.Edge;
23import de.ugoe.cs.autoquest.usageprofiles.Trie.TrieVertex;
24import de.ugoe.cs.util.StringTools;
25import edu.uci.ics.jung.graph.DelegateTree;
26import edu.uci.ics.jung.graph.Tree;
27
28/**
29 * <p>
30 * This class implements a node of a trie. Each node is associated with a symbol and has a counter.
31 * The counter marks the number of occurences of the sequence defined by the path from the root of
32 * the trie to this node.
33 * </p>
34 *
35 * @author Steffen Herbold, Patrick Harms
36 *
37 * @param <T>
38 *            Type of the symbols that are stored in the trie.
39 * @see Trie
40 */
41class TrieNode<T> implements Serializable {
42
43    /**
44     * <p>
45     * Id for object serialization.
46     * </p>
47     */
48    private static final long serialVersionUID = 1L;
49
50    /**
51     * <p>
52     * Counter for the number of occurences of the sequence.
53     * </p>
54     */
55    private int count;
56
57    /**
58     * <p>
59     * Symbol associated with the node.
60     * </p>
61     */
62    private final T symbol;
63
64    /**
65     * <p>
66     * Child nodes of this node. If the node is a leaf this collection is empty.
67     * </p>
68     */
69    private SymbolMap<T, TrieNode<T>> children;
70   
71    /**
72     * <p>
73     * Comparator to be used for comparing the symbols with each other
74     * </p>
75     */
76    private SymbolComparator<T> comparator;
77
78    /**
79     * <p>
80     * Constructor. Creates a new TrieNode without a symbol associated.<br>
81     * <b>This constructor should only be used to create the root node of the trie!</b>
82     * The comparator used by the tree node will be a {@link DefaultSymbolComparator}.
83     * </p>
84     */
85    TrieNode() {
86        this(new DefaultSymbolComparator<T>());
87    }
88
89    /**
90     * <p>
91     * Constructor. Creates a new TrieNode. The symbol must not be null.
92     * The comparator used by the tree node will be a {@link DefaultSymbolComparator}.
93     * </p>
94     *
95     * @param symbol
96     *            symbol associated with the trie node
97     */
98    TrieNode(T symbol) {
99        this(symbol, new DefaultSymbolComparator<T>());
100    }
101
102    /**
103     * <p>
104     * Constructor. Creates a new TrieNode without a symbol associated using the provided
105     * comparator.<br>
106     * <b>This constructor should only be used to create the root node of the trie!</b>
107     * <br>The comparator must not be null.
108     * </p>
109     */
110    TrieNode(SymbolComparator<T> comparator) {
111        if (comparator == null) {
112            throw new IllegalArgumentException("comparator must not be null!");
113        }
114        this.comparator = comparator;
115        this.symbol = null;
116        count = 0;
117        children = new SymbolMap<T, TrieNode<T>>(this.comparator);
118    }
119
120    /**
121     * <p>
122     * Constructor. Creates a new TrieNode using the provided comparator. The symbol and the
123     * comparator must not be null.
124     * </p>
125     *
126     * @param symbol
127     *            symbol associated with the trie node
128     */
129    TrieNode(T symbol, SymbolComparator<T> comparator) {
130        if (symbol == null) {
131            throw new IllegalArgumentException
132                ("symbol must not be null. null is reserved for root node!");
133        }
134        this.symbol = symbol;
135       
136        if (comparator == null) {
137            throw new IllegalArgumentException("comparator must not be null!");
138        }
139        this.comparator = comparator;
140
141        count = 0;
142        children = new SymbolMap<T, TrieNode<T>>(this.comparator);
143    }
144
145    /**
146     * <p>
147     * Copy-Constructor. Creates a new TrieNode as copy of other. Other must not be null.
148     * </p>
149     *
150     * @param other
151     */
152    TrieNode(TrieNode<T> other) {
153        if (other == null) {
154            throw new IllegalArgumentException("other must not be null");
155        }
156        symbol = other.symbol;
157        count = other.count;
158        comparator = other.comparator;
159        children = new SymbolMap<T, TrieNode<T>>(this.comparator);
160       
161        for (TrieNode<T> child : other.children.getValues()) {
162            children.addSymbol(child.getSymbol(), new TrieNode<T>(child));
163        }
164    }
165
166    /**
167     * <p>
168     * Adds a given subsequence to the trie and increases the counters accordingly.
169     * </p>
170     *
171     * @param subsequence
172     *            subsequence whose counters are increased
173     * @see Trie#add(List)
174     */
175    public void add(List<T> subsequence) {
176        if (!subsequence.isEmpty()) {
177            if (!comparator.equals(symbol, subsequence.get(0))) { // should be guaranteed by
178                                                                  // the recursion/TrieRoot!
179                throw new AssertionError("Invalid trie operation!");
180            }
181            count++;
182            subsequence.remove(0);
183            if (!subsequence.isEmpty()) {
184                T nextSymbol = subsequence.get(0);
185                getChildCreate(nextSymbol).add(subsequence);
186            }
187        }
188    }
189
190    /**
191     * <p>
192     * Returns the symbol associated with the node.
193     * </p>
194     *
195     * @return symbol associated with the node
196     */
197    public T getSymbol() {
198        return symbol;
199    }
200
201    /**
202     * <p>
203     * Returns the number of occurences of the sequence represented by the node.
204     * </p>
205     *
206     * @return number of occurences of the sequence represented by the node
207     */
208    public int getCount() {
209        return count;
210    }
211
212    /**
213     * <p>
214     * Returns the child of the node associated with the given symbol or creates it if it does not
215     * exist yet.
216     * </p>
217     *
218     * @param symbol
219     *            symbol whose node is required
220     * @return node associated with the symbol
221     * @see Trie#getChildCreate(Object)
222     */
223    protected TrieNode<T> getChildCreate(T symbol) {
224        TrieNode<T> node = getChild(symbol);
225        if (node == null) {
226            node = new TrieNode<T>(symbol, comparator);
227            children.addSymbol(symbol, node);
228        }
229        return node;
230    }
231
232    /**
233     * <p>
234     * Returns the child of the node associated with the given symbol or null if it does not exist.
235     * </p>
236     *
237     * @param symbol
238     *            symbol whose node is required
239     * @return node associated with the symbol; null if no such node exists
240     * @see Trie#getChild(Object)
241     */
242    protected TrieNode<T> getChild(T symbol) {
243        return children.getValue(symbol);
244    }
245
246    /**
247     * <p>
248     * Returns all children of this node.
249     * </p>
250     *
251     * @return children of this node
252     */
253    protected Collection<TrieNode<T>> getChildren() {
254        return children.getValues();
255    }
256
257    /**
258     * <p>
259     * Searches the sub-trie of this trie node for a given sequence and returns the node associated
260     * with the sequence or null if no such node is found.
261     * </p>
262     *
263     * @param sequence
264     *            sequence that is searched for
265     * @return node associated with the sequence
266     * @see Trie#find(List)
267     */
268    public TrieNode<T> find(List<T> subsequence) {
269        TrieNode<T> result = null;
270        if (subsequence.isEmpty()) {
271            result = this;
272        }
273        else {
274            TrieNode<T> node = getChild(subsequence.get(0));
275            if (node != null) {
276                subsequence.remove(0);
277                result = node.find(subsequence);
278            }
279        }
280        return result;
281    }
282
283    /**
284     * <p>
285     * Returns a collection of all symbols that follow a this node, i.e., the symbols associated
286     * with the children of this node.
287     * </p>
288     *
289     * @return symbols follow this node
290     * @see TrieNode#getFollowingSymbols()
291     */
292    public Collection<T> getFollowingSymbols() {
293        Collection<T> followingSymbols = new LinkedList<T>();
294        for (TrieNode<T> child : children.getValues()) {
295            followingSymbols.add(child.getSymbol());
296        }
297        return followingSymbols;
298    }
299
300    /**
301     * <p>
302     * The string representation of a node is {@code symbol.toString()#count}
303     * </p>
304     *
305     * @see java.lang.Object#toString()
306     */
307    @Override
308    public String toString() {
309        String str;
310       
311        if (symbol == null) {
312            str = "ROOT";
313        }
314        else {
315            str = symbol.toString() + " #" + count;
316        }
317       
318        if (!children.isEmpty()) {
319            str += StringTools.ENDLINE + children.toString();
320        }
321        return str;
322    }
323
324    /**
325     * <p>
326     * Generates a {@link Tree} representation of the trie.
327     * </p>
328     *
329     * @param parent
330     *            parent vertex in the generated tree
331     * @param graph
332     *            complete tree
333     */
334    void getGraph(TrieVertex parent, DelegateTree<TrieVertex, Edge> graph) {
335        TrieVertex currentVertex;
336        if (isRoot()) {
337            currentVertex = new TrieVertex("root");
338            graph.addVertex(currentVertex);
339        }
340        else {
341            currentVertex = new TrieVertex(getSymbol().toString() + "#" + getCount());
342            graph.addChild(new Edge(), parent, currentVertex);
343        }
344        for (TrieNode<T> node : children.getValues()) {
345            node.getGraph(currentVertex, graph);
346        }
347    }
348
349    /**
350     * <p>
351     * Appends the current node to the dot representation of the trie.
352     * </p>
353     *
354     * @param stringBuilder
355     *            {@link StringBuilder} to which the dot representation is appended
356     */
357    void appendDotRepresentation(StringBuilder stringBuilder) {
358        String thisSaneId;
359        if (isRoot()) {
360            thisSaneId = "root";
361        }
362        else {
363            thisSaneId =
364                symbol.toString().replace("\"", "\\\"").replaceAll("[\r\n]", "") + "#" + count;
365        }
366        stringBuilder.append(" " + hashCode() + " [label=\"" + thisSaneId + "\"];" +
367            StringTools.ENDLINE);
368        for (TrieNode<T> childNode : children.getValues()) {
369            stringBuilder.append(" " + hashCode() + " -> " + childNode.hashCode() + ";" +
370                StringTools.ENDLINE);
371        }
372        for (TrieNode<T> childNode : children.getValues()) {
373            childNode.appendDotRepresentation(stringBuilder);
374        }
375    }
376
377    /**
378     * <p>
379     * Checks if the node is a leaf.
380     * </p>
381     *
382     * @return true if the node is a leaf, false otherwise.
383     */
384    protected boolean isLeaf() {
385        return children.isEmpty();
386    }
387
388    /**
389     * <p>
390     * Checks if the node is the root.
391     * </p>
392     *
393     * @return true if the node is the root of the trie, false otherwise
394     */
395    protected boolean isRoot() {
396        return symbol == null;
397    }
398
399    /**
400     * <p>
401     * Recursive methods that collects all nodes that are ancestors of leafs and stores them in the
402     * set.
403     * </p>
404     *
405     * @param ancestors
406     *            set of all ancestors of leafs
407     */
408    protected void getLeafAncestors(List<TrieNode<T>> ancestors) {
409        boolean isAncestor = false;
410        for (TrieNode<T> child : children.getValues()) {
411            child.getLeafAncestors(ancestors);
412            isAncestor |= child.isLeaf();
413        }
414        if (isAncestor) {
415            ancestors.add(this);
416        }
417    }
418
419    /**
420     * <p>
421     * Returns the number of descendants of this node that are leafs. This does not only include
422     * direct children of this node, but all leafs in the sub-trie with this node as root.
423     * </p>
424     *
425     * @return number of leafs in this sub-trie
426     */
427    protected int getNumLeafs() {
428        int numLeafs = 0;
429        for (TrieNode<T> child : children.getValues()) {
430            if (child.isLeaf()) {
431                numLeafs++;
432            }
433            else {
434                numLeafs += child.getNumLeafs();
435            }
436        }
437        return numLeafs;
438    }
439
440    /**
441     * <p>
442     * Sets the {@link #count} of this node.
443     * </p>
444     * <p>
445     * This function should only be used sparingly and very carefully! The count is usually
446     * maintained automatically by the training procedures.
447     * </p>
448     *
449     * @param count
450     *            new count
451     */
452    protected void setCount(int count) {
453        this.count = count;
454    }
455
456    /**
457     * <p>
458     * Two TrieNodes are defined as equal, if their {@link #count}, {@link #symbol}, and
459     * {@link #children} are equal. For the comparison of their symbols the comparator is used.
460     * </p>
461     *
462     * @see java.lang.Object#equals(java.lang.Object)
463     */
464    @SuppressWarnings({ "rawtypes", "unchecked" })
465    @Override
466    public boolean equals(Object other) {
467        if (other == this) {
468            return true;
469        }
470        if (other instanceof TrieNode) {
471            TrieNode otherNode = (TrieNode) other;
472            if (symbol == null) {
473                return count == otherNode.count && otherNode.symbol == null &&
474                    children.equals(otherNode.children);
475            }
476            else {
477                return count == otherNode.count &&
478                    symbol.getClass().isInstance(((TrieNode) other).symbol) &&
479                    comparator.equals(symbol, ((TrieNode<T>) other).symbol) &&
480                    children.equals(((TrieNode) other).children);
481            }
482        }
483        return false;
484    }
485
486    /*
487     * (non-Javadoc)
488     *
489     * @see java.lang.Object#hashCode()
490     */
491    @Override
492    public int hashCode() {
493        int multiplier = 17;
494        int hash = 42;
495        if (symbol != null) {
496            hash = multiplier * hash + symbol.hashCode();
497        }
498        hash = multiplier * hash + count;
499        hash = multiplier * hash + children.hashCode();
500        return hash;
501    }
502}
Note: See TracBrowser for help on using the repository browser.