source: trunk/autoquest-core-usageprofiles/src/main/java/de/ugoe/cs/autoquest/usageprofiles/TrieNode.java @ 1282

Last change on this file since 1282 was 1282, checked in by pharms, 11 years ago
  • added support for symbol management strategy in tries, especially for storing them
  • adapted comparator approach accordingly
  • provided default implementation for symbol management strategies
  • added, extended and improved java doc
  • Property svn:mime-type set to text/plain
File size: 15.2 KB
RevLine 
[927]1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
[922]15package de.ugoe.cs.autoquest.usageprofiles;
[518]16
17import java.io.Serializable;
18import java.util.Collection;
19import java.util.LinkedList;
20import java.util.List;
21
[922]22import de.ugoe.cs.autoquest.usageprofiles.Trie.Edge;
23import de.ugoe.cs.autoquest.usageprofiles.Trie.TrieVertex;
[518]24import de.ugoe.cs.util.StringTools;
25import edu.uci.ics.jung.graph.DelegateTree;
26import edu.uci.ics.jung.graph.Tree;
27
28/**
29 * <p>
[559]30 * This class implements a node of a trie. Each node is associated with a symbol and has a counter.
[1282]31 * The counter marks the number of occurrences of the sequence defined by the path from the root of
[559]32 * the trie to this node.
[518]33 * </p>
34 *
[1060]35 * @author Steffen Herbold, Patrick Harms
[518]36 *
37 * @param <T>
38 *            Type of the symbols that are stored in the trie.
39 * @see Trie
40 */
41class TrieNode<T> implements Serializable {
42
[559]43    /**
44     * <p>
45     * Id for object serialization.
46     * </p>
47     */
48    private static final long serialVersionUID = 1L;
[518]49
[559]50    /**
51     * <p>
[1282]52     * Counter for the number of occurrences of the sequence.
[559]53     * </p>
54     */
55    private int count;
[518]56
[559]57    /**
58     * <p>
59     * Symbol associated with the node.
60     * </p>
61     */
62    private final T symbol;
[518]63
[559]64    /**
65     * <p>
66     * Child nodes of this node. If the node is a leaf this collection is empty.
67     * </p>
68     */
[1251]69    private SymbolMap<T, TrieNode<T>> children;
70   
[559]71    /**
72     * <p>
[1282]73     * Strategy for handling symbols, i.e., for comparing and storing them
[1060]74     * </p>
75     */
[1282]76    private SymbolStrategy<T> strategy;
[1060]77
78    /**
79     * <p>
[559]80     * Constructor. Creates a new TrieNode without a symbol associated.<br>
81     * <b>This constructor should only be used to create the root node of the trie!</b>
[1282]82     * The strategy used by the trie node will be a {@link DefaultSymbolStrategy}.
[559]83     * </p>
84     */
85    TrieNode() {
[1282]86        this(new DefaultSymbolStrategy<T>());
[1060]87    }
88
89    /**
90     * <p>
91     * Constructor. Creates a new TrieNode. The symbol must not be null.
[1282]92     * The strategy used by the trie node will be a {@link DefaultSymbolStrategy}.
[1060]93     * </p>
94     *
95     * @param symbol
96     *            symbol associated with the trie node
[1282]97     *
98     * @throws IllegalArgumentException
99     *            if the provided symbol is null
[1060]100     */
101    TrieNode(T symbol) {
[1282]102        this(symbol, new DefaultSymbolStrategy<T>());
[1060]103    }
104
105    /**
106     * <p>
107     * Constructor. Creates a new TrieNode without a symbol associated using the provided
[1282]108     * strategy.<br>
[1060]109     * <b>This constructor should only be used to create the root node of the trie!</b>
[1282]110     * <br>The strategy must not be null.
[1060]111     * </p>
[1282]112     *
113     * @param strategy
114     *            the strategy used for comparing and storing symbols
115     *
116     * @throws IllegalArgumentException
117     *            if the provided strategy is null
[1060]118     */
[1282]119    TrieNode(SymbolStrategy<T> strategy) {
120        if (strategy == null) {
121            throw new IllegalArgumentException("strategy must not be null!");
[1060]122        }
[1282]123        this.strategy = strategy;
[559]124        this.symbol = null;
125        count = 0;
[1282]126        children = strategy.createSymbolMap();
[559]127    }
[518]128
[559]129    /**
130     * <p>
[1282]131     * Constructor. Creates a new TrieNode using the provided strategy. The symbol and the
132     * strategy must not be null.
[559]133     * </p>
134     *
135     * @param symbol
136     *            symbol associated with the trie node
[1282]137     * @param strategy
138     *            the strategy used for comparing and storing symbols
139     *
140     * @throws IllegalArgumentException
141     *            if the provided symbol or strategy is null
[559]142     */
[1282]143    TrieNode(T symbol, SymbolStrategy<T> strategy) {
[559]144        if (symbol == null) {
[1060]145            throw new IllegalArgumentException
146                ("symbol must not be null. null is reserved for root node!");
[559]147        }
148        this.symbol = symbol;
[1060]149       
[1282]150        if (strategy == null) {
151            throw new IllegalArgumentException("strategy must not be null!");
[1060]152        }
[1282]153        this.strategy = strategy;
[1060]154
[559]155        count = 0;
[1282]156        children = strategy.createSymbolMap();
[559]157    }
[518]158
[559]159    /**
160     * <p>
161     * Copy-Constructor. Creates a new TrieNode as copy of other. Other must not be null.
162     * </p>
163     *
[1282]164     * @param other the trie node to create the copy of
165     *
166     * @throws IllegalArgumentException
167     *            if the provided node is null
[559]168     */
169    TrieNode(TrieNode<T> other) {
170        if (other == null) {
[766]171            throw new IllegalArgumentException("other must not be null");
[559]172        }
173        symbol = other.symbol;
174        count = other.count;
[1282]175        strategy = other.strategy;
176        children = strategy.createSymbolMap();
[1251]177       
178        for (TrieNode<T> child : other.children.getValues()) {
179            children.addSymbol(child.getSymbol(), new TrieNode<T>(child));
[559]180        }
181    }
[518]182
[559]183    /**
184     * <p>
185     * Adds a given subsequence to the trie and increases the counters accordingly.
186     * </p>
187     *
188     * @param subsequence
189     *            subsequence whose counters are increased
190     * @see Trie#add(List)
191     */
192    public void add(List<T> subsequence) {
193        if (!subsequence.isEmpty()) {
[1282]194            if (!strategy.getSymbolComparator().equals(symbol, subsequence.get(0))) {
195                // should be guaranteed by the recursion/TrieRoot!
[559]196                throw new AssertionError("Invalid trie operation!");
197            }
198            count++;
199            subsequence.remove(0);
200            if (!subsequence.isEmpty()) {
201                T nextSymbol = subsequence.get(0);
202                getChildCreate(nextSymbol).add(subsequence);
203            }
204        }
205    }
[518]206
[559]207    /**
208     * <p>
209     * Returns the symbol associated with the node.
210     * </p>
211     *
212     * @return symbol associated with the node
213     */
214    public T getSymbol() {
215        return symbol;
216    }
[518]217
[559]218    /**
219     * <p>
[1282]220     * Returns the number of occurrences of the sequence represented by the node.
[559]221     * </p>
222     *
[1282]223     * @return number of occurrences of the sequence represented by the node
[559]224     */
225    public int getCount() {
226        return count;
227    }
[518]228
[559]229    /**
230     * <p>
231     * Returns the child of the node associated with the given symbol or creates it if it does not
232     * exist yet.
233     * </p>
234     *
235     * @param symbol
236     *            symbol whose node is required
237     * @return node associated with the symbol
238     * @see Trie#getChildCreate(Object)
239     */
240    protected TrieNode<T> getChildCreate(T symbol) {
241        TrieNode<T> node = getChild(symbol);
242        if (node == null) {
[1282]243            node = new TrieNode<T>(symbol, strategy);
[1251]244            children.addSymbol(symbol, node);
[559]245        }
246        return node;
247    }
[518]248
[559]249    /**
250     * <p>
251     * Returns the child of the node associated with the given symbol or null if it does not exist.
252     * </p>
253     *
254     * @param symbol
255     *            symbol whose node is required
256     * @return node associated with the symbol; null if no such node exists
257     * @see Trie#getChild(Object)
258     */
259    protected TrieNode<T> getChild(T symbol) {
[1251]260        return children.getValue(symbol);
[559]261    }
[518]262
[559]263    /**
264     * <p>
265     * Returns all children of this node.
266     * </p>
267     *
268     * @return children of this node
269     */
270    protected Collection<TrieNode<T>> getChildren() {
[1251]271        return children.getValues();
[559]272    }
[518]273
[559]274    /**
275     * <p>
276     * Searches the sub-trie of this trie node for a given sequence and returns the node associated
277     * with the sequence or null if no such node is found.
278     * </p>
279     *
280     * @param sequence
281     *            sequence that is searched for
282     * @return node associated with the sequence
283     * @see Trie#find(List)
284     */
285    public TrieNode<T> find(List<T> subsequence) {
286        TrieNode<T> result = null;
287        if (subsequence.isEmpty()) {
288            result = this;
289        }
290        else {
291            TrieNode<T> node = getChild(subsequence.get(0));
292            if (node != null) {
293                subsequence.remove(0);
294                result = node.find(subsequence);
295            }
296        }
297        return result;
298    }
[518]299
[559]300    /**
301     * <p>
[1282]302     * Returns a collection of all symbols that follow this node, i.e., the symbols associated
[559]303     * with the children of this node.
304     * </p>
305     *
306     * @return symbols follow this node
307     * @see TrieNode#getFollowingSymbols()
308     */
309    public Collection<T> getFollowingSymbols() {
310        Collection<T> followingSymbols = new LinkedList<T>();
[1251]311        for (TrieNode<T> child : children.getValues()) {
[559]312            followingSymbols.add(child.getSymbol());
313        }
314        return followingSymbols;
315    }
[518]316
[559]317    /**
318     * <p>
319     * The string representation of a node is {@code symbol.toString()#count}
320     * </p>
321     *
322     * @see java.lang.Object#toString()
323     */
324    @Override
325    public String toString() {
[1060]326        String str;
327       
328        if (symbol == null) {
329            str = "ROOT";
330        }
331        else {
332            str = symbol.toString() + " #" + count;
333        }
334       
[559]335        if (!children.isEmpty()) {
336            str += StringTools.ENDLINE + children.toString();
337        }
338        return str;
339    }
[518]340
[559]341    /**
342     * <p>
[1060]343     * Generates a {@link Tree} representation of the trie.
[559]344     * </p>
345     *
346     * @param parent
347     *            parent vertex in the generated tree
348     * @param graph
349     *            complete tree
350     */
351    void getGraph(TrieVertex parent, DelegateTree<TrieVertex, Edge> graph) {
352        TrieVertex currentVertex;
353        if (isRoot()) {
354            currentVertex = new TrieVertex("root");
355            graph.addVertex(currentVertex);
356        }
357        else {
358            currentVertex = new TrieVertex(getSymbol().toString() + "#" + getCount());
359            graph.addChild(new Edge(), parent, currentVertex);
360        }
[1251]361        for (TrieNode<T> node : children.getValues()) {
[559]362            node.getGraph(currentVertex, graph);
363        }
364    }
[518]365
[559]366    /**
367     * <p>
368     * Appends the current node to the dot representation of the trie.
369     * </p>
370     *
371     * @param stringBuilder
372     *            {@link StringBuilder} to which the dot representation is appended
373     */
374    void appendDotRepresentation(StringBuilder stringBuilder) {
375        String thisSaneId;
376        if (isRoot()) {
377            thisSaneId = "root";
378        }
379        else {
380            thisSaneId =
381                symbol.toString().replace("\"", "\\\"").replaceAll("[\r\n]", "") + "#" + count;
382        }
383        stringBuilder.append(" " + hashCode() + " [label=\"" + thisSaneId + "\"];" +
384            StringTools.ENDLINE);
[1251]385        for (TrieNode<T> childNode : children.getValues()) {
[559]386            stringBuilder.append(" " + hashCode() + " -> " + childNode.hashCode() + ";" +
387                StringTools.ENDLINE);
388        }
[1251]389        for (TrieNode<T> childNode : children.getValues()) {
[559]390            childNode.appendDotRepresentation(stringBuilder);
391        }
392    }
[518]393
[559]394    /**
395     * <p>
396     * Checks if the node is a leaf.
397     * </p>
398     *
399     * @return true if the node is a leaf, false otherwise.
400     */
401    protected boolean isLeaf() {
402        return children.isEmpty();
403    }
[518]404
[559]405    /**
406     * <p>
407     * Checks if the node is the root.
408     * </p>
409     *
410     * @return true if the node is the root of the trie, false otherwise
411     */
412    protected boolean isRoot() {
413        return symbol == null;
414    }
[518]415
[559]416    /**
417     * <p>
[1282]418     * Recursive method that collects all nodes that are ancestors of leafs and stores them in the
[559]419     * set.
420     * </p>
421     *
422     * @param ancestors
423     *            set of all ancestors of leafs
424     */
425    protected void getLeafAncestors(List<TrieNode<T>> ancestors) {
426        boolean isAncestor = false;
[1251]427        for (TrieNode<T> child : children.getValues()) {
[559]428            child.getLeafAncestors(ancestors);
429            isAncestor |= child.isLeaf();
430        }
431        if (isAncestor) {
432            ancestors.add(this);
433        }
434    }
[518]435
[559]436    /**
437     * <p>
438     * Returns the number of descendants of this node that are leafs. This does not only include
439     * direct children of this node, but all leafs in the sub-trie with this node as root.
440     * </p>
441     *
442     * @return number of leafs in this sub-trie
443     */
444    protected int getNumLeafs() {
445        int numLeafs = 0;
[1251]446        for (TrieNode<T> child : children.getValues()) {
[559]447            if (child.isLeaf()) {
448                numLeafs++;
449            }
450            else {
451                numLeafs += child.getNumLeafs();
452            }
453        }
454        return numLeafs;
455    }
[518]456
[559]457    /**
458     * <p>
459     * Sets the {@link #count} of this node.
460     * </p>
461     * <p>
462     * This function should only be used sparingly and very carefully! The count is usually
463     * maintained automatically by the training procedures.
464     * </p>
465     *
466     * @param count
467     *            new count
468     */
469    protected void setCount(int count) {
470        this.count = count;
471    }
[518]472
[559]473    /**
474     * <p>
475     * Two TrieNodes are defined as equal, if their {@link #count}, {@link #symbol}, and
[1282]476     * {@link #children} are equal. For the comparison of their symbols the comparator provided
477     * by the symbol management strategy is used.
[559]478     * </p>
479     *
480     * @see java.lang.Object#equals(java.lang.Object)
481     */
[1060]482    @SuppressWarnings({ "rawtypes", "unchecked" })
[559]483    @Override
484    public boolean equals(Object other) {
485        if (other == this) {
486            return true;
487        }
488        if (other instanceof TrieNode) {
489            TrieNode otherNode = (TrieNode) other;
490            if (symbol == null) {
491                return count == otherNode.count && otherNode.symbol == null &&
492                    children.equals(otherNode.children);
493            }
494            else {
[1060]495                return count == otherNode.count &&
496                    symbol.getClass().isInstance(((TrieNode) other).symbol) &&
[1282]497                    strategy.getSymbolComparator().equals(symbol, ((TrieNode<T>) other).symbol) &&
[559]498                    children.equals(((TrieNode) other).children);
499            }
500        }
501        return false;
502    }
[518]503
[559]504    /*
505     * (non-Javadoc)
506     *
507     * @see java.lang.Object#hashCode()
508     */
509    @Override
510    public int hashCode() {
511        int multiplier = 17;
512        int hash = 42;
513        if (symbol != null) {
514            hash = multiplier * hash + symbol.hashCode();
515        }
516        hash = multiplier * hash + count;
517        hash = multiplier * hash + children.hashCode();
518        return hash;
519    }
[518]520}
Note: See TracBrowser for help on using the repository browser.