source: trunk/autoquest-core-usageprofiles/src/main/java/de/ugoe/cs/autoquest/usageprofiles/TrieNode.java @ 927

Last change on this file since 927 was 927, checked in by sherbold, 12 years ago
  • added copyright under the Apache License, Version 2.0
  • Property svn:mime-type set to text/plain
File size: 12.8 KB
Line 
1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.usageprofiles;
16
17import java.io.Serializable;
18import java.util.Collection;
19import java.util.LinkedList;
20import java.util.List;
21
22import de.ugoe.cs.autoquest.usageprofiles.Trie.Edge;
23import de.ugoe.cs.autoquest.usageprofiles.Trie.TrieVertex;
24import de.ugoe.cs.util.StringTools;
25import edu.uci.ics.jung.graph.DelegateTree;
26import edu.uci.ics.jung.graph.Tree;
27
28/**
29 * <p>
30 * This class implements a node of a trie. Each node is associated with a symbol and has a counter.
31 * The counter marks the number of occurences of the sequence defined by the path from the root of
32 * the trie to this node.
33 * </p>
34 *
35 * @author Steffen Herbold
36 *
37 * @param <T>
38 *            Type of the symbols that are stored in the trie.
39 * @see Trie
40 */
41class TrieNode<T> implements Serializable {
42
43    /**
44     * <p>
45     * Id for object serialization.
46     * </p>
47     */
48    private static final long serialVersionUID = 1L;
49
50    /**
51     * <p>
52     * Counter for the number of occurences of the sequence.
53     * </p>
54     */
55    private int count;
56
57    /**
58     * <p>
59     * Symbol associated with the node.
60     * </p>
61     */
62    private final T symbol;
63
64    /**
65     * <p>
66     * Child nodes of this node. If the node is a leaf this collection is empty.
67     * </p>
68     */
69    private Collection<TrieNode<T>> children;
70
71    /**
72     * <p>
73     * Constructor. Creates a new TrieNode without a symbol associated.<br>
74     * <b>This constructor should only be used to create the root node of the trie!</b>
75     * </p>
76     */
77    TrieNode() {
78        this.symbol = null;
79        count = 0;
80        children = new LinkedList<TrieNode<T>>();
81    }
82
83    /**
84     * <p>
85     * Constructor. Creates a new TrieNode. The symbol must not be null.
86     * </p>
87     *
88     * @param symbol
89     *            symbol associated with the trie node
90     */
91    TrieNode(T symbol) {
92        if (symbol == null) {
93            throw new IllegalArgumentException(
94                                                "symbol must not be null. null is reserved for root node!");
95        }
96        this.symbol = symbol;
97        count = 0;
98        children = new LinkedList<TrieNode<T>>();
99    }
100
101    /**
102     * <p>
103     * Copy-Constructor. Creates a new TrieNode as copy of other. Other must not be null.
104     * </p>
105     *
106     * @param other
107     */
108    TrieNode(TrieNode<T> other) {
109        if (other == null) {
110            throw new IllegalArgumentException("other must not be null");
111        }
112        symbol = other.symbol;
113        count = other.count;
114        children = new LinkedList<TrieNode<T>>();
115        for (TrieNode<T> child : other.children) {
116            children.add(new TrieNode<T>(child));
117        }
118    }
119
120    /**
121     * <p>
122     * Adds a given subsequence to the trie and increases the counters accordingly.
123     * </p>
124     *
125     * @param subsequence
126     *            subsequence whose counters are increased
127     * @see Trie#add(List)
128     */
129    public void add(List<T> subsequence) {
130        if (!subsequence.isEmpty()) {
131            if (!symbol.equals(subsequence.get(0))) { // should be guaranteed by
132                                                      // the
133                                                      // recursion/TrieRoot!
134                throw new AssertionError("Invalid trie operation!");
135            }
136            count++;
137            subsequence.remove(0);
138            if (!subsequence.isEmpty()) {
139                T nextSymbol = subsequence.get(0);
140                getChildCreate(nextSymbol).add(subsequence);
141            }
142        }
143    }
144
145    /**
146     * <p>
147     * Returns the symbol associated with the node.
148     * </p>
149     *
150     * @return symbol associated with the node
151     */
152    public T getSymbol() {
153        return symbol;
154    }
155
156    /**
157     * <p>
158     * Returns the number of occurences of the sequence represented by the node.
159     * </p>
160     *
161     * @return number of occurences of the sequence represented by the node
162     */
163    public int getCount() {
164        return count;
165    }
166
167    /**
168     * <p>
169     * Returns the child of the node associated with the given symbol or creates it if it does not
170     * exist yet.
171     * </p>
172     *
173     * @param symbol
174     *            symbol whose node is required
175     * @return node associated with the symbol
176     * @see Trie#getChildCreate(Object)
177     */
178    protected TrieNode<T> getChildCreate(T symbol) {
179        TrieNode<T> node = getChild(symbol);
180        if (node == null) {
181            node = new TrieNode<T>(symbol);
182            children.add(node);
183        }
184        return node;
185    }
186
187    /**
188     * <p>
189     * Returns the child of the node associated with the given symbol or null if it does not exist.
190     * </p>
191     *
192     * @param symbol
193     *            symbol whose node is required
194     * @return node associated with the symbol; null if no such node exists
195     * @see Trie#getChild(Object)
196     */
197    protected TrieNode<T> getChild(T symbol) {
198        for (TrieNode<T> child : children) {
199            if (child.getSymbol().equals(symbol)) {
200                return child;
201            }
202        }
203        return null;
204    }
205
206    /**
207     * <p>
208     * Returns all children of this node.
209     * </p>
210     *
211     * @return children of this node
212     */
213    protected Collection<TrieNode<T>> getChildren() {
214        return children;
215    }
216
217    /**
218     * <p>
219     * Searches the sub-trie of this trie node for a given sequence and returns the node associated
220     * with the sequence or null if no such node is found.
221     * </p>
222     *
223     * @param sequence
224     *            sequence that is searched for
225     * @return node associated with the sequence
226     * @see Trie#find(List)
227     */
228    public TrieNode<T> find(List<T> subsequence) {
229        TrieNode<T> result = null;
230        if (subsequence.isEmpty()) {
231            result = this;
232        }
233        else {
234            TrieNode<T> node = getChild(subsequence.get(0));
235            if (node != null) {
236                subsequence.remove(0);
237                result = node.find(subsequence);
238            }
239        }
240        return result;
241    }
242
243    /**
244     * <p>
245     * Returns a collection of all symbols that follow a this node, i.e., the symbols associated
246     * with the children of this node.
247     * </p>
248     *
249     * @return symbols follow this node
250     * @see TrieNode#getFollowingSymbols()
251     */
252    public Collection<T> getFollowingSymbols() {
253        Collection<T> followingSymbols = new LinkedList<T>();
254        for (TrieNode<T> child : children) {
255            followingSymbols.add(child.getSymbol());
256        }
257        return followingSymbols;
258    }
259
260    /**
261     * <p>
262     * The string representation of a node is {@code symbol.toString()#count}
263     * </p>
264     *
265     * @see java.lang.Object#toString()
266     */
267    @Override
268    public String toString() {
269        String str = symbol.toString() + " #" + count;
270        if (!children.isEmpty()) {
271            str += StringTools.ENDLINE + children.toString();
272        }
273        return str;
274    }
275
276    /**
277     * <p>
278     * Generates a {@link Tree} represenation of the trie.
279     * </p>
280     *
281     * @param parent
282     *            parent vertex in the generated tree
283     * @param graph
284     *            complete tree
285     */
286    void getGraph(TrieVertex parent, DelegateTree<TrieVertex, Edge> graph) {
287        TrieVertex currentVertex;
288        if (isRoot()) {
289            currentVertex = new TrieVertex("root");
290            graph.addVertex(currentVertex);
291        }
292        else {
293            currentVertex = new TrieVertex(getSymbol().toString() + "#" + getCount());
294            graph.addChild(new Edge(), parent, currentVertex);
295        }
296        for (TrieNode<T> node : children) {
297            node.getGraph(currentVertex, graph);
298        }
299    }
300
301    /**
302     * <p>
303     * Appends the current node to the dot representation of the trie.
304     * </p>
305     *
306     * @param stringBuilder
307     *            {@link StringBuilder} to which the dot representation is appended
308     */
309    void appendDotRepresentation(StringBuilder stringBuilder) {
310        String thisSaneId;
311        if (isRoot()) {
312            thisSaneId = "root";
313        }
314        else {
315            thisSaneId =
316                symbol.toString().replace("\"", "\\\"").replaceAll("[\r\n]", "") + "#" + count;
317        }
318        stringBuilder.append(" " + hashCode() + " [label=\"" + thisSaneId + "\"];" +
319            StringTools.ENDLINE);
320        for (TrieNode<T> childNode : children) {
321            stringBuilder.append(" " + hashCode() + " -> " + childNode.hashCode() + ";" +
322                StringTools.ENDLINE);
323        }
324        for (TrieNode<T> childNode : children) {
325            childNode.appendDotRepresentation(stringBuilder);
326        }
327    }
328
329    /**
330     * <p>
331     * Checks if the node is a leaf.
332     * </p>
333     *
334     * @return true if the node is a leaf, false otherwise.
335     */
336    protected boolean isLeaf() {
337        return children.isEmpty();
338    }
339
340    /**
341     * <p>
342     * Checks if the node is the root.
343     * </p>
344     *
345     * @return true if the node is the root of the trie, false otherwise
346     */
347    protected boolean isRoot() {
348        return symbol == null;
349    }
350
351    /**
352     * <p>
353     * Recursive methods that collects all nodes that are ancestors of leafs and stores them in the
354     * set.
355     * </p>
356     *
357     * @param ancestors
358     *            set of all ancestors of leafs
359     */
360    protected void getLeafAncestors(List<TrieNode<T>> ancestors) {
361        boolean isAncestor = false;
362        for (TrieNode<T> child : children) {
363            child.getLeafAncestors(ancestors);
364            isAncestor |= child.isLeaf();
365        }
366        if (isAncestor) {
367            ancestors.add(this);
368        }
369    }
370
371    /**
372     * <p>
373     * Returns the number of descendants of this node that are leafs. This does not only include
374     * direct children of this node, but all leafs in the sub-trie with this node as root.
375     * </p>
376     *
377     * @return number of leafs in this sub-trie
378     */
379    protected int getNumLeafs() {
380        int numLeafs = 0;
381        for (TrieNode<T> child : children) {
382            if (child.isLeaf()) {
383                numLeafs++;
384            }
385            else {
386                numLeafs += child.getNumLeafs();
387            }
388        }
389        return numLeafs;
390    }
391
392    /**
393     * <p>
394     * Sets the {@link #count} of this node.
395     * </p>
396     * <p>
397     * This function should only be used sparingly and very carefully! The count is usually
398     * maintained automatically by the training procedures.
399     * </p>
400     *
401     * @param count
402     *            new count
403     */
404    protected void setCount(int count) {
405        this.count = count;
406    }
407
408    /**
409     * <p>
410     * Two TrieNodes are defined as equal, if their {@link #count}, {@link #symbol}, and
411     * {@link #children} are equal.
412     * </p>
413     *
414     * @see java.lang.Object#equals(java.lang.Object)
415     */
416    @SuppressWarnings("rawtypes")
417    @Override
418    public boolean equals(Object other) {
419        if (other == this) {
420            return true;
421        }
422        if (other instanceof TrieNode) {
423            TrieNode otherNode = (TrieNode) other;
424            if (symbol == null) {
425                return count == otherNode.count && otherNode.symbol == null &&
426                    children.equals(otherNode.children);
427            }
428            else {
429                return count == otherNode.count && symbol.equals(((TrieNode) other).symbol) &&
430                    children.equals(((TrieNode) other).children);
431            }
432        }
433        return false;
434    }
435
436    /*
437     * (non-Javadoc)
438     *
439     * @see java.lang.Object#hashCode()
440     */
441    @Override
442    public int hashCode() {
443        int multiplier = 17;
444        int hash = 42;
445        if (symbol != null) {
446            hash = multiplier * hash + symbol.hashCode();
447        }
448        hash = multiplier * hash + count;
449        hash = multiplier * hash + children.hashCode();
450        return hash;
451    }
452}
Note: See TracBrowser for help on using the repository browser.