package de.ugoe.cs.eventbench.models; import java.io.Serializable; import java.util.Collection; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Set; import de.ugoe.cs.util.StringTools; import edu.uci.ics.jung.graph.DelegateTree; import edu.uci.ics.jung.graph.Tree; public class Trie implements IDotCompatible, Serializable { /** * Id for object serialization. */ private static final long serialVersionUID = 1L; private Set knownSymbols; private final TrieNode rootNode; public Trie() { rootNode = new TrieNode(); knownSymbols = new LinkedHashSet(); } public Set getKnownSymbols() { return new LinkedHashSet(knownSymbols); } // trains the current Trie using the given sequence and adds all subsequence of length trieOrder public void train(List sequence, int maxOrder) { IncompleteMemory latestActions = new IncompleteMemory(maxOrder); int i=0; for(T currentEvent : sequence) { latestActions.add(currentEvent); knownSymbols.add(currentEvent); i++; if( i>=maxOrder ) { add(latestActions.getLast(maxOrder)); } } int sequenceLength = sequence.size(); for( int j=maxOrder-1 ; j>0 ; j-- ) { add(sequence.subList(sequenceLength-j, sequenceLength)); } } // increases the counters for each symbol in the subsequence public void add(List subsequence) { if( subsequence!=null && !subsequence.isEmpty() ) { knownSymbols.addAll(subsequence); subsequence = new LinkedList(subsequence); // defensive copy! T firstSymbol = subsequence.get(0); TrieNode node = getChildCreate(firstSymbol); node.add(subsequence); } } protected TrieNode getChildCreate(T symbol) { return rootNode.getChildCreate(symbol); } protected TrieNode getChild(T symbol) { return rootNode.getChild(symbol); } // get the count of "sequence" public int getCount(List sequence) { int count = 0; TrieNode node = find(sequence); if( node!=null ) { count = node.getCount(); } return count; } // get the count of "sequence,follower" public int getCount(List sequence, T follower) { List tmpSequence = new LinkedList(sequence); tmpSequence.add(follower); return getCount(tmpSequence); } public TrieNode find(List sequence) { if( sequence==null || sequence.isEmpty() ) { return rootNode; } List sequenceCopy = new LinkedList(sequence); TrieNode result = null; TrieNode node = getChild(sequenceCopy.get(0)); if( node!=null ) { sequenceCopy.remove(0); result = node.find(sequenceCopy); } return result; } // returns all symbols that follow the defined sequence public Collection getFollowingSymbols(List sequence) { Collection result = new LinkedList(); TrieNode node = find(sequence); if( node!=null ) { result = node.getFollowingSymbols(); } return result; } // longest suffix of context, that is contained in the tree and whose children are leaves // possibly already deprecated public List getContextSuffix(List context) { List contextSuffix = new LinkedList(context); // defensive copy boolean suffixFound = false; while(!suffixFound) { if( contextSuffix.isEmpty() ) { suffixFound = true; // suffix is the empty word } else { TrieNode node = find(contextSuffix); if( node!=null ) { if( !node.getFollowingSymbols().isEmpty() ) { suffixFound = true; } } if( !suffixFound ) { contextSuffix.remove(0); } } } return contextSuffix; } static public class Edge{} static public class TrieVertex { private String id; protected TrieVertex(String id) { this.id = id; } public String toString() { return id; } } protected Tree getGraph() { DelegateTree graph = new DelegateTree(); rootNode.getGraph(null, graph); return graph; } public String getDotRepresentation() { StringBuilder stringBuilder = new StringBuilder(); stringBuilder.append("digraph model {" + StringTools.ENDLINE); rootNode.appendDotRepresentation(stringBuilder); stringBuilder.append('}' + StringTools.ENDLINE); return stringBuilder.toString(); } @Override public String toString() { return rootNode.toString(); } public int getNumSymbols() { return knownSymbols.size(); } }