Changeset 559 for trunk/quest-core-usageprofiles
- Timestamp:
- 08/17/12 09:05:19 (12 years ago)
- Location:
- trunk/quest-core-usageprofiles/src/main/java/de/ugoe/cs/quest/usageprofiles
- Files:
-
- 12 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/quest-core-usageprofiles/src/main/java/de/ugoe/cs/quest/usageprofiles/DeterministicFiniteAutomaton.java
r547 r559 1 1 2 package de.ugoe.cs.quest.usageprofiles; 2 3 … … 11 12 /** 12 13 * <p> 13 * Implements a Deterministic Finite Automata (DFA) capable of random session 14 * generation. It is a special case of a first-order Markov model, where the15 * transition probability is equally high forall following states.14 * Implements a Deterministic Finite Automata (DFA) capable of random session generation. It is a 15 * special case of a first-order Markov model, where the transition probability is equally high for 16 * all following states. 16 17 * </p> 17 18 * … … 21 22 public class DeterministicFiniteAutomaton extends FirstOrderMarkovModel { 22 23 23 24 25 26 27 28 24 /** 25 * <p> 26 * Id for object serialization. 27 * </p> 28 */ 29 private static final long serialVersionUID = 1L; 29 30 30 /** 31 * <p> 32 * Constructor. Creates a new DeterministicFiniteAutomaton. 33 * </p> 34 * 35 * @param r 36 * random number generator used by probabilistic methods of the 37 * class 38 */ 39 public DeterministicFiniteAutomaton(Random r) { 40 super(r); 41 } 31 /** 32 * <p> 33 * Constructor. Creates a new DeterministicFiniteAutomaton. 34 * </p> 35 * 36 * @param r 37 * random number generator used by probabilistic methods of the class 38 */ 39 public DeterministicFiniteAutomaton(Random r) { 40 super(r); 41 } 42 42 43 /** 44 * <p> 45 * Calculates the proability of the next state. Each of the following states 46 * in the automaton is equally probable. 47 * </p> 48 * 49 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#getProbability(java.util.List, 50 * de.ugoe.cs.quest.eventcore.Event) 51 */ 52 @Override 53 public double getProbability(List<Event> context, 54 Event symbol) { 55 if( context==null ) { 56 throw new InvalidParameterException("context must not be null"); 57 } 58 if( symbol==null ) { 59 throw new InvalidParameterException("symbol must not be null"); 60 } 61 double result = 0.0d; 43 /** 44 * <p> 45 * Calculates the proability of the next state. Each of the following states in the automaton is 46 * equally probable. 47 * </p> 48 * 49 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#getProbability(java.util.List, 50 * de.ugoe.cs.quest.eventcore.Event) 51 */ 52 @Override 53 public double getProbability(List<Event> context, Event symbol) { 54 if (context == null) { 55 throw new InvalidParameterException("context must not be null"); 56 } 57 if (symbol == null) { 58 throw new InvalidParameterException("symbol must not be null"); 59 } 60 double result = 0.0d; 62 61 63 List<Event> contextCopy; 64 if (context.size() >= trieOrder) { 65 contextCopy = new LinkedList<Event>(context.subList( 66 context.size() - trieOrder + 1, context.size())); 67 } else { 68 contextCopy = new LinkedList<Event>(context); 69 } 62 List<Event> contextCopy; 63 if (context.size() >= trieOrder) { 64 contextCopy = 65 new LinkedList<Event>(context.subList(context.size() - trieOrder + 1, 66 context.size())); 67 } 68 else { 69 contextCopy = new LinkedList<Event>(context); 70 } 70 71 71 72 Collection<Event> followers = trie.getFollowingSymbols(contextCopy); 72 73 73 74 75 74 if (followers.size() != 0 && followers.contains(symbol)) { 75 result = 1.0d / followers.size(); 76 } 76 77 77 78 78 return result; 79 } 79 80 80 81 } -
trunk/quest-core-usageprofiles/src/main/java/de/ugoe/cs/quest/usageprofiles/FirstOrderMarkovModel.java
r553 r559 1 1 2 package de.ugoe.cs.quest.usageprofiles; 2 3 … … 18 19 /** 19 20 * <p> 20 * Implements first-order Markov models. The implementation is based on 21 * {@link HighOrderMarkovModel} and restricts the Markov order to 1. In 22 * comparison to {@link HighOrderMarkovModel}, more calculations are possible 23 * with first-order models, e.g., the calculation of the entropy ( 21 * Implements first-order Markov models. The implementation is based on {@link HighOrderMarkovModel} 22 * and restricts the Markov order to 1. In comparison to {@link HighOrderMarkovModel}, more 23 * calculations are possible with first-order models, e.g., the calculation of the entropy ( 24 24 * {@link #calcEntropy()}). 25 25 * </p> … … 28 28 * @version 1.0 29 29 */ 30 public class FirstOrderMarkovModel extends HighOrderMarkovModel implements 31 IDotCompatible { 32 33 /** 34 * <p> 35 * Id for object serialization. 36 * </p> 37 */ 38 private static final long serialVersionUID = 1L; 39 40 /** 41 * <p> 42 * Maximum number of iterations when calculating the stationary distribution 43 * as the limit of multiplying the transmission matrix with itself. 44 * </p> 45 */ 46 final static int MAX_STATDIST_ITERATIONS = 1000; 47 48 /** 49 * <p> 50 * Constructor. Creates a new FirstOrderMarkovModel. 51 * </p> 52 * 53 * @param r 54 * random number generator used by probabilistic methods of the 55 * class 56 */ 57 public FirstOrderMarkovModel(Random r) { 58 super(1, r); 59 } 60 61 /** 62 * <p> 63 * Generates the transmission matrix of the Markov model. 64 * </p> 65 * 66 * @return transmission matrix 67 */ 68 private Matrix getTransmissionMatrix() { 69 List<Event> knownSymbols = new ArrayList<Event>( 70 trie.getKnownSymbols()); 71 int numStates = knownSymbols.size(); 72 Matrix transmissionMatrix = new Matrix(numStates, numStates); 73 74 for (int i = 0; i < numStates; i++) { 75 Event currentSymbol = knownSymbols.get(i); 76 List<Event> context = new ArrayList<Event>(); 77 context.add(currentSymbol); 78 for (int j = 0; j < numStates; j++) { 79 Event follower = knownSymbols.get(j); 80 double prob = getProbability(context, follower); 81 transmissionMatrix.set(i, j, prob); 82 } 83 } 84 return transmissionMatrix; 85 } 86 87 /** 88 * <p> 89 * Calculates the entropy of the model. To make it possible that the model 90 * is stationary, a transition from {@link Event#ENDEVENT} to 91 * {@link Event#STARTEVENT} is added. 92 * </p> 93 * 94 * @return entropy of the model or NaN if it could not be calculated 95 */ 96 public double calcEntropy() { 97 Matrix transmissionMatrix = getTransmissionMatrix(); 98 List<Event> knownSymbols = new ArrayList<Event>( 99 trie.getKnownSymbols()); 100 int numStates = knownSymbols.size(); 101 102 List<Integer> startIndexList = new LinkedList<Integer>(); 103 List<Integer> endIndexList = new LinkedList<Integer>(); 104 for( int i=0 ; i<knownSymbols.size() ; i++ ) { 105 String id = knownSymbols.get(i).getId(); 106 if( id.equals(Event.STARTEVENT.getId()) || id.contains(Event.STARTEVENT.getId()+"-=-") ) { 107 startIndexList.add(i); 108 } 109 if( id.equals(Event.ENDEVENT.getId()) || id.contains("-=-"+Event.ENDEVENT.getId()) ) { 110 endIndexList.add(i); 111 } 112 } 113 114 if (startIndexList.isEmpty()) { 115 Console.printerrln("Error calculating entropy. Initial state of markov chain not found."); 116 return Double.NaN; 117 } 118 if (endIndexList.isEmpty()) { 119 Console.printerrln("Error calculating entropy. End state of markov chain not found."); 120 return Double.NaN; 121 } 122 for( Integer i : endIndexList ) { 123 for(Integer j : startIndexList ) { 124 transmissionMatrix.set(i, j, 1); 125 } 126 } 127 128 // Calculate stationary distribution by raising the power of the 129 // transmission matrix. 130 // The rank of the matrix should fall to 1 and each two should be the 131 // vector of the stationory distribution. 132 int iter = 0; 133 int rank = transmissionMatrix.rank(); 134 Matrix stationaryMatrix = (Matrix) transmissionMatrix.clone(); 135 while (iter < MAX_STATDIST_ITERATIONS && rank > 1) { 136 stationaryMatrix = stationaryMatrix.times(stationaryMatrix); 137 rank = stationaryMatrix.rank(); 138 iter++; 139 } 140 141 if (rank != 1) { 142 Console.traceln("rank: " + rank); 143 Console.printerrln("Unable to calculate stationary distribution."); 144 return Double.NaN; 145 } 146 147 double entropy = 0.0; 148 for (int i = 0; i < numStates; i++) { 149 for (int j = 0; j < numStates; j++) { 150 if (transmissionMatrix.get(i, j) != 0 && transmissionMatrix.get(i, j)!=1) { 151 double tmp = stationaryMatrix.get(0, i); 152 tmp *= transmissionMatrix.get(i, j); 153 tmp *= Math.log(transmissionMatrix.get(i, j)) / Math.log(2); 154 entropy -= tmp; 155 } 156 } 157 } 158 return entropy; 159 } 160 161 /** 162 * <p> 163 * The dot represenation of {@link FirstOrderMarkovModel}s is its graph 164 * representation with the states as nodes and directed edges weighted with 165 * transition probabilities. 166 * </p> 167 * 168 * @see de.ugoe.cs.quest.usageprofiles.IDotCompatible#getDotRepresentation() 169 */ 170 @Override 171 public String getDotRepresentation() { 172 StringBuilder stringBuilder = new StringBuilder(); 173 stringBuilder.append("digraph model {" + StringTools.ENDLINE); 174 175 List<Event> knownSymbols = new ArrayList<Event>( 176 trie.getKnownSymbols()); 177 for (Event symbol : knownSymbols) { 178 final String thisSaneId = symbol.getId().replace("\"", "\\\"") 179 .replaceAll("[\r\n]", ""); 180 stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " [label=\"" 181 + thisSaneId + "\"];" + StringTools.ENDLINE); 182 List<Event> context = new ArrayList<Event>(); 183 context.add(symbol); 184 Collection<Event> followers = trie.getFollowingSymbols(context); 185 for (Event follower : followers) { 186 stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " -> " 187 + knownSymbols.indexOf(follower) + " "); 188 stringBuilder.append("[label=\"" 189 + getProbability(context, follower) + "\"];" 190 + StringTools.ENDLINE); 191 } 192 } 193 stringBuilder.append('}' + StringTools.ENDLINE); 194 return stringBuilder.toString(); 195 } 196 197 /** 198 * <p> 199 * Returns a {@link Graph} representation of the model with the states as 200 * nodes and directed edges weighted with transition probabilities. 201 * </p> 202 * 203 * @return {@link Graph} of the model 204 */ 205 public Graph<String, MarkovEdge> getGraph() { 206 Graph<String, MarkovEdge> graph = new SparseMultigraph<String, MarkovEdge>(); 207 208 List<Event> knownSymbols = new ArrayList<Event>( 209 trie.getKnownSymbols()); 210 211 for (Event symbol : knownSymbols) { 212 String from = symbol.getId(); 213 List<Event> context = new ArrayList<Event>(); 214 context.add(symbol); 215 216 Collection<Event> followers = trie.getFollowingSymbols(context); 217 218 for (Event follower : followers) { 219 String to = follower.getId(); 220 MarkovEdge prob = new MarkovEdge(getProbability(context, 221 follower)); 222 graph.addEdge(prob, from, to, EdgeType.DIRECTED); 223 } 224 } 225 return graph; 226 } 227 228 /** 229 * Inner class used for the {@link Graph} representation of the model. 230 * 231 * @author Steffen Herbold 232 * @version 1.0 233 */ 234 static public class MarkovEdge { 235 /** 236 * <p> 237 * Weight of the edge, i.e., its transition probability. 238 * </p> 239 */ 240 double weight; 241 242 /** 243 * <p> 244 * Constructor. Creates a new MarkovEdge. 245 * </p> 246 * 247 * @param weight 248 * weight of the edge, i.e., its transition probability 249 */ 250 MarkovEdge(double weight) { 251 this.weight = weight; 252 } 253 254 /** 255 * <p> 256 * The weight of the edge as {@link String}. 257 * </p> 258 */ 259 public String toString() { 260 return "" + weight; 261 } 262 } 30 public class FirstOrderMarkovModel extends HighOrderMarkovModel implements IDotCompatible { 31 32 /** 33 * <p> 34 * Id for object serialization. 35 * </p> 36 */ 37 private static final long serialVersionUID = 1L; 38 39 /** 40 * <p> 41 * Maximum number of iterations when calculating the stationary distribution as the limit of 42 * multiplying the transmission matrix with itself. 43 * </p> 44 */ 45 final static int MAX_STATDIST_ITERATIONS = 1000; 46 47 /** 48 * <p> 49 * Constructor. Creates a new FirstOrderMarkovModel. 50 * </p> 51 * 52 * @param r 53 * random number generator used by probabilistic methods of the class 54 */ 55 public FirstOrderMarkovModel(Random r) { 56 super(1, r); 57 } 58 59 /** 60 * <p> 61 * Generates the transmission matrix of the Markov model. 62 * </p> 63 * 64 * @return transmission matrix 65 */ 66 private Matrix getTransmissionMatrix() { 67 List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols()); 68 int numStates = knownSymbols.size(); 69 Matrix transmissionMatrix = new Matrix(numStates, numStates); 70 71 for (int i = 0; i < numStates; i++) { 72 Event currentSymbol = knownSymbols.get(i); 73 List<Event> context = new ArrayList<Event>(); 74 context.add(currentSymbol); 75 for (int j = 0; j < numStates; j++) { 76 Event follower = knownSymbols.get(j); 77 double prob = getProbability(context, follower); 78 transmissionMatrix.set(i, j, prob); 79 } 80 } 81 return transmissionMatrix; 82 } 83 84 /** 85 * <p> 86 * Calculates the entropy of the model. To make it possible that the model is stationary, a 87 * transition from {@link Event#ENDEVENT} to {@link Event#STARTEVENT} is added. 88 * </p> 89 * 90 * @return entropy of the model or NaN if it could not be calculated 91 */ 92 public double calcEntropy() { 93 Matrix transmissionMatrix = getTransmissionMatrix(); 94 List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols()); 95 int numStates = knownSymbols.size(); 96 97 List<Integer> startIndexList = new LinkedList<Integer>(); 98 List<Integer> endIndexList = new LinkedList<Integer>(); 99 for (int i = 0; i < knownSymbols.size(); i++) { 100 String id = knownSymbols.get(i).getId(); 101 if (id.equals(Event.STARTEVENT.getId()) || 102 id.contains(Event.STARTEVENT.getId() + "-=-")) 103 { 104 startIndexList.add(i); 105 } 106 if (id.equals(Event.ENDEVENT.getId()) || id.contains("-=-" + Event.ENDEVENT.getId())) { 107 endIndexList.add(i); 108 } 109 } 110 111 if (startIndexList.isEmpty()) { 112 Console 113 .printerrln("Error calculating entropy. Initial state of markov chain not found."); 114 return Double.NaN; 115 } 116 if (endIndexList.isEmpty()) { 117 Console.printerrln("Error calculating entropy. End state of markov chain not found."); 118 return Double.NaN; 119 } 120 for (Integer i : endIndexList) { 121 for (Integer j : startIndexList) { 122 transmissionMatrix.set(i, j, 1); 123 } 124 } 125 126 // Calculate stationary distribution by raising the power of the 127 // transmission matrix. 128 // The rank of the matrix should fall to 1 and each two should be the 129 // vector of the stationory distribution. 130 int iter = 0; 131 int rank = transmissionMatrix.rank(); 132 Matrix stationaryMatrix = (Matrix) transmissionMatrix.clone(); 133 while (iter < MAX_STATDIST_ITERATIONS && rank > 1) { 134 stationaryMatrix = stationaryMatrix.times(stationaryMatrix); 135 rank = stationaryMatrix.rank(); 136 iter++; 137 } 138 139 if (rank != 1) { 140 Console.traceln("rank: " + rank); 141 Console.printerrln("Unable to calculate stationary distribution."); 142 return Double.NaN; 143 } 144 145 double entropy = 0.0; 146 for (int i = 0; i < numStates; i++) { 147 for (int j = 0; j < numStates; j++) { 148 if (transmissionMatrix.get(i, j) != 0 && transmissionMatrix.get(i, j) != 1) { 149 double tmp = stationaryMatrix.get(0, i); 150 tmp *= transmissionMatrix.get(i, j); 151 tmp *= Math.log(transmissionMatrix.get(i, j)) / Math.log(2); 152 entropy -= tmp; 153 } 154 } 155 } 156 return entropy; 157 } 158 159 /** 160 * <p> 161 * The dot represenation of {@link FirstOrderMarkovModel}s is its graph representation with the 162 * states as nodes and directed edges weighted with transition probabilities. 163 * </p> 164 * 165 * @see de.ugoe.cs.quest.usageprofiles.IDotCompatible#getDotRepresentation() 166 */ 167 @Override 168 public String getDotRepresentation() { 169 StringBuilder stringBuilder = new StringBuilder(); 170 stringBuilder.append("digraph model {" + StringTools.ENDLINE); 171 172 List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols()); 173 for (Event symbol : knownSymbols) { 174 final String thisSaneId = symbol.getId().replace("\"", "\\\"").replaceAll("[\r\n]", ""); 175 stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " [label=\"" + thisSaneId + 176 "\"];" + StringTools.ENDLINE); 177 List<Event> context = new ArrayList<Event>(); 178 context.add(symbol); 179 Collection<Event> followers = trie.getFollowingSymbols(context); 180 for (Event follower : followers) { 181 stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " -> " + 182 knownSymbols.indexOf(follower) + " "); 183 stringBuilder.append("[label=\"" + getProbability(context, follower) + "\"];" + 184 StringTools.ENDLINE); 185 } 186 } 187 stringBuilder.append('}' + StringTools.ENDLINE); 188 return stringBuilder.toString(); 189 } 190 191 /** 192 * <p> 193 * Returns a {@link Graph} representation of the model with the states as nodes and directed 194 * edges weighted with transition probabilities. 195 * </p> 196 * 197 * @return {@link Graph} of the model 198 */ 199 public Graph<String, MarkovEdge> getGraph() { 200 Graph<String, MarkovEdge> graph = new SparseMultigraph<String, MarkovEdge>(); 201 202 List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols()); 203 204 for (Event symbol : knownSymbols) { 205 String from = symbol.getId(); 206 List<Event> context = new ArrayList<Event>(); 207 context.add(symbol); 208 209 Collection<Event> followers = trie.getFollowingSymbols(context); 210 211 for (Event follower : followers) { 212 String to = follower.getId(); 213 MarkovEdge prob = new MarkovEdge(getProbability(context, follower)); 214 graph.addEdge(prob, from, to, EdgeType.DIRECTED); 215 } 216 } 217 return graph; 218 } 219 220 /** 221 * Inner class used for the {@link Graph} representation of the model. 222 * 223 * @author Steffen Herbold 224 * @version 1.0 225 */ 226 static public class MarkovEdge { 227 /** 228 * <p> 229 * Weight of the edge, i.e., its transition probability. 230 * </p> 231 */ 232 double weight; 233 234 /** 235 * <p> 236 * Constructor. Creates a new MarkovEdge. 237 * </p> 238 * 239 * @param weight 240 * weight of the edge, i.e., its transition probability 241 */ 242 MarkovEdge(double weight) { 243 this.weight = weight; 244 } 245 246 /** 247 * <p> 248 * The weight of the edge as {@link String}. 249 * </p> 250 */ 251 public String toString() { 252 return "" + weight; 253 } 254 } 263 255 264 256 } -
trunk/quest-core-usageprofiles/src/main/java/de/ugoe/cs/quest/usageprofiles/HighOrderMarkovModel.java
r547 r559 1 1 2 package de.ugoe.cs.quest.usageprofiles; 2 3 … … 19 20 public class HighOrderMarkovModel extends TrieBasedModel { 20 21 21 22 23 24 25 26 22 /** 23 * <p> 24 * Id for object serialization. 25 * </p> 26 */ 27 private static final long serialVersionUID = 1L; 27 28 28 /** 29 * <p> 30 * Constructor. Creates a new HighOrderMarkovModel with a defined Markov 31 * order. 32 * </p> 33 * 34 * @param maxOrder 35 * Markov order of the model 36 * @param r 37 * random number generator used by probabilistic methods of the 38 * class 39 */ 40 public HighOrderMarkovModel(int maxOrder, Random r) { 41 super(maxOrder, r); 42 } 29 /** 30 * <p> 31 * Constructor. Creates a new HighOrderMarkovModel with a defined Markov order. 32 * </p> 33 * 34 * @param maxOrder 35 * Markov order of the model 36 * @param r 37 * random number generator used by probabilistic methods of the class 38 */ 39 public HighOrderMarkovModel(int maxOrder, Random r) { 40 super(maxOrder, r); 41 } 43 42 44 /** 45 * <p> 46 * Calculates the probability of the next Event being symbol based on the 47 * order of the Markov model. The order is defined in the constructor 48 * {@link #HighOrderMarkovModel(int, Random)}. 49 * </p> 50 * 51 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#getProbability(java.util.List, 52 * de.ugoe.cs.quest.eventcore.Event) 53 */ 54 @Override 55 public double getProbability(List<Event> context, 56 Event symbol) { 57 if (context == null) { 58 throw new InvalidParameterException("context must not be null"); 59 } 60 if (symbol == null) { 61 throw new InvalidParameterException("symbol must not be null"); 62 } 63 double result = 0.0d; 43 /** 44 * <p> 45 * Calculates the probability of the next Event being symbol based on the order of the Markov 46 * model. The order is defined in the constructor {@link #HighOrderMarkovModel(int, Random)}. 47 * </p> 48 * 49 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#getProbability(java.util.List, 50 * de.ugoe.cs.quest.eventcore.Event) 51 */ 52 @Override 53 public double getProbability(List<Event> context, Event symbol) { 54 if (context == null) { 55 throw new InvalidParameterException("context must not be null"); 56 } 57 if (symbol == null) { 58 throw new InvalidParameterException("symbol must not be null"); 59 } 60 double result = 0.0d; 64 61 65 List<Event> contextCopy; 66 if (context.size() >= trieOrder) { 67 contextCopy = new LinkedList<Event>(context.subList( 68 context.size() - trieOrder + 1, context.size())); 69 } else { 70 contextCopy = new LinkedList<Event>(context); 71 } 62 List<Event> contextCopy; 63 if (context.size() >= trieOrder) { 64 contextCopy = 65 new LinkedList<Event>(context.subList(context.size() - trieOrder + 1, 66 context.size())); 67 } 68 else { 69 contextCopy = new LinkedList<Event>(context); 70 } 72 71 73 74 75 76 77 72 Collection<Event> followers = trie.getFollowingSymbols(contextCopy); 73 int sumCountFollowers = 0; // N(s\sigma') 74 for (Event follower : followers) { 75 sumCountFollowers += trie.getCount(contextCopy, follower); 76 } 78 77 79 80 81 82 78 int countSymbol = trie.getCount(contextCopy, symbol); 79 if (sumCountFollowers != 0) { 80 result = ((double) countSymbol / sumCountFollowers); 81 } 83 82 84 85 83 return result; 84 } 86 85 87 86 } -
trunk/quest-core-usageprofiles/src/main/java/de/ugoe/cs/quest/usageprofiles/IDotCompatible.java
r518 r559 1 1 2 package de.ugoe.cs.quest.usageprofiles; 2 3 3 4 /** 4 5 * <p> 5 * Models implementing this interface provide a graph representation of 6 * themselves as a{@link String} with Dot syntax.6 * Models implementing this interface provide a graph representation of themselves as a 7 * {@link String} with Dot syntax. 7 8 * </p> 8 9 * … … 12 13 public interface IDotCompatible { 13 14 14 15 16 17 18 19 20 21 15 /** 16 * <p> 17 * Returns a Dot representation of the model. 18 * </p> 19 * 20 * @return string with Dot syntax that describes the model. 21 */ 22 public abstract String getDotRepresentation(); 22 23 } -
trunk/quest-core-usageprofiles/src/main/java/de/ugoe/cs/quest/usageprofiles/IMemory.java
r518 r559 1 1 2 package de.ugoe.cs.quest.usageprofiles; 2 3 … … 5 6 /** 6 7 * <p> 7 * This interface defines basic functions for classes that implement a memory 8 * about the recentevents of a sequences.8 * This interface defines basic functions for classes that implement a memory about the recent 9 * events of a sequences. 9 10 * </p> 10 11 * … … 17 18 public interface IMemory<T> { 18 19 19 20 21 22 23 24 25 20 /** 21 * Adds an element to the end of the memory. 22 * 23 * @param element 24 * Element to be added. 25 */ 26 public void add(T element); 26 27 27 28 29 * Returns the last <code>num</code> memorized elements. If the history is 30 * shorter than <code>num</code>, the length of the returned 31 * {@link java.util.List} may be less than<code>num</code>.32 33 34 35 36 37 38 28 /** 29 * <p> 30 * Returns the last <code>num</code> memorized elements. If the history is shorter than 31 * <code>num</code>, the length of the returned {@link java.util.List} may be less than 32 * <code>num</code>. 33 * </p> 34 * 35 * @param num 36 * Number of states from the end of the memory to be returned. 37 * @return {@link java.util.List} of memorized elements. 38 */ 39 public List<T> getLast(int num); 39 40 40 41 } -
trunk/quest-core-usageprofiles/src/main/java/de/ugoe/cs/quest/usageprofiles/IStochasticProcess.java
r547 r559 1 1 2 package de.ugoe.cs.quest.usageprofiles; 2 3 … … 18 19 public interface IStochasticProcess extends Serializable { 19 20 20 21 22 * Returns the probability, that the next event is {@code symbol} given the23 * last events are {@code context}. The last element of {@code context} is 24 * the most recent in the history, thefirst element is the oldest.25 26 27 28 29 30 31 * @return probabilty the {@code symbol} is the next event, given the last 32 * events{@code context}33 34 35 36 21 /** 22 * <p> 23 * Returns the probability, that the next event is {@code symbol} given the last events are 24 * {@code context}. The last element of {@code context} is the most recent in the history, the 25 * first element is the oldest. 26 * </p> 27 * 28 * @param context 29 * recently observed symbols 30 * @param symbol 31 * event for which the probability is calculated 32 * @return probabilty the {@code symbol} is the next event, given the last events 33 * {@code context} 34 * @throws InvalidParameterException 35 * thrown if context or symbol is null 36 */ 37 double getProbability(List<Event> context, Event symbol); 37 38 38 /** 39 * <p> 40 * Returns the probabilitiy that a given sequence is generated by the 41 * stochastic process. 42 * </p> 43 * 44 * @param sequence 45 * sequences of which the probability is calculated 46 * @return probability of the sequences; 1.0 if sequence is empty or null 47 * @throws InvalidParameterException 48 * thrown if sequence is null 49 */ 50 double getProbability(List<Event> sequence); 39 /** 40 * <p> 41 * Returns the probabilitiy that a given sequence is generated by the stochastic process. 42 * </p> 43 * 44 * @param sequence 45 * sequences of which the probability is calculated 46 * @return probability of the sequences; 1.0 if sequence is empty or null 47 * @throws InvalidParameterException 48 * thrown if sequence is null 49 */ 50 double getProbability(List<Event> sequence); 51 51 52 53 54 * Generates a random sequence of events. The sequence starts with 55 * {@link Event#STARTEVENT} andfinishes with {@link Event#ENDEVENT}.56 57 58 59 60 52 /** 53 * <p> 54 * Generates a random sequence of events. The sequence starts with {@link Event#STARTEVENT} and 55 * finishes with {@link Event#ENDEVENT}. 56 * </p> 57 * 58 * @return randomly generated sequence 59 */ 60 public List<Event> randomSequence(); 61 61 62 /** 63 * <p> 64 * Generates a random sequence of events. The sequence starts with 65 * {@link Event#STARTEVENT} and finishes with 66 * <ul> 67 * <li>{@link Event#ENDEVENT} if validEnd==true.</li> 68 * <li>b) if a generated sequences reaches {@link Event#ENDEVENT} before 69 * maxLength, the sequence finishes and is shorter than maxLenght. 70 * Otherwise, the sequence finishes as soon as maxLength is reached and the 71 * final event of the sequence must not be {@link Event#ENDEVENT}.</li> 72 * </ul> 73 * </p> 74 * 75 * @param maxLength 76 * maximum length of the generated sequence 77 * @param validEnd 78 * if true, only sequences that finish with 79 * {@link Event#ENDEVENT} are generated 80 * @return randomly generated sequence 81 * 82 */ 83 public List<Event> randomSequence(int maxLength, 84 boolean validEnd); 62 /** 63 * <p> 64 * Generates a random sequence of events. The sequence starts with {@link Event#STARTEVENT} and 65 * finishes with 66 * <ul> 67 * <li>{@link Event#ENDEVENT} if validEnd==true.</li> 68 * <li>b) if a generated sequences reaches {@link Event#ENDEVENT} before maxLength, the sequence 69 * finishes and is shorter than maxLenght. Otherwise, the sequence finishes as soon as maxLength 70 * is reached and the final event of the sequence must not be {@link Event#ENDEVENT}.</li> 71 * </ul> 72 * </p> 73 * 74 * @param maxLength 75 * maximum length of the generated sequence 76 * @param validEnd 77 * if true, only sequences that finish with {@link Event#ENDEVENT} are generated 78 * @return randomly generated sequence 79 * 80 */ 81 public List<Event> randomSequence(int maxLength, boolean validEnd); 85 82 86 /** 87 * <p> 88 * Generates all sequences of a given length are possible, i.e., have a 89 * positive probability.<br> 90 * All states are used as possible starting states. 91 * </p> 92 * 93 * @param length 94 * length of the generated sequences 95 * @return generated sequences 96 * @see #generateSequences(int, boolean) 97 * @throws InvalidParameterException 98 * thrown if length is less than or equal to 0 99 */ 100 public Collection<List<Event>> generateSequences(int length); 83 /** 84 * <p> 85 * Generates all sequences of a given length are possible, i.e., have a positive probability.<br> 86 * All states are used as possible starting states. 87 * </p> 88 * 89 * @param length 90 * length of the generated sequences 91 * @return generated sequences 92 * @see #generateSequences(int, boolean) 93 * @throws InvalidParameterException 94 * thrown if length is less than or equal to 0 95 */ 96 public Collection<List<Event>> generateSequences(int length); 101 97 102 /** 103 * <p> 104 * Generates all sequences of given length that can are possible, i.e., have 105 * positive probability.<br> 106 * If {@code fromStart==true}, all generated sequences start in 107 * {@link Event#STARTEVENT}. Otherwise this method is the same as 108 * {@link #generateSequences(int)}. 109 * </p> 110 * 111 * @param length 112 * length of the generated sequences 113 * @param fromStart 114 * if true, all generated sequences start with 115 * {@link Event#STARTEVENT} 116 * @return generated sequences 117 * @throws InvalidParameterException 118 * thrown if length is less than or equal to 0 119 */ 120 public Collection<List<Event>> generateSequences(int length, 121 boolean fromStart); 98 /** 99 * <p> 100 * Generates all sequences of given length that can are possible, i.e., have positive 101 * probability.<br> 102 * If {@code fromStart==true}, all generated sequences start in {@link Event#STARTEVENT}. 103 * Otherwise this method is the same as {@link #generateSequences(int)}. 104 * </p> 105 * 106 * @param length 107 * length of the generated sequences 108 * @param fromStart 109 * if true, all generated sequences start with {@link Event#STARTEVENT} 110 * @return generated sequences 111 * @throws InvalidParameterException 112 * thrown if length is less than or equal to 0 113 */ 114 public Collection<List<Event>> generateSequences(int length, boolean fromStart); 122 115 123 /** 124 * <p> 125 * Generates all sequences starting with {@link Event#STARTEVENT} and 126 * finishing with {@link Event#ENDEVENT} of a given length. It is possible 127 * that no such sequence exists with the defined length and the returned set 128 * is empty. If {@code length} is less than 2 the returned set is always 129 * empty. 130 * </p> 131 * 132 * @param length 133 * @return generated sequences 134 * @throws InvalidParameterException 135 * thrown if length is less than or equal to 0 136 */ 137 public Collection<List<Event>> generateValidSequences( 138 int length); 116 /** 117 * <p> 118 * Generates all sequences starting with {@link Event#STARTEVENT} and finishing with 119 * {@link Event#ENDEVENT} of a given length. It is possible that no such sequence exists with 120 * the defined length and the returned set is empty. If {@code length} is less than 2 the 121 * returned set is always empty. 122 * </p> 123 * 124 * @param length 125 * @return generated sequences 126 * @throws InvalidParameterException 127 * thrown if length is less than or equal to 0 128 */ 129 public Collection<List<Event>> generateValidSequences(int length); 139 130 140 /** 141 * <p> 142 * Returns the number of states known by the stochastic process, i.e., the 143 * size of its alphabet. 144 * </p> 145 * 146 * @return number of states 147 */ 148 public int getNumSymbols(); 131 /** 132 * <p> 133 * Returns the number of states known by the stochastic process, i.e., the size of its alphabet. 134 * </p> 135 * 136 * @return number of states 137 */ 138 public int getNumSymbols(); 149 139 150 151 152 153 154 155 156 157 140 /** 141 * <p> 142 * Returns a string representation of all known states. 143 * </p> 144 * 145 * @return string representation for all known states 146 */ 147 public String[] getSymbolStrings(); 158 148 159 /** 160 * <p> 161 * Returns the number of states the process would have if it would be 162 * flattened through state-splitting to a first-order Markov model. 163 * </p> 164 * <p> 165 * If it is not possible to flatten the model, -1 is returned. 166 * </p> 167 * 168 * @return number of states an equivalent FOM would have; -1 if not 169 * available 170 */ 171 public int getNumFOMStates(); 149 /** 150 * <p> 151 * Returns the number of states the process would have if it would be flattened through 152 * state-splitting to a first-order Markov model. 153 * </p> 154 * <p> 155 * If it is not possible to flatten the model, -1 is returned. 156 * </p> 157 * 158 * @return number of states an equivalent FOM would have; -1 if not available 159 */ 160 public int getNumFOMStates(); 172 161 173 /** 174 * <p> 175 * Returns the number of transitions the process would have if it would be 176 * flattened through state-splitting to a first-order Markov model. 177 * </p> 178 * 179 * @return number of transitions an equivalent FOM would have; -1 if not 180 * available 181 */ 182 public int getNumTransitions(); 162 /** 163 * <p> 164 * Returns the number of transitions the process would have if it would be flattened through 165 * state-splitting to a first-order Markov model. 166 * </p> 167 * 168 * @return number of transitions an equivalent FOM would have; -1 if not available 169 */ 170 public int getNumTransitions(); 183 171 184 /** 185 * <p> 186 * Returns all states known by the stochastic process, i.e., its 187 * {@link Event}s. 188 * </p> 189 * 190 * @return events known by the process 191 */ 192 public Collection<Event> getEvents(); 172 /** 173 * <p> 174 * Returns all states known by the stochastic process, i.e., its {@link Event}s. 175 * </p> 176 * 177 * @return events known by the process 178 */ 179 public Collection<Event> getEvents(); 193 180 194 181 } -
trunk/quest-core-usageprofiles/src/main/java/de/ugoe/cs/quest/usageprofiles/IncompleteMemory.java
r518 r559 1 1 2 package de.ugoe.cs.quest.usageprofiles; 2 3 … … 7 8 /** 8 9 * <p> 9 * Implements a round-trip buffered memory of a specified length that can be 10 * used to remember the recent history. Every event that happend longer ago than11 * the length of the memory is forgotten,hence the memory is incomplete.10 * Implements a round-trip buffered memory of a specified length that can be used to remember the 11 * recent history. Every event that happend longer ago than the length of the memory is forgotten, 12 * hence the memory is incomplete. 12 13 * </p> 13 14 * … … 20 21 public class IncompleteMemory<T> implements IMemory<T> { 21 22 22 23 24 25 26 27 23 /** 24 * <p> 25 * Maximum length of the memory. 26 * </p> 27 */ 28 private int length; 28 29 29 30 31 32 33 34 30 /** 31 * <p> 32 * Internal storage of the history. 33 * </p> 34 */ 35 private List<T> history; 35 36 36 /** 37 * <p> 38 * Constructor. Creates a new IncompleteMemory. 39 * </p> 40 * 41 * @param length 42 * number of recent events that are remembered 43 * @throws InvalidParameterException 44 * This exception is thrown if the length is smaller than 1 45 */ 46 public IncompleteMemory(int length) { 47 if (length < 1) { 48 throw new InvalidParameterException( 49 "Length of IncompleteMemory must be at least 1."); 50 } 51 this.length = length; 52 history = new LinkedList<T>(); 53 } 37 /** 38 * <p> 39 * Constructor. Creates a new IncompleteMemory. 40 * </p> 41 * 42 * @param length 43 * number of recent events that are remembered 44 * @throws InvalidParameterException 45 * This exception is thrown if the length is smaller than 1 46 */ 47 public IncompleteMemory(int length) { 48 if (length < 1) { 49 throw new InvalidParameterException("Length of IncompleteMemory must be at least 1."); 50 } 51 this.length = length; 52 history = new LinkedList<T>(); 53 } 54 54 55 56 57 58 59 60 61 62 63 64 65 66 55 /* 56 * (non-Javadoc) 57 * 58 * @see de.ugoe.cs.quest.usageprofiles.IMemory#add(java.lang.Object) 59 */ 60 @Override 61 public void add(T state) { 62 if (history.size() == length) { 63 history.remove(0); 64 } 65 history.add(state); 66 } 67 67 68 69 70 71 72 73 74 75 if( num<1) {76 77 } else { 78 return new LinkedList<T>(history.subList( 79 80 81 82 68 /* 69 * (non-Javadoc) 70 * 71 * @see de.ugoe.cs.quest.usageprofiles.IMemory#getLast(int) 72 */ 73 @Override 74 public List<T> getLast(int num) { 75 if (num < 1) { 76 return new LinkedList<T>(); 77 } 78 else { 79 return new LinkedList<T>(history.subList(Math.max(0, history.size() - num), 80 history.size())); // defensive copy 81 } 82 } 83 83 84 85 86 * Returns the current length of the memory. This can be less than 87 * {@link #length}, if theoverall history is less than {@link #length}.88 89 90 91 92 93 94 84 /** 85 * <p> 86 * Returns the current length of the memory. This can be less than {@link #length}, if the 87 * overall history is less than {@link #length}. 88 * </p> 89 * 90 * @return length of the current memory 91 */ 92 public int getLength() { 93 return history.size(); 94 } 95 95 } -
trunk/quest-core-usageprofiles/src/main/java/de/ugoe/cs/quest/usageprofiles/ModelFlattener.java
r553 r559 1 1 2 package de.ugoe.cs.quest.usageprofiles; 2 3 … … 10 11 /** 11 12 * <p> 12 * This class provides functions to create flattened first-order Markov models 13 * from {@link HighOrderMarkovModel}s and {@link PredictionByPartialMatch}14 * models through statesplitting.13 * This class provides functions to create flattened first-order Markov models from 14 * {@link HighOrderMarkovModel}s and {@link PredictionByPartialMatch} models through state 15 * splitting. 15 16 * </p> 16 17 * <p> 17 * If possible, the normal high-order markov model should be used, as the Events 18 * may be broken by the flattener, as, e.g., the information 19 * {@link ReplayableEvent}s contain is not preserved. 18 * If possible, the normal high-order markov model should be used, as the Events may be broken by 19 * the flattener, as, e.g., the information {@link ReplayableEvent}s contain is not preserved. 20 20 * </p> 21 21 * … … 25 25 public class ModelFlattener { 26 26 27 27 private static final String EVENT_SEPARATOR = "-=-"; 28 28 29 29 Trie<Event> firstOrderTrie; 30 30 31 /** 32 * <p> 33 * Takes a {@link HighOrderMarkovModel} and returns a 34 * {@link FirstOrderMarkovModel} that conserves the high-order memory 35 * through state splitting. 36 * </p> 37 * 38 * @param model 39 * model that is flattened 40 * @return flattened first-order Markov model 41 */ 42 public FirstOrderMarkovModel flattenHighOrderMarkovModel( 43 HighOrderMarkovModel model) { 44 int markovOrder = model.trieOrder - 1; 45 FirstOrderMarkovModel firstOrderModel = new FirstOrderMarkovModel( 46 new Random()); 47 firstOrderModel.trieOrder = 2; 48 if (markovOrder == 1) { 49 firstOrderModel.trie = new Trie<Event>(model.trie); 50 firstOrderModel.trieOrder = 2; 51 } else { 52 firstOrderTrie = new Trie<Event>(); 53 TrieNode<Event> rootNode = model.trie.find(null); 54 generateFirstOrderTrie(rootNode, new LinkedList<String>(), markovOrder); 55 firstOrderTrie.updateKnownSymbols(); 56 firstOrderModel.trie = firstOrderTrie; 57 } 31 /** 32 * <p> 33 * Takes a {@link HighOrderMarkovModel} and returns a {@link FirstOrderMarkovModel} that 34 * conserves the high-order memory through state splitting. 35 * </p> 36 * 37 * @param model 38 * model that is flattened 39 * @return flattened first-order Markov model 40 */ 41 public FirstOrderMarkovModel flattenHighOrderMarkovModel(HighOrderMarkovModel model) { 42 int markovOrder = model.trieOrder - 1; 43 FirstOrderMarkovModel firstOrderModel = new FirstOrderMarkovModel(new Random()); 44 firstOrderModel.trieOrder = 2; 45 if (markovOrder == 1) { 46 firstOrderModel.trie = new Trie<Event>(model.trie); 47 firstOrderModel.trieOrder = 2; 48 } 49 else { 50 firstOrderTrie = new Trie<Event>(); 51 TrieNode<Event> rootNode = model.trie.find(null); 52 generateFirstOrderTrie(rootNode, new LinkedList<String>(), markovOrder); 53 firstOrderTrie.updateKnownSymbols(); 54 firstOrderModel.trie = firstOrderTrie; 55 } 58 56 59 60 57 return firstOrderModel; 58 } 61 59 62 /** 63 * <p> 64 * <b>This method is not available yet and always return null!</b> 65 * </p> 66 * <p> 67 * Takes a {@link PredictionByPartialMatch} model and returns a 68 * {@link FirstOrderMarkovModel} that conserves the high-order memory 69 * through state splitting. 70 * </p> 71 * 72 * @param model 73 * model that is flattened 74 * @return flattened first-order Markov model 75 */ 76 public FirstOrderMarkovModel flattenPredictionByPartialMatch( 77 PredictionByPartialMatch model) { 78 // TODO implement method 79 return null; 80 } 60 /** 61 * <p> 62 * <b>This method is not available yet and always return null!</b> 63 * </p> 64 * <p> 65 * Takes a {@link PredictionByPartialMatch} model and returns a {@link FirstOrderMarkovModel} 66 * that conserves the high-order memory through state splitting. 67 * </p> 68 * 69 * @param model 70 * model that is flattened 71 * @return flattened first-order Markov model 72 */ 73 public FirstOrderMarkovModel flattenPredictionByPartialMatch(PredictionByPartialMatch model) { 74 // TODO implement method 75 return null; 76 } 81 77 82 /** 83 * <p> 84 * Converts all nodes of the given depth into first-order node through 85 * state-splitting. For each node at the given depth a new node is created 86 * and appropriate transitions will be added. 87 * </p> 88 * <p> 89 * This method traverses through the tree recursively. If the recursion 90 * reaches the desired depth in the tree, node are added. 91 * </p> 92 * 93 * @param currentNode 94 * node whose sub-trie is currently traversed 95 * @param parentIDs 96 * ID strings of the ancestors of the currentNode 97 * @param depth 98 * depth to go - NOT the current depth. 99 */ 100 private void generateFirstOrderTrie(TrieNode<Event> currentNode, 101 List<String> parentIDs, int depth) { 102 for (TrieNode<Event> child : currentNode.getChildren()) { 103 String currentId = child.getSymbol().getId(); 104 if (depth > 1) { 105 List<String> childParentIDs = new LinkedList<String>(parentIDs); 106 childParentIDs.add(currentId); 107 generateFirstOrderTrie(child, childParentIDs, depth - 1); 78 /** 79 * <p> 80 * Converts all nodes of the given depth into first-order node through state-splitting. For each 81 * node at the given depth a new node is created and appropriate transitions will be added. 82 * </p> 83 * <p> 84 * This method traverses through the tree recursively. If the recursion reaches the desired 85 * depth in the tree, node are added. 86 * </p> 87 * 88 * @param currentNode 89 * node whose sub-trie is currently traversed 90 * @param parentIDs 91 * ID strings of the ancestors of the currentNode 92 * @param depth 93 * depth to go - NOT the current depth. 94 */ 95 private void generateFirstOrderTrie(TrieNode<Event> currentNode, 96 List<String> parentIDs, 97 int depth) 98 { 99 for (TrieNode<Event> child : currentNode.getChildren()) { 100 String currentId = child.getSymbol().getId(); 101 if (depth > 1) { 102 List<String> childParentIDs = new LinkedList<String>(parentIDs); 103 childParentIDs.add(currentId); 104 generateFirstOrderTrie(child, childParentIDs, depth - 1); 108 105 109 } else { 110 StringBuilder firstOrderID = new StringBuilder(); 111 for (String parentID : parentIDs) { 112 firstOrderID.append(parentID + EVENT_SEPARATOR); 113 } 114 firstOrderID.append(currentId); 115 TrieNode<Event> firstOrderNode = firstOrderTrie 116 .getChildCreate(new Event(new StringEventType(firstOrderID 117 .toString()))); 118 firstOrderNode.setCount(child.getCount()); 119 for (TrieNode<Event> transitionChild : child.getChildren()) { 120 StringBuilder transitionID = new StringBuilder(); 121 for (String parentID : parentIDs.subList(1, 122 parentIDs.size())) { 123 transitionID.append(parentID + EVENT_SEPARATOR); 124 } 125 transitionID.append(currentId + EVENT_SEPARATOR); 126 transitionID.append(transitionChild.getSymbol() 127 .getId()); 128 TrieNode<Event> firstOrderTransitionChild = firstOrderNode 129 .getChildCreate(new Event(new StringEventType(transitionID 130 .toString()))); 131 firstOrderTransitionChild.setCount(transitionChild 132 .getCount()); 133 } 134 } 135 } 136 } 106 } 107 else { 108 StringBuilder firstOrderID = new StringBuilder(); 109 for (String parentID : parentIDs) { 110 firstOrderID.append(parentID + EVENT_SEPARATOR); 111 } 112 firstOrderID.append(currentId); 113 TrieNode<Event> firstOrderNode = 114 firstOrderTrie.getChildCreate(new Event(new StringEventType(firstOrderID 115 .toString()))); 116 firstOrderNode.setCount(child.getCount()); 117 for (TrieNode<Event> transitionChild : child.getChildren()) { 118 StringBuilder transitionID = new StringBuilder(); 119 for (String parentID : parentIDs.subList(1, parentIDs.size())) { 120 transitionID.append(parentID + EVENT_SEPARATOR); 121 } 122 transitionID.append(currentId + EVENT_SEPARATOR); 123 transitionID.append(transitionChild.getSymbol().getId()); 124 TrieNode<Event> firstOrderTransitionChild = 125 firstOrderNode.getChildCreate(new Event(new StringEventType(transitionID 126 .toString()))); 127 firstOrderTransitionChild.setCount(transitionChild.getCount()); 128 } 129 } 130 } 131 } 137 132 } -
trunk/quest-core-usageprofiles/src/main/java/de/ugoe/cs/quest/usageprofiles/PredictionByPartialMatch.java
r547 r559 1 1 2 package de.ugoe.cs.quest.usageprofiles; 2 3 … … 11 12 /** 12 13 * <p> 13 * Implements Prediction by Partial Match (PPM) based on the following formula 14 * (LaTeX-stylenotation):<br>14 * Implements Prediction by Partial Match (PPM) based on the following formula (LaTeX-style 15 * notation):<br> 15 16 * P_{PPM}(X_n|X_{n-1},...,X_{n-k}) = \sum_{i=k}^min escape^{k-i} P_{MM^i}(X_n 16 17 * |X_{n-1},...,X_{n-i})(1-escape)+escape^(k-min)P(X_n|X_{n-i},... ,X_{n-min})<br> … … 23 24 public class PredictionByPartialMatch extends TrieBasedModel { 24 25 25 /** 26 * <p> 27 * Id for object serialization. 28 * </p> 29 */ 30 private static final long serialVersionUID = 1L; 31 32 /** 33 * <p> 34 * Minimum order of the Markov model. 35 * </p> 36 */ 37 protected int minOrder; 38 39 /** 40 * <p> 41 * Probability to use a lower-order Markov model 42 * </p> 43 */ 44 protected double probEscape; 45 46 /** 47 * <p> 48 * Constructor. Creates a new PredictionByPartialMatch model with a given 49 * Markov order and a default escape probability of 0.1. 50 * </p> 51 * 52 * @param markovOrder 53 * Markov order of the model 54 * @param r 55 * random number generator used by probabilistic methods of the 56 * class 57 */ 58 public PredictionByPartialMatch(int markovOrder, Random r) { 59 this(markovOrder, r, 0.1); 60 } 61 62 /** 63 * <p> 64 * Creates a new PredictionByPartialMatch model with a given Markov order 65 * and escape probability. 66 * </p> 67 * 68 * @param markovOrder 69 * Markov order of the model 70 * @param r 71 * random number generator used by probabilistic methods of the 72 * class 73 * @param probEscape 74 * escape probability used by the model 75 */ 76 public PredictionByPartialMatch(int markovOrder, Random r, double probEscape) { 77 this(markovOrder, 0, r, probEscape); 78 } 79 80 /** 81 * <p> 82 * Creates a new PredictionByPartialMatch model with a given Markov order 83 * and escape probability. 84 * </p> 85 * 86 * @param markovOrder 87 * Markov order of the model 88 * @param minOrder 89 * minimum order of the model; if this order is reached, there is 90 * no escape 91 * @param r 92 * random number generator used by probabilistic methods of the 93 * class 94 * @param probEscape 95 * escape probability used by the model 96 * @throws InvalidParameterException thrown if minOrder is less than 0 or greater than markovOrder or probEscape is not in the interval (0,1) 97 */ 98 public PredictionByPartialMatch(int markovOrder, int minOrder, Random r, 99 double probEscape) { 100 super(markovOrder, r); 101 if( minOrder< 0 ) { 102 throw new InvalidParameterException("minOrder must be greather than or equal to 0"); 103 } 104 if( minOrder>markovOrder) { 105 throw new InvalidParameterException("minOrder must be less than or equal to markovOrder"); 106 } 107 if( probEscape<=0.0 || probEscape>=1.0) { 108 throw new InvalidParameterException("probEscape must be in the interval (0,1)"); 109 } 110 this.probEscape = probEscape; 111 this.minOrder = minOrder; 112 } 113 114 /** 115 * <p> 116 * Sets the escape probability of the model. 117 * </p> 118 * 119 * @param probEscape 120 * new escape probability 121 */ 122 public void setProbEscape(double probEscape) { 123 this.probEscape = probEscape; 124 } 125 126 /** 127 * <p> 128 * Returns the escape probability of the model. 129 * </p> 130 * 131 * @return escape probability of the model 132 */ 133 public double getProbEscape() { 134 return probEscape; 135 } 136 137 /** 138 * <p> 139 * Calculates the probability of the next event based on the formula:<br> 140 * P_{PPM}(X_n|X_{n-1},...,X_{n-k}) = \sum_{i=k}^min escape^{k-i} 141 * P_{MM^i}(X_n 142 * |X_{n-1},...,X_{n-i})(1-escape)+escape^(k-min)P(X_n|X_{n-i},... 143 * ,X_{n-min})<br> 144 * P_{MM^i} denotes the probability in an i-th order Markov model. 145 * </p> 146 * 147 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#getProbability(java.util.List, 148 * de.ugoe.cs.quest.eventcore.Event) 149 */ 150 @Override 151 public double getProbability(List<Event> context, 152 Event symbol) { 153 if (context == null) { 154 throw new InvalidParameterException("context must not be null"); 155 } 156 if (symbol == null) { 157 throw new InvalidParameterException("symbol must not be null"); 158 } 159 double result = 0.0d; 160 double resultCurrentContex = 0.0d; 161 double resultShorterContex = 0.0d; 162 163 List<Event> contextCopy; 164 if (context.size() >= trieOrder) { 165 contextCopy = new LinkedList<Event>(context.subList( 166 context.size() - trieOrder + 1, context.size())); 167 } else { 168 contextCopy = new LinkedList<Event>(context); 169 } 170 171 Collection<Event> followers = trie.getFollowingSymbols(contextCopy); // \Sigma' 172 int sumCountFollowers = 0; // N(s\sigma') 173 for (Event follower : followers) { 174 sumCountFollowers += trie.getCount(contextCopy, follower); 175 } 176 177 int countSymbol = trie.getCount(contextCopy, symbol); // N(s\sigma) 178 if (sumCountFollowers == 0) { 179 resultCurrentContex = 0.0; 180 } else { 181 resultCurrentContex = (double) countSymbol / sumCountFollowers; 182 } 183 if (contextCopy.size() > minOrder) { 184 resultCurrentContex *= (1 - probEscape); 185 contextCopy.remove(0); 186 if (contextCopy.size() >= minOrder) { 187 double probSuffix = getProbability(contextCopy, symbol); 188 189 if (followers.size() == 0) { 190 resultShorterContex = probSuffix; 191 } else { 192 resultShorterContex = probEscape * probSuffix; 193 } 194 } 195 } 196 result = resultCurrentContex + resultShorterContex; 197 198 return result; 199 } 26 /** 27 * <p> 28 * Id for object serialization. 29 * </p> 30 */ 31 private static final long serialVersionUID = 1L; 32 33 /** 34 * <p> 35 * Minimum order of the Markov model. 36 * </p> 37 */ 38 protected int minOrder; 39 40 /** 41 * <p> 42 * Probability to use a lower-order Markov model 43 * </p> 44 */ 45 protected double probEscape; 46 47 /** 48 * <p> 49 * Constructor. Creates a new PredictionByPartialMatch model with a given Markov order and a 50 * default escape probability of 0.1. 51 * </p> 52 * 53 * @param markovOrder 54 * Markov order of the model 55 * @param r 56 * random number generator used by probabilistic methods of the class 57 */ 58 public PredictionByPartialMatch(int markovOrder, Random r) { 59 this(markovOrder, r, 0.1); 60 } 61 62 /** 63 * <p> 64 * Creates a new PredictionByPartialMatch model with a given Markov order and escape 65 * probability. 66 * </p> 67 * 68 * @param markovOrder 69 * Markov order of the model 70 * @param r 71 * random number generator used by probabilistic methods of the class 72 * @param probEscape 73 * escape probability used by the model 74 */ 75 public PredictionByPartialMatch(int markovOrder, Random r, double probEscape) { 76 this(markovOrder, 0, r, probEscape); 77 } 78 79 /** 80 * <p> 81 * Creates a new PredictionByPartialMatch model with a given Markov order and escape 82 * probability. 83 * </p> 84 * 85 * @param markovOrder 86 * Markov order of the model 87 * @param minOrder 88 * minimum order of the model; if this order is reached, there is no escape 89 * @param r 90 * random number generator used by probabilistic methods of the class 91 * @param probEscape 92 * escape probability used by the model 93 * @throws InvalidParameterException 94 * thrown if minOrder is less than 0 or greater than markovOrder or probEscape is 95 * not in the interval (0,1) 96 */ 97 public PredictionByPartialMatch(int markovOrder, int minOrder, Random r, double probEscape) { 98 super(markovOrder, r); 99 if (minOrder < 0) { 100 throw new InvalidParameterException("minOrder must be greather than or equal to 0"); 101 } 102 if (minOrder > markovOrder) { 103 throw new InvalidParameterException( 104 "minOrder must be less than or equal to markovOrder"); 105 } 106 if (probEscape <= 0.0 || probEscape >= 1.0) { 107 throw new InvalidParameterException("probEscape must be in the interval (0,1)"); 108 } 109 this.probEscape = probEscape; 110 this.minOrder = minOrder; 111 } 112 113 /** 114 * <p> 115 * Sets the escape probability of the model. 116 * </p> 117 * 118 * @param probEscape 119 * new escape probability 120 */ 121 public void setProbEscape(double probEscape) { 122 this.probEscape = probEscape; 123 } 124 125 /** 126 * <p> 127 * Returns the escape probability of the model. 128 * </p> 129 * 130 * @return escape probability of the model 131 */ 132 public double getProbEscape() { 133 return probEscape; 134 } 135 136 /** 137 * <p> 138 * Calculates the probability of the next event based on the formula:<br> 139 * P_{PPM}(X_n|X_{n-1},...,X_{n-k}) = \sum_{i=k}^min escape^{k-i} P_{MM^i}(X_n 140 * |X_{n-1},...,X_{n-i})(1-escape)+escape^(k-min)P(X_n|X_{n-i},... ,X_{n-min})<br> 141 * P_{MM^i} denotes the probability in an i-th order Markov model. 142 * </p> 143 * 144 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#getProbability(java.util.List, 145 * de.ugoe.cs.quest.eventcore.Event) 146 */ 147 @Override 148 public double getProbability(List<Event> context, Event symbol) { 149 if (context == null) { 150 throw new InvalidParameterException("context must not be null"); 151 } 152 if (symbol == null) { 153 throw new InvalidParameterException("symbol must not be null"); 154 } 155 double result = 0.0d; 156 double resultCurrentContex = 0.0d; 157 double resultShorterContex = 0.0d; 158 159 List<Event> contextCopy; 160 if (context.size() >= trieOrder) { 161 contextCopy = 162 new LinkedList<Event>(context.subList(context.size() - trieOrder + 1, 163 context.size())); 164 } 165 else { 166 contextCopy = new LinkedList<Event>(context); 167 } 168 169 Collection<Event> followers = trie.getFollowingSymbols(contextCopy); // \Sigma' 170 int sumCountFollowers = 0; // N(s\sigma') 171 for (Event follower : followers) { 172 sumCountFollowers += trie.getCount(contextCopy, follower); 173 } 174 175 int countSymbol = trie.getCount(contextCopy, symbol); // N(s\sigma) 176 if (sumCountFollowers == 0) { 177 resultCurrentContex = 0.0; 178 } 179 else { 180 resultCurrentContex = (double) countSymbol / sumCountFollowers; 181 } 182 if (contextCopy.size() > minOrder) { 183 resultCurrentContex *= (1 - probEscape); 184 contextCopy.remove(0); 185 if (contextCopy.size() >= minOrder) { 186 double probSuffix = getProbability(contextCopy, symbol); 187 188 if (followers.size() == 0) { 189 resultShorterContex = probSuffix; 190 } 191 else { 192 resultShorterContex = probEscape * probSuffix; 193 } 194 } 195 } 196 result = resultCurrentContex + resultShorterContex; 197 198 return result; 199 } 200 200 } -
trunk/quest-core-usageprofiles/src/main/java/de/ugoe/cs/quest/usageprofiles/Trie.java
r518 r559 1 1 2 package de.ugoe.cs.quest.usageprofiles; 2 3 … … 17 18 /** 18 19 * <p> 19 * This class implements a <it>trie</it>, i.e., a tree of sequences that the 20 * occurence of subsequences up to a predefined length. This length is the trie 21 * order. 20 * This class implements a <it>trie</it>, i.e., a tree of sequences that the occurence of 21 * subsequences up to a predefined length. This length is the trie order. 22 22 * </p> 23 23 * … … 31 31 public class Trie<T> implements IDotCompatible, Serializable { 32 32 33 /** 34 * <p> 35 * Id for object serialization. 36 * </p> 37 */ 38 private static final long serialVersionUID = 1L; 39 40 /** 41 * <p> 42 * Collection of all symbols occuring in the trie. 43 * </p> 44 */ 45 private Collection<T> knownSymbols; 46 47 /** 48 * <p> 49 * Reference to the root of the trie. 50 * </p> 51 */ 52 private final TrieNode<T> rootNode; 53 54 /** 55 * <p> 56 * Contructor. Creates a new Trie. 57 * </p> 58 */ 59 public Trie() { 60 rootNode = new TrieNode<T>(); 61 knownSymbols = new LinkedHashSet<T>(); 62 } 63 64 /** 65 * <p> 66 * Copy-Constructor. Creates a new Trie as the copy of other. The other trie 67 * must noch be null. 68 * </p> 69 * 70 * @param other 71 * Trie that is copied 72 */ 73 public Trie(Trie<T> other) { 74 if (other == null) { 75 throw new InvalidParameterException("other trie must not be null"); 76 } 77 rootNode = new TrieNode<T>(other.rootNode); 78 knownSymbols = new LinkedHashSet<T>(other.knownSymbols); 79 } 80 81 /** 82 * <p> 83 * Returns a collection of all symbols occuring in the trie. 84 * </p> 85 * 86 * @return symbols occuring in the trie 87 */ 88 public Collection<T> getKnownSymbols() { 89 return new LinkedHashSet<T>(knownSymbols); 90 } 91 92 /** 93 * <p> 94 * Trains the current trie using the given sequence and adds all subsequence 95 * of length {@code maxOrder}. 96 * </p> 97 * 98 * @param sequence 99 * sequence whose subsequences are added to the trie 100 * @param maxOrder 101 * maximum length of the subsequences added to the trie 102 */ 103 public void train(List<T> sequence, int maxOrder) { 104 if (maxOrder < 1) { 105 return; 106 } 107 IncompleteMemory<T> latestActions = new IncompleteMemory<T>(maxOrder); 108 int i = 0; 109 for (T currentEvent : sequence) { 110 latestActions.add(currentEvent); 111 knownSymbols.add(currentEvent); 112 i++; 113 if (i >= maxOrder) { 114 add(latestActions.getLast(maxOrder)); 115 } 116 } 117 int sequenceLength = sequence.size(); 118 for (int j = maxOrder - 1; j > 0; j--) { 119 add(sequence.subList(sequenceLength - j, sequenceLength)); 120 } 121 } 122 123 /** 124 * <p> 125 * Adds a given subsequence to the trie and increases the counters 126 * accordingly. 127 * </p> 128 * 129 * @param subsequence 130 * subsequence whose counters are increased 131 * @see TrieNode#add(List) 132 */ 133 protected void add(List<T> subsequence) { 134 if (subsequence != null && !subsequence.isEmpty()) { 135 knownSymbols.addAll(subsequence); 136 subsequence = new LinkedList<T>(subsequence); // defensive copy! 137 T firstSymbol = subsequence.get(0); 138 TrieNode<T> node = getChildCreate(firstSymbol); 139 node.add(subsequence); 140 } 141 } 142 143 /** 144 * <p> 145 * Returns the child of the root node associated with the given symbol or 146 * creates it if it does not exist yet. 147 * </p> 148 * 149 * @param symbol 150 * symbol whose node is required 151 * @return node associated with the symbol 152 * @see TrieNode#getChildCreate(Object) 153 */ 154 protected TrieNode<T> getChildCreate(T symbol) { 155 return rootNode.getChildCreate(symbol); 156 } 157 158 /** 159 * <p> 160 * Returns the child of the root node associated with the given symbol or 161 * null if it does not exist. 162 * </p> 163 * 164 * @param symbol 165 * symbol whose node is required 166 * @return node associated with the symbol; null if no such node exists 167 * @see TrieNode#getChild(Object) 168 */ 169 protected TrieNode<T> getChild(T symbol) { 170 return rootNode.getChild(symbol); 171 } 172 173 /** 174 * <p> 175 * Returns the number of occurences of the given sequence. 176 * </p> 177 * 178 * @param sequence 179 * sequence whose number of occurences is required 180 * @return number of occurences of the sequence 181 */ 182 public int getCount(List<T> sequence) { 183 int count = 0; 184 TrieNode<T> node = find(sequence); 185 if (node != null) { 186 count = node.getCount(); 187 } 188 return count; 189 } 190 191 /** 192 * <p> 193 * Returns the number of occurences of the given prefix and a symbol that 194 * follows it.<br> 195 * Convenience function to simplify usage of {@link #getCount(List)}. 196 * </p> 197 * 198 * @param sequence 199 * prefix of the sequence 200 * @param follower 201 * suffix of the sequence 202 * @return number of occurences of the sequence 203 * @see #getCount(List) 204 */ 205 public int getCount(List<T> sequence, T follower) { 206 List<T> tmpSequence = new LinkedList<T>(sequence); 207 tmpSequence.add(follower); 208 return getCount(tmpSequence); 209 210 } 211 212 /** 213 * <p> 214 * Searches the trie for a given sequence and returns the node associated 215 * with the sequence or null if no such node is found. 216 * </p> 217 * 218 * @param sequence 219 * sequence that is searched for 220 * @return node associated with the sequence 221 * @see TrieNode#find(List) 222 */ 223 public TrieNode<T> find(List<T> sequence) { 224 if (sequence == null || sequence.isEmpty()) { 225 return rootNode; 226 } 227 List<T> sequenceCopy = new LinkedList<T>(sequence); 228 TrieNode<T> result = null; 229 TrieNode<T> node = getChild(sequenceCopy.get(0)); 230 if (node != null) { 231 sequenceCopy.remove(0); 232 result = node.find(sequenceCopy); 233 } 234 return result; 235 } 236 237 /** 238 * <p> 239 * Returns a collection of all symbols that follow a given sequence in the 240 * trie. In case the sequence is not found or no symbols follow the sequence 241 * the result will be empty. 242 * </p> 243 * 244 * @param sequence 245 * sequence whose followers are returned 246 * @return symbols following the given sequence 247 * @see TrieNode#getFollowingSymbols() 248 */ 249 public Collection<T> getFollowingSymbols(List<T> sequence) { 250 Collection<T> result = new LinkedList<T>(); 251 TrieNode<T> node = find(sequence); 252 if (node != null) { 253 result = node.getFollowingSymbols(); 254 } 255 return result; 256 } 257 258 /** 259 * <p> 260 * Returns the longest suffix of the given context that is contained in the 261 * tree and whose children are leaves. 262 * </p> 263 * 264 * @param context 265 * context whose suffix is searched for 266 * @return longest suffix of the context 267 */ 268 public List<T> getContextSuffix(List<T> context) { 269 List<T> contextSuffix; 270 if (context != null) { 271 contextSuffix = new LinkedList<T>(context); // defensive copy 272 } else { 273 contextSuffix = new LinkedList<T>(); 274 } 275 boolean suffixFound = false; 276 277 while (!suffixFound) { 278 if (contextSuffix.isEmpty()) { 279 suffixFound = true; // suffix is the empty word 280 } else { 281 TrieNode<T> node = find(contextSuffix); 282 if (node != null) { 283 if (!node.getFollowingSymbols().isEmpty()) { 284 suffixFound = true; 285 } 286 } 287 if (!suffixFound) { 288 contextSuffix.remove(0); 289 } 290 } 291 } 292 293 return contextSuffix; 294 } 295 296 /** 297 * <p> 298 * Helper class for graph visualization of a trie. 299 * </p> 300 * 301 * @author Steffen Herbold 302 * @version 1.0 303 */ 304 static public class Edge { 305 } 306 307 /** 308 * <p> 309 * Helper class for graph visualization of a trie. 310 * </p> 311 * 312 * @author Steffen Herbold 313 * @version 1.0 314 */ 315 static public class TrieVertex { 316 317 /** 318 * <p> 319 * Id of the vertex. 320 * </p> 321 */ 322 private String id; 323 324 /** 325 * <p> 326 * Contructor. Creates a new TrieVertex. 327 * </p> 328 * 329 * @param id 330 * id of the vertex 331 */ 332 protected TrieVertex(String id) { 333 this.id = id; 334 } 335 336 /** 337 * <p> 338 * Returns the id of the vertex. 339 * </p> 340 * 341 * @see java.lang.Object#toString() 342 */ 343 @Override 344 public String toString() { 345 return id; 346 } 347 } 348 349 /** 350 * <p> 351 * Returns a {@link Graph} representation of the trie. 352 * </p> 353 * 354 * @return {@link Graph} representation of the trie 355 */ 356 protected Tree<TrieVertex, Edge> getGraph() { 357 DelegateTree<TrieVertex, Edge> graph = new DelegateTree<TrieVertex, Edge>(); 358 rootNode.getGraph(null, graph); 359 return graph; 360 } 361 362 /* 363 * (non-Javadoc) 364 * 365 * @see de.ugoe.cs.quest.usageprofiles.IDotCompatible#getDotRepresentation() 366 */ 367 public String getDotRepresentation() { 368 StringBuilder stringBuilder = new StringBuilder(); 369 stringBuilder.append("digraph model {" + StringTools.ENDLINE); 370 rootNode.appendDotRepresentation(stringBuilder); 371 stringBuilder.append('}' + StringTools.ENDLINE); 372 return stringBuilder.toString(); 373 } 374 375 /** 376 * <p> 377 * Returns the string representation of the root node. 378 * </p> 379 * 380 * @see TrieNode#toString() 381 * @see java.lang.Object#toString() 382 */ 383 @Override 384 public String toString() { 385 return rootNode.toString(); 386 } 387 388 /** 389 * <p> 390 * Returns the number of symbols contained in the trie. 391 * </p> 392 * 393 * @return number of symbols contained in the trie 394 */ 395 public int getNumSymbols() { 396 return knownSymbols.size(); 397 } 398 399 /** 400 * <p> 401 * Returns the number of trie nodes that are ancestors of a leaf. This is 402 * the equivalent to the number of states a first-order markov model would 403 * have. 404 * <p> 405 * 406 * @return number of trie nodes that are ancestors of leafs. 407 */ 408 public int getNumLeafAncestors() { 409 List<TrieNode<T>> ancestors = new LinkedList<TrieNode<T>>(); 410 rootNode.getLeafAncestors(ancestors); 411 return ancestors.size(); 412 } 413 414 /** 415 * <p> 416 * Returns the number of trie nodes that are leafs. 417 * </p> 418 * 419 * @return number of leafs in the trie 420 */ 421 public int getNumLeafs() { 422 return rootNode.getNumLeafs(); 423 } 424 425 /** 426 * <p> 427 * Updates the list of known symbols by replacing it with all symbols that 428 * are found in the child nodes of the root node. This should be the same as 429 * all symbols that are contained in the trie. 430 * </p> 431 */ 432 public void updateKnownSymbols() { 433 knownSymbols = new HashSet<T>(); 434 for (TrieNode<T> node : rootNode.getChildren()) { 435 knownSymbols.add(node.getSymbol()); 436 } 437 } 438 439 /** 440 * <p> 441 * Two Tries are defined as equal, if their {@link #rootNode} are equal. 442 * </p> 443 * 444 * @see java.lang.Object#equals(java.lang.Object) 445 */ 446 @SuppressWarnings("rawtypes") 447 @Override 448 public boolean equals(Object other) { 449 if (other == this) { 450 return true; 451 } 452 if (other instanceof Trie) { 453 return rootNode.equals(((Trie) other).rootNode); 454 } 455 return false; 456 } 457 458 /* 459 * (non-Javadoc) 460 * 461 * @see java.lang.Object#hashCode() 462 */ 463 @Override 464 public int hashCode() { 465 int multiplier = 17; 466 int hash = 42; 467 if (rootNode != null) { 468 hash = multiplier * hash + rootNode.hashCode(); 469 } 470 return hash; 471 } 33 /** 34 * <p> 35 * Id for object serialization. 36 * </p> 37 */ 38 private static final long serialVersionUID = 1L; 39 40 /** 41 * <p> 42 * Collection of all symbols occuring in the trie. 43 * </p> 44 */ 45 private Collection<T> knownSymbols; 46 47 /** 48 * <p> 49 * Reference to the root of the trie. 50 * </p> 51 */ 52 private final TrieNode<T> rootNode; 53 54 /** 55 * <p> 56 * Contructor. Creates a new Trie. 57 * </p> 58 */ 59 public Trie() { 60 rootNode = new TrieNode<T>(); 61 knownSymbols = new LinkedHashSet<T>(); 62 } 63 64 /** 65 * <p> 66 * Copy-Constructor. Creates a new Trie as the copy of other. The other trie must noch be null. 67 * </p> 68 * 69 * @param other 70 * Trie that is copied 71 */ 72 public Trie(Trie<T> other) { 73 if (other == null) { 74 throw new InvalidParameterException("other trie must not be null"); 75 } 76 rootNode = new TrieNode<T>(other.rootNode); 77 knownSymbols = new LinkedHashSet<T>(other.knownSymbols); 78 } 79 80 /** 81 * <p> 82 * Returns a collection of all symbols occuring in the trie. 83 * </p> 84 * 85 * @return symbols occuring in the trie 86 */ 87 public Collection<T> getKnownSymbols() { 88 return new LinkedHashSet<T>(knownSymbols); 89 } 90 91 /** 92 * <p> 93 * Trains the current trie using the given sequence and adds all subsequence of length 94 * {@code maxOrder}. 95 * </p> 96 * 97 * @param sequence 98 * sequence whose subsequences are added to the trie 99 * @param maxOrder 100 * maximum length of the subsequences added to the trie 101 */ 102 public void train(List<T> sequence, int maxOrder) { 103 if (maxOrder < 1) { 104 return; 105 } 106 IncompleteMemory<T> latestActions = new IncompleteMemory<T>(maxOrder); 107 int i = 0; 108 for (T currentEvent : sequence) { 109 latestActions.add(currentEvent); 110 knownSymbols.add(currentEvent); 111 i++; 112 if (i >= maxOrder) { 113 add(latestActions.getLast(maxOrder)); 114 } 115 } 116 int sequenceLength = sequence.size(); 117 for (int j = maxOrder - 1; j > 0; j--) { 118 add(sequence.subList(sequenceLength - j, sequenceLength)); 119 } 120 } 121 122 /** 123 * <p> 124 * Adds a given subsequence to the trie and increases the counters accordingly. 125 * </p> 126 * 127 * @param subsequence 128 * subsequence whose counters are increased 129 * @see TrieNode#add(List) 130 */ 131 protected void add(List<T> subsequence) { 132 if (subsequence != null && !subsequence.isEmpty()) { 133 knownSymbols.addAll(subsequence); 134 subsequence = new LinkedList<T>(subsequence); // defensive copy! 135 T firstSymbol = subsequence.get(0); 136 TrieNode<T> node = getChildCreate(firstSymbol); 137 node.add(subsequence); 138 } 139 } 140 141 /** 142 * <p> 143 * Returns the child of the root node associated with the given symbol or creates it if it does 144 * not exist yet. 145 * </p> 146 * 147 * @param symbol 148 * symbol whose node is required 149 * @return node associated with the symbol 150 * @see TrieNode#getChildCreate(Object) 151 */ 152 protected TrieNode<T> getChildCreate(T symbol) { 153 return rootNode.getChildCreate(symbol); 154 } 155 156 /** 157 * <p> 158 * Returns the child of the root node associated with the given symbol or null if it does not 159 * exist. 160 * </p> 161 * 162 * @param symbol 163 * symbol whose node is required 164 * @return node associated with the symbol; null if no such node exists 165 * @see TrieNode#getChild(Object) 166 */ 167 protected TrieNode<T> getChild(T symbol) { 168 return rootNode.getChild(symbol); 169 } 170 171 /** 172 * <p> 173 * Returns the number of occurences of the given sequence. 174 * </p> 175 * 176 * @param sequence 177 * sequence whose number of occurences is required 178 * @return number of occurences of the sequence 179 */ 180 public int getCount(List<T> sequence) { 181 int count = 0; 182 TrieNode<T> node = find(sequence); 183 if (node != null) { 184 count = node.getCount(); 185 } 186 return count; 187 } 188 189 /** 190 * <p> 191 * Returns the number of occurences of the given prefix and a symbol that follows it.<br> 192 * Convenience function to simplify usage of {@link #getCount(List)}. 193 * </p> 194 * 195 * @param sequence 196 * prefix of the sequence 197 * @param follower 198 * suffix of the sequence 199 * @return number of occurences of the sequence 200 * @see #getCount(List) 201 */ 202 public int getCount(List<T> sequence, T follower) { 203 List<T> tmpSequence = new LinkedList<T>(sequence); 204 tmpSequence.add(follower); 205 return getCount(tmpSequence); 206 207 } 208 209 /** 210 * <p> 211 * Searches the trie for a given sequence and returns the node associated with the sequence or 212 * null if no such node is found. 213 * </p> 214 * 215 * @param sequence 216 * sequence that is searched for 217 * @return node associated with the sequence 218 * @see TrieNode#find(List) 219 */ 220 public TrieNode<T> find(List<T> sequence) { 221 if (sequence == null || sequence.isEmpty()) { 222 return rootNode; 223 } 224 List<T> sequenceCopy = new LinkedList<T>(sequence); 225 TrieNode<T> result = null; 226 TrieNode<T> node = getChild(sequenceCopy.get(0)); 227 if (node != null) { 228 sequenceCopy.remove(0); 229 result = node.find(sequenceCopy); 230 } 231 return result; 232 } 233 234 /** 235 * <p> 236 * Returns a collection of all symbols that follow a given sequence in the trie. In case the 237 * sequence is not found or no symbols follow the sequence the result will be empty. 238 * </p> 239 * 240 * @param sequence 241 * sequence whose followers are returned 242 * @return symbols following the given sequence 243 * @see TrieNode#getFollowingSymbols() 244 */ 245 public Collection<T> getFollowingSymbols(List<T> sequence) { 246 Collection<T> result = new LinkedList<T>(); 247 TrieNode<T> node = find(sequence); 248 if (node != null) { 249 result = node.getFollowingSymbols(); 250 } 251 return result; 252 } 253 254 /** 255 * <p> 256 * Returns the longest suffix of the given context that is contained in the tree and whose 257 * children are leaves. 258 * </p> 259 * 260 * @param context 261 * context whose suffix is searched for 262 * @return longest suffix of the context 263 */ 264 public List<T> getContextSuffix(List<T> context) { 265 List<T> contextSuffix; 266 if (context != null) { 267 contextSuffix = new LinkedList<T>(context); // defensive copy 268 } 269 else { 270 contextSuffix = new LinkedList<T>(); 271 } 272 boolean suffixFound = false; 273 274 while (!suffixFound) { 275 if (contextSuffix.isEmpty()) { 276 suffixFound = true; // suffix is the empty word 277 } 278 else { 279 TrieNode<T> node = find(contextSuffix); 280 if (node != null) { 281 if (!node.getFollowingSymbols().isEmpty()) { 282 suffixFound = true; 283 } 284 } 285 if (!suffixFound) { 286 contextSuffix.remove(0); 287 } 288 } 289 } 290 291 return contextSuffix; 292 } 293 294 /** 295 * <p> 296 * Helper class for graph visualization of a trie. 297 * </p> 298 * 299 * @author Steffen Herbold 300 * @version 1.0 301 */ 302 static public class Edge {} 303 304 /** 305 * <p> 306 * Helper class for graph visualization of a trie. 307 * </p> 308 * 309 * @author Steffen Herbold 310 * @version 1.0 311 */ 312 static public class TrieVertex { 313 314 /** 315 * <p> 316 * Id of the vertex. 317 * </p> 318 */ 319 private String id; 320 321 /** 322 * <p> 323 * Contructor. Creates a new TrieVertex. 324 * </p> 325 * 326 * @param id 327 * id of the vertex 328 */ 329 protected TrieVertex(String id) { 330 this.id = id; 331 } 332 333 /** 334 * <p> 335 * Returns the id of the vertex. 336 * </p> 337 * 338 * @see java.lang.Object#toString() 339 */ 340 @Override 341 public String toString() { 342 return id; 343 } 344 } 345 346 /** 347 * <p> 348 * Returns a {@link Graph} representation of the trie. 349 * </p> 350 * 351 * @return {@link Graph} representation of the trie 352 */ 353 protected Tree<TrieVertex, Edge> getGraph() { 354 DelegateTree<TrieVertex, Edge> graph = new DelegateTree<TrieVertex, Edge>(); 355 rootNode.getGraph(null, graph); 356 return graph; 357 } 358 359 /* 360 * (non-Javadoc) 361 * 362 * @see de.ugoe.cs.quest.usageprofiles.IDotCompatible#getDotRepresentation() 363 */ 364 public String getDotRepresentation() { 365 StringBuilder stringBuilder = new StringBuilder(); 366 stringBuilder.append("digraph model {" + StringTools.ENDLINE); 367 rootNode.appendDotRepresentation(stringBuilder); 368 stringBuilder.append('}' + StringTools.ENDLINE); 369 return stringBuilder.toString(); 370 } 371 372 /** 373 * <p> 374 * Returns the string representation of the root node. 375 * </p> 376 * 377 * @see TrieNode#toString() 378 * @see java.lang.Object#toString() 379 */ 380 @Override 381 public String toString() { 382 return rootNode.toString(); 383 } 384 385 /** 386 * <p> 387 * Returns the number of symbols contained in the trie. 388 * </p> 389 * 390 * @return number of symbols contained in the trie 391 */ 392 public int getNumSymbols() { 393 return knownSymbols.size(); 394 } 395 396 /** 397 * <p> 398 * Returns the number of trie nodes that are ancestors of a leaf. This is the equivalent to the 399 * number of states a first-order markov model would have. 400 * <p> 401 * 402 * @return number of trie nodes that are ancestors of leafs. 403 */ 404 public int getNumLeafAncestors() { 405 List<TrieNode<T>> ancestors = new LinkedList<TrieNode<T>>(); 406 rootNode.getLeafAncestors(ancestors); 407 return ancestors.size(); 408 } 409 410 /** 411 * <p> 412 * Returns the number of trie nodes that are leafs. 413 * </p> 414 * 415 * @return number of leafs in the trie 416 */ 417 public int getNumLeafs() { 418 return rootNode.getNumLeafs(); 419 } 420 421 /** 422 * <p> 423 * Updates the list of known symbols by replacing it with all symbols that are found in the 424 * child nodes of the root node. This should be the same as all symbols that are contained in 425 * the trie. 426 * </p> 427 */ 428 public void updateKnownSymbols() { 429 knownSymbols = new HashSet<T>(); 430 for (TrieNode<T> node : rootNode.getChildren()) { 431 knownSymbols.add(node.getSymbol()); 432 } 433 } 434 435 /** 436 * <p> 437 * Two Tries are defined as equal, if their {@link #rootNode} are equal. 438 * </p> 439 * 440 * @see java.lang.Object#equals(java.lang.Object) 441 */ 442 @SuppressWarnings("rawtypes") 443 @Override 444 public boolean equals(Object other) { 445 if (other == this) { 446 return true; 447 } 448 if (other instanceof Trie) { 449 return rootNode.equals(((Trie) other).rootNode); 450 } 451 return false; 452 } 453 454 /* 455 * (non-Javadoc) 456 * 457 * @see java.lang.Object#hashCode() 458 */ 459 @Override 460 public int hashCode() { 461 int multiplier = 17; 462 int hash = 42; 463 if (rootNode != null) { 464 hash = multiplier * hash + rootNode.hashCode(); 465 } 466 return hash; 467 } 472 468 } -
trunk/quest-core-usageprofiles/src/main/java/de/ugoe/cs/quest/usageprofiles/TrieBasedModel.java
r547 r559 1 1 2 package de.ugoe.cs.quest.usageprofiles; 2 3 … … 18 19 /** 19 20 * <p> 20 * Implements a skeleton for stochastic processes that can calculate 21 * probabilities based on a trie. The skeleton provides all functionalities of 22 * {@link IStochasticProcess} except 21 * Implements a skeleton for stochastic processes that can calculate probabilities based on a trie. 22 * The skeleton provides all functionalities of {@link IStochasticProcess} except 23 23 * {@link IStochasticProcess#getProbability(List, Event)}. 24 24 * </p> … … 29 29 public abstract class TrieBasedModel implements IStochasticProcess { 30 30 31 /** 32 * <p> 33 * Id for object serialization. 34 * </p> 35 */ 36 private static final long serialVersionUID = 1L; 37 38 /** 39 * <p> 40 * The order of the trie, i.e., the maximum length of subsequences stored in 41 * the trie. 42 * </p> 43 */ 44 protected int trieOrder; 45 46 /** 47 * <p> 48 * Trie on which the probability calculations are based. 49 * </p> 50 */ 51 protected Trie<Event> trie = null; 52 53 /** 54 * <p> 55 * Random number generator used by probabilistic sequence generation 56 * methods. 57 * </p> 58 */ 59 protected final Random r; 60 61 /** 62 * <p> 63 * Constructor. Creates a new TrieBasedModel that can be used for stochastic 64 * processes with a Markov order less than or equal to {@code markovOrder}. 65 * </p> 66 * 67 * @param markovOrder 68 * Markov order of the model 69 * @param r 70 * random number generator used by probabilistic methods of the 71 * class 72 * @throws InvalidParameterException 73 * thrown if markovOrder is less than 0 or the random number 74 * generator r is null 75 */ 76 public TrieBasedModel(int markovOrder, Random r) { 77 super(); 78 if (markovOrder < 0) { 79 throw new InvalidParameterException( 80 "markov order must not be less than 0"); 81 } 82 if (r == null) { 83 throw new InvalidParameterException( 84 "random number generator r must not be null"); 85 } 86 this.trieOrder = markovOrder + 1; 87 this.r = r; 88 } 89 90 /** 91 * <p> 92 * Trains the model by generating a trie from which probabilities are 93 * calculated. The trie is newly generated based solely on the passed 94 * sequences. If an existing model should only be updated, use 95 * {@link #update(Collection)} instead. 96 * </p> 97 * 98 * @param sequences 99 * training data 100 * @throws InvalidParameterException 101 * thrown is sequences is null 102 */ 103 public void train(Collection<List<Event>> sequences) { 104 trie = null; 105 update(sequences); 106 } 107 108 /** 109 * <p> 110 * Trains the model by updating the trie from which the probabilities are 111 * calculated. This function updates an existing trie. In case no trie 112 * exists yet, a new trie is generated and the function behaves like 113 * {@link #train(Collection)}. 114 * </p> 115 * 116 * @param sequences 117 * training data 118 * @throws InvalidParameterException 119 * thrown is sequences is null 120 */ 121 public void update(Collection<List<Event>> sequences) { 122 if (sequences == null) { 123 throw new InvalidParameterException("sequences must not be null"); 124 } 125 if (trie == null) { 126 trie = new Trie<Event>(); 127 } 128 for (List<Event> sequence : sequences) { 129 List<Event> currentSequence = new LinkedList<Event>(sequence); // defensive 130 // copy 131 currentSequence.add(0, Event.STARTEVENT); 132 currentSequence.add(Event.ENDEVENT); 133 134 trie.train(currentSequence, trieOrder); 135 } 136 } 137 138 /* 139 * (non-Javadoc) 140 * 141 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#randomSequence() 142 */ 143 @Override 144 public List<Event> randomSequence() { 145 return randomSequence(Integer.MAX_VALUE, true); 146 } 147 148 /* 149 * (non-Javadoc) 150 * 151 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#randomSequence() 152 */ 153 @Override 154 public List<Event> randomSequence(int maxLength, 155 boolean validEnd) { 156 List<Event> sequence = new LinkedList<Event>(); 157 if (trie != null) { 158 boolean endFound = false; 159 while (!endFound) { // outer loop for length checking 160 sequence = new LinkedList<Event>(); 161 IncompleteMemory<Event> context = new IncompleteMemory<Event>( 162 trieOrder - 1); 163 context.add(Event.STARTEVENT); 164 165 while (!endFound && sequence.size() <= maxLength) { 166 double randVal = r.nextDouble(); 167 double probSum = 0.0; 168 List<Event> currentContext = context.getLast(trieOrder); 169 for (Event symbol : trie.getKnownSymbols()) { 170 probSum += getProbability(currentContext, symbol); 171 if (probSum >= randVal) { 172 if (!(Event.STARTEVENT.equals(symbol) || Event.ENDEVENT 173 .equals(symbol))) { 174 // only add the symbol the sequence if it is not 175 // START or END 176 context.add(symbol); 177 sequence.add(symbol); 178 } 179 endFound = (Event.ENDEVENT.equals(symbol)) 180 || (!validEnd && sequence.size() == maxLength); 181 break; 182 } 183 } 184 } 185 } 186 } 187 return sequence; 188 } 189 190 /** 191 * <p> 192 * Returns a Dot representation of the internal trie. 193 * </p> 194 * 195 * @return dot representation of the internal trie 196 */ 197 public String getTrieDotRepresentation() { 198 if (trie == null) { 199 return ""; 200 } else { 201 return trie.getDotRepresentation(); 202 } 203 } 204 205 /** 206 * <p> 207 * Returns a {@link Tree} of the internal trie that can be used for 208 * visualization. 209 * </p> 210 * 211 * @return {@link Tree} depicting the internal trie 212 */ 213 public Tree<TrieVertex, Edge> getTrieGraph() { 214 if (trie == null) { 215 return null; 216 } else { 217 return trie.getGraph(); 218 } 219 } 220 221 /** 222 * <p> 223 * The string representation of the model is {@link Trie#toString()} of 224 * {@link #trie}. 225 * </p> 226 * 227 * @see java.lang.Object#toString() 228 */ 229 @Override 230 public String toString() { 231 if (trie == null) { 232 return ""; 233 } else { 234 return trie.toString(); 235 } 236 } 237 238 /* 239 * (non-Javadoc) 240 * 241 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#getNumStates() 242 */ 243 @Override 244 public int getNumSymbols() { 245 if (trie == null) { 246 return 0; 247 } else { 248 return trie.getNumSymbols(); 249 } 250 } 251 252 /* 253 * (non-Javadoc) 254 * 255 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#getStateStrings() 256 */ 257 @Override 258 public String[] getSymbolStrings() { 259 if (trie == null) { 260 return new String[0]; 261 } 262 String[] stateStrings = new String[getNumSymbols()]; 263 int i = 0; 264 for (Event symbol : trie.getKnownSymbols()) { 265 if (symbol.toString() == null) { 266 stateStrings[i] = "null"; 267 } else { 268 stateStrings[i] = symbol.toString(); 269 } 270 i++; 271 } 272 return stateStrings; 273 } 274 275 /* 276 * (non-Javadoc) 277 * 278 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#getEvents() 279 */ 280 @Override 281 public Collection<Event> getEvents() { 282 if (trie == null) { 283 return new HashSet<Event>(); 284 } else { 285 return trie.getKnownSymbols(); 286 } 287 } 288 289 /* 290 * (non-Javadoc) 291 * 292 * @see 293 * de.ugoe.cs.quest.usageprofiles.IStochasticProcess#generateSequences(int) 294 */ 295 @Override 296 public Collection<List<Event>> generateSequences(int length) { 297 return generateSequences(length, false); 298 } 299 300 /* 301 * (non-Javadoc) 302 * 303 * @see 304 * de.ugoe.cs.quest.usageprofiles.IStochasticProcess#generateSequences(int, 305 * boolean) 306 */ 307 @Override 308 public Set<List<Event>> generateSequences(int length, 309 boolean fromStart) { 310 Set<List<Event>> sequenceSet = new LinkedHashSet<List<Event>>(); 311 if (length < 1) { 312 throw new InvalidParameterException( 313 "Length of generated subsequences must be at least 1."); 314 } 315 if (length == 1) { 316 if (fromStart) { 317 List<Event> subSeq = new LinkedList<Event>(); 318 subSeq.add(Event.STARTEVENT); 319 sequenceSet.add(subSeq); 320 } else { 321 for (Event event : getEvents()) { 322 List<Event> subSeq = new LinkedList<Event>(); 323 subSeq.add(event); 324 sequenceSet.add(subSeq); 325 } 326 } 327 return sequenceSet; 328 } 329 Collection<Event> events = getEvents(); 330 Collection<List<Event>> seqsShorter = generateSequences( 331 length - 1, fromStart); 332 for (Event event : events) { 333 for (List<Event> seqShorter : seqsShorter) { 334 Event lastEvent = event; 335 if (getProbability(seqShorter, lastEvent) > 0.0) { 336 List<Event> subSeq = new ArrayList<Event>(seqShorter); 337 subSeq.add(lastEvent); 338 sequenceSet.add(subSeq); 339 } 340 } 341 } 342 return sequenceSet; 343 } 344 345 /* 346 * (non-Javadoc) 347 * 348 * @see 349 * de.ugoe.cs.quest.usageprofiles.IStochasticProcess#generateValidSequences 350 * (int) 351 */ 352 @Override 353 public Collection<List<Event>> generateValidSequences( 354 int length) { 355 // check for min-length implicitly done by generateSequences 356 Collection<List<Event>> allSequences = generateSequences( 357 length, true); 358 Collection<List<Event>> validSequences = new LinkedHashSet<List<Event>>(); 359 for (List<Event> sequence : allSequences) { 360 if (sequence.size() == length 361 && Event.ENDEVENT.equals(sequence.get(sequence.size() - 1))) { 362 validSequences.add(sequence); 363 } 364 } 365 return validSequences; 366 } 367 368 /* 369 * (non-Javadoc) 370 * 371 * @see 372 * de.ugoe.cs.quest.usageprofiles.IStochasticProcess#getProbability(java.util 373 * .List) 374 */ 375 @Override 376 public double getProbability(List<Event> sequence) { 377 if (sequence == null) { 378 throw new InvalidParameterException("sequence must not be null"); 379 } 380 double prob = 1.0; 381 List<Event> context = new LinkedList<Event>(); 382 for (Event event : sequence) { 383 prob *= getProbability(context, event); 384 context.add(event); 385 } 386 return prob; 387 } 388 389 /* 390 * (non-Javadoc) 391 * 392 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#getNumFOMStates() 393 */ 394 @Override 395 public int getNumFOMStates() { 396 if (trie == null) { 397 return 0; 398 } else { 399 return trie.getNumLeafAncestors(); 400 } 401 } 402 403 /* 404 * (non-Javadoc) 405 * 406 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#getNumTransitions() 407 */ 408 @Override 409 public int getNumTransitions() { 410 if (trie == null) { 411 return 0; 412 } else { 413 return trie.getNumLeafs(); 414 } 415 } 31 /** 32 * <p> 33 * Id for object serialization. 34 * </p> 35 */ 36 private static final long serialVersionUID = 1L; 37 38 /** 39 * <p> 40 * The order of the trie, i.e., the maximum length of subsequences stored in the trie. 41 * </p> 42 */ 43 protected int trieOrder; 44 45 /** 46 * <p> 47 * Trie on which the probability calculations are based. 48 * </p> 49 */ 50 protected Trie<Event> trie = null; 51 52 /** 53 * <p> 54 * Random number generator used by probabilistic sequence generation methods. 55 * </p> 56 */ 57 protected final Random r; 58 59 /** 60 * <p> 61 * Constructor. Creates a new TrieBasedModel that can be used for stochastic processes with a 62 * Markov order less than or equal to {@code markovOrder}. 63 * </p> 64 * 65 * @param markovOrder 66 * Markov order of the model 67 * @param r 68 * random number generator used by probabilistic methods of the class 69 * @throws InvalidParameterException 70 * thrown if markovOrder is less than 0 or the random number generator r is null 71 */ 72 public TrieBasedModel(int markovOrder, Random r) { 73 super(); 74 if (markovOrder < 0) { 75 throw new InvalidParameterException("markov order must not be less than 0"); 76 } 77 if (r == null) { 78 throw new InvalidParameterException("random number generator r must not be null"); 79 } 80 this.trieOrder = markovOrder + 1; 81 this.r = r; 82 } 83 84 /** 85 * <p> 86 * Trains the model by generating a trie from which probabilities are calculated. The trie is 87 * newly generated based solely on the passed sequences. If an existing model should only be 88 * updated, use {@link #update(Collection)} instead. 89 * </p> 90 * 91 * @param sequences 92 * training data 93 * @throws InvalidParameterException 94 * thrown is sequences is null 95 */ 96 public void train(Collection<List<Event>> sequences) { 97 trie = null; 98 update(sequences); 99 } 100 101 /** 102 * <p> 103 * Trains the model by updating the trie from which the probabilities are calculated. This 104 * function updates an existing trie. In case no trie exists yet, a new trie is generated and 105 * the function behaves like {@link #train(Collection)}. 106 * </p> 107 * 108 * @param sequences 109 * training data 110 * @throws InvalidParameterException 111 * thrown is sequences is null 112 */ 113 public void update(Collection<List<Event>> sequences) { 114 if (sequences == null) { 115 throw new InvalidParameterException("sequences must not be null"); 116 } 117 if (trie == null) { 118 trie = new Trie<Event>(); 119 } 120 for (List<Event> sequence : sequences) { 121 List<Event> currentSequence = new LinkedList<Event>(sequence); // defensive 122 // copy 123 currentSequence.add(0, Event.STARTEVENT); 124 currentSequence.add(Event.ENDEVENT); 125 126 trie.train(currentSequence, trieOrder); 127 } 128 } 129 130 /* 131 * (non-Javadoc) 132 * 133 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#randomSequence() 134 */ 135 @Override 136 public List<Event> randomSequence() { 137 return randomSequence(Integer.MAX_VALUE, true); 138 } 139 140 /* 141 * (non-Javadoc) 142 * 143 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#randomSequence() 144 */ 145 @Override 146 public List<Event> randomSequence(int maxLength, boolean validEnd) { 147 List<Event> sequence = new LinkedList<Event>(); 148 if (trie != null) { 149 boolean endFound = false; 150 while (!endFound) { // outer loop for length checking 151 sequence = new LinkedList<Event>(); 152 IncompleteMemory<Event> context = new IncompleteMemory<Event>(trieOrder - 1); 153 context.add(Event.STARTEVENT); 154 155 while (!endFound && sequence.size() <= maxLength) { 156 double randVal = r.nextDouble(); 157 double probSum = 0.0; 158 List<Event> currentContext = context.getLast(trieOrder); 159 for (Event symbol : trie.getKnownSymbols()) { 160 probSum += getProbability(currentContext, symbol); 161 if (probSum >= randVal) { 162 if (!(Event.STARTEVENT.equals(symbol) || Event.ENDEVENT.equals(symbol))) 163 { 164 // only add the symbol the sequence if it is not 165 // START or END 166 context.add(symbol); 167 sequence.add(symbol); 168 } 169 endFound = 170 (Event.ENDEVENT.equals(symbol)) || 171 (!validEnd && sequence.size() == maxLength); 172 break; 173 } 174 } 175 } 176 } 177 } 178 return sequence; 179 } 180 181 /** 182 * <p> 183 * Returns a Dot representation of the internal trie. 184 * </p> 185 * 186 * @return dot representation of the internal trie 187 */ 188 public String getTrieDotRepresentation() { 189 if (trie == null) { 190 return ""; 191 } 192 else { 193 return trie.getDotRepresentation(); 194 } 195 } 196 197 /** 198 * <p> 199 * Returns a {@link Tree} of the internal trie that can be used for visualization. 200 * </p> 201 * 202 * @return {@link Tree} depicting the internal trie 203 */ 204 public Tree<TrieVertex, Edge> getTrieGraph() { 205 if (trie == null) { 206 return null; 207 } 208 else { 209 return trie.getGraph(); 210 } 211 } 212 213 /** 214 * <p> 215 * The string representation of the model is {@link Trie#toString()} of {@link #trie}. 216 * </p> 217 * 218 * @see java.lang.Object#toString() 219 */ 220 @Override 221 public String toString() { 222 if (trie == null) { 223 return ""; 224 } 225 else { 226 return trie.toString(); 227 } 228 } 229 230 /* 231 * (non-Javadoc) 232 * 233 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#getNumStates() 234 */ 235 @Override 236 public int getNumSymbols() { 237 if (trie == null) { 238 return 0; 239 } 240 else { 241 return trie.getNumSymbols(); 242 } 243 } 244 245 /* 246 * (non-Javadoc) 247 * 248 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#getStateStrings() 249 */ 250 @Override 251 public String[] getSymbolStrings() { 252 if (trie == null) { 253 return new String[0]; 254 } 255 String[] stateStrings = new String[getNumSymbols()]; 256 int i = 0; 257 for (Event symbol : trie.getKnownSymbols()) { 258 if (symbol.toString() == null) { 259 stateStrings[i] = "null"; 260 } 261 else { 262 stateStrings[i] = symbol.toString(); 263 } 264 i++; 265 } 266 return stateStrings; 267 } 268 269 /* 270 * (non-Javadoc) 271 * 272 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#getEvents() 273 */ 274 @Override 275 public Collection<Event> getEvents() { 276 if (trie == null) { 277 return new HashSet<Event>(); 278 } 279 else { 280 return trie.getKnownSymbols(); 281 } 282 } 283 284 /* 285 * (non-Javadoc) 286 * 287 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#generateSequences(int) 288 */ 289 @Override 290 public Collection<List<Event>> generateSequences(int length) { 291 return generateSequences(length, false); 292 } 293 294 /* 295 * (non-Javadoc) 296 * 297 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#generateSequences(int, boolean) 298 */ 299 @Override 300 public Set<List<Event>> generateSequences(int length, boolean fromStart) { 301 Set<List<Event>> sequenceSet = new LinkedHashSet<List<Event>>(); 302 if (length < 1) { 303 throw new InvalidParameterException( 304 "Length of generated subsequences must be at least 1."); 305 } 306 if (length == 1) { 307 if (fromStart) { 308 List<Event> subSeq = new LinkedList<Event>(); 309 subSeq.add(Event.STARTEVENT); 310 sequenceSet.add(subSeq); 311 } 312 else { 313 for (Event event : getEvents()) { 314 List<Event> subSeq = new LinkedList<Event>(); 315 subSeq.add(event); 316 sequenceSet.add(subSeq); 317 } 318 } 319 return sequenceSet; 320 } 321 Collection<Event> events = getEvents(); 322 Collection<List<Event>> seqsShorter = generateSequences(length - 1, fromStart); 323 for (Event event : events) { 324 for (List<Event> seqShorter : seqsShorter) { 325 Event lastEvent = event; 326 if (getProbability(seqShorter, lastEvent) > 0.0) { 327 List<Event> subSeq = new ArrayList<Event>(seqShorter); 328 subSeq.add(lastEvent); 329 sequenceSet.add(subSeq); 330 } 331 } 332 } 333 return sequenceSet; 334 } 335 336 /* 337 * (non-Javadoc) 338 * 339 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#generateValidSequences (int) 340 */ 341 @Override 342 public Collection<List<Event>> generateValidSequences(int length) { 343 // check for min-length implicitly done by generateSequences 344 Collection<List<Event>> allSequences = generateSequences(length, true); 345 Collection<List<Event>> validSequences = new LinkedHashSet<List<Event>>(); 346 for (List<Event> sequence : allSequences) { 347 if (sequence.size() == length && 348 Event.ENDEVENT.equals(sequence.get(sequence.size() - 1))) 349 { 350 validSequences.add(sequence); 351 } 352 } 353 return validSequences; 354 } 355 356 /* 357 * (non-Javadoc) 358 * 359 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#getProbability(java.util .List) 360 */ 361 @Override 362 public double getProbability(List<Event> sequence) { 363 if (sequence == null) { 364 throw new InvalidParameterException("sequence must not be null"); 365 } 366 double prob = 1.0; 367 List<Event> context = new LinkedList<Event>(); 368 for (Event event : sequence) { 369 prob *= getProbability(context, event); 370 context.add(event); 371 } 372 return prob; 373 } 374 375 /* 376 * (non-Javadoc) 377 * 378 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#getNumFOMStates() 379 */ 380 @Override 381 public int getNumFOMStates() { 382 if (trie == null) { 383 return 0; 384 } 385 else { 386 return trie.getNumLeafAncestors(); 387 } 388 } 389 390 /* 391 * (non-Javadoc) 392 * 393 * @see de.ugoe.cs.quest.usageprofiles.IStochasticProcess#getNumTransitions() 394 */ 395 @Override 396 public int getNumTransitions() { 397 if (trie == null) { 398 return 0; 399 } 400 else { 401 return trie.getNumLeafs(); 402 } 403 } 416 404 } -
trunk/quest-core-usageprofiles/src/main/java/de/ugoe/cs/quest/usageprofiles/TrieNode.java
r518 r559 1 1 2 package de.ugoe.cs.quest.usageprofiles; 2 3 … … 15 16 /** 16 17 * <p> 17 * This class implements a node of a trie. Each node is associated with a symbol 18 * and has a counter. The counter marks the number of occurences of the sequence19 * defined by the path from the root ofthe trie to this node.18 * This class implements a node of a trie. Each node is associated with a symbol and has a counter. 19 * The counter marks the number of occurences of the sequence defined by the path from the root of 20 * the trie to this node. 20 21 * </p> 21 22 * … … 28 29 class TrieNode<T> implements Serializable { 29 30 30 /** 31 * <p> 32 * Id for object serialization. 33 * </p> 34 */ 35 private static final long serialVersionUID = 1L; 36 37 /** 38 * <p> 39 * Counter for the number of occurences of the sequence. 40 * </p> 41 */ 42 private int count; 43 44 /** 45 * <p> 46 * Symbol associated with the node. 47 * </p> 48 */ 49 private final T symbol; 50 51 /** 52 * <p> 53 * Child nodes of this node. If the node is a leaf this collection is empty. 54 * </p> 55 */ 56 private Collection<TrieNode<T>> children; 57 58 /** 59 * <p> 60 * Constructor. Creates a new TrieNode without a symbol associated.<br> 61 * <b>This constructor should only be used to create the root node of the 62 * trie!</b> 63 * </p> 64 */ 65 TrieNode() { 66 this.symbol = null; 67 count = 0; 68 children = new LinkedList<TrieNode<T>>(); 69 } 70 71 /** 72 * <p> 73 * Constructor. Creates a new TrieNode. The symbol must not be null. 74 * </p> 75 * 76 * @param symbol 77 * symbol associated with the trie node 78 */ 79 TrieNode(T symbol) { 80 if (symbol == null) { 81 throw new InvalidParameterException( 82 "symbol must not be null. null is reserved for root node!"); 83 } 84 this.symbol = symbol; 85 count = 0; 86 children = new LinkedList<TrieNode<T>>(); 87 } 88 89 /** 90 * <p> 91 * Copy-Constructor. Creates a new TrieNode as copy of other. Other must not 92 * be null. 93 * </p> 94 * 95 * @param other 96 */ 97 TrieNode(TrieNode<T> other) { 98 if (other == null) { 99 throw new InvalidParameterException("other must not be null"); 100 } 101 symbol = other.symbol; 102 count = other.count; 103 children = new LinkedList<TrieNode<T>>(); 104 for (TrieNode<T> child : other.children) { 105 children.add(new TrieNode<T>(child)); 106 } 107 } 108 109 /** 110 * <p> 111 * Adds a given subsequence to the trie and increases the counters 112 * accordingly. 113 * </p> 114 * 115 * @param subsequence 116 * subsequence whose counters are increased 117 * @see Trie#add(List) 118 */ 119 public void add(List<T> subsequence) { 120 if (!subsequence.isEmpty()) { 121 if (!symbol.equals(subsequence.get(0))) { // should be guaranteed by 122 // the 123 // recursion/TrieRoot! 124 throw new AssertionError("Invalid trie operation!"); 125 } 126 count++; 127 subsequence.remove(0); 128 if (!subsequence.isEmpty()) { 129 T nextSymbol = subsequence.get(0); 130 getChildCreate(nextSymbol).add(subsequence); 131 } 132 } 133 } 134 135 /** 136 * <p> 137 * Returns the symbol associated with the node. 138 * </p> 139 * 140 * @return symbol associated with the node 141 */ 142 public T getSymbol() { 143 return symbol; 144 } 145 146 /** 147 * <p> 148 * Returns the number of occurences of the sequence represented by the node. 149 * </p> 150 * 151 * @return number of occurences of the sequence represented by the node 152 */ 153 public int getCount() { 154 return count; 155 } 156 157 /** 158 * <p> 159 * Returns the child of the node associated with the given symbol or creates 160 * it if it does not exist yet. 161 * </p> 162 * 163 * @param symbol 164 * symbol whose node is required 165 * @return node associated with the symbol 166 * @see Trie#getChildCreate(Object) 167 */ 168 protected TrieNode<T> getChildCreate(T symbol) { 169 TrieNode<T> node = getChild(symbol); 170 if (node == null) { 171 node = new TrieNode<T>(symbol); 172 children.add(node); 173 } 174 return node; 175 } 176 177 /** 178 * <p> 179 * Returns the child of the node associated with the given symbol or null if 180 * it does not exist. 181 * </p> 182 * 183 * @param symbol 184 * symbol whose node is required 185 * @return node associated with the symbol; null if no such node exists 186 * @see Trie#getChild(Object) 187 */ 188 protected TrieNode<T> getChild(T symbol) { 189 for (TrieNode<T> child : children) { 190 if (child.getSymbol().equals(symbol)) { 191 return child; 192 } 193 } 194 return null; 195 } 196 197 /** 198 * <p> 199 * Returns all children of this node. 200 * </p> 201 * 202 * @return children of this node 203 */ 204 protected Collection<TrieNode<T>> getChildren() { 205 return children; 206 } 207 208 /** 209 * <p> 210 * Searches the sub-trie of this trie node for a given sequence and returns 211 * the node associated with the sequence or null if no such node is found. 212 * </p> 213 * 214 * @param sequence 215 * sequence that is searched for 216 * @return node associated with the sequence 217 * @see Trie#find(List) 218 */ 219 public TrieNode<T> find(List<T> subsequence) { 220 TrieNode<T> result = null; 221 if (subsequence.isEmpty()) { 222 result = this; 223 } else { 224 TrieNode<T> node = getChild(subsequence.get(0)); 225 if (node != null) { 226 subsequence.remove(0); 227 result = node.find(subsequence); 228 } 229 } 230 return result; 231 } 232 233 /** 234 * <p> 235 * Returns a collection of all symbols that follow a this node, i.e., the 236 * symbols associated with the children of this node. 237 * </p> 238 * 239 * @return symbols follow this node 240 * @see TrieNode#getFollowingSymbols() 241 */ 242 public Collection<T> getFollowingSymbols() { 243 Collection<T> followingSymbols = new LinkedList<T>(); 244 for (TrieNode<T> child : children) { 245 followingSymbols.add(child.getSymbol()); 246 } 247 return followingSymbols; 248 } 249 250 /** 251 * <p> 252 * The string representation of a node is {@code symbol.toString()#count} 253 * </p> 254 * 255 * @see java.lang.Object#toString() 256 */ 257 @Override 258 public String toString() { 259 String str = symbol.toString() + " #" + count; 260 if (!children.isEmpty()) { 261 str += StringTools.ENDLINE + children.toString(); 262 } 263 return str; 264 } 265 266 /** 267 * <p> 268 * Generates a {@link Tree} represenation of the trie. 269 * </p> 270 * 271 * @param parent 272 * parent vertex in the generated tree 273 * @param graph 274 * complete tree 275 */ 276 void getGraph(TrieVertex parent, DelegateTree<TrieVertex, Edge> graph) { 277 TrieVertex currentVertex; 278 if (isRoot()) { 279 currentVertex = new TrieVertex("root"); 280 graph.addVertex(currentVertex); 281 } else { 282 currentVertex = new TrieVertex(getSymbol().toString() + "#" 283 + getCount()); 284 graph.addChild(new Edge(), parent, currentVertex); 285 } 286 for (TrieNode<T> node : children) { 287 node.getGraph(currentVertex, graph); 288 } 289 } 290 291 /** 292 * <p> 293 * Appends the current node to the dot representation of the trie. 294 * </p> 295 * 296 * @param stringBuilder 297 * {@link StringBuilder} to which the dot representation is 298 * appended 299 */ 300 void appendDotRepresentation(StringBuilder stringBuilder) { 301 String thisSaneId; 302 if (isRoot()) { 303 thisSaneId = "root"; 304 } else { 305 thisSaneId = symbol.toString().replace("\"", "\\\"") 306 .replaceAll("[\r\n]", "") 307 + "#" + count; 308 } 309 stringBuilder.append(" " + hashCode() + " [label=\"" + thisSaneId 310 + "\"];" + StringTools.ENDLINE); 311 for (TrieNode<T> childNode : children) { 312 stringBuilder.append(" " + hashCode() + " -> " 313 + childNode.hashCode() + ";" + StringTools.ENDLINE); 314 } 315 for (TrieNode<T> childNode : children) { 316 childNode.appendDotRepresentation(stringBuilder); 317 } 318 } 319 320 /** 321 * <p> 322 * Checks if the node is a leaf. 323 * </p> 324 * 325 * @return true if the node is a leaf, false otherwise. 326 */ 327 protected boolean isLeaf() { 328 return children.isEmpty(); 329 } 330 331 /** 332 * <p> 333 * Checks if the node is the root. 334 * </p> 335 * 336 * @return true if the node is the root of the trie, false otherwise 337 */ 338 protected boolean isRoot() { 339 return symbol == null; 340 } 341 342 /** 343 * <p> 344 * Recursive methods that collects all nodes that are ancestors of leafs and 345 * stores them in the set. 346 * </p> 347 * 348 * @param ancestors 349 * set of all ancestors of leafs 350 */ 351 protected void getLeafAncestors(List<TrieNode<T>> ancestors) { 352 boolean isAncestor = false; 353 for (TrieNode<T> child : children) { 354 child.getLeafAncestors(ancestors); 355 isAncestor |= child.isLeaf(); 356 } 357 if (isAncestor) { 358 ancestors.add(this); 359 } 360 } 361 362 /** 363 * <p> 364 * Returns the number of descendants of this node that are leafs. This does 365 * not only include direct children of this node, but all leafs in the 366 * sub-trie with this node as root. 367 * </p> 368 * 369 * @return number of leafs in this sub-trie 370 */ 371 protected int getNumLeafs() { 372 int numLeafs = 0; 373 for (TrieNode<T> child : children) { 374 if (child.isLeaf()) { 375 numLeafs++; 376 } else { 377 numLeafs += child.getNumLeafs(); 378 } 379 } 380 return numLeafs; 381 } 382 383 /** 384 * <p> 385 * Sets the {@link #count} of this node. 386 * </p> 387 * <p> 388 * This function should only be used sparingly and very carefully! The count 389 * is usually maintained automatically by the training procedures. 390 * </p> 391 * 392 * @param count 393 * new count 394 */ 395 protected void setCount(int count) { 396 this.count = count; 397 } 398 399 /** 400 * <p> 401 * Two TrieNodes are defined as equal, if their {@link #count}, 402 * {@link #symbol}, and {@link #children} are equal. 403 * </p> 404 * 405 * @see java.lang.Object#equals(java.lang.Object) 406 */ 407 @SuppressWarnings("rawtypes") 408 @Override 409 public boolean equals(Object other) { 410 if (other == this) { 411 return true; 412 } 413 if (other instanceof TrieNode) { 414 TrieNode otherNode = (TrieNode) other; 415 if (symbol == null) { 416 return count == otherNode.count && otherNode.symbol == null 417 && children.equals(otherNode.children); 418 } else { 419 return count == otherNode.count 420 && symbol.equals(((TrieNode) other).symbol) 421 && children.equals(((TrieNode) other).children); 422 } 423 } 424 return false; 425 } 426 427 /* 428 * (non-Javadoc) 429 * 430 * @see java.lang.Object#hashCode() 431 */ 432 @Override 433 public int hashCode() { 434 int multiplier = 17; 435 int hash = 42; 436 if (symbol != null) { 437 hash = multiplier * hash + symbol.hashCode(); 438 } 439 hash = multiplier * hash + count; 440 hash = multiplier * hash + children.hashCode(); 441 return hash; 442 } 31 /** 32 * <p> 33 * Id for object serialization. 34 * </p> 35 */ 36 private static final long serialVersionUID = 1L; 37 38 /** 39 * <p> 40 * Counter for the number of occurences of the sequence. 41 * </p> 42 */ 43 private int count; 44 45 /** 46 * <p> 47 * Symbol associated with the node. 48 * </p> 49 */ 50 private final T symbol; 51 52 /** 53 * <p> 54 * Child nodes of this node. If the node is a leaf this collection is empty. 55 * </p> 56 */ 57 private Collection<TrieNode<T>> children; 58 59 /** 60 * <p> 61 * Constructor. Creates a new TrieNode without a symbol associated.<br> 62 * <b>This constructor should only be used to create the root node of the trie!</b> 63 * </p> 64 */ 65 TrieNode() { 66 this.symbol = null; 67 count = 0; 68 children = new LinkedList<TrieNode<T>>(); 69 } 70 71 /** 72 * <p> 73 * Constructor. Creates a new TrieNode. The symbol must not be null. 74 * </p> 75 * 76 * @param symbol 77 * symbol associated with the trie node 78 */ 79 TrieNode(T symbol) { 80 if (symbol == null) { 81 throw new InvalidParameterException( 82 "symbol must not be null. null is reserved for root node!"); 83 } 84 this.symbol = symbol; 85 count = 0; 86 children = new LinkedList<TrieNode<T>>(); 87 } 88 89 /** 90 * <p> 91 * Copy-Constructor. Creates a new TrieNode as copy of other. Other must not be null. 92 * </p> 93 * 94 * @param other 95 */ 96 TrieNode(TrieNode<T> other) { 97 if (other == null) { 98 throw new InvalidParameterException("other must not be null"); 99 } 100 symbol = other.symbol; 101 count = other.count; 102 children = new LinkedList<TrieNode<T>>(); 103 for (TrieNode<T> child : other.children) { 104 children.add(new TrieNode<T>(child)); 105 } 106 } 107 108 /** 109 * <p> 110 * Adds a given subsequence to the trie and increases the counters accordingly. 111 * </p> 112 * 113 * @param subsequence 114 * subsequence whose counters are increased 115 * @see Trie#add(List) 116 */ 117 public void add(List<T> subsequence) { 118 if (!subsequence.isEmpty()) { 119 if (!symbol.equals(subsequence.get(0))) { // should be guaranteed by 120 // the 121 // recursion/TrieRoot! 122 throw new AssertionError("Invalid trie operation!"); 123 } 124 count++; 125 subsequence.remove(0); 126 if (!subsequence.isEmpty()) { 127 T nextSymbol = subsequence.get(0); 128 getChildCreate(nextSymbol).add(subsequence); 129 } 130 } 131 } 132 133 /** 134 * <p> 135 * Returns the symbol associated with the node. 136 * </p> 137 * 138 * @return symbol associated with the node 139 */ 140 public T getSymbol() { 141 return symbol; 142 } 143 144 /** 145 * <p> 146 * Returns the number of occurences of the sequence represented by the node. 147 * </p> 148 * 149 * @return number of occurences of the sequence represented by the node 150 */ 151 public int getCount() { 152 return count; 153 } 154 155 /** 156 * <p> 157 * Returns the child of the node associated with the given symbol or creates it if it does not 158 * exist yet. 159 * </p> 160 * 161 * @param symbol 162 * symbol whose node is required 163 * @return node associated with the symbol 164 * @see Trie#getChildCreate(Object) 165 */ 166 protected TrieNode<T> getChildCreate(T symbol) { 167 TrieNode<T> node = getChild(symbol); 168 if (node == null) { 169 node = new TrieNode<T>(symbol); 170 children.add(node); 171 } 172 return node; 173 } 174 175 /** 176 * <p> 177 * Returns the child of the node associated with the given symbol or null if it does not exist. 178 * </p> 179 * 180 * @param symbol 181 * symbol whose node is required 182 * @return node associated with the symbol; null if no such node exists 183 * @see Trie#getChild(Object) 184 */ 185 protected TrieNode<T> getChild(T symbol) { 186 for (TrieNode<T> child : children) { 187 if (child.getSymbol().equals(symbol)) { 188 return child; 189 } 190 } 191 return null; 192 } 193 194 /** 195 * <p> 196 * Returns all children of this node. 197 * </p> 198 * 199 * @return children of this node 200 */ 201 protected Collection<TrieNode<T>> getChildren() { 202 return children; 203 } 204 205 /** 206 * <p> 207 * Searches the sub-trie of this trie node for a given sequence and returns the node associated 208 * with the sequence or null if no such node is found. 209 * </p> 210 * 211 * @param sequence 212 * sequence that is searched for 213 * @return node associated with the sequence 214 * @see Trie#find(List) 215 */ 216 public TrieNode<T> find(List<T> subsequence) { 217 TrieNode<T> result = null; 218 if (subsequence.isEmpty()) { 219 result = this; 220 } 221 else { 222 TrieNode<T> node = getChild(subsequence.get(0)); 223 if (node != null) { 224 subsequence.remove(0); 225 result = node.find(subsequence); 226 } 227 } 228 return result; 229 } 230 231 /** 232 * <p> 233 * Returns a collection of all symbols that follow a this node, i.e., the symbols associated 234 * with the children of this node. 235 * </p> 236 * 237 * @return symbols follow this node 238 * @see TrieNode#getFollowingSymbols() 239 */ 240 public Collection<T> getFollowingSymbols() { 241 Collection<T> followingSymbols = new LinkedList<T>(); 242 for (TrieNode<T> child : children) { 243 followingSymbols.add(child.getSymbol()); 244 } 245 return followingSymbols; 246 } 247 248 /** 249 * <p> 250 * The string representation of a node is {@code symbol.toString()#count} 251 * </p> 252 * 253 * @see java.lang.Object#toString() 254 */ 255 @Override 256 public String toString() { 257 String str = symbol.toString() + " #" + count; 258 if (!children.isEmpty()) { 259 str += StringTools.ENDLINE + children.toString(); 260 } 261 return str; 262 } 263 264 /** 265 * <p> 266 * Generates a {@link Tree} represenation of the trie. 267 * </p> 268 * 269 * @param parent 270 * parent vertex in the generated tree 271 * @param graph 272 * complete tree 273 */ 274 void getGraph(TrieVertex parent, DelegateTree<TrieVertex, Edge> graph) { 275 TrieVertex currentVertex; 276 if (isRoot()) { 277 currentVertex = new TrieVertex("root"); 278 graph.addVertex(currentVertex); 279 } 280 else { 281 currentVertex = new TrieVertex(getSymbol().toString() + "#" + getCount()); 282 graph.addChild(new Edge(), parent, currentVertex); 283 } 284 for (TrieNode<T> node : children) { 285 node.getGraph(currentVertex, graph); 286 } 287 } 288 289 /** 290 * <p> 291 * Appends the current node to the dot representation of the trie. 292 * </p> 293 * 294 * @param stringBuilder 295 * {@link StringBuilder} to which the dot representation is appended 296 */ 297 void appendDotRepresentation(StringBuilder stringBuilder) { 298 String thisSaneId; 299 if (isRoot()) { 300 thisSaneId = "root"; 301 } 302 else { 303 thisSaneId = 304 symbol.toString().replace("\"", "\\\"").replaceAll("[\r\n]", "") + "#" + count; 305 } 306 stringBuilder.append(" " + hashCode() + " [label=\"" + thisSaneId + "\"];" + 307 StringTools.ENDLINE); 308 for (TrieNode<T> childNode : children) { 309 stringBuilder.append(" " + hashCode() + " -> " + childNode.hashCode() + ";" + 310 StringTools.ENDLINE); 311 } 312 for (TrieNode<T> childNode : children) { 313 childNode.appendDotRepresentation(stringBuilder); 314 } 315 } 316 317 /** 318 * <p> 319 * Checks if the node is a leaf. 320 * </p> 321 * 322 * @return true if the node is a leaf, false otherwise. 323 */ 324 protected boolean isLeaf() { 325 return children.isEmpty(); 326 } 327 328 /** 329 * <p> 330 * Checks if the node is the root. 331 * </p> 332 * 333 * @return true if the node is the root of the trie, false otherwise 334 */ 335 protected boolean isRoot() { 336 return symbol == null; 337 } 338 339 /** 340 * <p> 341 * Recursive methods that collects all nodes that are ancestors of leafs and stores them in the 342 * set. 343 * </p> 344 * 345 * @param ancestors 346 * set of all ancestors of leafs 347 */ 348 protected void getLeafAncestors(List<TrieNode<T>> ancestors) { 349 boolean isAncestor = false; 350 for (TrieNode<T> child : children) { 351 child.getLeafAncestors(ancestors); 352 isAncestor |= child.isLeaf(); 353 } 354 if (isAncestor) { 355 ancestors.add(this); 356 } 357 } 358 359 /** 360 * <p> 361 * Returns the number of descendants of this node that are leafs. This does not only include 362 * direct children of this node, but all leafs in the sub-trie with this node as root. 363 * </p> 364 * 365 * @return number of leafs in this sub-trie 366 */ 367 protected int getNumLeafs() { 368 int numLeafs = 0; 369 for (TrieNode<T> child : children) { 370 if (child.isLeaf()) { 371 numLeafs++; 372 } 373 else { 374 numLeafs += child.getNumLeafs(); 375 } 376 } 377 return numLeafs; 378 } 379 380 /** 381 * <p> 382 * Sets the {@link #count} of this node. 383 * </p> 384 * <p> 385 * This function should only be used sparingly and very carefully! The count is usually 386 * maintained automatically by the training procedures. 387 * </p> 388 * 389 * @param count 390 * new count 391 */ 392 protected void setCount(int count) { 393 this.count = count; 394 } 395 396 /** 397 * <p> 398 * Two TrieNodes are defined as equal, if their {@link #count}, {@link #symbol}, and 399 * {@link #children} are equal. 400 * </p> 401 * 402 * @see java.lang.Object#equals(java.lang.Object) 403 */ 404 @SuppressWarnings("rawtypes") 405 @Override 406 public boolean equals(Object other) { 407 if (other == this) { 408 return true; 409 } 410 if (other instanceof TrieNode) { 411 TrieNode otherNode = (TrieNode) other; 412 if (symbol == null) { 413 return count == otherNode.count && otherNode.symbol == null && 414 children.equals(otherNode.children); 415 } 416 else { 417 return count == otherNode.count && symbol.equals(((TrieNode) other).symbol) && 418 children.equals(((TrieNode) other).children); 419 } 420 } 421 return false; 422 } 423 424 /* 425 * (non-Javadoc) 426 * 427 * @see java.lang.Object#hashCode() 428 */ 429 @Override 430 public int hashCode() { 431 int multiplier = 17; 432 int hash = 42; 433 if (symbol != null) { 434 hash = multiplier * hash + symbol.hashCode(); 435 } 436 hash = multiplier * hash + count; 437 hash = multiplier * hash + children.hashCode(); 438 return hash; 439 } 443 440 }
Note: See TracChangeset
for help on using the changeset viewer.