Changeset 559 for trunk/quest-core-usageprofiles/src/main/java/de/ugoe/cs/quest/usageprofiles/FirstOrderMarkovModel.java
- Timestamp:
- 08/17/12 09:05:19 (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/quest-core-usageprofiles/src/main/java/de/ugoe/cs/quest/usageprofiles/FirstOrderMarkovModel.java
r553 r559 1 1 2 package de.ugoe.cs.quest.usageprofiles; 2 3 … … 18 19 /** 19 20 * <p> 20 * Implements first-order Markov models. The implementation is based on 21 * {@link HighOrderMarkovModel} and restricts the Markov order to 1. In 22 * comparison to {@link HighOrderMarkovModel}, more calculations are possible 23 * with first-order models, e.g., the calculation of the entropy ( 21 * Implements first-order Markov models. The implementation is based on {@link HighOrderMarkovModel} 22 * and restricts the Markov order to 1. In comparison to {@link HighOrderMarkovModel}, more 23 * calculations are possible with first-order models, e.g., the calculation of the entropy ( 24 24 * {@link #calcEntropy()}). 25 25 * </p> … … 28 28 * @version 1.0 29 29 */ 30 public class FirstOrderMarkovModel extends HighOrderMarkovModel implements 31 IDotCompatible { 32 33 /** 34 * <p> 35 * Id for object serialization. 36 * </p> 37 */ 38 private static final long serialVersionUID = 1L; 39 40 /** 41 * <p> 42 * Maximum number of iterations when calculating the stationary distribution 43 * as the limit of multiplying the transmission matrix with itself. 44 * </p> 45 */ 46 final static int MAX_STATDIST_ITERATIONS = 1000; 47 48 /** 49 * <p> 50 * Constructor. Creates a new FirstOrderMarkovModel. 51 * </p> 52 * 53 * @param r 54 * random number generator used by probabilistic methods of the 55 * class 56 */ 57 public FirstOrderMarkovModel(Random r) { 58 super(1, r); 59 } 60 61 /** 62 * <p> 63 * Generates the transmission matrix of the Markov model. 64 * </p> 65 * 66 * @return transmission matrix 67 */ 68 private Matrix getTransmissionMatrix() { 69 List<Event> knownSymbols = new ArrayList<Event>( 70 trie.getKnownSymbols()); 71 int numStates = knownSymbols.size(); 72 Matrix transmissionMatrix = new Matrix(numStates, numStates); 73 74 for (int i = 0; i < numStates; i++) { 75 Event currentSymbol = knownSymbols.get(i); 76 List<Event> context = new ArrayList<Event>(); 77 context.add(currentSymbol); 78 for (int j = 0; j < numStates; j++) { 79 Event follower = knownSymbols.get(j); 80 double prob = getProbability(context, follower); 81 transmissionMatrix.set(i, j, prob); 82 } 83 } 84 return transmissionMatrix; 85 } 86 87 /** 88 * <p> 89 * Calculates the entropy of the model. To make it possible that the model 90 * is stationary, a transition from {@link Event#ENDEVENT} to 91 * {@link Event#STARTEVENT} is added. 92 * </p> 93 * 94 * @return entropy of the model or NaN if it could not be calculated 95 */ 96 public double calcEntropy() { 97 Matrix transmissionMatrix = getTransmissionMatrix(); 98 List<Event> knownSymbols = new ArrayList<Event>( 99 trie.getKnownSymbols()); 100 int numStates = knownSymbols.size(); 101 102 List<Integer> startIndexList = new LinkedList<Integer>(); 103 List<Integer> endIndexList = new LinkedList<Integer>(); 104 for( int i=0 ; i<knownSymbols.size() ; i++ ) { 105 String id = knownSymbols.get(i).getId(); 106 if( id.equals(Event.STARTEVENT.getId()) || id.contains(Event.STARTEVENT.getId()+"-=-") ) { 107 startIndexList.add(i); 108 } 109 if( id.equals(Event.ENDEVENT.getId()) || id.contains("-=-"+Event.ENDEVENT.getId()) ) { 110 endIndexList.add(i); 111 } 112 } 113 114 if (startIndexList.isEmpty()) { 115 Console.printerrln("Error calculating entropy. Initial state of markov chain not found."); 116 return Double.NaN; 117 } 118 if (endIndexList.isEmpty()) { 119 Console.printerrln("Error calculating entropy. End state of markov chain not found."); 120 return Double.NaN; 121 } 122 for( Integer i : endIndexList ) { 123 for(Integer j : startIndexList ) { 124 transmissionMatrix.set(i, j, 1); 125 } 126 } 127 128 // Calculate stationary distribution by raising the power of the 129 // transmission matrix. 130 // The rank of the matrix should fall to 1 and each two should be the 131 // vector of the stationory distribution. 132 int iter = 0; 133 int rank = transmissionMatrix.rank(); 134 Matrix stationaryMatrix = (Matrix) transmissionMatrix.clone(); 135 while (iter < MAX_STATDIST_ITERATIONS && rank > 1) { 136 stationaryMatrix = stationaryMatrix.times(stationaryMatrix); 137 rank = stationaryMatrix.rank(); 138 iter++; 139 } 140 141 if (rank != 1) { 142 Console.traceln("rank: " + rank); 143 Console.printerrln("Unable to calculate stationary distribution."); 144 return Double.NaN; 145 } 146 147 double entropy = 0.0; 148 for (int i = 0; i < numStates; i++) { 149 for (int j = 0; j < numStates; j++) { 150 if (transmissionMatrix.get(i, j) != 0 && transmissionMatrix.get(i, j)!=1) { 151 double tmp = stationaryMatrix.get(0, i); 152 tmp *= transmissionMatrix.get(i, j); 153 tmp *= Math.log(transmissionMatrix.get(i, j)) / Math.log(2); 154 entropy -= tmp; 155 } 156 } 157 } 158 return entropy; 159 } 160 161 /** 162 * <p> 163 * The dot represenation of {@link FirstOrderMarkovModel}s is its graph 164 * representation with the states as nodes and directed edges weighted with 165 * transition probabilities. 166 * </p> 167 * 168 * @see de.ugoe.cs.quest.usageprofiles.IDotCompatible#getDotRepresentation() 169 */ 170 @Override 171 public String getDotRepresentation() { 172 StringBuilder stringBuilder = new StringBuilder(); 173 stringBuilder.append("digraph model {" + StringTools.ENDLINE); 174 175 List<Event> knownSymbols = new ArrayList<Event>( 176 trie.getKnownSymbols()); 177 for (Event symbol : knownSymbols) { 178 final String thisSaneId = symbol.getId().replace("\"", "\\\"") 179 .replaceAll("[\r\n]", ""); 180 stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " [label=\"" 181 + thisSaneId + "\"];" + StringTools.ENDLINE); 182 List<Event> context = new ArrayList<Event>(); 183 context.add(symbol); 184 Collection<Event> followers = trie.getFollowingSymbols(context); 185 for (Event follower : followers) { 186 stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " -> " 187 + knownSymbols.indexOf(follower) + " "); 188 stringBuilder.append("[label=\"" 189 + getProbability(context, follower) + "\"];" 190 + StringTools.ENDLINE); 191 } 192 } 193 stringBuilder.append('}' + StringTools.ENDLINE); 194 return stringBuilder.toString(); 195 } 196 197 /** 198 * <p> 199 * Returns a {@link Graph} representation of the model with the states as 200 * nodes and directed edges weighted with transition probabilities. 201 * </p> 202 * 203 * @return {@link Graph} of the model 204 */ 205 public Graph<String, MarkovEdge> getGraph() { 206 Graph<String, MarkovEdge> graph = new SparseMultigraph<String, MarkovEdge>(); 207 208 List<Event> knownSymbols = new ArrayList<Event>( 209 trie.getKnownSymbols()); 210 211 for (Event symbol : knownSymbols) { 212 String from = symbol.getId(); 213 List<Event> context = new ArrayList<Event>(); 214 context.add(symbol); 215 216 Collection<Event> followers = trie.getFollowingSymbols(context); 217 218 for (Event follower : followers) { 219 String to = follower.getId(); 220 MarkovEdge prob = new MarkovEdge(getProbability(context, 221 follower)); 222 graph.addEdge(prob, from, to, EdgeType.DIRECTED); 223 } 224 } 225 return graph; 226 } 227 228 /** 229 * Inner class used for the {@link Graph} representation of the model. 230 * 231 * @author Steffen Herbold 232 * @version 1.0 233 */ 234 static public class MarkovEdge { 235 /** 236 * <p> 237 * Weight of the edge, i.e., its transition probability. 238 * </p> 239 */ 240 double weight; 241 242 /** 243 * <p> 244 * Constructor. Creates a new MarkovEdge. 245 * </p> 246 * 247 * @param weight 248 * weight of the edge, i.e., its transition probability 249 */ 250 MarkovEdge(double weight) { 251 this.weight = weight; 252 } 253 254 /** 255 * <p> 256 * The weight of the edge as {@link String}. 257 * </p> 258 */ 259 public String toString() { 260 return "" + weight; 261 } 262 } 30 public class FirstOrderMarkovModel extends HighOrderMarkovModel implements IDotCompatible { 31 32 /** 33 * <p> 34 * Id for object serialization. 35 * </p> 36 */ 37 private static final long serialVersionUID = 1L; 38 39 /** 40 * <p> 41 * Maximum number of iterations when calculating the stationary distribution as the limit of 42 * multiplying the transmission matrix with itself. 43 * </p> 44 */ 45 final static int MAX_STATDIST_ITERATIONS = 1000; 46 47 /** 48 * <p> 49 * Constructor. Creates a new FirstOrderMarkovModel. 50 * </p> 51 * 52 * @param r 53 * random number generator used by probabilistic methods of the class 54 */ 55 public FirstOrderMarkovModel(Random r) { 56 super(1, r); 57 } 58 59 /** 60 * <p> 61 * Generates the transmission matrix of the Markov model. 62 * </p> 63 * 64 * @return transmission matrix 65 */ 66 private Matrix getTransmissionMatrix() { 67 List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols()); 68 int numStates = knownSymbols.size(); 69 Matrix transmissionMatrix = new Matrix(numStates, numStates); 70 71 for (int i = 0; i < numStates; i++) { 72 Event currentSymbol = knownSymbols.get(i); 73 List<Event> context = new ArrayList<Event>(); 74 context.add(currentSymbol); 75 for (int j = 0; j < numStates; j++) { 76 Event follower = knownSymbols.get(j); 77 double prob = getProbability(context, follower); 78 transmissionMatrix.set(i, j, prob); 79 } 80 } 81 return transmissionMatrix; 82 } 83 84 /** 85 * <p> 86 * Calculates the entropy of the model. To make it possible that the model is stationary, a 87 * transition from {@link Event#ENDEVENT} to {@link Event#STARTEVENT} is added. 88 * </p> 89 * 90 * @return entropy of the model or NaN if it could not be calculated 91 */ 92 public double calcEntropy() { 93 Matrix transmissionMatrix = getTransmissionMatrix(); 94 List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols()); 95 int numStates = knownSymbols.size(); 96 97 List<Integer> startIndexList = new LinkedList<Integer>(); 98 List<Integer> endIndexList = new LinkedList<Integer>(); 99 for (int i = 0; i < knownSymbols.size(); i++) { 100 String id = knownSymbols.get(i).getId(); 101 if (id.equals(Event.STARTEVENT.getId()) || 102 id.contains(Event.STARTEVENT.getId() + "-=-")) 103 { 104 startIndexList.add(i); 105 } 106 if (id.equals(Event.ENDEVENT.getId()) || id.contains("-=-" + Event.ENDEVENT.getId())) { 107 endIndexList.add(i); 108 } 109 } 110 111 if (startIndexList.isEmpty()) { 112 Console 113 .printerrln("Error calculating entropy. Initial state of markov chain not found."); 114 return Double.NaN; 115 } 116 if (endIndexList.isEmpty()) { 117 Console.printerrln("Error calculating entropy. End state of markov chain not found."); 118 return Double.NaN; 119 } 120 for (Integer i : endIndexList) { 121 for (Integer j : startIndexList) { 122 transmissionMatrix.set(i, j, 1); 123 } 124 } 125 126 // Calculate stationary distribution by raising the power of the 127 // transmission matrix. 128 // The rank of the matrix should fall to 1 and each two should be the 129 // vector of the stationory distribution. 130 int iter = 0; 131 int rank = transmissionMatrix.rank(); 132 Matrix stationaryMatrix = (Matrix) transmissionMatrix.clone(); 133 while (iter < MAX_STATDIST_ITERATIONS && rank > 1) { 134 stationaryMatrix = stationaryMatrix.times(stationaryMatrix); 135 rank = stationaryMatrix.rank(); 136 iter++; 137 } 138 139 if (rank != 1) { 140 Console.traceln("rank: " + rank); 141 Console.printerrln("Unable to calculate stationary distribution."); 142 return Double.NaN; 143 } 144 145 double entropy = 0.0; 146 for (int i = 0; i < numStates; i++) { 147 for (int j = 0; j < numStates; j++) { 148 if (transmissionMatrix.get(i, j) != 0 && transmissionMatrix.get(i, j) != 1) { 149 double tmp = stationaryMatrix.get(0, i); 150 tmp *= transmissionMatrix.get(i, j); 151 tmp *= Math.log(transmissionMatrix.get(i, j)) / Math.log(2); 152 entropy -= tmp; 153 } 154 } 155 } 156 return entropy; 157 } 158 159 /** 160 * <p> 161 * The dot represenation of {@link FirstOrderMarkovModel}s is its graph representation with the 162 * states as nodes and directed edges weighted with transition probabilities. 163 * </p> 164 * 165 * @see de.ugoe.cs.quest.usageprofiles.IDotCompatible#getDotRepresentation() 166 */ 167 @Override 168 public String getDotRepresentation() { 169 StringBuilder stringBuilder = new StringBuilder(); 170 stringBuilder.append("digraph model {" + StringTools.ENDLINE); 171 172 List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols()); 173 for (Event symbol : knownSymbols) { 174 final String thisSaneId = symbol.getId().replace("\"", "\\\"").replaceAll("[\r\n]", ""); 175 stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " [label=\"" + thisSaneId + 176 "\"];" + StringTools.ENDLINE); 177 List<Event> context = new ArrayList<Event>(); 178 context.add(symbol); 179 Collection<Event> followers = trie.getFollowingSymbols(context); 180 for (Event follower : followers) { 181 stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " -> " + 182 knownSymbols.indexOf(follower) + " "); 183 stringBuilder.append("[label=\"" + getProbability(context, follower) + "\"];" + 184 StringTools.ENDLINE); 185 } 186 } 187 stringBuilder.append('}' + StringTools.ENDLINE); 188 return stringBuilder.toString(); 189 } 190 191 /** 192 * <p> 193 * Returns a {@link Graph} representation of the model with the states as nodes and directed 194 * edges weighted with transition probabilities. 195 * </p> 196 * 197 * @return {@link Graph} of the model 198 */ 199 public Graph<String, MarkovEdge> getGraph() { 200 Graph<String, MarkovEdge> graph = new SparseMultigraph<String, MarkovEdge>(); 201 202 List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols()); 203 204 for (Event symbol : knownSymbols) { 205 String from = symbol.getId(); 206 List<Event> context = new ArrayList<Event>(); 207 context.add(symbol); 208 209 Collection<Event> followers = trie.getFollowingSymbols(context); 210 211 for (Event follower : followers) { 212 String to = follower.getId(); 213 MarkovEdge prob = new MarkovEdge(getProbability(context, follower)); 214 graph.addEdge(prob, from, to, EdgeType.DIRECTED); 215 } 216 } 217 return graph; 218 } 219 220 /** 221 * Inner class used for the {@link Graph} representation of the model. 222 * 223 * @author Steffen Herbold 224 * @version 1.0 225 */ 226 static public class MarkovEdge { 227 /** 228 * <p> 229 * Weight of the edge, i.e., its transition probability. 230 * </p> 231 */ 232 double weight; 233 234 /** 235 * <p> 236 * Constructor. Creates a new MarkovEdge. 237 * </p> 238 * 239 * @param weight 240 * weight of the edge, i.e., its transition probability 241 */ 242 MarkovEdge(double weight) { 243 this.weight = weight; 244 } 245 246 /** 247 * <p> 248 * The weight of the edge as {@link String}. 249 * </p> 250 */ 251 public String toString() { 252 return "" + weight; 253 } 254 } 263 255 264 256 }
Note: See TracChangeset
for help on using the changeset viewer.