// Copyright 2012 Georg-August-Universität Göttingen, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package de.ugoe.cs.autoquest.usageprofiles; import java.util.ArrayList; import java.util.Collection; import java.util.LinkedList; import java.util.List; import java.util.Random; import java.util.logging.Level; import de.ugoe.cs.autoquest.eventcore.Event; import de.ugoe.cs.util.StringTools; import de.ugoe.cs.util.console.Console; import edu.uci.ics.jung.graph.Graph; import edu.uci.ics.jung.graph.SparseMultigraph; import edu.uci.ics.jung.graph.util.EdgeType; import Jama.Matrix; /** *

* Implements first-order Markov models. The implementation is based on {@link HighOrderMarkovModel} * and restricts the Markov order to 1. In comparison to {@link HighOrderMarkovModel}, more * calculations are possible with first-order models, e.g., the calculation of the entropy ( * {@link #calcEntropy()}). *

* * @author Steffen Herbold * @version 1.0 */ public class FirstOrderMarkovModel extends HighOrderMarkovModel implements IDotCompatible { /** *

* Id for object serialization. *

*/ private static final long serialVersionUID = 1L; /** *

* Maximum number of iterations when calculating the stationary distribution as the limit of * multiplying the transmission matrix with itself. *

*/ final static int MAX_STATDIST_ITERATIONS = 1000; /** *

* Constructor. Creates a new FirstOrderMarkovModel. *

* * @param r * random number generator used by probabilistic methods of the class */ public FirstOrderMarkovModel(Random r) { super(1, r); } /** *

* Generates the transmission matrix of the Markov model. *

* * @return transmission matrix */ private Matrix getTransmissionMatrix() { List knownSymbols = new ArrayList(trie.getKnownSymbols()); int numStates = knownSymbols.size(); Matrix transmissionMatrix = new Matrix(numStates, numStates); for (int i = 0; i < numStates; i++) { Event currentSymbol = knownSymbols.get(i); List context = new ArrayList(); context.add(currentSymbol); for (int j = 0; j < numStates; j++) { Event follower = knownSymbols.get(j); double prob = getProbability(context, follower); transmissionMatrix.set(i, j, prob); } } return transmissionMatrix; } /** *

* Calculates the entropy of the model. To make it possible that the model is stationary, a * transition from {@link Event#ENDEVENT} to {@link Event#STARTEVENT} is added. *

* * @return entropy of the model or NaN if it could not be calculated */ public double calcEntropy() { Matrix transmissionMatrix = getTransmissionMatrix(); List knownSymbols = new ArrayList(trie.getKnownSymbols()); int numStates = knownSymbols.size(); List startIndexList = new LinkedList(); List endIndexList = new LinkedList(); for (int i = 0; i < knownSymbols.size(); i++) { String id = knownSymbols.get(i).getId(); if (id.equals(Event.STARTEVENT.getId()) || id.contains(Event.STARTEVENT.getId() + "-=-")) { startIndexList.add(i); } if (id.equals(Event.ENDEVENT.getId()) || id.contains("-=-" + Event.ENDEVENT.getId())) { endIndexList.add(i); } } if (startIndexList.isEmpty()) { Console .printerrln("Error calculating entropy. Initial state of markov chain not found."); return Double.NaN; } if (endIndexList.isEmpty()) { Console.printerrln("Error calculating entropy. End state of markov chain not found."); return Double.NaN; } for (Integer i : endIndexList) { for (Integer j : startIndexList) { transmissionMatrix.set(i, j, 1); } } // Calculate stationary distribution by raising the power of the // transmission matrix. // The rank of the matrix should fall to 1 and each two should be the // vector of the stationory distribution. int iter = 0; int rank = transmissionMatrix.rank(); Matrix stationaryMatrix = (Matrix) transmissionMatrix.clone(); while (iter < MAX_STATDIST_ITERATIONS && rank > 1) { stationaryMatrix = stationaryMatrix.times(stationaryMatrix); rank = stationaryMatrix.rank(); iter++; } if (rank != 1) { Console.traceln(Level.FINE, "rank: " + rank); Console.printerrln("Unable to calculate stationary distribution."); return Double.NaN; } double entropy = 0.0; for (int i = 0; i < numStates; i++) { for (int j = 0; j < numStates; j++) { if (transmissionMatrix.get(i, j) != 0 && transmissionMatrix.get(i, j) != 1) { double tmp = stationaryMatrix.get(0, i); tmp *= transmissionMatrix.get(i, j); tmp *= Math.log(transmissionMatrix.get(i, j)) / Math.log(2); entropy -= tmp; } } } return entropy; } /** *

* The dot represenation of {@link FirstOrderMarkovModel}s is its graph representation with the * states as nodes and directed edges weighted with transition probabilities. *

* * @see de.ugoe.cs.autoquest.usageprofiles.IDotCompatible#getDotRepresentation() */ @Override public String getDotRepresentation() { StringBuilder stringBuilder = new StringBuilder(); stringBuilder.append("digraph model {" + StringTools.ENDLINE); List knownSymbols = new ArrayList(trie.getKnownSymbols()); for (Event symbol : knownSymbols) { final String thisSaneId = symbol.getId().replace("\"", "\\\"").replaceAll("[\r\n]", ""); stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " [label=\"" + thisSaneId + "\"];" + StringTools.ENDLINE); List context = new ArrayList(); context.add(symbol); Collection followers = trie.getFollowingSymbols(context); for (Event follower : followers) { stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " -> " + knownSymbols.indexOf(follower) + " "); stringBuilder.append("[label=\"" + getProbability(context, follower) + "\"];" + StringTools.ENDLINE); } } stringBuilder.append('}' + StringTools.ENDLINE); return stringBuilder.toString(); } /** *

* Returns a {@link Graph} representation of the model with the states as nodes and directed * edges weighted with transition probabilities. *

* * @return {@link Graph} of the model */ public Graph getGraph() { Graph graph = new SparseMultigraph(); List knownSymbols = new ArrayList(trie.getKnownSymbols()); for (Event symbol : knownSymbols) { String from = symbol.getId(); List context = new ArrayList(); context.add(symbol); Collection followers = trie.getFollowingSymbols(context); for (Event follower : followers) { String to = follower.getId(); MarkovEdge prob = new MarkovEdge(getProbability(context, follower)); graph.addEdge(prob, from, to, EdgeType.DIRECTED); } } return graph; } /** * Inner class used for the {@link Graph} representation of the model. * * @author Steffen Herbold * @version 1.0 */ static public class MarkovEdge { /** *

* Weight of the edge, i.e., its transition probability. *

*/ double weight; /** *

* Constructor. Creates a new MarkovEdge. *

* * @param weight * weight of the edge, i.e., its transition probability */ MarkovEdge(double weight) { this.weight = weight; } /** *

* The weight of the edge as {@link String}. *

*/ public String toString() { return "" + weight; } } }