// Copyright 2012 Georg-August-Universität Göttingen, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package de.ugoe.cs.autoquest.usageprofiles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import java.util.logging.Level;
import de.ugoe.cs.autoquest.eventcore.Event;
import de.ugoe.cs.util.StringTools;
import de.ugoe.cs.util.console.Console;
import edu.uci.ics.jung.graph.Graph;
import edu.uci.ics.jung.graph.SparseMultigraph;
import edu.uci.ics.jung.graph.util.EdgeType;
import Jama.Matrix;
/**
*
* Implements first-order Markov models. The implementation is based on {@link HighOrderMarkovModel}
* and restricts the Markov order to 1. In comparison to {@link HighOrderMarkovModel}, more
* calculations are possible with first-order models, e.g., the calculation of the entropy (
* {@link #calcEntropy()}).
*
*
* @author Steffen Herbold
* @version 1.0
*/
public class FirstOrderMarkovModel extends HighOrderMarkovModel implements IDotCompatible {
/**
*
* Id for object serialization.
*
*/
private static final long serialVersionUID = 1L;
/**
*
* Maximum number of iterations when calculating the stationary distribution as the limit of
* multiplying the transmission matrix with itself.
*
*/
final static int MAX_STATDIST_ITERATIONS = 1000;
/**
*
* Constructor. Creates a new FirstOrderMarkovModel.
*
*
* @param r
* random number generator used by probabilistic methods of the class
*/
public FirstOrderMarkovModel(Random r) {
super(1, r);
}
/**
*
* Generates the transmission matrix of the Markov model.
*
*
* @return transmission matrix
*/
private Matrix getTransmissionMatrix() {
List knownSymbols = new ArrayList(trie.getKnownSymbols());
int numStates = knownSymbols.size();
Matrix transmissionMatrix = new Matrix(numStates, numStates);
for (int i = 0; i < numStates; i++) {
Event currentSymbol = knownSymbols.get(i);
List context = new ArrayList();
context.add(currentSymbol);
for (int j = 0; j < numStates; j++) {
Event follower = knownSymbols.get(j);
double prob = getProbability(context, follower);
transmissionMatrix.set(i, j, prob);
}
}
return transmissionMatrix;
}
/**
*
* Calculates the entropy of the model. To make it possible that the model is stationary, a
* transition from {@link Event#ENDEVENT} to {@link Event#STARTEVENT} is added.
*
*
* @return entropy of the model or NaN if it could not be calculated
*/
public double calcEntropy() {
Matrix transmissionMatrix = getTransmissionMatrix();
List knownSymbols = new ArrayList(trie.getKnownSymbols());
int numStates = knownSymbols.size();
List startIndexList = new LinkedList();
List endIndexList = new LinkedList();
for (int i = 0; i < knownSymbols.size(); i++) {
String id = knownSymbols.get(i).getId();
if (id.equals(Event.STARTEVENT.getId()) ||
id.contains(Event.STARTEVENT.getId() + "-=-"))
{
startIndexList.add(i);
}
if (id.equals(Event.ENDEVENT.getId()) || id.contains("-=-" + Event.ENDEVENT.getId())) {
endIndexList.add(i);
}
}
if (startIndexList.isEmpty()) {
Console
.printerrln("Error calculating entropy. Initial state of markov chain not found.");
return Double.NaN;
}
if (endIndexList.isEmpty()) {
Console.printerrln("Error calculating entropy. End state of markov chain not found.");
return Double.NaN;
}
for (Integer i : endIndexList) {
for (Integer j : startIndexList) {
transmissionMatrix.set(i, j, 1);
}
}
// Calculate stationary distribution by raising the power of the
// transmission matrix.
// The rank of the matrix should fall to 1 and each two should be the
// vector of the stationory distribution.
int iter = 0;
int rank = transmissionMatrix.rank();
Matrix stationaryMatrix = (Matrix) transmissionMatrix.clone();
while (iter < MAX_STATDIST_ITERATIONS && rank > 1) {
stationaryMatrix = stationaryMatrix.times(stationaryMatrix);
rank = stationaryMatrix.rank();
iter++;
}
if (rank != 1) {
Console.traceln(Level.FINE, "rank: " + rank);
Console.printerrln("Unable to calculate stationary distribution.");
return Double.NaN;
}
double entropy = 0.0;
for (int i = 0; i < numStates; i++) {
for (int j = 0; j < numStates; j++) {
if (transmissionMatrix.get(i, j) != 0 && transmissionMatrix.get(i, j) != 1) {
double tmp = stationaryMatrix.get(0, i);
tmp *= transmissionMatrix.get(i, j);
tmp *= Math.log(transmissionMatrix.get(i, j)) / Math.log(2);
entropy -= tmp;
}
}
}
return entropy;
}
/**
*
* The dot represenation of {@link FirstOrderMarkovModel}s is its graph representation with the
* states as nodes and directed edges weighted with transition probabilities.
*
*
* @see de.ugoe.cs.autoquest.usageprofiles.IDotCompatible#getDotRepresentation()
*/
@Override
public String getDotRepresentation() {
StringBuilder stringBuilder = new StringBuilder();
stringBuilder.append("digraph model {" + StringTools.ENDLINE);
List knownSymbols = new ArrayList(trie.getKnownSymbols());
for (Event symbol : knownSymbols) {
final String thisSaneId = symbol.getId().replace("\"", "\\\"").replaceAll("[\r\n]", "");
stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " [label=\"" + thisSaneId +
"\"];" + StringTools.ENDLINE);
List context = new ArrayList();
context.add(symbol);
Collection followers = trie.getFollowingSymbols(context);
for (Event follower : followers) {
stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " -> " +
knownSymbols.indexOf(follower) + " ");
stringBuilder.append("[label=\"" + getProbability(context, follower) + "\"];" +
StringTools.ENDLINE);
}
}
stringBuilder.append('}' + StringTools.ENDLINE);
return stringBuilder.toString();
}
/**
*
* Returns a {@link Graph} representation of the model with the states as nodes and directed
* edges weighted with transition probabilities.
*
*
* @return {@link Graph} of the model
*/
public Graph getGraph() {
Graph graph = new SparseMultigraph();
List knownSymbols = new ArrayList(trie.getKnownSymbols());
for (Event symbol : knownSymbols) {
String from = symbol.getId();
List context = new ArrayList();
context.add(symbol);
Collection followers = trie.getFollowingSymbols(context);
for (Event follower : followers) {
String to = follower.getId();
MarkovEdge prob = new MarkovEdge(getProbability(context, follower));
graph.addEdge(prob, from, to, EdgeType.DIRECTED);
}
}
return graph;
}
/**
* Inner class used for the {@link Graph} representation of the model.
*
* @author Steffen Herbold
* @version 1.0
*/
static public class MarkovEdge {
/**
*
* Weight of the edge, i.e., its transition probability.
*
*/
double weight;
/**
*
* Constructor. Creates a new MarkovEdge.
*
*
* @param weight
* weight of the edge, i.e., its transition probability
*/
MarkovEdge(double weight) {
this.weight = weight;
}
/**
*
* The weight of the edge as {@link String}.
*
*/
public String toString() {
return "" + weight;
}
}
}