Context Navigation

FirstOrderMarkovModel.java @ 102

Last change on this file since 102 was 102, checked in by sherbold, 13 years ago
replaced java.util.Set and Java.util.List with java.util.Collection where possible
Property svn:mime-type set to `text/plain`
File size: 7.2 KB

Rev	Line
[16]	1	package de.ugoe.cs.eventbench.models;
	2
	3	import java.util.ArrayList;
[102]	4	import java.util.Collection;
[16]	5	import java.util.List;
	6	import java.util.Random;
	7
	8	import de.ugoe.cs.eventbench.data.Event;
[29]	9	import de.ugoe.cs.util.StringTools;
[16]	10	import de.ugoe.cs.util.console.Console;
	11	import edu.uci.ics.jung.graph.Graph;
	12	import edu.uci.ics.jung.graph.SparseMultigraph;
	13	import edu.uci.ics.jung.graph.util.EdgeType;
	14
	15	import Jama.Matrix;
	16
[101]	17	/**
	18	* <p>
	19	* Implements first-order Markov models. The implementation is based on
	20	* {@link HighOrderMarkovModel} and restricts the Markov order to 1. In
	21	* comparison to {@link HighOrderMarkovModel}, more calculations are possible
	22	* with first-order models, e.g., the calculation of the entropy (
	23	* {@link #calcEntropy()}).
	24	* </p>
	25	*
	26	* @author Steffen Herbold
	27	* @version 1.0
	28	*/
	29	public class FirstOrderMarkovModel extends HighOrderMarkovModel implements
	30	IDotCompatible {
[16]	31
[86]	32	/**
[101]	33	* <p>
[86]	34	* Id for object serialization.
[101]	35	* </p>
[86]	36	*/
	37	private static final long serialVersionUID = 1L;
[101]	38
	39	/**
	40	* <p>
	41	* Maximum number of iterations when calculating the stationary distribution
	42	* as the limit of multiplying the transmission matrix with itself.
	43	* </p>
	44	*/
[16]	45	final static int MAX_STATDIST_ITERATIONS = 1000;
[101]	46
	47	/**
	48	* <p>
	49	* Constructor. Creates a new FirstOrderMarkovModel.
	50	* </p>
	51	*
	52	* @param r
	53	* random number generator used by probabilistic methods of the
	54	* class
	55	*/
[16]	56	public FirstOrderMarkovModel(Random r) {
	57	super(1, r);
	58	}
[101]	59
	60	/**
	61	* <p>
	62	* Generates the transmission matrix of the Markov model.
	63	* </p>
	64	*
	65	* @return transmission matrix
	66	*/
[16]	67	private Matrix getTransmissionMatrix() {
[101]	68	List<Event<?>> knownSymbols = new ArrayList<Event<?>>(
	69	trie.getKnownSymbols());
[16]	70	int numStates = knownSymbols.size();
	71	Matrix transmissionMatrix = new Matrix(numStates, numStates);
[101]	72
	73	for (int i = 0; i < numStates; i++) {
[16]	74	Event<?> currentSymbol = knownSymbols.get(i);
	75	List<Event<?>> context = new ArrayList<Event<?>>();
	76	context.add(currentSymbol);
[101]	77	for (int j = 0; j < numStates; j++) {
[16]	78	Event<?> follower = knownSymbols.get(j);
	79	double prob = getProbability(context, follower);
	80	transmissionMatrix.set(i, j, prob);
	81	}
	82	}
	83	return transmissionMatrix;
	84	}
	85
[101]	86	/**
	87	* <p>
	88	* Calculates the entropy of the model. To make it possible that the model
	89	* is stationary, a transition from {@link Event#ENDEVENT} to
	90	* {@link Event#STARTEVENT} is added.
	91	* </p>
	92	*
	93	* @return entropy of the model or NaN if it could not be calculated
	94	*/
[16]	95	public double calcEntropy() {
	96	Matrix transmissionMatrix = getTransmissionMatrix();
[101]	97	List<Event<?>> knownSymbols = new ArrayList<Event<?>>(
	98	trie.getKnownSymbols());
[16]	99	int numStates = knownSymbols.size();
[101]	100
[16]	101	int startStateIndex = knownSymbols.indexOf(Event.STARTEVENT);
	102	int endStateIndex = knownSymbols.indexOf(Event.ENDEVENT);
[101]	103	if (startStateIndex == -1) {
[16]	104	Console.printerrln("Error calculating entropy. Initial state of markov chain not found.");
	105	return Double.NaN;
	106	}
[101]	107	if (endStateIndex == -1) {
[16]	108	Console.printerrln("Error calculating entropy. End state of markov chain not found.");
	109	return Double.NaN;
	110	}
	111	transmissionMatrix.set(endStateIndex, startStateIndex, 1);
[101]	112
	113	// Calculate stationary distribution by raising the power of the
	114	// transmission matrix.
	115	// The rank of the matrix should fall to 1 and each two should be the
	116	// vector of the stationory distribution.
[16]	117	int iter = 0;
	118	int rank = transmissionMatrix.rank();
	119	Matrix stationaryMatrix = (Matrix) transmissionMatrix.clone();
[101]	120	while (iter < MAX_STATDIST_ITERATIONS && rank > 1) {
[16]	121	stationaryMatrix = stationaryMatrix.times(stationaryMatrix);
	122	rank = stationaryMatrix.rank();
	123	iter++;
	124	}
[101]	125
	126	if (rank != 1) {
[16]	127	Console.traceln("rank: " + rank);
	128	Console.printerrln("Unable to calculate stationary distribution.");
	129	return Double.NaN;
	130	}
[101]	131
[16]	132	double entropy = 0.0;
[101]	133	for (int i = 0; i < numStates; i++) {
	134	for (int j = 0; j < numStates; j++) {
	135	if (transmissionMatrix.get(i, j) != 0) {
[16]	136	double tmp = stationaryMatrix.get(i, 0);
	137	tmp *= transmissionMatrix.get(i, j);
[101]	138	tmp *= Math.log(transmissionMatrix.get(i, j)) / Math.log(2);
[16]	139	entropy -= tmp;
	140	}
	141	}
	142	}
	143	return entropy;
	144	}
	145
[101]	146	/**
	147	* <p>
	148	* The dot represenation of {@link FirstOrderMarkovModel}s is its graph
	149	* representation with the states as nodes and directed edges weighted with
	150	* transition probabilities.
	151	* </p>
	152	*
	153	* @see de.ugoe.cs.eventbench.models.IDotCompatible#getDotRepresentation()
	154	*/
	155	@Override
	156	public String getDotRepresentation() {
	157	StringBuilder stringBuilder = new StringBuilder();
	158	stringBuilder.append("digraph model {" + StringTools.ENDLINE);
	159
	160	List<Event<?>> knownSymbols = new ArrayList<Event<?>>(
	161	trie.getKnownSymbols());
	162
	163	for (Event<?> symbol : knownSymbols) {
	164	final String thisSaneId = symbol.getShortId().replace("\"", "\\\"")
	165	.replaceAll("[\r\n]", "");
	166	stringBuilder.append(" " + symbol.hashCode() + " [label=\""
	167	+ thisSaneId + "\"];" + StringTools.ENDLINE);
	168	List<Event<?>> context = new ArrayList<Event<?>>();
	169	context.add(symbol);
[102]	170	Collection<Event<?>> followers = trie.getFollowingSymbols(context);
[101]	171	for (Event<?> follower : followers) {
	172	stringBuilder.append(" " + symbol.hashCode() + " -> "
	173	+ follower.hashCode() + " ");
	174	stringBuilder.append("[label=\""
	175	+ getProbability(context, follower) + "\"];"
	176	+ StringTools.ENDLINE);
	177	}
	178	}
	179	stringBuilder.append('}' + StringTools.ENDLINE);
	180	return stringBuilder.toString();
	181	}
	182
	183	/**
	184	* <p>
	185	* Returns a {@link Graph} represenation of the model with the states as
	186	* nodes and directed edges weighted with transition probabilities.
	187	* </p>
	188	*
	189	* @return {@link Graph} of the model
	190	*/
	191	public Graph<String, MarkovEdge> getGraph() {
	192	Graph<String, MarkovEdge> graph = new SparseMultigraph<String, MarkovEdge>();
	193
	194	List<Event<?>> knownSymbols = new ArrayList<Event<?>>(
	195	trie.getKnownSymbols());
	196
	197	for (Event<?> symbol : knownSymbols) {
	198	String from = symbol.getShortId();
	199	List<Event<?>> context = new ArrayList<Event<?>>();
	200	context.add(symbol);
	201
[102]	202	Collection<Event<?>> followers = trie.getFollowingSymbols(context);
[101]	203
	204	for (Event<?> follower : followers) {
	205	String to = follower.getShortId();
	206	MarkovEdge prob = new MarkovEdge(getProbability(context,
	207	follower));
	208	graph.addEdge(prob, from, to, EdgeType.DIRECTED);
	209	}
	210	}
	211	return graph;
	212	}
	213
	214	/**
	215	* Inner class used for the {@link Graph} representation of the model.
	216	*
	217	* @author Steffen Herbold
	218	* @version 1.0
	219	*/
	220	static public class MarkovEdge {
	221	/**
	222	* <p>
	223	* Weight of the edge, i.e., its transition probability.
	224	* </p>
	225	*/
	226	double weight;
	227
	228	/**
	229	* <p>
	230	* Constructor. Creates a new MarkovEdge.
	231	* </p>
	232	*
	233	* @param weight
	234	* weight of the edge, i.e., its transition probability
	235	*/
	236	MarkovEdge(double weight) {
	237	this.weight = weight;
	238	}
	239
	240	/**
	241	* <p>
	242	* The weight of the edge as {@link String}.
	243	* </p>
	244	*/
	245	public String toString() {
	246	return "" + weight;
	247	}
	248	}
	249
[16]	250	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format