Context Navigation

FirstOrderMarkovModel.java @ 922

Last change on this file since 922 was 922, checked in by sherbold, 12 years ago
renaming of packages from de.ugoe.cs.quest to de.ugoe.cs.autoquest
Property svn:mime-type set to `text/plain`
File size: 8.8 KB

Rev	Line
[922]	1	package de.ugoe.cs.autoquest.usageprofiles;
[518]	2
	3	import java.util.ArrayList;
	4	import java.util.Collection;
	5	import java.util.LinkedList;
	6	import java.util.List;
	7	import java.util.Random;
[639]	8	import java.util.logging.Level;
[518]	9
[922]	10	import de.ugoe.cs.autoquest.eventcore.Event;
[518]	11	import de.ugoe.cs.util.StringTools;
	12	import de.ugoe.cs.util.console.Console;
	13	import edu.uci.ics.jung.graph.Graph;
	14	import edu.uci.ics.jung.graph.SparseMultigraph;
	15	import edu.uci.ics.jung.graph.util.EdgeType;
	16
	17	import Jama.Matrix;
	18
	19	/**
	20	* <p>
[559]	21	* Implements first-order Markov models. The implementation is based on {@link HighOrderMarkovModel}
	22	* and restricts the Markov order to 1. In comparison to {@link HighOrderMarkovModel}, more
	23	* calculations are possible with first-order models, e.g., the calculation of the entropy (
[518]	24	* {@link #calcEntropy()}).
	25	* </p>
	26	*
	27	* @author Steffen Herbold
	28	* @version 1.0
	29	*/
[559]	30	public class FirstOrderMarkovModel extends HighOrderMarkovModel implements IDotCompatible {
[518]	31
[559]	32	/**
	33	* <p>
	34	* Id for object serialization.
	35	* </p>
	36	*/
	37	private static final long serialVersionUID = 1L;
[518]	38
[559]	39	/**
	40	* <p>
	41	* Maximum number of iterations when calculating the stationary distribution as the limit of
	42	* multiplying the transmission matrix with itself.
	43	* </p>
	44	*/
	45	final static int MAX_STATDIST_ITERATIONS = 1000;
[518]	46
[559]	47	/**
	48	* <p>
	49	* Constructor. Creates a new FirstOrderMarkovModel.
	50	* </p>
	51	*
	52	* @param r
	53	* random number generator used by probabilistic methods of the class
	54	*/
	55	public FirstOrderMarkovModel(Random r) {
	56	super(1, r);
	57	}
[518]	58
[559]	59	/**
	60	* <p>
	61	* Generates the transmission matrix of the Markov model.
	62	* </p>
	63	*
	64	* @return transmission matrix
	65	*/
	66	private Matrix getTransmissionMatrix() {
	67	List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
	68	int numStates = knownSymbols.size();
	69	Matrix transmissionMatrix = new Matrix(numStates, numStates);
[518]	70
[559]	71	for (int i = 0; i < numStates; i++) {
	72	Event currentSymbol = knownSymbols.get(i);
	73	List<Event> context = new ArrayList<Event>();
	74	context.add(currentSymbol);
	75	for (int j = 0; j < numStates; j++) {
	76	Event follower = knownSymbols.get(j);
	77	double prob = getProbability(context, follower);
	78	transmissionMatrix.set(i, j, prob);
	79	}
	80	}
	81	return transmissionMatrix;
	82	}
[518]	83
[559]	84	/**
	85	* <p>
	86	* Calculates the entropy of the model. To make it possible that the model is stationary, a
	87	* transition from {@link Event#ENDEVENT} to {@link Event#STARTEVENT} is added.
	88	* </p>
	89	*
	90	* @return entropy of the model or NaN if it could not be calculated
	91	*/
	92	public double calcEntropy() {
	93	Matrix transmissionMatrix = getTransmissionMatrix();
	94	List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
	95	int numStates = knownSymbols.size();
[518]	96
[559]	97	List<Integer> startIndexList = new LinkedList<Integer>();
	98	List<Integer> endIndexList = new LinkedList<Integer>();
	99	for (int i = 0; i < knownSymbols.size(); i++) {
	100	String id = knownSymbols.get(i).getId();
	101	if (id.equals(Event.STARTEVENT.getId()) \|\|
	102	id.contains(Event.STARTEVENT.getId() + "-=-"))
	103	{
	104	startIndexList.add(i);
	105	}
	106	if (id.equals(Event.ENDEVENT.getId()) \|\| id.contains("-=-" + Event.ENDEVENT.getId())) {
	107	endIndexList.add(i);
	108	}
	109	}
[518]	110
[559]	111	if (startIndexList.isEmpty()) {
	112	Console
	113	.printerrln("Error calculating entropy. Initial state of markov chain not found.");
	114	return Double.NaN;
	115	}
	116	if (endIndexList.isEmpty()) {
	117	Console.printerrln("Error calculating entropy. End state of markov chain not found.");
	118	return Double.NaN;
	119	}
	120	for (Integer i : endIndexList) {
	121	for (Integer j : startIndexList) {
	122	transmissionMatrix.set(i, j, 1);
	123	}
	124	}
[518]	125
[559]	126	// Calculate stationary distribution by raising the power of the
	127	// transmission matrix.
	128	// The rank of the matrix should fall to 1 and each two should be the
	129	// vector of the stationory distribution.
	130	int iter = 0;
	131	int rank = transmissionMatrix.rank();
	132	Matrix stationaryMatrix = (Matrix) transmissionMatrix.clone();
	133	while (iter < MAX_STATDIST_ITERATIONS && rank > 1) {
	134	stationaryMatrix = stationaryMatrix.times(stationaryMatrix);
	135	rank = stationaryMatrix.rank();
	136	iter++;
	137	}
[518]	138
[559]	139	if (rank != 1) {
[639]	140	Console.traceln(Level.FINE, "rank: " + rank);
[559]	141	Console.printerrln("Unable to calculate stationary distribution.");
	142	return Double.NaN;
	143	}
[518]	144
[559]	145	double entropy = 0.0;
	146	for (int i = 0; i < numStates; i++) {
	147	for (int j = 0; j < numStates; j++) {
	148	if (transmissionMatrix.get(i, j) != 0 && transmissionMatrix.get(i, j) != 1) {
	149	double tmp = stationaryMatrix.get(0, i);
	150	tmp *= transmissionMatrix.get(i, j);
	151	tmp *= Math.log(transmissionMatrix.get(i, j)) / Math.log(2);
	152	entropy -= tmp;
	153	}
	154	}
	155	}
	156	return entropy;
	157	}
[518]	158
[559]	159	/**
	160	* <p>
	161	* The dot represenation of {@link FirstOrderMarkovModel}s is its graph representation with the
	162	* states as nodes and directed edges weighted with transition probabilities.
	163	* </p>
	164	*
[922]	165	* @see de.ugoe.cs.autoquest.usageprofiles.IDotCompatible#getDotRepresentation()
[559]	166	*/
	167	@Override
	168	public String getDotRepresentation() {
	169	StringBuilder stringBuilder = new StringBuilder();
	170	stringBuilder.append("digraph model {" + StringTools.ENDLINE);
[518]	171
[559]	172	List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
	173	for (Event symbol : knownSymbols) {
	174	final String thisSaneId = symbol.getId().replace("\"", "\\\"").replaceAll("[\r\n]", "");
	175	stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " [label=\"" + thisSaneId +
	176	"\"];" + StringTools.ENDLINE);
	177	List<Event> context = new ArrayList<Event>();
	178	context.add(symbol);
	179	Collection<Event> followers = trie.getFollowingSymbols(context);
	180	for (Event follower : followers) {
	181	stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " -> " +
	182	knownSymbols.indexOf(follower) + " ");
	183	stringBuilder.append("[label=\"" + getProbability(context, follower) + "\"];" +
	184	StringTools.ENDLINE);
	185	}
	186	}
	187	stringBuilder.append('}' + StringTools.ENDLINE);
	188	return stringBuilder.toString();
	189	}
[518]	190
[559]	191	/**
	192	* <p>
	193	* Returns a {@link Graph} representation of the model with the states as nodes and directed
	194	* edges weighted with transition probabilities.
	195	* </p>
	196	*
	197	* @return {@link Graph} of the model
	198	*/
	199	public Graph<String, MarkovEdge> getGraph() {
	200	Graph<String, MarkovEdge> graph = new SparseMultigraph<String, MarkovEdge>();
[518]	201
[559]	202	List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
[518]	203
[559]	204	for (Event symbol : knownSymbols) {
	205	String from = symbol.getId();
	206	List<Event> context = new ArrayList<Event>();
	207	context.add(symbol);
[518]	208
[559]	209	Collection<Event> followers = trie.getFollowingSymbols(context);
[518]	210
[559]	211	for (Event follower : followers) {
	212	String to = follower.getId();
	213	MarkovEdge prob = new MarkovEdge(getProbability(context, follower));
	214	graph.addEdge(prob, from, to, EdgeType.DIRECTED);
	215	}
	216	}
	217	return graph;
	218	}
[518]	219
[559]	220	/**
	221	* Inner class used for the {@link Graph} representation of the model.
	222	*
	223	* @author Steffen Herbold
	224	* @version 1.0
	225	*/
	226	static public class MarkovEdge {
	227	/**
	228	* <p>
	229	* Weight of the edge, i.e., its transition probability.
	230	* </p>
	231	*/
	232	double weight;
[518]	233
[559]	234	/**
	235	* <p>
	236	* Constructor. Creates a new MarkovEdge.
	237	* </p>
	238	*
	239	* @param weight
	240	* weight of the edge, i.e., its transition probability
	241	*/
	242	MarkovEdge(double weight) {
	243	this.weight = weight;
	244	}
[518]	245
[559]	246	/**
	247	* <p>
	248	* The weight of the edge as {@link String}.
	249	* </p>
	250	*/
	251	public String toString() {
	252	return "" + weight;
	253	}
	254	}
	255
[518]	256	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format