Context Navigation

FirstOrderMarkovModel.java @ 927

Last change on this file since 927 was 927, checked in by sherbold, 12 years ago
added copyright under the Apache License, Version 2.0
Property svn:mime-type set to `text/plain`
File size: 9.5 KB

Rev	Line
[927]	1	// Copyright 2012 Georg-August-Universität Göttingen, Germany
	2	//
	3	// Licensed under the Apache License, Version 2.0 (the "License");
	4	// you may not use this file except in compliance with the License.
	5	// You may obtain a copy of the License at
	6	//
	7	// http://www.apache.org/licenses/LICENSE-2.0
	8	//
	9	// Unless required by applicable law or agreed to in writing, software
	10	// distributed under the License is distributed on an "AS IS" BASIS,
	11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	12	// See the License for the specific language governing permissions and
	13	// limitations under the License.
	14
[922]	15	package de.ugoe.cs.autoquest.usageprofiles;
[518]	16
	17	import java.util.ArrayList;
	18	import java.util.Collection;
	19	import java.util.LinkedList;
	20	import java.util.List;
	21	import java.util.Random;
[639]	22	import java.util.logging.Level;
[518]	23
[922]	24	import de.ugoe.cs.autoquest.eventcore.Event;
[518]	25	import de.ugoe.cs.util.StringTools;
	26	import de.ugoe.cs.util.console.Console;
	27	import edu.uci.ics.jung.graph.Graph;
	28	import edu.uci.ics.jung.graph.SparseMultigraph;
	29	import edu.uci.ics.jung.graph.util.EdgeType;
	30
	31	import Jama.Matrix;
	32
	33	/**
	34	* <p>
[559]	35	* Implements first-order Markov models. The implementation is based on {@link HighOrderMarkovModel}
	36	* and restricts the Markov order to 1. In comparison to {@link HighOrderMarkovModel}, more
	37	* calculations are possible with first-order models, e.g., the calculation of the entropy (
[518]	38	* {@link #calcEntropy()}).
	39	* </p>
	40	*
	41	* @author Steffen Herbold
	42	* @version 1.0
	43	*/
[559]	44	public class FirstOrderMarkovModel extends HighOrderMarkovModel implements IDotCompatible {
[518]	45
[559]	46	/**
	47	* <p>
	48	* Id for object serialization.
	49	* </p>
	50	*/
	51	private static final long serialVersionUID = 1L;
[518]	52
[559]	53	/**
	54	* <p>
	55	* Maximum number of iterations when calculating the stationary distribution as the limit of
	56	* multiplying the transmission matrix with itself.
	57	* </p>
	58	*/
	59	final static int MAX_STATDIST_ITERATIONS = 1000;
[518]	60
[559]	61	/**
	62	* <p>
	63	* Constructor. Creates a new FirstOrderMarkovModel.
	64	* </p>
	65	*
	66	* @param r
	67	* random number generator used by probabilistic methods of the class
	68	*/
	69	public FirstOrderMarkovModel(Random r) {
	70	super(1, r);
	71	}
[518]	72
[559]	73	/**
	74	* <p>
	75	* Generates the transmission matrix of the Markov model.
	76	* </p>
	77	*
	78	* @return transmission matrix
	79	*/
	80	private Matrix getTransmissionMatrix() {
	81	List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
	82	int numStates = knownSymbols.size();
	83	Matrix transmissionMatrix = new Matrix(numStates, numStates);
[518]	84
[559]	85	for (int i = 0; i < numStates; i++) {
	86	Event currentSymbol = knownSymbols.get(i);
	87	List<Event> context = new ArrayList<Event>();
	88	context.add(currentSymbol);
	89	for (int j = 0; j < numStates; j++) {
	90	Event follower = knownSymbols.get(j);
	91	double prob = getProbability(context, follower);
	92	transmissionMatrix.set(i, j, prob);
	93	}
	94	}
	95	return transmissionMatrix;
	96	}
[518]	97
[559]	98	/**
	99	* <p>
	100	* Calculates the entropy of the model. To make it possible that the model is stationary, a
	101	* transition from {@link Event#ENDEVENT} to {@link Event#STARTEVENT} is added.
	102	* </p>
	103	*
	104	* @return entropy of the model or NaN if it could not be calculated
	105	*/
	106	public double calcEntropy() {
	107	Matrix transmissionMatrix = getTransmissionMatrix();
	108	List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
	109	int numStates = knownSymbols.size();
[518]	110
[559]	111	List<Integer> startIndexList = new LinkedList<Integer>();
	112	List<Integer> endIndexList = new LinkedList<Integer>();
	113	for (int i = 0; i < knownSymbols.size(); i++) {
	114	String id = knownSymbols.get(i).getId();
	115	if (id.equals(Event.STARTEVENT.getId()) \|\|
	116	id.contains(Event.STARTEVENT.getId() + "-=-"))
	117	{
	118	startIndexList.add(i);
	119	}
	120	if (id.equals(Event.ENDEVENT.getId()) \|\| id.contains("-=-" + Event.ENDEVENT.getId())) {
	121	endIndexList.add(i);
	122	}
	123	}
[518]	124
[559]	125	if (startIndexList.isEmpty()) {
	126	Console
	127	.printerrln("Error calculating entropy. Initial state of markov chain not found.");
	128	return Double.NaN;
	129	}
	130	if (endIndexList.isEmpty()) {
	131	Console.printerrln("Error calculating entropy. End state of markov chain not found.");
	132	return Double.NaN;
	133	}
	134	for (Integer i : endIndexList) {
	135	for (Integer j : startIndexList) {
	136	transmissionMatrix.set(i, j, 1);
	137	}
	138	}
[518]	139
[559]	140	// Calculate stationary distribution by raising the power of the
	141	// transmission matrix.
	142	// The rank of the matrix should fall to 1 and each two should be the
	143	// vector of the stationory distribution.
	144	int iter = 0;
	145	int rank = transmissionMatrix.rank();
	146	Matrix stationaryMatrix = (Matrix) transmissionMatrix.clone();
	147	while (iter < MAX_STATDIST_ITERATIONS && rank > 1) {
	148	stationaryMatrix = stationaryMatrix.times(stationaryMatrix);
	149	rank = stationaryMatrix.rank();
	150	iter++;
	151	}
[518]	152
[559]	153	if (rank != 1) {
[639]	154	Console.traceln(Level.FINE, "rank: " + rank);
[559]	155	Console.printerrln("Unable to calculate stationary distribution.");
	156	return Double.NaN;
	157	}
[518]	158
[559]	159	double entropy = 0.0;
	160	for (int i = 0; i < numStates; i++) {
	161	for (int j = 0; j < numStates; j++) {
	162	if (transmissionMatrix.get(i, j) != 0 && transmissionMatrix.get(i, j) != 1) {
	163	double tmp = stationaryMatrix.get(0, i);
	164	tmp *= transmissionMatrix.get(i, j);
	165	tmp *= Math.log(transmissionMatrix.get(i, j)) / Math.log(2);
	166	entropy -= tmp;
	167	}
	168	}
	169	}
	170	return entropy;
	171	}
[518]	172
[559]	173	/**
	174	* <p>
	175	* The dot represenation of {@link FirstOrderMarkovModel}s is its graph representation with the
	176	* states as nodes and directed edges weighted with transition probabilities.
	177	* </p>
	178	*
[922]	179	* @see de.ugoe.cs.autoquest.usageprofiles.IDotCompatible#getDotRepresentation()
[559]	180	*/
	181	@Override
	182	public String getDotRepresentation() {
	183	StringBuilder stringBuilder = new StringBuilder();
	184	stringBuilder.append("digraph model {" + StringTools.ENDLINE);
[518]	185
[559]	186	List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
	187	for (Event symbol : knownSymbols) {
	188	final String thisSaneId = symbol.getId().replace("\"", "\\\"").replaceAll("[\r\n]", "");
	189	stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " [label=\"" + thisSaneId +
	190	"\"];" + StringTools.ENDLINE);
	191	List<Event> context = new ArrayList<Event>();
	192	context.add(symbol);
	193	Collection<Event> followers = trie.getFollowingSymbols(context);
	194	for (Event follower : followers) {
	195	stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " -> " +
	196	knownSymbols.indexOf(follower) + " ");
	197	stringBuilder.append("[label=\"" + getProbability(context, follower) + "\"];" +
	198	StringTools.ENDLINE);
	199	}
	200	}
	201	stringBuilder.append('}' + StringTools.ENDLINE);
	202	return stringBuilder.toString();
	203	}
[518]	204
[559]	205	/**
	206	* <p>
	207	* Returns a {@link Graph} representation of the model with the states as nodes and directed
	208	* edges weighted with transition probabilities.
	209	* </p>
	210	*
	211	* @return {@link Graph} of the model
	212	*/
	213	public Graph<String, MarkovEdge> getGraph() {
	214	Graph<String, MarkovEdge> graph = new SparseMultigraph<String, MarkovEdge>();
[518]	215
[559]	216	List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
[518]	217
[559]	218	for (Event symbol : knownSymbols) {
	219	String from = symbol.getId();
	220	List<Event> context = new ArrayList<Event>();
	221	context.add(symbol);
[518]	222
[559]	223	Collection<Event> followers = trie.getFollowingSymbols(context);
[518]	224
[559]	225	for (Event follower : followers) {
	226	String to = follower.getId();
	227	MarkovEdge prob = new MarkovEdge(getProbability(context, follower));
	228	graph.addEdge(prob, from, to, EdgeType.DIRECTED);
	229	}
	230	}
	231	return graph;
	232	}
[518]	233
[559]	234	/**
	235	* Inner class used for the {@link Graph} representation of the model.
	236	*
	237	* @author Steffen Herbold
	238	* @version 1.0
	239	*/
	240	static public class MarkovEdge {
	241	/**
	242	* <p>
	243	* Weight of the edge, i.e., its transition probability.
	244	* </p>
	245	*/
	246	double weight;
[518]	247
[559]	248	/**
	249	* <p>
	250	* Constructor. Creates a new MarkovEdge.
	251	* </p>
	252	*
	253	* @param weight
	254	* weight of the edge, i.e., its transition probability
	255	*/
	256	MarkovEdge(double weight) {
	257	this.weight = weight;
	258	}
[518]	259
[559]	260	/**
	261	* <p>
	262	* The weight of the edge as {@link String}.
	263	* </p>
	264	*/
	265	public String toString() {
	266	return "" + weight;
	267	}
	268	}
	269
[518]	270	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format