Context Navigation

FirstOrderMarkovModel.java @ 766

Last change on this file since 766 was 655, checked in by pharms, 12 years ago
removed old copyright file header
Property svn:mime-type set to `text/plain`
File size: 8.8 KB

Line
1	package de.ugoe.cs.quest.usageprofiles;
2
3	import java.util.ArrayList;
4	import java.util.Collection;
5	import java.util.LinkedList;
6	import java.util.List;
7	import java.util.Random;
8	import java.util.logging.Level;
9
10	import de.ugoe.cs.quest.eventcore.Event;
11	import de.ugoe.cs.util.StringTools;
12	import de.ugoe.cs.util.console.Console;
13	import edu.uci.ics.jung.graph.Graph;
14	import edu.uci.ics.jung.graph.SparseMultigraph;
15	import edu.uci.ics.jung.graph.util.EdgeType;
16
17	import Jama.Matrix;
18
19	/**
20	* <p>
21	* Implements first-order Markov models. The implementation is based on {@link HighOrderMarkovModel}
22	* and restricts the Markov order to 1. In comparison to {@link HighOrderMarkovModel}, more
23	* calculations are possible with first-order models, e.g., the calculation of the entropy (
24	* {@link #calcEntropy()}).
25	* </p>
26	*
27	* @author Steffen Herbold
28	* @version 1.0
29	*/
30	public class FirstOrderMarkovModel extends HighOrderMarkovModel implements IDotCompatible {
31
32	/**
33	* <p>
34	* Id for object serialization.
35	* </p>
36	*/
37	private static final long serialVersionUID = 1L;
38
39	/**
40	* <p>
41	* Maximum number of iterations when calculating the stationary distribution as the limit of
42	* multiplying the transmission matrix with itself.
43	* </p>
44	*/
45	final static int MAX_STATDIST_ITERATIONS = 1000;
46
47	/**
48	* <p>
49	* Constructor. Creates a new FirstOrderMarkovModel.
50	* </p>
51	*
52	* @param r
53	* random number generator used by probabilistic methods of the class
54	*/
55	public FirstOrderMarkovModel(Random r) {
56	super(1, r);
57	}
58
59	/**
60	* <p>
61	* Generates the transmission matrix of the Markov model.
62	* </p>
63	*
64	* @return transmission matrix
65	*/
66	private Matrix getTransmissionMatrix() {
67	List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
68	int numStates = knownSymbols.size();
69	Matrix transmissionMatrix = new Matrix(numStates, numStates);
70
71	for (int i = 0; i < numStates; i++) {
72	Event currentSymbol = knownSymbols.get(i);
73	List<Event> context = new ArrayList<Event>();
74	context.add(currentSymbol);
75	for (int j = 0; j < numStates; j++) {
76	Event follower = knownSymbols.get(j);
77	double prob = getProbability(context, follower);
78	transmissionMatrix.set(i, j, prob);
79	}
80	}
81	return transmissionMatrix;
82	}
83
84	/**
85	* <p>
86	* Calculates the entropy of the model. To make it possible that the model is stationary, a
87	* transition from {@link Event#ENDEVENT} to {@link Event#STARTEVENT} is added.
88	* </p>
89	*
90	* @return entropy of the model or NaN if it could not be calculated
91	*/
92	public double calcEntropy() {
93	Matrix transmissionMatrix = getTransmissionMatrix();
94	List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
95	int numStates = knownSymbols.size();
96
97	List<Integer> startIndexList = new LinkedList<Integer>();
98	List<Integer> endIndexList = new LinkedList<Integer>();
99	for (int i = 0; i < knownSymbols.size(); i++) {
100	String id = knownSymbols.get(i).getId();
101	if (id.equals(Event.STARTEVENT.getId()) \|\|
102	id.contains(Event.STARTEVENT.getId() + "-=-"))
103	{
104	startIndexList.add(i);
105	}
106	if (id.equals(Event.ENDEVENT.getId()) \|\| id.contains("-=-" + Event.ENDEVENT.getId())) {
107	endIndexList.add(i);
108	}
109	}
110
111	if (startIndexList.isEmpty()) {
112	Console
113	.printerrln("Error calculating entropy. Initial state of markov chain not found.");
114	return Double.NaN;
115	}
116	if (endIndexList.isEmpty()) {
117	Console.printerrln("Error calculating entropy. End state of markov chain not found.");
118	return Double.NaN;
119	}
120	for (Integer i : endIndexList) {
121	for (Integer j : startIndexList) {
122	transmissionMatrix.set(i, j, 1);
123	}
124	}
125
126	// Calculate stationary distribution by raising the power of the
127	// transmission matrix.
128	// The rank of the matrix should fall to 1 and each two should be the
129	// vector of the stationory distribution.
130	int iter = 0;
131	int rank = transmissionMatrix.rank();
132	Matrix stationaryMatrix = (Matrix) transmissionMatrix.clone();
133	while (iter < MAX_STATDIST_ITERATIONS && rank > 1) {
134	stationaryMatrix = stationaryMatrix.times(stationaryMatrix);
135	rank = stationaryMatrix.rank();
136	iter++;
137	}
138
139	if (rank != 1) {
140	Console.traceln(Level.FINE, "rank: " + rank);
141	Console.printerrln("Unable to calculate stationary distribution.");
142	return Double.NaN;
143	}
144
145	double entropy = 0.0;
146	for (int i = 0; i < numStates; i++) {
147	for (int j = 0; j < numStates; j++) {
148	if (transmissionMatrix.get(i, j) != 0 && transmissionMatrix.get(i, j) != 1) {
149	double tmp = stationaryMatrix.get(0, i);
150	tmp *= transmissionMatrix.get(i, j);
151	tmp *= Math.log(transmissionMatrix.get(i, j)) / Math.log(2);
152	entropy -= tmp;
153	}
154	}
155	}
156	return entropy;
157	}
158
159	/**
160	* <p>
161	* The dot represenation of {@link FirstOrderMarkovModel}s is its graph representation with the
162	* states as nodes and directed edges weighted with transition probabilities.
163	* </p>
164	*
165	* @see de.ugoe.cs.quest.usageprofiles.IDotCompatible#getDotRepresentation()
166	*/
167	@Override
168	public String getDotRepresentation() {
169	StringBuilder stringBuilder = new StringBuilder();
170	stringBuilder.append("digraph model {" + StringTools.ENDLINE);
171
172	List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
173	for (Event symbol : knownSymbols) {
174	final String thisSaneId = symbol.getId().replace("\"", "\\\"").replaceAll("[\r\n]", "");
175	stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " [label=\"" + thisSaneId +
176	"\"];" + StringTools.ENDLINE);
177	List<Event> context = new ArrayList<Event>();
178	context.add(symbol);
179	Collection<Event> followers = trie.getFollowingSymbols(context);
180	for (Event follower : followers) {
181	stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " -> " +
182	knownSymbols.indexOf(follower) + " ");
183	stringBuilder.append("[label=\"" + getProbability(context, follower) + "\"];" +
184	StringTools.ENDLINE);
185	}
186	}
187	stringBuilder.append('}' + StringTools.ENDLINE);
188	return stringBuilder.toString();
189	}
190
191	/**
192	* <p>
193	* Returns a {@link Graph} representation of the model with the states as nodes and directed
194	* edges weighted with transition probabilities.
195	* </p>
196	*
197	* @return {@link Graph} of the model
198	*/
199	public Graph<String, MarkovEdge> getGraph() {
200	Graph<String, MarkovEdge> graph = new SparseMultigraph<String, MarkovEdge>();
201
202	List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
203
204	for (Event symbol : knownSymbols) {
205	String from = symbol.getId();
206	List<Event> context = new ArrayList<Event>();
207	context.add(symbol);
208
209	Collection<Event> followers = trie.getFollowingSymbols(context);
210
211	for (Event follower : followers) {
212	String to = follower.getId();
213	MarkovEdge prob = new MarkovEdge(getProbability(context, follower));
214	graph.addEdge(prob, from, to, EdgeType.DIRECTED);
215	}
216	}
217	return graph;
218	}
219
220	/**
221	* Inner class used for the {@link Graph} representation of the model.
222	*
223	* @author Steffen Herbold
224	* @version 1.0
225	*/
226	static public class MarkovEdge {
227	/**
228	* <p>
229	* Weight of the edge, i.e., its transition probability.
230	* </p>
231	*/
232	double weight;
233
234	/**
235	* <p>
236	* Constructor. Creates a new MarkovEdge.
237	* </p>
238	*
239	* @param weight
240	* weight of the edge, i.e., its transition probability
241	*/
242	MarkovEdge(double weight) {
243	this.weight = weight;
244	}
245
246	/**
247	* <p>
248	* The weight of the edge as {@link String}.
249	* </p>
250	*/
251	public String toString() {
252	return "" + weight;
253	}
254	}
255
256	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format