Context Navigation

FirstOrderMarkovModel.java @ 639

Last change on this file since 639 was 639, checked in by sherbold, 12 years ago
all usages of the Console tracing API now define log levels
Property svn:mime-type set to `text/plain`
File size: 8.8 KB

Line
1
2	package de.ugoe.cs.quest.usageprofiles;
3
4	import java.util.ArrayList;
5	import java.util.Collection;
6	import java.util.LinkedList;
7	import java.util.List;
8	import java.util.Random;
9	import java.util.logging.Level;
10
11	import de.ugoe.cs.quest.eventcore.Event;
12	import de.ugoe.cs.util.StringTools;
13	import de.ugoe.cs.util.console.Console;
14	import edu.uci.ics.jung.graph.Graph;
15	import edu.uci.ics.jung.graph.SparseMultigraph;
16	import edu.uci.ics.jung.graph.util.EdgeType;
17
18	import Jama.Matrix;
19
20	/**
21	* <p>
22	* Implements first-order Markov models. The implementation is based on {@link HighOrderMarkovModel}
23	* and restricts the Markov order to 1. In comparison to {@link HighOrderMarkovModel}, more
24	* calculations are possible with first-order models, e.g., the calculation of the entropy (
25	* {@link #calcEntropy()}).
26	* </p>
27	*
28	* @author Steffen Herbold
29	* @version 1.0
30	*/
31	public class FirstOrderMarkovModel extends HighOrderMarkovModel implements IDotCompatible {
32
33	/**
34	* <p>
35	* Id for object serialization.
36	* </p>
37	*/
38	private static final long serialVersionUID = 1L;
39
40	/**
41	* <p>
42	* Maximum number of iterations when calculating the stationary distribution as the limit of
43	* multiplying the transmission matrix with itself.
44	* </p>
45	*/
46	final static int MAX_STATDIST_ITERATIONS = 1000;
47
48	/**
49	* <p>
50	* Constructor. Creates a new FirstOrderMarkovModel.
51	* </p>
52	*
53	* @param r
54	* random number generator used by probabilistic methods of the class
55	*/
56	public FirstOrderMarkovModel(Random r) {
57	super(1, r);
58	}
59
60	/**
61	* <p>
62	* Generates the transmission matrix of the Markov model.
63	* </p>
64	*
65	* @return transmission matrix
66	*/
67	private Matrix getTransmissionMatrix() {
68	List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
69	int numStates = knownSymbols.size();
70	Matrix transmissionMatrix = new Matrix(numStates, numStates);
71
72	for (int i = 0; i < numStates; i++) {
73	Event currentSymbol = knownSymbols.get(i);
74	List<Event> context = new ArrayList<Event>();
75	context.add(currentSymbol);
76	for (int j = 0; j < numStates; j++) {
77	Event follower = knownSymbols.get(j);
78	double prob = getProbability(context, follower);
79	transmissionMatrix.set(i, j, prob);
80	}
81	}
82	return transmissionMatrix;
83	}
84
85	/**
86	* <p>
87	* Calculates the entropy of the model. To make it possible that the model is stationary, a
88	* transition from {@link Event#ENDEVENT} to {@link Event#STARTEVENT} is added.
89	* </p>
90	*
91	* @return entropy of the model or NaN if it could not be calculated
92	*/
93	public double calcEntropy() {
94	Matrix transmissionMatrix = getTransmissionMatrix();
95	List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
96	int numStates = knownSymbols.size();
97
98	List<Integer> startIndexList = new LinkedList<Integer>();
99	List<Integer> endIndexList = new LinkedList<Integer>();
100	for (int i = 0; i < knownSymbols.size(); i++) {
101	String id = knownSymbols.get(i).getId();
102	if (id.equals(Event.STARTEVENT.getId()) \|\|
103	id.contains(Event.STARTEVENT.getId() + "-=-"))
104	{
105	startIndexList.add(i);
106	}
107	if (id.equals(Event.ENDEVENT.getId()) \|\| id.contains("-=-" + Event.ENDEVENT.getId())) {
108	endIndexList.add(i);
109	}
110	}
111
112	if (startIndexList.isEmpty()) {
113	Console
114	.printerrln("Error calculating entropy. Initial state of markov chain not found.");
115	return Double.NaN;
116	}
117	if (endIndexList.isEmpty()) {
118	Console.printerrln("Error calculating entropy. End state of markov chain not found.");
119	return Double.NaN;
120	}
121	for (Integer i : endIndexList) {
122	for (Integer j : startIndexList) {
123	transmissionMatrix.set(i, j, 1);
124	}
125	}
126
127	// Calculate stationary distribution by raising the power of the
128	// transmission matrix.
129	// The rank of the matrix should fall to 1 and each two should be the
130	// vector of the stationory distribution.
131	int iter = 0;
132	int rank = transmissionMatrix.rank();
133	Matrix stationaryMatrix = (Matrix) transmissionMatrix.clone();
134	while (iter < MAX_STATDIST_ITERATIONS && rank > 1) {
135	stationaryMatrix = stationaryMatrix.times(stationaryMatrix);
136	rank = stationaryMatrix.rank();
137	iter++;
138	}
139
140	if (rank != 1) {
141	Console.traceln(Level.FINE, "rank: " + rank);
142	Console.printerrln("Unable to calculate stationary distribution.");
143	return Double.NaN;
144	}
145
146	double entropy = 0.0;
147	for (int i = 0; i < numStates; i++) {
148	for (int j = 0; j < numStates; j++) {
149	if (transmissionMatrix.get(i, j) != 0 && transmissionMatrix.get(i, j) != 1) {
150	double tmp = stationaryMatrix.get(0, i);
151	tmp *= transmissionMatrix.get(i, j);
152	tmp *= Math.log(transmissionMatrix.get(i, j)) / Math.log(2);
153	entropy -= tmp;
154	}
155	}
156	}
157	return entropy;
158	}
159
160	/**
161	* <p>
162	* The dot represenation of {@link FirstOrderMarkovModel}s is its graph representation with the
163	* states as nodes and directed edges weighted with transition probabilities.
164	* </p>
165	*
166	* @see de.ugoe.cs.quest.usageprofiles.IDotCompatible#getDotRepresentation()
167	*/
168	@Override
169	public String getDotRepresentation() {
170	StringBuilder stringBuilder = new StringBuilder();
171	stringBuilder.append("digraph model {" + StringTools.ENDLINE);
172
173	List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
174	for (Event symbol : knownSymbols) {
175	final String thisSaneId = symbol.getId().replace("\"", "\\\"").replaceAll("[\r\n]", "");
176	stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " [label=\"" + thisSaneId +
177	"\"];" + StringTools.ENDLINE);
178	List<Event> context = new ArrayList<Event>();
179	context.add(symbol);
180	Collection<Event> followers = trie.getFollowingSymbols(context);
181	for (Event follower : followers) {
182	stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " -> " +
183	knownSymbols.indexOf(follower) + " ");
184	stringBuilder.append("[label=\"" + getProbability(context, follower) + "\"];" +
185	StringTools.ENDLINE);
186	}
187	}
188	stringBuilder.append('}' + StringTools.ENDLINE);
189	return stringBuilder.toString();
190	}
191
192	/**
193	* <p>
194	* Returns a {@link Graph} representation of the model with the states as nodes and directed
195	* edges weighted with transition probabilities.
196	* </p>
197	*
198	* @return {@link Graph} of the model
199	*/
200	public Graph<String, MarkovEdge> getGraph() {
201	Graph<String, MarkovEdge> graph = new SparseMultigraph<String, MarkovEdge>();
202
203	List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
204
205	for (Event symbol : knownSymbols) {
206	String from = symbol.getId();
207	List<Event> context = new ArrayList<Event>();
208	context.add(symbol);
209
210	Collection<Event> followers = trie.getFollowingSymbols(context);
211
212	for (Event follower : followers) {
213	String to = follower.getId();
214	MarkovEdge prob = new MarkovEdge(getProbability(context, follower));
215	graph.addEdge(prob, from, to, EdgeType.DIRECTED);
216	}
217	}
218	return graph;
219	}
220
221	/**
222	* Inner class used for the {@link Graph} representation of the model.
223	*
224	* @author Steffen Herbold
225	* @version 1.0
226	*/
227	static public class MarkovEdge {
228	/**
229	* <p>
230	* Weight of the edge, i.e., its transition probability.
231	* </p>
232	*/
233	double weight;
234
235	/**
236	* <p>
237	* Constructor. Creates a new MarkovEdge.
238	* </p>
239	*
240	* @param weight
241	* weight of the edge, i.e., its transition probability
242	*/
243	MarkovEdge(double weight) {
244	this.weight = weight;
245	}
246
247	/**
248	* <p>
249	* The weight of the edge as {@link String}.
250	* </p>
251	*/
252	public String toString() {
253	return "" + weight;
254	}
255	}
256
257	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format