Context Navigation

FirstOrderMarkovModel.java @ 432

Last change on this file since 432 was 432, checked in by sherbold, 12 years ago
renamed packages to fir QUEST project structure
Property svn:mime-type set to `text/plain`
File size: 7.7 KB

Line
1	package de.ugoe.cs.quest.models;
2
3	import java.util.ArrayList;
4	import java.util.Collection;
5	import java.util.LinkedList;
6	import java.util.List;
7	import java.util.Random;
8
9	import de.ugoe.cs.quest.data.Event;
10	import de.ugoe.cs.util.StringTools;
11	import de.ugoe.cs.util.console.Console;
12	import edu.uci.ics.jung.graph.Graph;
13	import edu.uci.ics.jung.graph.SparseMultigraph;
14	import edu.uci.ics.jung.graph.util.EdgeType;
15
16	import Jama.Matrix;
17
18	/**
19	* <p>
20	* Implements first-order Markov models. The implementation is based on
21	* {@link HighOrderMarkovModel} and restricts the Markov order to 1. In
22	* comparison to {@link HighOrderMarkovModel}, more calculations are possible
23	* with first-order models, e.g., the calculation of the entropy (
24	* {@link #calcEntropy()}).
25	* </p>
26	*
27	* @author Steffen Herbold
28	* @version 1.0
29	*/
30	public class FirstOrderMarkovModel extends HighOrderMarkovModel implements
31	IDotCompatible {
32
33	/**
34	* <p>
35	* Id for object serialization.
36	* </p>
37	*/
38	private static final long serialVersionUID = 1L;
39
40	/**
41	* <p>
42	* Maximum number of iterations when calculating the stationary distribution
43	* as the limit of multiplying the transmission matrix with itself.
44	* </p>
45	*/
46	final static int MAX_STATDIST_ITERATIONS = 1000;
47
48	/**
49	* <p>
50	* Constructor. Creates a new FirstOrderMarkovModel.
51	* </p>
52	*
53	* @param r
54	* random number generator used by probabilistic methods of the
55	* class
56	*/
57	public FirstOrderMarkovModel(Random r) {
58	super(1, r);
59	}
60
61	/**
62	* <p>
63	* Generates the transmission matrix of the Markov model.
64	* </p>
65	*
66	* @return transmission matrix
67	*/
68	private Matrix getTransmissionMatrix() {
69	List<Event<?>> knownSymbols = new ArrayList<Event<?>>(
70	trie.getKnownSymbols());
71	int numStates = knownSymbols.size();
72	Matrix transmissionMatrix = new Matrix(numStates, numStates);
73
74	for (int i = 0; i < numStates; i++) {
75	Event<?> currentSymbol = knownSymbols.get(i);
76	List<Event<?>> context = new ArrayList<Event<?>>();
77	context.add(currentSymbol);
78	for (int j = 0; j < numStates; j++) {
79	Event<?> follower = knownSymbols.get(j);
80	double prob = getProbability(context, follower);
81	transmissionMatrix.set(i, j, prob);
82	}
83	}
84	return transmissionMatrix;
85	}
86
87	/**
88	* <p>
89	* Calculates the entropy of the model. To make it possible that the model
90	* is stationary, a transition from {@link Event#ENDEVENT} to
91	* {@link Event#STARTEVENT} is added.
92	* </p>
93	*
94	* @return entropy of the model or NaN if it could not be calculated
95	*/
96	public double calcEntropy() {
97	Matrix transmissionMatrix = getTransmissionMatrix();
98	List<Event<?>> knownSymbols = new ArrayList<Event<?>>(
99	trie.getKnownSymbols());
100	int numStates = knownSymbols.size();
101
102	List<Integer> startIndexList = new LinkedList<Integer>();
103	List<Integer> endIndexList = new LinkedList<Integer>();
104	for( int i=0 ; i<knownSymbols.size() ; i++ ) {
105	String id = knownSymbols.get(i).getStandardId();
106	if( id.equals(Event.STARTEVENT.getStandardId()) \|\| id.contains(Event.STARTEVENT.getStandardId()+"-=-") ) {
107	startIndexList.add(i);
108	}
109	if( id.equals(Event.ENDEVENT.getStandardId()) \|\| id.contains("-=-"+Event.ENDEVENT.getStandardId()) ) {
110	endIndexList.add(i);
111	}
112	}
113
114	if (startIndexList.isEmpty()) {
115	Console.printerrln("Error calculating entropy. Initial state of markov chain not found.");
116	return Double.NaN;
117	}
118	if (endIndexList.isEmpty()) {
119	Console.printerrln("Error calculating entropy. End state of markov chain not found.");
120	return Double.NaN;
121	}
122	for( Integer i : endIndexList ) {
123	for(Integer j : startIndexList ) {
124	transmissionMatrix.set(i, j, 1);
125	}
126	}
127
128	// Calculate stationary distribution by raising the power of the
129	// transmission matrix.
130	// The rank of the matrix should fall to 1 and each two should be the
131	// vector of the stationory distribution.
132	int iter = 0;
133	int rank = transmissionMatrix.rank();
134	Matrix stationaryMatrix = (Matrix) transmissionMatrix.clone();
135	while (iter < MAX_STATDIST_ITERATIONS && rank > 1) {
136	stationaryMatrix = stationaryMatrix.times(stationaryMatrix);
137	rank = stationaryMatrix.rank();
138	iter++;
139	}
140
141	if (rank != 1) {
142	Console.traceln("rank: " + rank);
143	Console.printerrln("Unable to calculate stationary distribution.");
144	return Double.NaN;
145	}
146
147	double entropy = 0.0;
148	for (int i = 0; i < numStates; i++) {
149	for (int j = 0; j < numStates; j++) {
150	if (transmissionMatrix.get(i, j) != 0 && transmissionMatrix.get(i, j)!=1) {
151	double tmp = stationaryMatrix.get(0, i);
152	tmp *= transmissionMatrix.get(i, j);
153	tmp *= Math.log(transmissionMatrix.get(i, j)) / Math.log(2);
154	entropy -= tmp;
155	}
156	}
157	}
158	return entropy;
159	}
160
161	/**
162	* <p>
163	* The dot represenation of {@link FirstOrderMarkovModel}s is its graph
164	* representation with the states as nodes and directed edges weighted with
165	* transition probabilities.
166	* </p>
167	*
168	* @see de.ugoe.cs.quest.models.IDotCompatible#getDotRepresentation()
169	*/
170	@Override
171	public String getDotRepresentation() {
172	StringBuilder stringBuilder = new StringBuilder();
173	stringBuilder.append("digraph model {" + StringTools.ENDLINE);
174
175	List<Event<?>> knownSymbols = new ArrayList<Event<?>>(
176	trie.getKnownSymbols());
177	for (Event<?> symbol : knownSymbols) {
178	final String thisSaneId = symbol.getShortId().replace("\"", "\\\"")
179	.replaceAll("[\r\n]", "");
180	stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " [label=\""
181	+ thisSaneId + "\"];" + StringTools.ENDLINE);
182	List<Event<?>> context = new ArrayList<Event<?>>();
183	context.add(symbol);
184	Collection<Event<?>> followers = trie.getFollowingSymbols(context);
185	for (Event<?> follower : followers) {
186	stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " -> "
187	+ knownSymbols.indexOf(follower) + " ");
188	stringBuilder.append("[label=\""
189	+ getProbability(context, follower) + "\"];"
190	+ StringTools.ENDLINE);
191	}
192	}
193	stringBuilder.append('}' + StringTools.ENDLINE);
194	return stringBuilder.toString();
195	}
196
197	/**
198	* <p>
199	* Returns a {@link Graph} representation of the model with the states as
200	* nodes and directed edges weighted with transition probabilities.
201	* </p>
202	*
203	* @return {@link Graph} of the model
204	*/
205	public Graph<String, MarkovEdge> getGraph() {
206	Graph<String, MarkovEdge> graph = new SparseMultigraph<String, MarkovEdge>();
207
208	List<Event<?>> knownSymbols = new ArrayList<Event<?>>(
209	trie.getKnownSymbols());
210
211	for (Event<?> symbol : knownSymbols) {
212	String from = symbol.getShortId();
213	List<Event<?>> context = new ArrayList<Event<?>>();
214	context.add(symbol);
215
216	Collection<Event<?>> followers = trie.getFollowingSymbols(context);
217
218	for (Event<?> follower : followers) {
219	String to = follower.getShortId();
220	MarkovEdge prob = new MarkovEdge(getProbability(context,
221	follower));
222	graph.addEdge(prob, from, to, EdgeType.DIRECTED);
223	}
224	}
225	return graph;
226	}
227
228	/**
229	* Inner class used for the {@link Graph} representation of the model.
230	*
231	* @author Steffen Herbold
232	* @version 1.0
233	*/
234	static public class MarkovEdge {
235	/**
236	* <p>
237	* Weight of the edge, i.e., its transition probability.
238	* </p>
239	*/
240	double weight;
241
242	/**
243	* <p>
244	* Constructor. Creates a new MarkovEdge.
245	* </p>
246	*
247	* @param weight
248	* weight of the edge, i.e., its transition probability
249	*/
250	MarkovEdge(double weight) {
251	this.weight = weight;
252	}
253
254	/**
255	* <p>
256	* The weight of the edge as {@link String}.
257	* </p>
258	*/
259	public String toString() {
260	return "" + weight;
261	}
262	}
263
264	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format