Context Navigation

FirstOrderMarkovModel.java @ 1251

Last change on this file since 1251 was 927, checked in by sherbold, 12 years ago
added copyright under the Apache License, Version 2.0
Property svn:mime-type set to `text/plain`
File size: 9.5 KB

Line
1	// Copyright 2012 Georg-August-Universität Göttingen, Germany
2	//
3	// Licensed under the Apache License, Version 2.0 (the "License");
4	// you may not use this file except in compliance with the License.
5	// You may obtain a copy of the License at
6	//
7	// http://www.apache.org/licenses/LICENSE-2.0
8	//
9	// Unless required by applicable law or agreed to in writing, software
10	// distributed under the License is distributed on an "AS IS" BASIS,
11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	// See the License for the specific language governing permissions and
13	// limitations under the License.
14
15	package de.ugoe.cs.autoquest.usageprofiles;
16
17	import java.util.ArrayList;
18	import java.util.Collection;
19	import java.util.LinkedList;
20	import java.util.List;
21	import java.util.Random;
22	import java.util.logging.Level;
23
24	import de.ugoe.cs.autoquest.eventcore.Event;
25	import de.ugoe.cs.util.StringTools;
26	import de.ugoe.cs.util.console.Console;
27	import edu.uci.ics.jung.graph.Graph;
28	import edu.uci.ics.jung.graph.SparseMultigraph;
29	import edu.uci.ics.jung.graph.util.EdgeType;
30
31	import Jama.Matrix;
32
33	/**
34	* <p>
35	* Implements first-order Markov models. The implementation is based on {@link HighOrderMarkovModel}
36	* and restricts the Markov order to 1. In comparison to {@link HighOrderMarkovModel}, more
37	* calculations are possible with first-order models, e.g., the calculation of the entropy (
38	* {@link #calcEntropy()}).
39	* </p>
40	*
41	* @author Steffen Herbold
42	* @version 1.0
43	*/
44	public class FirstOrderMarkovModel extends HighOrderMarkovModel implements IDotCompatible {
45
46	/**
47	* <p>
48	* Id for object serialization.
49	* </p>
50	*/
51	private static final long serialVersionUID = 1L;
52
53	/**
54	* <p>
55	* Maximum number of iterations when calculating the stationary distribution as the limit of
56	* multiplying the transmission matrix with itself.
57	* </p>
58	*/
59	final static int MAX_STATDIST_ITERATIONS = 1000;
60
61	/**
62	* <p>
63	* Constructor. Creates a new FirstOrderMarkovModel.
64	* </p>
65	*
66	* @param r
67	* random number generator used by probabilistic methods of the class
68	*/
69	public FirstOrderMarkovModel(Random r) {
70	super(1, r);
71	}
72
73	/**
74	* <p>
75	* Generates the transmission matrix of the Markov model.
76	* </p>
77	*
78	* @return transmission matrix
79	*/
80	private Matrix getTransmissionMatrix() {
81	List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
82	int numStates = knownSymbols.size();
83	Matrix transmissionMatrix = new Matrix(numStates, numStates);
84
85	for (int i = 0; i < numStates; i++) {
86	Event currentSymbol = knownSymbols.get(i);
87	List<Event> context = new ArrayList<Event>();
88	context.add(currentSymbol);
89	for (int j = 0; j < numStates; j++) {
90	Event follower = knownSymbols.get(j);
91	double prob = getProbability(context, follower);
92	transmissionMatrix.set(i, j, prob);
93	}
94	}
95	return transmissionMatrix;
96	}
97
98	/**
99	* <p>
100	* Calculates the entropy of the model. To make it possible that the model is stationary, a
101	* transition from {@link Event#ENDEVENT} to {@link Event#STARTEVENT} is added.
102	* </p>
103	*
104	* @return entropy of the model or NaN if it could not be calculated
105	*/
106	public double calcEntropy() {
107	Matrix transmissionMatrix = getTransmissionMatrix();
108	List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
109	int numStates = knownSymbols.size();
110
111	List<Integer> startIndexList = new LinkedList<Integer>();
112	List<Integer> endIndexList = new LinkedList<Integer>();
113	for (int i = 0; i < knownSymbols.size(); i++) {
114	String id = knownSymbols.get(i).getId();
115	if (id.equals(Event.STARTEVENT.getId()) \|\|
116	id.contains(Event.STARTEVENT.getId() + "-=-"))
117	{
118	startIndexList.add(i);
119	}
120	if (id.equals(Event.ENDEVENT.getId()) \|\| id.contains("-=-" + Event.ENDEVENT.getId())) {
121	endIndexList.add(i);
122	}
123	}
124
125	if (startIndexList.isEmpty()) {
126	Console
127	.printerrln("Error calculating entropy. Initial state of markov chain not found.");
128	return Double.NaN;
129	}
130	if (endIndexList.isEmpty()) {
131	Console.printerrln("Error calculating entropy. End state of markov chain not found.");
132	return Double.NaN;
133	}
134	for (Integer i : endIndexList) {
135	for (Integer j : startIndexList) {
136	transmissionMatrix.set(i, j, 1);
137	}
138	}
139
140	// Calculate stationary distribution by raising the power of the
141	// transmission matrix.
142	// The rank of the matrix should fall to 1 and each two should be the
143	// vector of the stationory distribution.
144	int iter = 0;
145	int rank = transmissionMatrix.rank();
146	Matrix stationaryMatrix = (Matrix) transmissionMatrix.clone();
147	while (iter < MAX_STATDIST_ITERATIONS && rank > 1) {
148	stationaryMatrix = stationaryMatrix.times(stationaryMatrix);
149	rank = stationaryMatrix.rank();
150	iter++;
151	}
152
153	if (rank != 1) {
154	Console.traceln(Level.FINE, "rank: " + rank);
155	Console.printerrln("Unable to calculate stationary distribution.");
156	return Double.NaN;
157	}
158
159	double entropy = 0.0;
160	for (int i = 0; i < numStates; i++) {
161	for (int j = 0; j < numStates; j++) {
162	if (transmissionMatrix.get(i, j) != 0 && transmissionMatrix.get(i, j) != 1) {
163	double tmp = stationaryMatrix.get(0, i);
164	tmp *= transmissionMatrix.get(i, j);
165	tmp *= Math.log(transmissionMatrix.get(i, j)) / Math.log(2);
166	entropy -= tmp;
167	}
168	}
169	}
170	return entropy;
171	}
172
173	/**
174	* <p>
175	* The dot represenation of {@link FirstOrderMarkovModel}s is its graph representation with the
176	* states as nodes and directed edges weighted with transition probabilities.
177	* </p>
178	*
179	* @see de.ugoe.cs.autoquest.usageprofiles.IDotCompatible#getDotRepresentation()
180	*/
181	@Override
182	public String getDotRepresentation() {
183	StringBuilder stringBuilder = new StringBuilder();
184	stringBuilder.append("digraph model {" + StringTools.ENDLINE);
185
186	List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
187	for (Event symbol : knownSymbols) {
188	final String thisSaneId = symbol.getId().replace("\"", "\\\"").replaceAll("[\r\n]", "");
189	stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " [label=\"" + thisSaneId +
190	"\"];" + StringTools.ENDLINE);
191	List<Event> context = new ArrayList<Event>();
192	context.add(symbol);
193	Collection<Event> followers = trie.getFollowingSymbols(context);
194	for (Event follower : followers) {
195	stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " -> " +
196	knownSymbols.indexOf(follower) + " ");
197	stringBuilder.append("[label=\"" + getProbability(context, follower) + "\"];" +
198	StringTools.ENDLINE);
199	}
200	}
201	stringBuilder.append('}' + StringTools.ENDLINE);
202	return stringBuilder.toString();
203	}
204
205	/**
206	* <p>
207	* Returns a {@link Graph} representation of the model with the states as nodes and directed
208	* edges weighted with transition probabilities.
209	* </p>
210	*
211	* @return {@link Graph} of the model
212	*/
213	public Graph<String, MarkovEdge> getGraph() {
214	Graph<String, MarkovEdge> graph = new SparseMultigraph<String, MarkovEdge>();
215
216	List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
217
218	for (Event symbol : knownSymbols) {
219	String from = symbol.getId();
220	List<Event> context = new ArrayList<Event>();
221	context.add(symbol);
222
223	Collection<Event> followers = trie.getFollowingSymbols(context);
224
225	for (Event follower : followers) {
226	String to = follower.getId();
227	MarkovEdge prob = new MarkovEdge(getProbability(context, follower));
228	graph.addEdge(prob, from, to, EdgeType.DIRECTED);
229	}
230	}
231	return graph;
232	}
233
234	/**
235	* Inner class used for the {@link Graph} representation of the model.
236	*
237	* @author Steffen Herbold
238	* @version 1.0
239	*/
240	static public class MarkovEdge {
241	/**
242	* <p>
243	* Weight of the edge, i.e., its transition probability.
244	* </p>
245	*/
246	double weight;
247
248	/**
249	* <p>
250	* Constructor. Creates a new MarkovEdge.
251	* </p>
252	*
253	* @param weight
254	* weight of the edge, i.e., its transition probability
255	*/
256	MarkovEdge(double weight) {
257	this.weight = weight;
258	}
259
260	/**
261	* <p>
262	* The weight of the edge as {@link String}.
263	* </p>
264	*/
265	public String toString() {
266	return "" + weight;
267	}
268	}
269
270	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format