source: trunk/quest-core-usageprofiles/src/main/java/de/ugoe/cs/quest/usageprofiles/FirstOrderMarkovModel.java @ 639

Last change on this file since 639 was 639, checked in by sherbold, 12 years ago
  • all usages of the Console tracing API now define log levels
  • Property svn:mime-type set to text/plain
File size: 8.8 KB
Line 
1
2package de.ugoe.cs.quest.usageprofiles;
3
4import java.util.ArrayList;
5import java.util.Collection;
6import java.util.LinkedList;
7import java.util.List;
8import java.util.Random;
9import java.util.logging.Level;
10
11import de.ugoe.cs.quest.eventcore.Event;
12import de.ugoe.cs.util.StringTools;
13import de.ugoe.cs.util.console.Console;
14import edu.uci.ics.jung.graph.Graph;
15import edu.uci.ics.jung.graph.SparseMultigraph;
16import edu.uci.ics.jung.graph.util.EdgeType;
17
18import Jama.Matrix;
19
20/**
21 * <p>
22 * Implements first-order Markov models. The implementation is based on {@link HighOrderMarkovModel}
23 * and restricts the Markov order to 1. In comparison to {@link HighOrderMarkovModel}, more
24 * calculations are possible with first-order models, e.g., the calculation of the entropy (
25 * {@link #calcEntropy()}).
26 * </p>
27 *
28 * @author Steffen Herbold
29 * @version 1.0
30 */
31public class FirstOrderMarkovModel extends HighOrderMarkovModel implements IDotCompatible {
32
33    /**
34     * <p>
35     * Id for object serialization.
36     * </p>
37     */
38    private static final long serialVersionUID = 1L;
39
40    /**
41     * <p>
42     * Maximum number of iterations when calculating the stationary distribution as the limit of
43     * multiplying the transmission matrix with itself.
44     * </p>
45     */
46    final static int MAX_STATDIST_ITERATIONS = 1000;
47
48    /**
49     * <p>
50     * Constructor. Creates a new FirstOrderMarkovModel.
51     * </p>
52     *
53     * @param r
54     *            random number generator used by probabilistic methods of the class
55     */
56    public FirstOrderMarkovModel(Random r) {
57        super(1, r);
58    }
59
60    /**
61     * <p>
62     * Generates the transmission matrix of the Markov model.
63     * </p>
64     *
65     * @return transmission matrix
66     */
67    private Matrix getTransmissionMatrix() {
68        List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
69        int numStates = knownSymbols.size();
70        Matrix transmissionMatrix = new Matrix(numStates, numStates);
71
72        for (int i = 0; i < numStates; i++) {
73            Event currentSymbol = knownSymbols.get(i);
74            List<Event> context = new ArrayList<Event>();
75            context.add(currentSymbol);
76            for (int j = 0; j < numStates; j++) {
77                Event follower = knownSymbols.get(j);
78                double prob = getProbability(context, follower);
79                transmissionMatrix.set(i, j, prob);
80            }
81        }
82        return transmissionMatrix;
83    }
84
85    /**
86     * <p>
87     * Calculates the entropy of the model. To make it possible that the model is stationary, a
88     * transition from {@link Event#ENDEVENT} to {@link Event#STARTEVENT} is added.
89     * </p>
90     *
91     * @return entropy of the model or NaN if it could not be calculated
92     */
93    public double calcEntropy() {
94        Matrix transmissionMatrix = getTransmissionMatrix();
95        List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
96        int numStates = knownSymbols.size();
97
98        List<Integer> startIndexList = new LinkedList<Integer>();
99        List<Integer> endIndexList = new LinkedList<Integer>();
100        for (int i = 0; i < knownSymbols.size(); i++) {
101            String id = knownSymbols.get(i).getId();
102            if (id.equals(Event.STARTEVENT.getId()) ||
103                id.contains(Event.STARTEVENT.getId() + "-=-"))
104            {
105                startIndexList.add(i);
106            }
107            if (id.equals(Event.ENDEVENT.getId()) || id.contains("-=-" + Event.ENDEVENT.getId())) {
108                endIndexList.add(i);
109            }
110        }
111
112        if (startIndexList.isEmpty()) {
113            Console
114                .printerrln("Error calculating entropy. Initial state of markov chain not found.");
115            return Double.NaN;
116        }
117        if (endIndexList.isEmpty()) {
118            Console.printerrln("Error calculating entropy. End state of markov chain not found.");
119            return Double.NaN;
120        }
121        for (Integer i : endIndexList) {
122            for (Integer j : startIndexList) {
123                transmissionMatrix.set(i, j, 1);
124            }
125        }
126
127        // Calculate stationary distribution by raising the power of the
128        // transmission matrix.
129        // The rank of the matrix should fall to 1 and each two should be the
130        // vector of the stationory distribution.
131        int iter = 0;
132        int rank = transmissionMatrix.rank();
133        Matrix stationaryMatrix = (Matrix) transmissionMatrix.clone();
134        while (iter < MAX_STATDIST_ITERATIONS && rank > 1) {
135            stationaryMatrix = stationaryMatrix.times(stationaryMatrix);
136            rank = stationaryMatrix.rank();
137            iter++;
138        }
139
140        if (rank != 1) {
141            Console.traceln(Level.FINE, "rank: " + rank);
142            Console.printerrln("Unable to calculate stationary distribution.");
143            return Double.NaN;
144        }
145
146        double entropy = 0.0;
147        for (int i = 0; i < numStates; i++) {
148            for (int j = 0; j < numStates; j++) {
149                if (transmissionMatrix.get(i, j) != 0 && transmissionMatrix.get(i, j) != 1) {
150                    double tmp = stationaryMatrix.get(0, i);
151                    tmp *= transmissionMatrix.get(i, j);
152                    tmp *= Math.log(transmissionMatrix.get(i, j)) / Math.log(2);
153                    entropy -= tmp;
154                }
155            }
156        }
157        return entropy;
158    }
159
160    /**
161     * <p>
162     * The dot represenation of {@link FirstOrderMarkovModel}s is its graph representation with the
163     * states as nodes and directed edges weighted with transition probabilities.
164     * </p>
165     *
166     * @see de.ugoe.cs.quest.usageprofiles.IDotCompatible#getDotRepresentation()
167     */
168    @Override
169    public String getDotRepresentation() {
170        StringBuilder stringBuilder = new StringBuilder();
171        stringBuilder.append("digraph model {" + StringTools.ENDLINE);
172
173        List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
174        for (Event symbol : knownSymbols) {
175            final String thisSaneId = symbol.getId().replace("\"", "\\\"").replaceAll("[\r\n]", "");
176            stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " [label=\"" + thisSaneId +
177                "\"];" + StringTools.ENDLINE);
178            List<Event> context = new ArrayList<Event>();
179            context.add(symbol);
180            Collection<Event> followers = trie.getFollowingSymbols(context);
181            for (Event follower : followers) {
182                stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " -> " +
183                    knownSymbols.indexOf(follower) + " ");
184                stringBuilder.append("[label=\"" + getProbability(context, follower) + "\"];" +
185                    StringTools.ENDLINE);
186            }
187        }
188        stringBuilder.append('}' + StringTools.ENDLINE);
189        return stringBuilder.toString();
190    }
191
192    /**
193     * <p>
194     * Returns a {@link Graph} representation of the model with the states as nodes and directed
195     * edges weighted with transition probabilities.
196     * </p>
197     *
198     * @return {@link Graph} of the model
199     */
200    public Graph<String, MarkovEdge> getGraph() {
201        Graph<String, MarkovEdge> graph = new SparseMultigraph<String, MarkovEdge>();
202
203        List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
204
205        for (Event symbol : knownSymbols) {
206            String from = symbol.getId();
207            List<Event> context = new ArrayList<Event>();
208            context.add(symbol);
209
210            Collection<Event> followers = trie.getFollowingSymbols(context);
211
212            for (Event follower : followers) {
213                String to = follower.getId();
214                MarkovEdge prob = new MarkovEdge(getProbability(context, follower));
215                graph.addEdge(prob, from, to, EdgeType.DIRECTED);
216            }
217        }
218        return graph;
219    }
220
221    /**
222     * Inner class used for the {@link Graph} representation of the model.
223     *
224     * @author Steffen Herbold
225     * @version 1.0
226     */
227    static public class MarkovEdge {
228        /**
229         * <p>
230         * Weight of the edge, i.e., its transition probability.
231         * </p>
232         */
233        double weight;
234
235        /**
236         * <p>
237         * Constructor. Creates a new MarkovEdge.
238         * </p>
239         *
240         * @param weight
241         *            weight of the edge, i.e., its transition probability
242         */
243        MarkovEdge(double weight) {
244            this.weight = weight;
245        }
246
247        /**
248         * <p>
249         * The weight of the edge as {@link String}.
250         * </p>
251         */
252        public String toString() {
253            return "" + weight;
254        }
255    }
256
257}
Note: See TracBrowser for help on using the repository browser.