source: trunk/autoquest-core-usageprofiles/src/main/java/de/ugoe/cs/autoquest/usageprofiles/FirstOrderMarkovModel.java @ 922

Last change on this file since 922 was 922, checked in by sherbold, 12 years ago
  • renaming of packages from de.ugoe.cs.quest to de.ugoe.cs.autoquest
  • Property svn:mime-type set to text/plain
File size: 8.8 KB
Line 
1package de.ugoe.cs.autoquest.usageprofiles;
2
3import java.util.ArrayList;
4import java.util.Collection;
5import java.util.LinkedList;
6import java.util.List;
7import java.util.Random;
8import java.util.logging.Level;
9
10import de.ugoe.cs.autoquest.eventcore.Event;
11import de.ugoe.cs.util.StringTools;
12import de.ugoe.cs.util.console.Console;
13import edu.uci.ics.jung.graph.Graph;
14import edu.uci.ics.jung.graph.SparseMultigraph;
15import edu.uci.ics.jung.graph.util.EdgeType;
16
17import Jama.Matrix;
18
19/**
20 * <p>
21 * Implements first-order Markov models. The implementation is based on {@link HighOrderMarkovModel}
22 * and restricts the Markov order to 1. In comparison to {@link HighOrderMarkovModel}, more
23 * calculations are possible with first-order models, e.g., the calculation of the entropy (
24 * {@link #calcEntropy()}).
25 * </p>
26 *
27 * @author Steffen Herbold
28 * @version 1.0
29 */
30public class FirstOrderMarkovModel extends HighOrderMarkovModel implements IDotCompatible {
31
32    /**
33     * <p>
34     * Id for object serialization.
35     * </p>
36     */
37    private static final long serialVersionUID = 1L;
38
39    /**
40     * <p>
41     * Maximum number of iterations when calculating the stationary distribution as the limit of
42     * multiplying the transmission matrix with itself.
43     * </p>
44     */
45    final static int MAX_STATDIST_ITERATIONS = 1000;
46
47    /**
48     * <p>
49     * Constructor. Creates a new FirstOrderMarkovModel.
50     * </p>
51     *
52     * @param r
53     *            random number generator used by probabilistic methods of the class
54     */
55    public FirstOrderMarkovModel(Random r) {
56        super(1, r);
57    }
58
59    /**
60     * <p>
61     * Generates the transmission matrix of the Markov model.
62     * </p>
63     *
64     * @return transmission matrix
65     */
66    private Matrix getTransmissionMatrix() {
67        List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
68        int numStates = knownSymbols.size();
69        Matrix transmissionMatrix = new Matrix(numStates, numStates);
70
71        for (int i = 0; i < numStates; i++) {
72            Event currentSymbol = knownSymbols.get(i);
73            List<Event> context = new ArrayList<Event>();
74            context.add(currentSymbol);
75            for (int j = 0; j < numStates; j++) {
76                Event follower = knownSymbols.get(j);
77                double prob = getProbability(context, follower);
78                transmissionMatrix.set(i, j, prob);
79            }
80        }
81        return transmissionMatrix;
82    }
83
84    /**
85     * <p>
86     * Calculates the entropy of the model. To make it possible that the model is stationary, a
87     * transition from {@link Event#ENDEVENT} to {@link Event#STARTEVENT} is added.
88     * </p>
89     *
90     * @return entropy of the model or NaN if it could not be calculated
91     */
92    public double calcEntropy() {
93        Matrix transmissionMatrix = getTransmissionMatrix();
94        List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
95        int numStates = knownSymbols.size();
96
97        List<Integer> startIndexList = new LinkedList<Integer>();
98        List<Integer> endIndexList = new LinkedList<Integer>();
99        for (int i = 0; i < knownSymbols.size(); i++) {
100            String id = knownSymbols.get(i).getId();
101            if (id.equals(Event.STARTEVENT.getId()) ||
102                id.contains(Event.STARTEVENT.getId() + "-=-"))
103            {
104                startIndexList.add(i);
105            }
106            if (id.equals(Event.ENDEVENT.getId()) || id.contains("-=-" + Event.ENDEVENT.getId())) {
107                endIndexList.add(i);
108            }
109        }
110
111        if (startIndexList.isEmpty()) {
112            Console
113                .printerrln("Error calculating entropy. Initial state of markov chain not found.");
114            return Double.NaN;
115        }
116        if (endIndexList.isEmpty()) {
117            Console.printerrln("Error calculating entropy. End state of markov chain not found.");
118            return Double.NaN;
119        }
120        for (Integer i : endIndexList) {
121            for (Integer j : startIndexList) {
122                transmissionMatrix.set(i, j, 1);
123            }
124        }
125
126        // Calculate stationary distribution by raising the power of the
127        // transmission matrix.
128        // The rank of the matrix should fall to 1 and each two should be the
129        // vector of the stationory distribution.
130        int iter = 0;
131        int rank = transmissionMatrix.rank();
132        Matrix stationaryMatrix = (Matrix) transmissionMatrix.clone();
133        while (iter < MAX_STATDIST_ITERATIONS && rank > 1) {
134            stationaryMatrix = stationaryMatrix.times(stationaryMatrix);
135            rank = stationaryMatrix.rank();
136            iter++;
137        }
138
139        if (rank != 1) {
140            Console.traceln(Level.FINE, "rank: " + rank);
141            Console.printerrln("Unable to calculate stationary distribution.");
142            return Double.NaN;
143        }
144
145        double entropy = 0.0;
146        for (int i = 0; i < numStates; i++) {
147            for (int j = 0; j < numStates; j++) {
148                if (transmissionMatrix.get(i, j) != 0 && transmissionMatrix.get(i, j) != 1) {
149                    double tmp = stationaryMatrix.get(0, i);
150                    tmp *= transmissionMatrix.get(i, j);
151                    tmp *= Math.log(transmissionMatrix.get(i, j)) / Math.log(2);
152                    entropy -= tmp;
153                }
154            }
155        }
156        return entropy;
157    }
158
159    /**
160     * <p>
161     * The dot represenation of {@link FirstOrderMarkovModel}s is its graph representation with the
162     * states as nodes and directed edges weighted with transition probabilities.
163     * </p>
164     *
165     * @see de.ugoe.cs.autoquest.usageprofiles.IDotCompatible#getDotRepresentation()
166     */
167    @Override
168    public String getDotRepresentation() {
169        StringBuilder stringBuilder = new StringBuilder();
170        stringBuilder.append("digraph model {" + StringTools.ENDLINE);
171
172        List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
173        for (Event symbol : knownSymbols) {
174            final String thisSaneId = symbol.getId().replace("\"", "\\\"").replaceAll("[\r\n]", "");
175            stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " [label=\"" + thisSaneId +
176                "\"];" + StringTools.ENDLINE);
177            List<Event> context = new ArrayList<Event>();
178            context.add(symbol);
179            Collection<Event> followers = trie.getFollowingSymbols(context);
180            for (Event follower : followers) {
181                stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " -> " +
182                    knownSymbols.indexOf(follower) + " ");
183                stringBuilder.append("[label=\"" + getProbability(context, follower) + "\"];" +
184                    StringTools.ENDLINE);
185            }
186        }
187        stringBuilder.append('}' + StringTools.ENDLINE);
188        return stringBuilder.toString();
189    }
190
191    /**
192     * <p>
193     * Returns a {@link Graph} representation of the model with the states as nodes and directed
194     * edges weighted with transition probabilities.
195     * </p>
196     *
197     * @return {@link Graph} of the model
198     */
199    public Graph<String, MarkovEdge> getGraph() {
200        Graph<String, MarkovEdge> graph = new SparseMultigraph<String, MarkovEdge>();
201
202        List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
203
204        for (Event symbol : knownSymbols) {
205            String from = symbol.getId();
206            List<Event> context = new ArrayList<Event>();
207            context.add(symbol);
208
209            Collection<Event> followers = trie.getFollowingSymbols(context);
210
211            for (Event follower : followers) {
212                String to = follower.getId();
213                MarkovEdge prob = new MarkovEdge(getProbability(context, follower));
214                graph.addEdge(prob, from, to, EdgeType.DIRECTED);
215            }
216        }
217        return graph;
218    }
219
220    /**
221     * Inner class used for the {@link Graph} representation of the model.
222     *
223     * @author Steffen Herbold
224     * @version 1.0
225     */
226    static public class MarkovEdge {
227        /**
228         * <p>
229         * Weight of the edge, i.e., its transition probability.
230         * </p>
231         */
232        double weight;
233
234        /**
235         * <p>
236         * Constructor. Creates a new MarkovEdge.
237         * </p>
238         *
239         * @param weight
240         *            weight of the edge, i.e., its transition probability
241         */
242        MarkovEdge(double weight) {
243            this.weight = weight;
244        }
245
246        /**
247         * <p>
248         * The weight of the edge as {@link String}.
249         * </p>
250         */
251        public String toString() {
252            return "" + weight;
253        }
254    }
255
256}
Note: See TracBrowser for help on using the repository browser.