source: trunk/autoquest-core-usageprofiles/src/main/java/de/ugoe/cs/autoquest/usageprofiles/FirstOrderMarkovModel.java @ 927

Last change on this file since 927 was 927, checked in by sherbold, 12 years ago
  • added copyright under the Apache License, Version 2.0
  • Property svn:mime-type set to text/plain
File size: 9.5 KB
Line 
1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.usageprofiles;
16
17import java.util.ArrayList;
18import java.util.Collection;
19import java.util.LinkedList;
20import java.util.List;
21import java.util.Random;
22import java.util.logging.Level;
23
24import de.ugoe.cs.autoquest.eventcore.Event;
25import de.ugoe.cs.util.StringTools;
26import de.ugoe.cs.util.console.Console;
27import edu.uci.ics.jung.graph.Graph;
28import edu.uci.ics.jung.graph.SparseMultigraph;
29import edu.uci.ics.jung.graph.util.EdgeType;
30
31import Jama.Matrix;
32
33/**
34 * <p>
35 * Implements first-order Markov models. The implementation is based on {@link HighOrderMarkovModel}
36 * and restricts the Markov order to 1. In comparison to {@link HighOrderMarkovModel}, more
37 * calculations are possible with first-order models, e.g., the calculation of the entropy (
38 * {@link #calcEntropy()}).
39 * </p>
40 *
41 * @author Steffen Herbold
42 * @version 1.0
43 */
44public class FirstOrderMarkovModel extends HighOrderMarkovModel implements IDotCompatible {
45
46    /**
47     * <p>
48     * Id for object serialization.
49     * </p>
50     */
51    private static final long serialVersionUID = 1L;
52
53    /**
54     * <p>
55     * Maximum number of iterations when calculating the stationary distribution as the limit of
56     * multiplying the transmission matrix with itself.
57     * </p>
58     */
59    final static int MAX_STATDIST_ITERATIONS = 1000;
60
61    /**
62     * <p>
63     * Constructor. Creates a new FirstOrderMarkovModel.
64     * </p>
65     *
66     * @param r
67     *            random number generator used by probabilistic methods of the class
68     */
69    public FirstOrderMarkovModel(Random r) {
70        super(1, r);
71    }
72
73    /**
74     * <p>
75     * Generates the transmission matrix of the Markov model.
76     * </p>
77     *
78     * @return transmission matrix
79     */
80    private Matrix getTransmissionMatrix() {
81        List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
82        int numStates = knownSymbols.size();
83        Matrix transmissionMatrix = new Matrix(numStates, numStates);
84
85        for (int i = 0; i < numStates; i++) {
86            Event currentSymbol = knownSymbols.get(i);
87            List<Event> context = new ArrayList<Event>();
88            context.add(currentSymbol);
89            for (int j = 0; j < numStates; j++) {
90                Event follower = knownSymbols.get(j);
91                double prob = getProbability(context, follower);
92                transmissionMatrix.set(i, j, prob);
93            }
94        }
95        return transmissionMatrix;
96    }
97
98    /**
99     * <p>
100     * Calculates the entropy of the model. To make it possible that the model is stationary, a
101     * transition from {@link Event#ENDEVENT} to {@link Event#STARTEVENT} is added.
102     * </p>
103     *
104     * @return entropy of the model or NaN if it could not be calculated
105     */
106    public double calcEntropy() {
107        Matrix transmissionMatrix = getTransmissionMatrix();
108        List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
109        int numStates = knownSymbols.size();
110
111        List<Integer> startIndexList = new LinkedList<Integer>();
112        List<Integer> endIndexList = new LinkedList<Integer>();
113        for (int i = 0; i < knownSymbols.size(); i++) {
114            String id = knownSymbols.get(i).getId();
115            if (id.equals(Event.STARTEVENT.getId()) ||
116                id.contains(Event.STARTEVENT.getId() + "-=-"))
117            {
118                startIndexList.add(i);
119            }
120            if (id.equals(Event.ENDEVENT.getId()) || id.contains("-=-" + Event.ENDEVENT.getId())) {
121                endIndexList.add(i);
122            }
123        }
124
125        if (startIndexList.isEmpty()) {
126            Console
127                .printerrln("Error calculating entropy. Initial state of markov chain not found.");
128            return Double.NaN;
129        }
130        if (endIndexList.isEmpty()) {
131            Console.printerrln("Error calculating entropy. End state of markov chain not found.");
132            return Double.NaN;
133        }
134        for (Integer i : endIndexList) {
135            for (Integer j : startIndexList) {
136                transmissionMatrix.set(i, j, 1);
137            }
138        }
139
140        // Calculate stationary distribution by raising the power of the
141        // transmission matrix.
142        // The rank of the matrix should fall to 1 and each two should be the
143        // vector of the stationory distribution.
144        int iter = 0;
145        int rank = transmissionMatrix.rank();
146        Matrix stationaryMatrix = (Matrix) transmissionMatrix.clone();
147        while (iter < MAX_STATDIST_ITERATIONS && rank > 1) {
148            stationaryMatrix = stationaryMatrix.times(stationaryMatrix);
149            rank = stationaryMatrix.rank();
150            iter++;
151        }
152
153        if (rank != 1) {
154            Console.traceln(Level.FINE, "rank: " + rank);
155            Console.printerrln("Unable to calculate stationary distribution.");
156            return Double.NaN;
157        }
158
159        double entropy = 0.0;
160        for (int i = 0; i < numStates; i++) {
161            for (int j = 0; j < numStates; j++) {
162                if (transmissionMatrix.get(i, j) != 0 && transmissionMatrix.get(i, j) != 1) {
163                    double tmp = stationaryMatrix.get(0, i);
164                    tmp *= transmissionMatrix.get(i, j);
165                    tmp *= Math.log(transmissionMatrix.get(i, j)) / Math.log(2);
166                    entropy -= tmp;
167                }
168            }
169        }
170        return entropy;
171    }
172
173    /**
174     * <p>
175     * The dot represenation of {@link FirstOrderMarkovModel}s is its graph representation with the
176     * states as nodes and directed edges weighted with transition probabilities.
177     * </p>
178     *
179     * @see de.ugoe.cs.autoquest.usageprofiles.IDotCompatible#getDotRepresentation()
180     */
181    @Override
182    public String getDotRepresentation() {
183        StringBuilder stringBuilder = new StringBuilder();
184        stringBuilder.append("digraph model {" + StringTools.ENDLINE);
185
186        List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
187        for (Event symbol : knownSymbols) {
188            final String thisSaneId = symbol.getId().replace("\"", "\\\"").replaceAll("[\r\n]", "");
189            stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " [label=\"" + thisSaneId +
190                "\"];" + StringTools.ENDLINE);
191            List<Event> context = new ArrayList<Event>();
192            context.add(symbol);
193            Collection<Event> followers = trie.getFollowingSymbols(context);
194            for (Event follower : followers) {
195                stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " -> " +
196                    knownSymbols.indexOf(follower) + " ");
197                stringBuilder.append("[label=\"" + getProbability(context, follower) + "\"];" +
198                    StringTools.ENDLINE);
199            }
200        }
201        stringBuilder.append('}' + StringTools.ENDLINE);
202        return stringBuilder.toString();
203    }
204
205    /**
206     * <p>
207     * Returns a {@link Graph} representation of the model with the states as nodes and directed
208     * edges weighted with transition probabilities.
209     * </p>
210     *
211     * @return {@link Graph} of the model
212     */
213    public Graph<String, MarkovEdge> getGraph() {
214        Graph<String, MarkovEdge> graph = new SparseMultigraph<String, MarkovEdge>();
215
216        List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
217
218        for (Event symbol : knownSymbols) {
219            String from = symbol.getId();
220            List<Event> context = new ArrayList<Event>();
221            context.add(symbol);
222
223            Collection<Event> followers = trie.getFollowingSymbols(context);
224
225            for (Event follower : followers) {
226                String to = follower.getId();
227                MarkovEdge prob = new MarkovEdge(getProbability(context, follower));
228                graph.addEdge(prob, from, to, EdgeType.DIRECTED);
229            }
230        }
231        return graph;
232    }
233
234    /**
235     * Inner class used for the {@link Graph} representation of the model.
236     *
237     * @author Steffen Herbold
238     * @version 1.0
239     */
240    static public class MarkovEdge {
241        /**
242         * <p>
243         * Weight of the edge, i.e., its transition probability.
244         * </p>
245         */
246        double weight;
247
248        /**
249         * <p>
250         * Constructor. Creates a new MarkovEdge.
251         * </p>
252         *
253         * @param weight
254         *            weight of the edge, i.e., its transition probability
255         */
256        MarkovEdge(double weight) {
257            this.weight = weight;
258        }
259
260        /**
261         * <p>
262         * The weight of the edge as {@link String}.
263         * </p>
264         */
265        public String toString() {
266            return "" + weight;
267        }
268    }
269
270}
Note: See TracBrowser for help on using the repository browser.