Context Navigation

← Previous Change
Next Change →

FirstOrderMarkovModel.java

Timestamp:

08/17/12 09:05:19 (12 years ago)

Author:

sherbold

Message:

adapted to quest coding style

File:

: 1 edited

trunk/quest-core-usageprofiles/src/main/java/de/ugoe/cs/quest/usageprofiles/FirstOrderMarkovModel.java (modified) (3 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/quest-core-usageprofiles/src/main/java/de/ugoe/cs/quest/usageprofiles/FirstOrderMarkovModel.java

-                      r553
+                      r559
 package de.ugoe.cs.quest.usageprofiles;
 …
 /**
  * <p>
+ * Implements first-order Markov models. The implementation is based on
+ * {@link HighOrderMarkovModel} and restricts the Markov order to 1. In
+ * comparison to {@link HighOrderMarkovModel}, more calculations are possible
+ * with first-order models, e.g., the calculation of the entropy (
+ * Implements first-order Markov models. The implementation is based on {@link HighOrderMarkovModel}
+ * and restricts the Markov order to 1. In comparison to {@link HighOrderMarkovModel}, more
+ * calculations are possible with first-order models, e.g., the calculation of the entropy (
  * {@link #calcEntropy()}).
  * </p>
 …
  * @version 1.0
  */
+public class FirstOrderMarkovModel extends HighOrderMarkovModel implements
+                IDotCompatible {
+        /**
+         * <p>
+         * Id for object serialization.
+         * </p>
+         */
+        private static final long serialVersionUID = 1L;
+        /**
+         * <p>
+         * Maximum number of iterations when calculating the stationary distribution
+         * as the limit of multiplying the transmission matrix with itself.
+         * </p>
+         */
+        final static int MAX_STATDIST_ITERATIONS = 1000;
+        /**
+         * <p>
+         * Constructor. Creates a new FirstOrderMarkovModel.
+         * </p>
+         *
+         * @param r
+         *            random number generator used by probabilistic methods of the
+         *            class
+         */
+        public FirstOrderMarkovModel(Random r) {
+                super(1, r);
+        }
+        /**
+         * <p>
+         * Generates the transmission matrix of the Markov model.
+         * </p>
+         *
+         * @return transmission matrix
+         */
+        private Matrix getTransmissionMatrix() {
+                List<Event> knownSymbols = new ArrayList<Event>(
+                                trie.getKnownSymbols());
+                int numStates = knownSymbols.size();
+                Matrix transmissionMatrix = new Matrix(numStates, numStates);
+                for (int i = 0; i < numStates; i++) {
+                        Event currentSymbol = knownSymbols.get(i);
+                        List<Event> context = new ArrayList<Event>();
+                        context.add(currentSymbol);
+                        for (int j = 0; j < numStates; j++) {
+                                Event follower = knownSymbols.get(j);
+                                double prob = getProbability(context, follower);
+                                transmissionMatrix.set(i, j, prob);
+                        }
+                }
+                return transmissionMatrix;
+        }
+        /**
+         * <p>
+         * Calculates the entropy of the model. To make it possible that the model
+         * is stationary, a transition from {@link Event#ENDEVENT} to
+         * {@link Event#STARTEVENT} is added.
+         * </p>
+         *
+         * @return entropy of the model or NaN if it could not be calculated
+         */
+        public double calcEntropy() {
+                Matrix transmissionMatrix = getTransmissionMatrix();
+                List<Event> knownSymbols = new ArrayList<Event>(
+                                trie.getKnownSymbols());
+                int numStates = knownSymbols.size();
+                List<Integer> startIndexList = new LinkedList<Integer>();
+                List<Integer> endIndexList = new LinkedList<Integer>();
+                for( int i=0 ; i<knownSymbols.size() ; i++ ) {
+                        String id = knownSymbols.get(i).getId();
+                        if( id.equals(Event.STARTEVENT.getId()) || id.contains(Event.STARTEVENT.getId()+"-=-") ) {
+                                startIndexList.add(i);
+                        }
+                        if( id.equals(Event.ENDEVENT.getId()) || id.contains("-=-"+Event.ENDEVENT.getId()) ) {
+                                endIndexList.add(i);
+                        }
+                }
+                if (startIndexList.isEmpty()) {
+                        Console.printerrln("Error calculating entropy. Initial state of markov chain not found.");
+                        return Double.NaN;
+                }
+                if (endIndexList.isEmpty()) {
+                        Console.printerrln("Error calculating entropy. End state of markov chain not found.");
+                        return Double.NaN;
+                }
+                for( Integer i : endIndexList ) {
+                        for(Integer j : startIndexList ) {
+                                transmissionMatrix.set(i, j, 1);
+                        }
+                }
+                // Calculate stationary distribution by raising the power of the
+                // transmission matrix.
+                // The rank of the matrix should fall to 1 and each two should be the
+                // vector of the stationory distribution.
+                int iter = 0;
+                int rank = transmissionMatrix.rank();
+                Matrix stationaryMatrix = (Matrix) transmissionMatrix.clone();
+                while (iter < MAX_STATDIST_ITERATIONS && rank > 1) {
+                        stationaryMatrix = stationaryMatrix.times(stationaryMatrix);
+                        rank = stationaryMatrix.rank();
+                        iter++;
+                }
+                if (rank != 1) {
+                        Console.traceln("rank: " + rank);
+                        Console.printerrln("Unable to calculate stationary distribution.");
+                        return Double.NaN;
+                }
+                double entropy = 0.0;
+                for (int i = 0; i < numStates; i++) {
+                        for (int j = 0; j < numStates; j++) {
+                                if (transmissionMatrix.get(i, j) != 0 && transmissionMatrix.get(i, j)!=1) {
+                                        double tmp = stationaryMatrix.get(0, i);
+                                        tmp *= transmissionMatrix.get(i, j);
+                                        tmp *= Math.log(transmissionMatrix.get(i, j)) / Math.log(2);
+                                        entropy -= tmp;
+                                }
+                        }
+                }
+                return entropy;
+        }
+        /**
+         * <p>
+         * The dot represenation of {@link FirstOrderMarkovModel}s is its graph
+         * representation with the states as nodes and directed edges weighted with
+         * transition probabilities.
+         * </p>
+         *
+         * @see de.ugoe.cs.quest.usageprofiles.IDotCompatible#getDotRepresentation()
+         */
+        @Override
+        public String getDotRepresentation() {
+                StringBuilder stringBuilder = new StringBuilder();
+                stringBuilder.append("digraph model {" + StringTools.ENDLINE);
+                List<Event> knownSymbols = new ArrayList<Event>(
+                                trie.getKnownSymbols());
+                for (Event symbol : knownSymbols) {
+                        final String thisSaneId = symbol.getId().replace("\"", "\\\"")
+                                        .replaceAll("[\r\n]", "");
+                        stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " [label=\""
+                                        + thisSaneId + "\"];" + StringTools.ENDLINE);
+                        List<Event> context = new ArrayList<Event>();
+                        context.add(symbol);
+                        Collection<Event> followers = trie.getFollowingSymbols(context);
+                        for (Event follower : followers) {
+                                stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " -> "
+                                                + knownSymbols.indexOf(follower) + " ");
+                                stringBuilder.append("[label=\""
+                                                + getProbability(context, follower) + "\"];"
+                                                + StringTools.ENDLINE);
+                        }
+                }
+                stringBuilder.append('}' + StringTools.ENDLINE);
+                return stringBuilder.toString();
+        }
+        /**
+         * <p>
+         * Returns a {@link Graph} representation of the model with the states as
+         * nodes and directed edges weighted with transition probabilities.
+         * </p>
+         *
+         * @return {@link Graph} of the model
+         */
+        public Graph<String, MarkovEdge> getGraph() {
+                Graph<String, MarkovEdge> graph = new SparseMultigraph<String, MarkovEdge>();
+                List<Event> knownSymbols = new ArrayList<Event>(
+                                trie.getKnownSymbols());
+                for (Event symbol : knownSymbols) {
+                        String from = symbol.getId();
+                        List<Event> context = new ArrayList<Event>();
+                        context.add(symbol);
+                        Collection<Event> followers = trie.getFollowingSymbols(context);
+                        for (Event follower : followers) {
+                                String to = follower.getId();
+                                MarkovEdge prob = new MarkovEdge(getProbability(context,
+                                                follower));
+                                graph.addEdge(prob, from, to, EdgeType.DIRECTED);
+                        }
+                }
+                return graph;
+        }
+        /**
+         * Inner class used for the {@link Graph} representation of the model.
+         *
+         * @author Steffen Herbold
+         * @version 1.0
+         */
+        static public class MarkovEdge {
+                /**
+                 * <p>
+                 * Weight of the edge, i.e., its transition probability.
+                 * </p>
+                 */
+                double weight;
+                /**
+                 * <p>
+                 * Constructor. Creates a new MarkovEdge.
+                 * </p>
+                 *
+                 * @param weight
+                 *            weight of the edge, i.e., its transition probability
+                 */
+                MarkovEdge(double weight) {
+                        this.weight = weight;
+                }
+                /**
+                 * <p>
+                 * The weight of the edge as {@link String}.
+                 * </p>
+                 */
+                public String toString() {
+                        return "" + weight;
+                }
+        }
+public class FirstOrderMarkovModel extends HighOrderMarkovModel implements IDotCompatible {
+    /**
+     * <p>
+     * Id for object serialization.
+     * </p>
+     */
+    private static final long serialVersionUID = 1L;
+    /**
+     * <p>
+     * Maximum number of iterations when calculating the stationary distribution as the limit of
+     * multiplying the transmission matrix with itself.
+     * </p>
+     */
+    final static int MAX_STATDIST_ITERATIONS = 1000;
+    /**
+     * <p>
+     * Constructor. Creates a new FirstOrderMarkovModel.
+     * </p>
+     *
+     * @param r
+     *            random number generator used by probabilistic methods of the class
+     */
+    public FirstOrderMarkovModel(Random r) {
+        super(1, r);
+    }
+    /**
+     * <p>
+     * Generates the transmission matrix of the Markov model.
+     * </p>
+     *
+     * @return transmission matrix
+     */
+    private Matrix getTransmissionMatrix() {
+        List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
+        int numStates = knownSymbols.size();
+        Matrix transmissionMatrix = new Matrix(numStates, numStates);
+        for (int i = 0; i < numStates; i++) {
+            Event currentSymbol = knownSymbols.get(i);
+            List<Event> context = new ArrayList<Event>();
+            context.add(currentSymbol);
+            for (int j = 0; j < numStates; j++) {
+                Event follower = knownSymbols.get(j);
+                double prob = getProbability(context, follower);
+                transmissionMatrix.set(i, j, prob);
+            }
+        }
+        return transmissionMatrix;
+    }
+    /**
+     * <p>
+     * Calculates the entropy of the model. To make it possible that the model is stationary, a
+     * transition from {@link Event#ENDEVENT} to {@link Event#STARTEVENT} is added.
+     * </p>
+     *
+     * @return entropy of the model or NaN if it could not be calculated
+     */
+    public double calcEntropy() {
+        Matrix transmissionMatrix = getTransmissionMatrix();
+        List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
+        int numStates = knownSymbols.size();
+        List<Integer> startIndexList = new LinkedList<Integer>();
+        List<Integer> endIndexList = new LinkedList<Integer>();
+        for (int i = 0; i < knownSymbols.size(); i++) {
+            String id = knownSymbols.get(i).getId();
+            if (id.equals(Event.STARTEVENT.getId()) ||
+                id.contains(Event.STARTEVENT.getId() + "-=-"))
+            {
+                startIndexList.add(i);
+            }
+            if (id.equals(Event.ENDEVENT.getId()) || id.contains("-=-" + Event.ENDEVENT.getId())) {
+                endIndexList.add(i);
+            }
+        }
+        if (startIndexList.isEmpty()) {
+            Console
+                .printerrln("Error calculating entropy. Initial state of markov chain not found.");
+            return Double.NaN;
+        }
+        if (endIndexList.isEmpty()) {
+            Console.printerrln("Error calculating entropy. End state of markov chain not found.");
+            return Double.NaN;
+        }
+        for (Integer i : endIndexList) {
+            for (Integer j : startIndexList) {
+                transmissionMatrix.set(i, j, 1);
+            }
+        }
+        // Calculate stationary distribution by raising the power of the
+        // transmission matrix.
+        // The rank of the matrix should fall to 1 and each two should be the
+        // vector of the stationory distribution.
+        int iter = 0;
+        int rank = transmissionMatrix.rank();
+        Matrix stationaryMatrix = (Matrix) transmissionMatrix.clone();
+        while (iter < MAX_STATDIST_ITERATIONS && rank > 1) {
+            stationaryMatrix = stationaryMatrix.times(stationaryMatrix);
+            rank = stationaryMatrix.rank();
+            iter++;
+        }
+        if (rank != 1) {
+            Console.traceln("rank: " + rank);
+            Console.printerrln("Unable to calculate stationary distribution.");
+            return Double.NaN;
+        }
+        double entropy = 0.0;
+        for (int i = 0; i < numStates; i++) {
+            for (int j = 0; j < numStates; j++) {
+                if (transmissionMatrix.get(i, j) != 0 && transmissionMatrix.get(i, j) != 1) {
+                    double tmp = stationaryMatrix.get(0, i);
+                    tmp *= transmissionMatrix.get(i, j);
+                    tmp *= Math.log(transmissionMatrix.get(i, j)) / Math.log(2);
+                    entropy -= tmp;
+                }
+            }
+        }
+        return entropy;
+    }
+    /**
+     * <p>
+     * The dot represenation of {@link FirstOrderMarkovModel}s is its graph representation with the
+     * states as nodes and directed edges weighted with transition probabilities.
+     * </p>
+     *
+     * @see de.ugoe.cs.quest.usageprofiles.IDotCompatible#getDotRepresentation()
+     */
+    @Override
+    public String getDotRepresentation() {
+        StringBuilder stringBuilder = new StringBuilder();
+        stringBuilder.append("digraph model {" + StringTools.ENDLINE);
+        List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
+        for (Event symbol : knownSymbols) {
+            final String thisSaneId = symbol.getId().replace("\"", "\\\"").replaceAll("[\r\n]", "");
+            stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " [label=\"" + thisSaneId +
+                "\"];" + StringTools.ENDLINE);
+            List<Event> context = new ArrayList<Event>();
+            context.add(symbol);
+            Collection<Event> followers = trie.getFollowingSymbols(context);
+            for (Event follower : followers) {
+                stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " -> " +
+                    knownSymbols.indexOf(follower) + " ");
+                stringBuilder.append("[label=\"" + getProbability(context, follower) + "\"];" +
+                    StringTools.ENDLINE);
+            }
+        }
+        stringBuilder.append('}' + StringTools.ENDLINE);
+        return stringBuilder.toString();
+    }
+    /**
+     * <p>
+     * Returns a {@link Graph} representation of the model with the states as nodes and directed
+     * edges weighted with transition probabilities.
+     * </p>
+     *
+     * @return {@link Graph} of the model
+     */
+    public Graph<String, MarkovEdge> getGraph() {
+        Graph<String, MarkovEdge> graph = new SparseMultigraph<String, MarkovEdge>();
+        List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
+        for (Event symbol : knownSymbols) {
+            String from = symbol.getId();
+            List<Event> context = new ArrayList<Event>();
+            context.add(symbol);
+            Collection<Event> followers = trie.getFollowingSymbols(context);
+            for (Event follower : followers) {
+                String to = follower.getId();
+                MarkovEdge prob = new MarkovEdge(getProbability(context, follower));
+                graph.addEdge(prob, from, to, EdgeType.DIRECTED);
+            }
+        }
+        return graph;
+    }
+    /**
+     * Inner class used for the {@link Graph} representation of the model.
+     *
+     * @author Steffen Herbold
+     * @version 1.0
+     */
+    static public class MarkovEdge {
+        /**
+         * <p>
+         * Weight of the edge, i.e., its transition probability.
+         * </p>
+         */
+        double weight;
+        /**
+         * <p>
+         * Constructor. Creates a new MarkovEdge.
+         * </p>
+         *
+         * @param weight
+         *            weight of the edge, i.e., its transition probability
+         */
+        MarkovEdge(double weight) {
+            this.weight = weight;
+        }
+        /**
+         * <p>
+         * The weight of the edge as {@link String}.
+         * </p>
+         */
+        public String toString() {
+            return "" + weight;
+        }
+    }
+}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 559 for trunk/quest-core-usageprofiles/src/main/java/de/ugoe/cs/quest/usageprofiles/FirstOrderMarkovModel.java

Legend:

trunk/quest-core-usageprofiles/src/main/java/de/ugoe/cs/quest/usageprofiles/FirstOrderMarkovModel.java

Download in other formats: