
package de.ugoe.cs.quest.usageprofiles;

import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import java.util.logging.Level;

import de.ugoe.cs.quest.eventcore.Event;
import de.ugoe.cs.util.StringTools;
import de.ugoe.cs.util.console.Console;
import edu.uci.ics.jung.graph.Graph;
import edu.uci.ics.jung.graph.SparseMultigraph;
import edu.uci.ics.jung.graph.util.EdgeType;

import Jama.Matrix;

/**
 * <p>
 * Implements first-order Markov models. The implementation is based on {@link HighOrderMarkovModel}
 * and restricts the Markov order to 1. In comparison to {@link HighOrderMarkovModel}, more
 * calculations are possible with first-order models, e.g., the calculation of the entropy (
 * {@link #calcEntropy()}).
 * </p>
 * 
 * @author Steffen Herbold
 * @version 1.0
 */
public class FirstOrderMarkovModel extends HighOrderMarkovModel implements IDotCompatible {

    /**
     * <p>
     * Id for object serialization.
     * </p>
     */
    private static final long serialVersionUID = 1L;

    /**
     * <p>
     * Maximum number of iterations when calculating the stationary distribution as the limit of
     * multiplying the transmission matrix with itself.
     * </p>
     */
    final static int MAX_STATDIST_ITERATIONS = 1000;

    /**
     * <p>
     * Constructor. Creates a new FirstOrderMarkovModel.
     * </p>
     * 
     * @param r
     *            random number generator used by probabilistic methods of the class
     */
    public FirstOrderMarkovModel(Random r) {
        super(1, r);
    }

    /**
     * <p>
     * Generates the transmission matrix of the Markov model.
     * </p>
     * 
     * @return transmission matrix
     */
    private Matrix getTransmissionMatrix() {
        List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
        int numStates = knownSymbols.size();
        Matrix transmissionMatrix = new Matrix(numStates, numStates);

        for (int i = 0; i < numStates; i++) {
            Event currentSymbol = knownSymbols.get(i);
            List<Event> context = new ArrayList<Event>();
            context.add(currentSymbol);
            for (int j = 0; j < numStates; j++) {
                Event follower = knownSymbols.get(j);
                double prob = getProbability(context, follower);
                transmissionMatrix.set(i, j, prob);
            }
        }
        return transmissionMatrix;
    }

    /**
     * <p>
     * Calculates the entropy of the model. To make it possible that the model is stationary, a
     * transition from {@link Event#ENDEVENT} to {@link Event#STARTEVENT} is added.
     * </p>
     * 
     * @return entropy of the model or NaN if it could not be calculated
     */
    public double calcEntropy() {
        Matrix transmissionMatrix = getTransmissionMatrix();
        List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
        int numStates = knownSymbols.size();

        List<Integer> startIndexList = new LinkedList<Integer>();
        List<Integer> endIndexList = new LinkedList<Integer>();
        for (int i = 0; i < knownSymbols.size(); i++) {
            String id = knownSymbols.get(i).getId();
            if (id.equals(Event.STARTEVENT.getId()) ||
                id.contains(Event.STARTEVENT.getId() + "-=-"))
            {
                startIndexList.add(i);
            }
            if (id.equals(Event.ENDEVENT.getId()) || id.contains("-=-" + Event.ENDEVENT.getId())) {
                endIndexList.add(i);
            }
        }

        if (startIndexList.isEmpty()) {
            Console
                .printerrln("Error calculating entropy. Initial state of markov chain not found.");
            return Double.NaN;
        }
        if (endIndexList.isEmpty()) {
            Console.printerrln("Error calculating entropy. End state of markov chain not found.");
            return Double.NaN;
        }
        for (Integer i : endIndexList) {
            for (Integer j : startIndexList) {
                transmissionMatrix.set(i, j, 1);
            }
        }

        // Calculate stationary distribution by raising the power of the
        // transmission matrix.
        // The rank of the matrix should fall to 1 and each two should be the
        // vector of the stationory distribution.
        int iter = 0;
        int rank = transmissionMatrix.rank();
        Matrix stationaryMatrix = (Matrix) transmissionMatrix.clone();
        while (iter < MAX_STATDIST_ITERATIONS && rank > 1) {
            stationaryMatrix = stationaryMatrix.times(stationaryMatrix);
            rank = stationaryMatrix.rank();
            iter++;
        }

        if (rank != 1) {
            Console.traceln(Level.FINE, "rank: " + rank);
            Console.printerrln("Unable to calculate stationary distribution.");
            return Double.NaN;
        }

        double entropy = 0.0;
        for (int i = 0; i < numStates; i++) {
            for (int j = 0; j < numStates; j++) {
                if (transmissionMatrix.get(i, j) != 0 && transmissionMatrix.get(i, j) != 1) {
                    double tmp = stationaryMatrix.get(0, i);
                    tmp *= transmissionMatrix.get(i, j);
                    tmp *= Math.log(transmissionMatrix.get(i, j)) / Math.log(2);
                    entropy -= tmp;
                }
            }
        }
        return entropy;
    }

    /**
     * <p>
     * The dot represenation of {@link FirstOrderMarkovModel}s is its graph representation with the
     * states as nodes and directed edges weighted with transition probabilities.
     * </p>
     * 
     * @see de.ugoe.cs.quest.usageprofiles.IDotCompatible#getDotRepresentation()
     */
    @Override
    public String getDotRepresentation() {
        StringBuilder stringBuilder = new StringBuilder();
        stringBuilder.append("digraph model {" + StringTools.ENDLINE);

        List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());
        for (Event symbol : knownSymbols) {
            final String thisSaneId = symbol.getId().replace("\"", "\\\"").replaceAll("[\r\n]", "");
            stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " [label=\"" + thisSaneId +
                "\"];" + StringTools.ENDLINE);
            List<Event> context = new ArrayList<Event>();
            context.add(symbol);
            Collection<Event> followers = trie.getFollowingSymbols(context);
            for (Event follower : followers) {
                stringBuilder.append(" " + knownSymbols.indexOf(symbol) + " -> " +
                    knownSymbols.indexOf(follower) + " ");
                stringBuilder.append("[label=\"" + getProbability(context, follower) + "\"];" +
                    StringTools.ENDLINE);
            }
        }
        stringBuilder.append('}' + StringTools.ENDLINE);
        return stringBuilder.toString();
    }

    /**
     * <p>
     * Returns a {@link Graph} representation of the model with the states as nodes and directed
     * edges weighted with transition probabilities.
     * </p>
     * 
     * @return {@link Graph} of the model
     */
    public Graph<String, MarkovEdge> getGraph() {
        Graph<String, MarkovEdge> graph = new SparseMultigraph<String, MarkovEdge>();

        List<Event> knownSymbols = new ArrayList<Event>(trie.getKnownSymbols());

        for (Event symbol : knownSymbols) {
            String from = symbol.getId();
            List<Event> context = new ArrayList<Event>();
            context.add(symbol);

            Collection<Event> followers = trie.getFollowingSymbols(context);

            for (Event follower : followers) {
                String to = follower.getId();
                MarkovEdge prob = new MarkovEdge(getProbability(context, follower));
                graph.addEdge(prob, from, to, EdgeType.DIRECTED);
            }
        }
        return graph;
    }

    /**
     * Inner class used for the {@link Graph} representation of the model.
     * 
     * @author Steffen Herbold
     * @version 1.0
     */
    static public class MarkovEdge {
        /**
         * <p>
         * Weight of the edge, i.e., its transition probability.
         * </p>
         */
        double weight;

        /**
         * <p>
         * Constructor. Creates a new MarkovEdge.
         * </p>
         * 
         * @param weight
         *            weight of the edge, i.e., its transition probability
         */
        MarkovEdge(double weight) {
            this.weight = weight;
        }

        /**
         * <p>
         * The weight of the edge as {@link String}.
         * </p>
         */
        public String toString() {
            return "" + weight;
        }
    }

}
