[13] | 1 | package de.ugoe.cs.eventbench.models;
|
---|
[1] | 2 |
|
---|
[102] | 3 | import java.util.Collection;
|
---|
[5] | 4 | import java.util.LinkedList;
|
---|
[1] | 5 | import java.util.List;
|
---|
| 6 | import java.util.Random;
|
---|
| 7 |
|
---|
| 8 | import de.ugoe.cs.eventbench.data.Event;
|
---|
| 9 |
|
---|
[101] | 10 | /**
|
---|
| 11 | * <p>
|
---|
| 12 | * Implements Prediction by Partial Match (PPM) based on the following formula
|
---|
| 13 | * (LaTeX-style notation):<br>
|
---|
[115] | 14 | * P_{PPM}(X_n|X_{n-1},...,X_{n-k}) = \sum_{i=k}^min escape^{k-i} P_{MM^i}(X_n
|
---|
| 15 | * |X_{n-1},...,X_{n-i})(1-escape)+escape^(k-min)P(X_n|X_{n-i},... ,X_{n-min})<br>
|
---|
[101] | 16 | * P_{MM^i} denotes the probability in an i-th order Markov model.
|
---|
| 17 | * </p>
|
---|
| 18 | *
|
---|
| 19 | * @author Steffen Herbold
|
---|
| 20 | *
|
---|
| 21 | */
|
---|
[12] | 22 | public class PredictionByPartialMatch extends TrieBasedModel {
|
---|
[101] | 23 |
|
---|
[86] | 24 | /**
|
---|
[101] | 25 | * <p>
|
---|
[86] | 26 | * Id for object serialization.
|
---|
[101] | 27 | * </p>
|
---|
[86] | 28 | */
|
---|
| 29 | private static final long serialVersionUID = 1L;
|
---|
[101] | 30 |
|
---|
| 31 | /**
|
---|
| 32 | * <p>
|
---|
[109] | 33 | * Minimum order of the Markov model.
|
---|
| 34 | * </p>
|
---|
| 35 | */
|
---|
| 36 | private int minOrder;
|
---|
| 37 |
|
---|
| 38 | /**
|
---|
| 39 | * <p>
|
---|
[101] | 40 | * Probability to use a lower-order Markov model
|
---|
| 41 | * </p>
|
---|
| 42 | */
|
---|
[12] | 43 | double probEscape;
|
---|
[101] | 44 |
|
---|
| 45 | /**
|
---|
| 46 | * <p>
|
---|
| 47 | * Constructor. Creates a new PredictionByPartialMatch model with a given
|
---|
| 48 | * Markov order and a default escape probability of 0.1.
|
---|
| 49 | * </p>
|
---|
| 50 | *
|
---|
| 51 | * @param markovOrder
|
---|
| 52 | * Markov order of the model
|
---|
| 53 | * @param r
|
---|
| 54 | * random number generator used by probabilistic methods of the
|
---|
| 55 | * class
|
---|
| 56 | */
|
---|
[16] | 57 | public PredictionByPartialMatch(int markovOrder, Random r) {
|
---|
| 58 | this(markovOrder, r, 0.1);
|
---|
[7] | 59 | }
|
---|
[101] | 60 |
|
---|
| 61 | /**
|
---|
| 62 | * <p>
|
---|
| 63 | * Creates a new PredictionByPartialMatch model with a given Markov order
|
---|
| 64 | * and escape probability.
|
---|
| 65 | * </p>
|
---|
| 66 | *
|
---|
| 67 | * @param markovOrder
|
---|
| 68 | * Markov order of the model
|
---|
| 69 | * @param r
|
---|
| 70 | * random number generator used by probabilistic methods of the
|
---|
| 71 | * class
|
---|
| 72 | * @param probEscape
|
---|
| 73 | * escape probability used by the model
|
---|
| 74 | */
|
---|
[16] | 75 | public PredictionByPartialMatch(int markovOrder, Random r, double probEscape) {
|
---|
[109] | 76 | this(markovOrder, 0, r, probEscape);
|
---|
| 77 | }
|
---|
| 78 |
|
---|
| 79 | /**
|
---|
| 80 | * <p>
|
---|
| 81 | * Creates a new PredictionByPartialMatch model with a given Markov order
|
---|
| 82 | * and escape probability.
|
---|
| 83 | * </p>
|
---|
| 84 | *
|
---|
| 85 | * @param markovOrder
|
---|
| 86 | * Markov order of the model
|
---|
| 87 | * @param minOrder
|
---|
| 88 | * minimum order of the model; if this order is reached, there is
|
---|
| 89 | * no escape
|
---|
| 90 | * @param r
|
---|
| 91 | * random number generator used by probabilistic methods of the
|
---|
| 92 | * class
|
---|
| 93 | * @param probEscape
|
---|
| 94 | * escape probability used by the model
|
---|
| 95 | */
|
---|
| 96 | public PredictionByPartialMatch(int markovOrder, int minOrder, Random r,
|
---|
| 97 | double probEscape) {
|
---|
[16] | 98 | super(markovOrder, r);
|
---|
[7] | 99 | this.probEscape = probEscape;
|
---|
[109] | 100 | this.minOrder = minOrder;
|
---|
[7] | 101 | }
|
---|
[101] | 102 |
|
---|
| 103 | /**
|
---|
| 104 | * <p>
|
---|
| 105 | * Sets the escape probability of the model.
|
---|
| 106 | * </p>
|
---|
| 107 | *
|
---|
| 108 | * @param probEscape
|
---|
| 109 | * new escape probability
|
---|
| 110 | */
|
---|
[7] | 111 | public void setProbEscape(double probEscape) {
|
---|
| 112 | this.probEscape = probEscape;
|
---|
| 113 | }
|
---|
[101] | 114 |
|
---|
| 115 | /**
|
---|
| 116 | * <p>
|
---|
| 117 | * Returns the escape probability of the model.
|
---|
| 118 | * </p>
|
---|
| 119 | *
|
---|
| 120 | * @return escape probability of the model
|
---|
| 121 | */
|
---|
[7] | 122 | public double getProbEscape() {
|
---|
| 123 | return probEscape;
|
---|
| 124 | }
|
---|
[101] | 125 |
|
---|
| 126 | /**
|
---|
| 127 | * <p>
|
---|
| 128 | * Calculates the probability of the next event based on the formula:<br>
|
---|
[109] | 129 | * P_{PPM}(X_n|X_{n-1},...,X_{n-k}) = \sum_{i=k}^min escape^{k-i}
|
---|
| 130 | * P_{MM^i}(X_n
|
---|
| 131 | * |X_{n-1},...,X_{n-i})(1-escape)+escape^(k-min)P(X_n|X_{n-i},...
|
---|
| 132 | * ,X_{n-min})<br>
|
---|
[101] | 133 | * P_{MM^i} denotes the probability in an i-th order Markov model.
|
---|
| 134 | * </p>
|
---|
| 135 | */
|
---|
[12] | 136 | @Override
|
---|
[101] | 137 | public double getProbability(List<? extends Event<?>> context,
|
---|
| 138 | Event<?> symbol) {
|
---|
[6] | 139 | double result = 0.0d;
|
---|
| 140 | double resultCurrentContex = 0.0d;
|
---|
| 141 | double resultShorterContex = 0.0d;
|
---|
[101] | 142 |
|
---|
[80] | 143 | List<Event<?>> contextCopy;
|
---|
[101] | 144 | if (context.size() >= trieOrder) {
|
---|
| 145 | contextCopy = new LinkedList<Event<?>>(context.subList(
|
---|
| 146 | context.size() - trieOrder + 1, context.size()));
|
---|
[80] | 147 | } else {
|
---|
| 148 | contextCopy = new LinkedList<Event<?>>(context);
|
---|
| 149 | }
|
---|
[6] | 150 |
|
---|
[102] | 151 | Collection<Event<?>> followers = trie.getFollowingSymbols(contextCopy); // \Sigma'
|
---|
[6] | 152 | int sumCountFollowers = 0; // N(s\sigma')
|
---|
[101] | 153 | for (Event<?> follower : followers) {
|
---|
[6] | 154 | sumCountFollowers += trie.getCount(contextCopy, follower);
|
---|
| 155 | }
|
---|
[101] | 156 |
|
---|
[6] | 157 | int countSymbol = trie.getCount(contextCopy, symbol); // N(s\sigma)
|
---|
[109] | 158 | if (sumCountFollowers == 0) {
|
---|
| 159 | resultCurrentContex = 0.0;
|
---|
[6] | 160 | } else {
|
---|
[109] | 161 | resultCurrentContex = (double) countSymbol / sumCountFollowers;
|
---|
| 162 | }
|
---|
[115] | 163 | if (contextCopy.size() >= minOrder) {
|
---|
[109] | 164 | resultCurrentContex *= (1 - probEscape);
|
---|
[101] | 165 | contextCopy.remove(0);
|
---|
[109] | 166 | if (contextCopy.size() >= minOrder) {
|
---|
| 167 | double probSuffix = getProbability(contextCopy, symbol);
|
---|
| 168 |
|
---|
| 169 | if (followers.size() == 0) {
|
---|
| 170 | resultShorterContex = probSuffix;
|
---|
| 171 | } else {
|
---|
| 172 | resultShorterContex = probEscape * probSuffix;
|
---|
| 173 | }
|
---|
[6] | 174 | }
|
---|
| 175 | }
|
---|
[101] | 176 | result = resultCurrentContex + resultShorterContex;
|
---|
| 177 |
|
---|
[6] | 178 | return result;
|
---|
| 179 | }
|
---|
[1] | 180 | }
|
---|