[81] | 1 | package de.ugoe.cs.eventbench.coverage;
|
---|
| 2 |
|
---|
[102] | 3 | import java.util.Collection;
|
---|
[81] | 4 | import java.util.LinkedHashMap;
|
---|
| 5 | import java.util.LinkedHashSet;
|
---|
| 6 | import java.util.LinkedList;
|
---|
| 7 | import java.util.List;
|
---|
| 8 | import java.util.Map;
|
---|
| 9 | import java.util.Set;
|
---|
| 10 |
|
---|
| 11 | import de.ugoe.cs.eventbench.data.Event;
|
---|
| 12 | import de.ugoe.cs.eventbench.models.IStochasticProcess;
|
---|
| 13 |
|
---|
[106] | 14 | /**
|
---|
| 15 | * <p>
|
---|
| 16 | * This class calculates various types of sequence coverage.
|
---|
| 17 | * </p>
|
---|
| 18 | *
|
---|
| 19 | * @author Steffen Herbold
|
---|
| 20 | * @version 1.0
|
---|
| 21 | */
|
---|
[81] | 22 | public class CoverageCalculator {
|
---|
[106] | 23 |
|
---|
| 24 | /**
|
---|
| 25 | * <p>
|
---|
| 26 | * Stochastic process that is the foundation for probabilistic coverages and
|
---|
| 27 | * coverages with reference to all possible sequences.
|
---|
| 28 | * </p>
|
---|
| 29 | */
|
---|
[81] | 30 | private final IStochasticProcess process;
|
---|
[106] | 31 |
|
---|
| 32 | /**
|
---|
| 33 | * <p>
|
---|
| 34 | * Sequences for which the coverage is calculated.
|
---|
| 35 | * </p>
|
---|
| 36 | */
|
---|
[102] | 37 | private final Collection<List<? extends Event<?>>> sequences;
|
---|
[106] | 38 |
|
---|
| 39 | /**
|
---|
| 40 | * <p>
|
---|
| 41 | * Length of the subsequences in relation to which the covarage is
|
---|
| 42 | * calculated.
|
---|
| 43 | * </p>
|
---|
| 44 | */
|
---|
[81] | 45 | private final int length;
|
---|
[106] | 46 |
|
---|
| 47 | /**
|
---|
| 48 | * <p>
|
---|
| 49 | * All subsequences of {@link #length} of {@link #sequences}.
|
---|
| 50 | * </p>
|
---|
| 51 | */
|
---|
[102] | 52 | private Collection<List<? extends Event<?>>> containedSubSeqs = null;
|
---|
[106] | 53 |
|
---|
| 54 | /**
|
---|
| 55 | * <p>
|
---|
| 56 | * All subsequences of {@link #length} that can be generated by
|
---|
| 57 | * {@link #process}.
|
---|
| 58 | * </p>
|
---|
| 59 | */
|
---|
[102] | 60 | private Collection<List<? extends Event<?>>> allPossibleSubSeqs = null;
|
---|
[106] | 61 |
|
---|
| 62 | /**
|
---|
| 63 | * <p>
|
---|
| 64 | * The probabilities of al subsequences of {@link #length} according to
|
---|
| 65 | * {@link #process}.
|
---|
| 66 | * </p>
|
---|
| 67 | */
|
---|
[93] | 68 | private Map<List<? extends Event<?>>, Double> subSeqWeights = null;
|
---|
[106] | 69 |
|
---|
| 70 | /**
|
---|
| 71 | * <p>
|
---|
| 72 | * Constructor. Creates a new CoverageCalculator for a given stochastic
|
---|
| 73 | * process and generated sequences.
|
---|
| 74 | * </p>
|
---|
| 75 | *
|
---|
| 76 | * @param process
|
---|
| 77 | * stochastic process used for coverage calculations; if it is
|
---|
| 78 | * zero, not all calculations are possible
|
---|
| 79 | * @param sequences
|
---|
| 80 | * sequences for which the coverage is calculated; must not be
|
---|
| 81 | * null.
|
---|
| 82 | * @param length
|
---|
| 83 | * length of the subsequences for which the coverage is analyzed;
|
---|
| 84 | * must be >0
|
---|
| 85 | */
|
---|
| 86 | public CoverageCalculator(IStochasticProcess process,
|
---|
| 87 | Collection<List<? extends Event<?>>> sequences, int length) {
|
---|
| 88 | // TODO check parameters
|
---|
[81] | 89 | this.process = process;
|
---|
| 90 | this.sequences = sequences;
|
---|
| 91 | this.length = length;
|
---|
| 92 | }
|
---|
| 93 |
|
---|
[106] | 94 | /**
|
---|
| 95 | * <p>
|
---|
| 96 | * Calculates the percentage of subsequences of length k that exist occur,
|
---|
| 97 | * including those that cannot be generated by {@link #process}.
|
---|
| 98 | * </p>
|
---|
| 99 | *
|
---|
| 100 | * @return coverage percentage
|
---|
| 101 | */
|
---|
[81] | 102 | public double getCoverageAllNoWeight() {
|
---|
[106] | 103 | if (containedSubSeqs == null) {
|
---|
[81] | 104 | containedSubSeqs = containedSubSequences(sequences, length);
|
---|
| 105 | }
|
---|
[106] | 106 | return ((double) containedSubSeqs.size())
|
---|
| 107 | / numSequences(process, length);
|
---|
[81] | 108 | }
|
---|
[106] | 109 |
|
---|
| 110 | /**
|
---|
| 111 | * <p>
|
---|
| 112 | * Calculates the percentage of subsequences of length k that occur and can
|
---|
| 113 | * generated by {@link #process}.
|
---|
| 114 | * </p>
|
---|
| 115 | *
|
---|
| 116 | * @return coverage percentage
|
---|
| 117 | */
|
---|
[81] | 118 | public double getCoveragePossibleNoWeight() {
|
---|
[106] | 119 | if (containedSubSeqs == null) {
|
---|
[81] | 120 | containedSubSeqs = containedSubSequences(sequences, length);
|
---|
| 121 | }
|
---|
[106] | 122 | if (allPossibleSubSeqs == null) {
|
---|
[93] | 123 | allPossibleSubSeqs = process.generateSequences(length);
|
---|
[81] | 124 | }
|
---|
[106] | 125 | return ((double) containedSubSeqs.size()) / allPossibleSubSeqs.size();
|
---|
[81] | 126 | }
|
---|
[106] | 127 |
|
---|
| 128 | /**
|
---|
| 129 | * <p>
|
---|
| 130 | * Calculates the weight of the subsequences that occur with relation to
|
---|
| 131 | * {@link #process}, i.e., the mass of the subsequence probability covered
|
---|
| 132 | * by the subsequences.
|
---|
| 133 | * </p>
|
---|
| 134 | *
|
---|
| 135 | * @return coverage weight
|
---|
| 136 | */
|
---|
[81] | 137 | public double getCoveragePossibleWeight() {
|
---|
[106] | 138 | if (containedSubSeqs == null) {
|
---|
[81] | 139 | containedSubSeqs = containedSubSequences(sequences, length);
|
---|
| 140 | }
|
---|
[106] | 141 | if (allPossibleSubSeqs == null) {
|
---|
[93] | 142 | allPossibleSubSeqs = process.generateSequences(length);
|
---|
[81] | 143 | }
|
---|
[106] | 144 | if (subSeqWeights == null) {
|
---|
[81] | 145 | subSeqWeights = generateWeights(process, allPossibleSubSeqs);
|
---|
| 146 | }
|
---|
| 147 | double weight = 0.0;
|
---|
[106] | 148 | for (List<? extends Event<?>> subSeq : containedSubSeqs) {
|
---|
[81] | 149 | weight += subSeqWeights.get(subSeq);
|
---|
| 150 | }
|
---|
| 151 | return weight;
|
---|
| 152 | }
|
---|
[106] | 153 |
|
---|
| 154 | /**
|
---|
| 155 | * <p>
|
---|
| 156 | * Calculates the weights for all sequences passed to this function as
|
---|
| 157 | * defined by the stochastic process and stores them in a {@link Map}.
|
---|
| 158 | * </p>
|
---|
| 159 | *
|
---|
| 160 | * @param process
|
---|
| 161 | * process used for weight calculation
|
---|
| 162 | * @param sequences
|
---|
| 163 | * sequences for which the weights are calculated
|
---|
| 164 | * @return {@link Map} of weights
|
---|
| 165 | */
|
---|
| 166 | private static Map<List<? extends Event<?>>, Double> generateWeights(
|
---|
| 167 | IStochasticProcess process,
|
---|
| 168 | Collection<List<? extends Event<?>>> sequences) {
|
---|
[93] | 169 | Map<List<? extends Event<?>>, Double> subSeqWeights = new LinkedHashMap<List<? extends Event<?>>, Double>();
|
---|
[81] | 170 | double sum = 0.0;
|
---|
[106] | 171 | for (List<? extends Event<?>> sequence : sequences) {
|
---|
[81] | 172 | double prob = 1.0;
|
---|
| 173 | List<Event<?>> context = new LinkedList<Event<?>>();
|
---|
[106] | 174 | for (Event<?> event : sequence) {
|
---|
[81] | 175 | prob *= process.getProbability(context, event);
|
---|
| 176 | context.add(event);
|
---|
| 177 | }
|
---|
| 178 | subSeqWeights.put(sequence, prob);
|
---|
| 179 | sum += prob;
|
---|
| 180 | }
|
---|
[106] | 181 | if (sum < 1.0) {
|
---|
| 182 | for (Map.Entry<List<? extends Event<?>>, Double> entry : subSeqWeights
|
---|
| 183 | .entrySet()) {
|
---|
| 184 | entry.setValue(entry.getValue() / sum);
|
---|
[81] | 185 | }
|
---|
| 186 | }
|
---|
| 187 | return subSeqWeights;
|
---|
| 188 | }
|
---|
[106] | 189 |
|
---|
| 190 | /**
|
---|
| 191 | * <p>
|
---|
| 192 | * Calculates the number of all existing sequences of a given length,
|
---|
| 193 | * regardless whether they are possible or not.
|
---|
| 194 | * </p>
|
---|
| 195 | *
|
---|
| 196 | * @param process
|
---|
| 197 | * stochastic process whose symbols are the basis for this
|
---|
| 198 | * calculation
|
---|
| 199 | * @param length
|
---|
| 200 | * lenght of the sequences
|
---|
| 201 | * @return numStates^length
|
---|
| 202 | */
|
---|
| 203 | private static long numSequences(IStochasticProcess process, int length) {
|
---|
[81] | 204 | return (long) Math.pow(process.getNumStates(), length);
|
---|
| 205 | }
|
---|
[106] | 206 |
|
---|
| 207 | /**
|
---|
| 208 | * <p>
|
---|
| 209 | * Creates a {@link Set} of all subsequences of a given length that are
|
---|
| 210 | * contained in a sequence collection.
|
---|
| 211 | * </p>
|
---|
| 212 | *
|
---|
| 213 | * @param sequences
|
---|
| 214 | * sequences from which the subsequences are extracted
|
---|
| 215 | * @param length
|
---|
| 216 | * length of the subsequences
|
---|
| 217 | * @return {@link Set} of all subsequences
|
---|
| 218 | */
|
---|
| 219 | private static Set<List<? extends Event<?>>> containedSubSequences(
|
---|
| 220 | Collection<List<? extends Event<?>>> sequences, int length) {
|
---|
[93] | 221 | Set<List<? extends Event<?>>> containedSubSeqs = new LinkedHashSet<List<? extends Event<?>>>();
|
---|
[81] | 222 | List<Event<?>> subSeq = new LinkedList<Event<?>>();
|
---|
| 223 | boolean minLengthReached = false;
|
---|
[106] | 224 | for (List<? extends Event<?>> sequence : sequences) {
|
---|
| 225 | for (Event<?> event : sequence) {
|
---|
[81] | 226 | subSeq.add(event);
|
---|
[106] | 227 | if (!minLengthReached) {
|
---|
| 228 | if (subSeq.size() == length) {
|
---|
| 229 | minLengthReached = true;
|
---|
[81] | 230 | }
|
---|
| 231 | } else {
|
---|
| 232 | subSeq.remove(0);
|
---|
| 233 | }
|
---|
[106] | 234 | if (minLengthReached) {
|
---|
| 235 | if (!containedSubSeqs.contains(subSeq)) {
|
---|
[81] | 236 | containedSubSeqs.add(new LinkedList<Event<?>>(subSeq));
|
---|
| 237 | }
|
---|
| 238 | }
|
---|
| 239 | }
|
---|
| 240 | }
|
---|
| 241 | return containedSubSeqs;
|
---|
| 242 | }
|
---|
[106] | 243 |
|
---|
[81] | 244 | }
|
---|