package de.ugoe.cs.eventbench.coverage; import java.util.Collection; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import de.ugoe.cs.eventbench.data.Event; import de.ugoe.cs.eventbench.models.IStochasticProcess; /** *

* This class calculates various types of sequence coverage. *

* * @author Steffen Herbold * @version 1.0 */ public class CoverageCalculator { /** *

* Stochastic process that is the foundation for probabilistic coverages and * coverages with reference to all possible sequences. *

*/ private final IStochasticProcess process; /** *

* Sequences for which the coverage is calculated. *

*/ private final Collection>> sequences; /** *

* Length of the subsequences in relation to which the covarage is * calculated. *

*/ private final int length; /** *

* All subsequences of {@link #length} of {@link #sequences}. *

*/ private Collection>> containedSubSeqs = null; /** *

* All subsequences of {@link #length} that can be generated by * {@link #process}. *

*/ private Collection>> allPossibleSubSeqs = null; /** *

* The probabilities of al subsequences of {@link #length} according to * {@link #process}. *

*/ private Map>, Double> subSeqWeights = null; /** *

* Constructor. Creates a new CoverageCalculator for a given stochastic * process and generated sequences. *

* * @param process * stochastic process used for coverage calculations; if it is * zero, not all calculations are possible * @param sequences * sequences for which the coverage is calculated; must not be * null. * @param length * length of the subsequences for which the coverage is analyzed; * must be >0 */ public CoverageCalculator(IStochasticProcess process, Collection>> sequences, int length) { // TODO check parameters this.process = process; this.sequences = sequences; this.length = length; } /** *

* Calculates the percentage of subsequences of length k that exist occur, * including those that cannot be generated by {@link #process}. *

* * @return coverage percentage */ public double getCoverageAllNoWeight() { if (containedSubSeqs == null) { containedSubSeqs = containedSubSequences(sequences, length); } return ((double) containedSubSeqs.size()) / numSequences(process, length); } /** *

* Calculates the percentage of subsequences of length k that occur and can * generated by {@link #process}. *

* * @return coverage percentage */ public double getCoveragePossibleNoWeight() { if (containedSubSeqs == null) { containedSubSeqs = containedSubSequences(sequences, length); } if (allPossibleSubSeqs == null) { allPossibleSubSeqs = process.generateSequences(length); } return ((double) containedSubSeqs.size()) / allPossibleSubSeqs.size(); } /** *

* Calculates the weight of the subsequences that occur with relation to * {@link #process}, i.e., the mass of the subsequence probability covered * by the subsequences. *

* * @return coverage weight */ public double getCoveragePossibleWeight() { if (containedSubSeqs == null) { containedSubSeqs = containedSubSequences(sequences, length); } if (allPossibleSubSeqs == null) { allPossibleSubSeqs = process.generateSequences(length); } if (subSeqWeights == null) { subSeqWeights = generateWeights(process, allPossibleSubSeqs); } double weight = 0.0; for (List> subSeq : containedSubSeqs) { weight += subSeqWeights.get(subSeq); } return weight; } /** *

* Calculates the weights for all sequences passed to this function as * defined by the stochastic process and stores them in a {@link Map}. *

* * @param process * process used for weight calculation * @param sequences * sequences for which the weights are calculated * @return {@link Map} of weights */ private static Map>, Double> generateWeights( IStochasticProcess process, Collection>> sequences) { Map>, Double> subSeqWeights = new LinkedHashMap>, Double>(); double sum = 0.0; for (List> sequence : sequences) { double prob = 1.0; List> context = new LinkedList>(); for (Event event : sequence) { prob *= process.getProbability(context, event); context.add(event); } subSeqWeights.put(sequence, prob); sum += prob; } if (sum < 1.0) { for (Map.Entry>, Double> entry : subSeqWeights .entrySet()) { entry.setValue(entry.getValue() / sum); } } return subSeqWeights; } /** *

* Calculates the number of all existing sequences of a given length, * regardless whether they are possible or not. *

* * @param process * stochastic process whose symbols are the basis for this * calculation * @param length * lenght of the sequences * @return numStates^length */ private static long numSequences(IStochasticProcess process, int length) { return (long) Math.pow(process.getNumStates(), length); } /** *

* Creates a {@link Set} of all subsequences of a given length that are * contained in a sequence collection. *

* * @param sequences * sequences from which the subsequences are extracted * @param length * length of the subsequences * @return {@link Set} of all subsequences */ private static Set>> containedSubSequences( Collection>> sequences, int length) { Set>> containedSubSeqs = new LinkedHashSet>>(); List> subSeq = new LinkedList>(); boolean minLengthReached = false; for (List> sequence : sequences) { for (Event event : sequence) { subSeq.add(event); if (!minLengthReached) { if (subSeq.size() == length) { minLengthReached = true; } } else { subSeq.remove(0); } if (minLengthReached) { if (!containedSubSeqs.contains(subSeq)) { containedSubSeqs.add(new LinkedList>(subSeq)); } } } } return containedSubSeqs; } }