package de.ugoe.cs.quest.coverage; import java.security.InvalidParameterException; import java.util.Collection; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import de.ugoe.cs.quest.eventcore.Event; import de.ugoe.cs.quest.usageprofiles.IStochasticProcess; /** *

* This class calculates various types of sequence coverage in relation to a collection of observed * sequences. *

* * @author Steffen Herbold * @version 1.0 */ public class CoverageCalculatorObserved { /** *

* Sequences for which the coverage is calculated. *

*/ private final Collection> sequences; /** *

* Observed sequences that are baseline for the coverage calculation. *

*/ private final Collection> observedSequences; /** *

* Length of the subsequences in relation to which the covarage is calculated. *

*/ private final int length; /** *

* All subsequences of {@link #length} of {@link #sequences}. *

*/ private Collection> subSeqsGenerated = null; /** *

* All subsequences of {@link #length} of {@link #observedSequences}. *

*/ private Collection> subSeqsObserved = null; /** *

* Constructor. Creates a new CoverageCalculatorObserved for given collections of observed * sequences and generated sequences. *

* * @param observedSequences * observed sequences in relation to which the coverage is calculated; must not be * null * @param sequences * sequences for which the coverage is calculated; must not be null * @param length * length of the subsequences for which the coverage is analyzed; must be >0 * @throws InvalidParameterException * thrown if observedSequences or sequences is null or length less than or equal to * 0 */ public CoverageCalculatorObserved(Collection> observedSequences, Collection> sequences, int length) { if (observedSequences == null) { throw new InvalidParameterException("observed sequences must not be null"); } if (sequences == null) { throw new InvalidParameterException("sequences must not be null"); } if (length <= 0) { throw new InvalidParameterException("length must be >0; actual value: " + length); } this.observedSequences = observedSequences; this.sequences = sequences; this.length = length; } /** *

* Calculates the percentage of subsequences of length k that occur, with reference to those * that were observed. *

* * @return coverage percentage */ public double getCoverageObserved() { createSubSeqs(); Collection> subSeqsObservedCopy = new LinkedHashSet>(subSeqsObserved); subSeqsObservedCopy.retainAll(subSeqsGenerated); return ((double) subSeqsObservedCopy.size()) / subSeqsObserved.size(); } /** *

* Calculates the weight of subsequences of length k that occur, with reference to those that * were observed. *

* * @param process * stochastic process in reference to which the weight is calculated * @return coverage percentage */ public double getCoverageObservedWeigth(IStochasticProcess process) { createSubSeqs(); Map, Double> weightMap = SequenceTools.generateWeights(process, subSeqsObserved); Collection> subSeqsObservedCopy = new LinkedHashSet>(subSeqsObserved); subSeqsObservedCopy.retainAll(subSeqsGenerated); double weight = 0.0d; for (List subSeq : subSeqsObservedCopy) { weight += weightMap.get(subSeq); } return weight; } /** *

* Calculates the percentage of generated subsequences of length k that occur and have not been * observed, with reference to all generated subsequences. *

* * @return coverage percentage */ public double getNewPercentage() { createSubSeqs(); Collection> subSeqsGeneratedCopy = new LinkedHashSet>(subSeqsGenerated); subSeqsGeneratedCopy.removeAll(subSeqsObserved); return ((double) subSeqsGeneratedCopy.size()) / subSeqsGenerated.size(); } /** *

* Calculates the percentage of generated subsequences of length k that occur and have not been * observed, with references to all possible new subsequences. *

* * @param process * stochastic process which is used to determine which subsequences are possible * @return coverage percentage * @throws InvalidParameterException * thrown if process is null */ public double getCoveragePossibleNew(IStochasticProcess process) { if (process == null) { throw new InvalidParameterException("process must not be null"); } createSubSeqs(); Collection> subSeqsGeneratedCopy = new LinkedHashSet>(subSeqsGenerated); Collection> subSeqsPossible = process.generateSequences(length); subSeqsGeneratedCopy.removeAll(subSeqsObserved); subSeqsPossible.removeAll(subSeqsObserved); int possibleSize = subSeqsPossible.size(); subSeqsPossible.retainAll(subSeqsGeneratedCopy); return ((double) subSeqsPossible.size()) / possibleSize; } /** *

* Calculates the weight of generated subsequences of length k that occur and have not been * observed, with references to all possible new subsequences. *

* * @param process * stochastic process which is used to determine the weights and which subsequences * are possible * @return coverage percentage * @throws InvalidParameterException * thrown if process is null */ public double getCoveragePossibleNewWeight(IStochasticProcess process) { if (process == null) { throw new InvalidParameterException("process must not be null"); } createSubSeqs(); Collection> subSeqsGeneratedCopy = new LinkedHashSet>(subSeqsGenerated); Collection> subSeqsPossible = process.generateSequences(length); subSeqsGeneratedCopy.removeAll(subSeqsObserved); subSeqsPossible.removeAll(subSeqsObserved); Map, Double> weightMap = SequenceTools.generateWeights(process, subSeqsPossible); double weight = 0.0d; for (List subSeq : subSeqsGeneratedCopy) { Double currentWeight = weightMap.get(subSeq); if (currentWeight != null) { weight += currentWeight; } } return weight; } /** *

* Returns the number of covered subsequences of length k. *

* * @return number of covered subsequences */ public int getNumObserved() { createSubSeqs(); return subSeqsObserved.size(); } /** *

* Returns the number of covered subsequences of length k. *

* * @return number of covered subsequences */ public int getNumCovered() { createSubSeqs(); return subSeqsGenerated.size(); } public int getNumNew() { createSubSeqs(); Collection> subSeqsGeneratedCopy = new LinkedHashSet>(subSeqsGenerated); subSeqsGeneratedCopy.removeAll(subSeqsObserved); return subSeqsGeneratedCopy.size(); } /** *

* Helper function that calcuates the subsequences of length k that have been observed and * generated. *

*/ private void createSubSeqs() { if (subSeqsObserved == null) { subSeqsObserved = SequenceTools.containedSubSequences(observedSequences, length); } if (subSeqsGenerated == null) { subSeqsGenerated = SequenceTools.containedSubSequences(sequences, length); } } }