source: trunk/quest-core-coverage/src/main/java/de/ugoe/cs/quest/coverage/CoverageCalculatorObserved.java @ 766

Last change on this file since 766 was 766, checked in by sherbold, 12 years ago
  • Property svn:mime-type set to text/plain
File size: 8.6 KB
Line 
1package de.ugoe.cs.quest.coverage;
2
3import java.util.Collection;
4import java.util.LinkedHashSet;
5import java.util.List;
6import java.util.Map;
7
8import de.ugoe.cs.quest.eventcore.Event;
9import de.ugoe.cs.quest.usageprofiles.IStochasticProcess;
10
11/**
12 * <p>
13 * This class calculates various types of sequence coverage in relation to a collection of observed
14 * sequences.
15 * </p>
16 *
17 * @author Steffen Herbold
18 * @version 1.0
19 */
20public class CoverageCalculatorObserved {
21
22    /**
23     * <p>
24     * Sequences for which the coverage is calculated.
25     * </p>
26     */
27    private final Collection<List<Event>> sequences;
28
29    /**
30     * <p>
31     * Observed sequences that are baseline for the coverage calculation.
32     * </p>
33     */
34    private final Collection<List<Event>> observedSequences;
35
36    /**
37     * <p>
38     * Length of the subsequences in relation to which the covarage is calculated.
39     * </p>
40     */
41    private final int length;
42
43    /**
44     * <p>
45     * All subsequences of {@link #length} of {@link #sequences}.
46     * </p>
47     */
48    private Collection<List<Event>> subSeqsGenerated = null;
49
50    /**
51     * <p>
52     * All subsequences of {@link #length} of {@link #observedSequences}.
53     * </p>
54     */
55    private Collection<List<Event>> subSeqsObserved = null;
56
57    /**
58     * <p>
59     * Constructor. Creates a new CoverageCalculatorObserved for given collections of observed
60     * sequences and generated sequences.
61     * </p>
62     *
63     * @param observedSequences
64     *            observed sequences in relation to which the coverage is calculated; must not be
65     *            null
66     * @param sequences
67     *            sequences for which the coverage is calculated; must not be null
68     * @param length
69     *            length of the subsequences for which the coverage is analyzed; must be >0
70     * @throws IllegalArgumentException
71     *             thrown if observedSequences or sequences is null or length less than or equal to
72     *             0
73     */
74    public CoverageCalculatorObserved(Collection<List<Event>> observedSequences,
75                                      Collection<List<Event>> sequences,
76                                      int length)
77    {
78        if (observedSequences == null) {
79            throw new IllegalArgumentException("observed sequences must not be null");
80        }
81        if (sequences == null) {
82            throw new IllegalArgumentException("sequences must not be null");
83        }
84        if (length <= 0) {
85            throw new IllegalArgumentException("length must be >0; actual value: " + length);
86        }
87        this.observedSequences = observedSequences;
88        this.sequences = sequences;
89        this.length = length;
90    }
91
92    /**
93     * <p>
94     * Calculates the percentage of subsequences of length k that occur, with reference to those
95     * that were observed.
96     * </p>
97     *
98     * @return coverage percentage
99     */
100    public double getCoverageObserved() {
101        createSubSeqs();
102        Collection<List<Event>> subSeqsObservedCopy =
103            new LinkedHashSet<List<Event>>(subSeqsObserved);
104        subSeqsObservedCopy.retainAll(subSeqsGenerated);
105        return ((double) subSeqsObservedCopy.size()) / subSeqsObserved.size();
106    }
107
108    /**
109     * <p>
110     * Calculates the weight of subsequences of length k that occur, with reference to those that
111     * were observed.
112     * </p>
113     *
114     * @param process
115     *            stochastic process in reference to which the weight is calculated
116     * @return coverage percentage
117     */
118
119    public double getCoverageObservedWeigth(IStochasticProcess process) {
120        createSubSeqs();
121        Map<List<Event>, Double> weightMap =
122            SequenceTools.generateWeights(process, subSeqsObserved);
123
124        Collection<List<Event>> subSeqsObservedCopy =
125            new LinkedHashSet<List<Event>>(subSeqsObserved);
126        subSeqsObservedCopy.retainAll(subSeqsGenerated);
127        double weight = 0.0d;
128        for (List<Event> subSeq : subSeqsObservedCopy) {
129            weight += weightMap.get(subSeq);
130        }
131        return weight;
132    }
133
134    /**
135     * <p>
136     * Calculates the percentage of generated subsequences of length k that occur and have not been
137     * observed, with reference to all generated subsequences.
138     * </p>
139     *
140     * @return coverage percentage
141     */
142    public double getNewPercentage() {
143        createSubSeqs();
144        Collection<List<Event>> subSeqsGeneratedCopy =
145            new LinkedHashSet<List<Event>>(subSeqsGenerated);
146        subSeqsGeneratedCopy.removeAll(subSeqsObserved);
147        return ((double) subSeqsGeneratedCopy.size()) / subSeqsGenerated.size();
148    }
149
150    /**
151     * <p>
152     * Calculates the percentage of generated subsequences of length k that occur and have not been
153     * observed, with references to all possible new subsequences.
154     * </p>
155     *
156     * @param process
157     *            stochastic process which is used to determine which subsequences are possible
158     * @return coverage percentage
159     * @throws IllegalArgumentException
160     *             thrown if process is null
161     */
162    public double getCoveragePossibleNew(IStochasticProcess process) {
163        if (process == null) {
164            throw new IllegalArgumentException("process must not be null");
165        }
166        createSubSeqs();
167        Collection<List<Event>> subSeqsGeneratedCopy =
168            new LinkedHashSet<List<Event>>(subSeqsGenerated);
169        Collection<List<Event>> subSeqsPossible = process.generateSequences(length);
170        subSeqsGeneratedCopy.removeAll(subSeqsObserved);
171        subSeqsPossible.removeAll(subSeqsObserved);
172        int possibleSize = subSeqsPossible.size();
173        subSeqsPossible.retainAll(subSeqsGeneratedCopy);
174        return ((double) subSeqsPossible.size()) / possibleSize;
175    }
176
177    /**
178     * <p>
179     * Calculates the weight of generated subsequences of length k that occur and have not been
180     * observed, with references to all possible new subsequences.
181     * </p>
182     *
183     * @param process
184     *            stochastic process which is used to determine the weights and which subsequences
185     *            are possible
186     * @return coverage percentage
187     * @throws IllegalArgumentException
188     *             thrown if process is null
189     */
190    public double getCoveragePossibleNewWeight(IStochasticProcess process) {
191        if (process == null) {
192            throw new IllegalArgumentException("process must not be null");
193        }
194        createSubSeqs();
195        Collection<List<Event>> subSeqsGeneratedCopy =
196            new LinkedHashSet<List<Event>>(subSeqsGenerated);
197        Collection<List<Event>> subSeqsPossible = process.generateSequences(length);
198        subSeqsGeneratedCopy.removeAll(subSeqsObserved);
199        subSeqsPossible.removeAll(subSeqsObserved);
200        Map<List<Event>, Double> weightMap =
201            SequenceTools.generateWeights(process, subSeqsPossible);
202        double weight = 0.0d;
203        for (List<Event> subSeq : subSeqsGeneratedCopy) {
204            Double currentWeight = weightMap.get(subSeq);
205            if (currentWeight != null) {
206                weight += currentWeight;
207            }
208        }
209        return weight;
210    }
211
212    /**
213     * <p>
214     * Returns the number of covered subsequences of length k.
215     * </p>
216     *
217     * @return number of covered subsequences
218     */
219    public int getNumObserved() {
220        createSubSeqs();
221        return subSeqsObserved.size();
222    }
223
224    /**
225     * <p>
226     * Returns the number of covered subsequences of length k.
227     * </p>
228     *
229     * @return number of covered subsequences
230     */
231    public int getNumCovered() {
232        createSubSeqs();
233        return subSeqsGenerated.size();
234    }
235
236    public int getNumNew() {
237        createSubSeqs();
238        Collection<List<Event>> subSeqsGeneratedCopy =
239            new LinkedHashSet<List<Event>>(subSeqsGenerated);
240        subSeqsGeneratedCopy.removeAll(subSeqsObserved);
241        return subSeqsGeneratedCopy.size();
242    }
243
244    /**
245     * <p>
246     * Helper function that calcuates the subsequences of length k that have been observed and
247     * generated.
248     * </p>
249     */
250    private void createSubSeqs() {
251        if (subSeqsObserved == null) {
252            subSeqsObserved = SequenceTools.containedSubSequences(observedSequences, length);
253        }
254        if (subSeqsGenerated == null) {
255            subSeqsGenerated = SequenceTools.containedSubSequences(sequences, length);
256        }
257    }
258}
Note: See TracBrowser for help on using the repository browser.