source: trunk/autoquest-ui-core/src/main/java/de/ugoe/cs/autoquest/commands/sequences/CMDsequencesSubset.java @ 1934

Last change on this file since 1934 was 1934, checked in by pharms, 9 years ago
  • adapted to allow giving percentages, too
File size: 8.6 KB
Line 
1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.commands.sequences;
16
17import java.util.Collection;
18import java.util.LinkedList;
19import java.util.List;
20import java.util.ListIterator;
21
22import de.ugoe.cs.autoquest.CommandHelpers;
23import de.ugoe.cs.autoquest.SequenceInstanceOf;
24import de.ugoe.cs.autoquest.eventcore.Event;
25import de.ugoe.cs.util.console.Command;
26import de.ugoe.cs.util.console.Console;
27import de.ugoe.cs.util.console.GlobalDataContainer;
28
29/**
30 * <p>
31 * Command to select a subset of a set of sequences covering at least a given percentage of all
32 * events covered by the given sequence set. The resulting set covers a little more events than
33 * intended because the input sequences are randomly selected and they are not cut. Hence, the sum
34 * of their covered events may not fully match the intended sum.
35 * </p>
36 *
37 * @author Patrick Harms
38 * @version 1.0
39 */
40public class CMDsequencesSubset implements Command {
41
42    /*
43     * (non-Javadoc)
44     *
45     * @see de.ugoe.cs.util.console.Command#run(java.util.List)
46     */
47    @SuppressWarnings("unchecked")
48    @Override
49    public void run(List<Object> parameters) {
50        int subsetCount;
51        double subsetSize;
52        boolean isPercent = false;
53        String sequencesName;
54        String newSequencesName;
55        try {
56            subsetCount = Integer.parseInt((String) parameters.get(0));
57            isPercent = ((String) parameters.get(1)).endsWith("%");
58           
59            if (!isPercent) {
60                subsetSize = Integer.parseInt((String) parameters.get(1));
61            }
62            else {
63                String percent = (String) parameters.get(1);
64                percent = percent.substring(0, percent.length() - 1);
65                subsetSize = Double.parseDouble(percent);
66            }
67            sequencesName = (String) parameters.get(2);
68            if (parameters.size() > 3) {
69                newSequencesName = (String) parameters.get(3);
70            }
71            else {
72                newSequencesName = sequencesName;
73            }
74        }
75        catch (Exception e) {
76            throw new IllegalArgumentException("must provide two integers, the first defining " +
77                                               "the number of subsets, the second defining the " +
78                                               "number of events in each subset");
79        }
80
81        Collection<List<Event>> sequences = null;
82        Object dataObject = GlobalDataContainer.getInstance().getData(sequencesName);
83        if (dataObject == null) {
84            CommandHelpers.objectNotFoundMessage(sequencesName);
85            return;
86        }
87       
88        if (!SequenceInstanceOf.isCollectionOfSequences(dataObject)) {
89            CommandHelpers.objectNotType(sequencesName, "Collection<List<Event<?>>>");
90            return;
91        }
92       
93        sequences = (Collection<List<Event>>) dataObject;
94       
95        int allEvents = 0;
96       
97        // create a sorted list of sessions
98        List<List<Event>> fixedOrderSequences = new LinkedList<>();
99        for (List<Event> sequence : sequences) {
100            if (sequence.size() > 0) {
101                ListIterator<List<Event>> iterator = fixedOrderSequences.listIterator();
102                boolean added = false;
103                while (iterator.hasNext()) {
104                    if (iterator.next().size() < sequences.size()) {
105                        iterator.previous();
106                        iterator.add(sequence);
107                        added = true;
108                        break;
109                    }
110                }
111               
112                if (!added) {
113                    fixedOrderSequences.add(sequence);
114                }
115               
116                allEvents += sequence.size();
117            }
118        }
119       
120        int effSubsetSize;
121        if (isPercent) {
122            effSubsetSize = (int) Math.floor(subsetSize * allEvents / 100);
123        }
124        else {
125            effSubsetSize = (int) subsetSize;
126        }
127       
128        Console.println("creating " + subsetCount + " sequence sets covering on average " +
129                        effSubsetSize + " events by subdividing " + sequences.size() +
130                        " sequences covering " + allEvents + " events into subsets");
131       
132        boolean mutuallyExclusiveSubsets = (effSubsetSize * subsetCount) <= allEvents;
133       
134        if (!mutuallyExclusiveSubsets) {
135            Console.println("subsets will not be independent, i.e., one and the same session may " +
136                            "be included in several subsets");
137        }
138
139        List<List<List<Event>>> subsets = new LinkedList<>();
140        double randomProbability = ((double) effSubsetSize) / allEvents;
141        ListIterator<List<Event>> sequencesIterator = fixedOrderSequences.listIterator();
142        int minSubsetSize = Integer.MAX_VALUE;
143        int maxSubsetSize = 0;
144        int allCoveredEvents = 0;
145       
146        for (int i = 0; i < subsetCount; i++) {
147            List<List<Event>> subset = new LinkedList<>();
148            int remainingEventsToAdd = effSubsetSize;
149            while (remainingEventsToAdd > 0) {
150                if (!sequencesIterator.hasNext()) {
151                    if (fixedOrderSequences.size() > 0) {
152                        sequencesIterator = fixedOrderSequences.listIterator();
153                    }
154                    else {
155                        break;
156                    }
157                }
158           
159                List<Event> candidate = sequencesIterator.next();
160                if ((candidate.size() > 0) && (randomProbability > Math.random())) {
161                    // with the candidate, we may exceed the intended size too much. Hence we
162                    // continue with a shorter candidate, if any
163                    if (remainingEventsToAdd - candidate.size() < 0) {
164                        List<Event> shorterCandidate = null;
165                       
166                        while (sequencesIterator.hasNext()) {
167                            shorterCandidate = sequencesIterator.next();
168                           
169                            if (remainingEventsToAdd - shorterCandidate.size() >= 0) {
170                                break;
171                            }
172                        }
173                       
174                        // always take the shortest
175                        if (shorterCandidate != null) {
176                            candidate = shorterCandidate;
177                        }
178                    }
179                   
180                    subset.add(candidate);
181                    allCoveredEvents += candidate.size();
182                    remainingEventsToAdd -= candidate.size();
183                   
184                    if (mutuallyExclusiveSubsets) {
185                        sequencesIterator.remove();
186                    }
187                }
188            }
189           
190            minSubsetSize = Math.min(minSubsetSize, subset.size());
191            maxSubsetSize = Math.max(maxSubsetSize, subset.size());
192            subsets.add(subset);
193        }
194       
195        Console.println("created " + subsets.size() + " subsets of " + minSubsetSize + " to " +
196                        maxSubsetSize + " sequences covering " + allCoveredEvents + " events (" +
197                        (allCoveredEvents / subsets.size()) + " on average)");
198       
199        int index = 1;
200        for (List<List<Event>> subset : subsets) {
201            String subsetName = newSequencesName + "_" + index++;
202           
203            if (GlobalDataContainer.getInstance().addData(subsetName, subset)) {
204                CommandHelpers.dataOverwritten(subsetName);
205            }
206        }
207    }
208
209    /*
210     * (non-Javadoc)
211     *
212     * @see de.ugoe.cs.util.console.Command#help()
213     */
214    @Override
215    public String help() {
216        return "sequencesSubset <subsetCount> <subsetSize> <inputSequencesName> {<outputSequencesName>}";
217    }
218
219}
Note: See TracBrowser for help on using the repository browser.