source: trunk/autoquest-ui-core/src/main/java/de/ugoe/cs/autoquest/commands/sequences/CMDsequencesSubset.java @ 1885

Last change on this file since 1885 was 1885, checked in by pharms, 9 years ago
  • added command for creates subsets of sequences with an intended size
File size: 8.0 KB
Line 
1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.commands.sequences;
16
17import java.util.Collection;
18import java.util.LinkedList;
19import java.util.List;
20import java.util.ListIterator;
21
22import de.ugoe.cs.autoquest.CommandHelpers;
23import de.ugoe.cs.autoquest.SequenceInstanceOf;
24import de.ugoe.cs.autoquest.eventcore.Event;
25import de.ugoe.cs.util.console.Command;
26import de.ugoe.cs.util.console.Console;
27import de.ugoe.cs.util.console.GlobalDataContainer;
28
29/**
30 * <p>
31 * Command to select a subset of a set of sequences covering at least a given percentage of all
32 * events covered by the given sequence set. The resulting set covers a little more events than
33 * intended because the input sequences are randomly selected and they are not cut. Hence, the sum
34 * of their covered events may not fully match the intended sum.
35 * </p>
36 *
37 * @author Patrick Harms
38 * @version 1.0
39 */
40public class CMDsequencesSubset implements Command {
41
42    /*
43     * (non-Javadoc)
44     *
45     * @see de.ugoe.cs.util.console.Command#run(java.util.List)
46     */
47    @SuppressWarnings("unchecked")
48    @Override
49    public void run(List<Object> parameters) {
50        int subsetCount;
51        int subsetSize;
52        String sequencesName;
53        String newSequencesName;
54        try {
55            subsetCount = Integer.parseInt((String) parameters.get(0));
56            subsetSize = Integer.parseInt((String) parameters.get(1));
57            sequencesName = (String) parameters.get(2);
58            if (parameters.size() > 3) {
59                newSequencesName = (String) parameters.get(3);
60            }
61            else {
62                newSequencesName = sequencesName;
63            }
64        }
65        catch (Exception e) {
66            throw new IllegalArgumentException("must provide two integers, the first defining " +
67                                               "the number of subsets, the second defining the " +
68                                               "number of events in each subset");
69        }
70
71        Collection<List<Event>> sequences = null;
72        Object dataObject = GlobalDataContainer.getInstance().getData(sequencesName);
73        if (dataObject == null) {
74            CommandHelpers.objectNotFoundMessage(sequencesName);
75            return;
76        }
77       
78        if (!SequenceInstanceOf.isCollectionOfSequences(dataObject)) {
79            CommandHelpers.objectNotType(sequencesName, "Collection<List<Event<?>>>");
80            return;
81        }
82       
83        sequences = (Collection<List<Event>>) dataObject;
84       
85        int allEvents = 0;
86       
87        // create a sorted list of sessions
88        List<List<Event>> fixedOrderSequences = new LinkedList<>();
89        for (List<Event> sequence : sequences) {
90            if (sequence.size() > 0) {
91                ListIterator<List<Event>> iterator = fixedOrderSequences.listIterator();
92                boolean added = false;
93                while (iterator.hasNext()) {
94                    if (iterator.next().size() < sequences.size()) {
95                        iterator.previous();
96                        iterator.add(sequence);
97                        added = true;
98                        break;
99                    }
100                }
101               
102                if (!added) {
103                    fixedOrderSequences.add(sequence);
104                }
105               
106                allEvents += sequence.size();
107            }
108        }
109       
110        Console.println("creating " + subsetCount + " sequence sets covering on average " +
111                        subsetSize + " events by subdividing " + sequences.size() +
112                        " sequences covering " + allEvents + " events into subsets");
113       
114        boolean mutuallyExclusiveSubsets = (subsetSize * subsetCount) < allEvents;
115       
116        if (!mutuallyExclusiveSubsets) {
117            Console.println("subsets will not be independent, i.e., one and the same session may " +
118                            "be included in several subsets");
119        }
120
121        List<List<List<Event>>> subsets = new LinkedList<>();
122        double randomProbability = ((double) subsetSize) / allEvents;
123        ListIterator<List<Event>> sequencesIterator = fixedOrderSequences.listIterator();
124        int minSubsetSize = Integer.MAX_VALUE;
125        int maxSubsetSize = 0;
126        int allCoveredEvents = 0;
127       
128        for (int i = 0; i < subsetCount; i++) {
129            List<List<Event>> subset = new LinkedList<>();
130            int remainingEventsToAdd = subsetSize;
131            while (remainingEventsToAdd > 0) {
132                if (!sequencesIterator.hasNext()) {
133                    if (fixedOrderSequences.size() > 0) {
134                        sequencesIterator = fixedOrderSequences.listIterator();
135                    }
136                    else {
137                        break;
138                    }
139                }
140           
141                List<Event> candidate = sequencesIterator.next();
142                if ((candidate.size() > 0) && (randomProbability > Math.random())) {
143                    // with the candidate, we may exceed the intended size too much. Hence we
144                    // continue with a shorter candidate, if any
145                    if (remainingEventsToAdd - candidate.size() < 0) {
146                        List<Event> shorterCandidate = null;
147                       
148                        while (sequencesIterator.hasNext()) {
149                            shorterCandidate = sequencesIterator.next();
150                           
151                            if (remainingEventsToAdd - shorterCandidate.size() >= 0) {
152                                break;
153                            }
154                        }
155                       
156                        // always take the shortest
157                        if (shorterCandidate != null) {
158                            candidate = shorterCandidate;
159                        }
160                    }
161                   
162                    subset.add(candidate);
163                    allCoveredEvents += candidate.size();
164                    remainingEventsToAdd -= candidate.size();
165                   
166                    if (mutuallyExclusiveSubsets) {
167                        sequencesIterator.remove();
168                    }
169                }
170            }
171           
172            minSubsetSize = Math.min(minSubsetSize, subset.size());
173            maxSubsetSize = Math.max(maxSubsetSize, subset.size());
174            subsets.add(subset);
175        }
176       
177        Console.println("created " + subsets.size() + " subsets of " + minSubsetSize + " to " +
178                        maxSubsetSize + " sequences covering " + allCoveredEvents + " events (" +
179                        (allCoveredEvents / subsets.size()) + " on average)");
180       
181        int index = 1;
182        for (List<List<Event>> subset : subsets) {
183            String subsetName = newSequencesName + "_" + index++;
184           
185            if (GlobalDataContainer.getInstance().addData(subsetName, subset)) {
186                CommandHelpers.dataOverwritten(subsetName);
187            }
188        }
189    }
190
191    /*
192     * (non-Javadoc)
193     *
194     * @see de.ugoe.cs.util.console.Command#help()
195     */
196    @Override
197    public String help() {
198        return "sequencesSubset <subsetCount> <subsetSize> <inputSequencesName> {<outputSequencesName>}";
199    }
200
201}
Note: See TracBrowser for help on using the repository browser.