Index: trunk/autoquest-ui-core/src/main/java/de/ugoe/cs/autoquest/commands/sequences/CMDsequencesSubset.java
===================================================================
--- trunk/autoquest-ui-core/src/main/java/de/ugoe/cs/autoquest/commands/sequences/CMDsequencesSubset.java	(revision 1885)
+++ trunk/autoquest-ui-core/src/main/java/de/ugoe/cs/autoquest/commands/sequences/CMDsequencesSubset.java	(revision 1885)
@@ -0,0 +1,201 @@
+//   Copyright 2012 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
+
+package de.ugoe.cs.autoquest.commands.sequences;
+
+import java.util.Collection;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.ListIterator;
+
+import de.ugoe.cs.autoquest.CommandHelpers;
+import de.ugoe.cs.autoquest.SequenceInstanceOf;
+import de.ugoe.cs.autoquest.eventcore.Event;
+import de.ugoe.cs.util.console.Command;
+import de.ugoe.cs.util.console.Console;
+import de.ugoe.cs.util.console.GlobalDataContainer;
+
+/**
+ * <p>
+ * Command to select a subset of a set of sequences covering at least a given percentage of all
+ * events covered by the given sequence set. The resulting set covers a little more events than
+ * intended because the input sequences are randomly selected and they are not cut. Hence, the sum
+ * of their covered events may not fully match the intended sum.
+ * </p>
+ * 
+ * @author Patrick Harms
+ * @version 1.0
+ */
+public class CMDsequencesSubset implements Command {
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see de.ugoe.cs.util.console.Command#run(java.util.List)
+     */
+    @SuppressWarnings("unchecked")
+    @Override
+    public void run(List<Object> parameters) {
+        int subsetCount;
+        int subsetSize;
+        String sequencesName;
+        String newSequencesName;
+        try {
+            subsetCount = Integer.parseInt((String) parameters.get(0));
+            subsetSize = Integer.parseInt((String) parameters.get(1));
+            sequencesName = (String) parameters.get(2);
+            if (parameters.size() > 3) {
+                newSequencesName = (String) parameters.get(3);
+            }
+            else {
+                newSequencesName = sequencesName;
+            }
+        }
+        catch (Exception e) {
+            throw new IllegalArgumentException("must provide two integers, the first defining " +
+                                               "the number of subsets, the second defining the " +
+                                               "number of events in each subset");
+        }
+
+        Collection<List<Event>> sequences = null;
+        Object dataObject = GlobalDataContainer.getInstance().getData(sequencesName);
+        if (dataObject == null) {
+            CommandHelpers.objectNotFoundMessage(sequencesName);
+            return;
+        }
+        
+        if (!SequenceInstanceOf.isCollectionOfSequences(dataObject)) {
+            CommandHelpers.objectNotType(sequencesName, "Collection<List<Event<?>>>");
+            return;
+        }
+        
+        sequences = (Collection<List<Event>>) dataObject;
+        
+        int allEvents = 0;
+        
+        // create a sorted list of sessions
+        List<List<Event>> fixedOrderSequences = new LinkedList<>();
+        for (List<Event> sequence : sequences) {
+            if (sequence.size() > 0) {
+                ListIterator<List<Event>> iterator = fixedOrderSequences.listIterator();
+                boolean added = false;
+                while (iterator.hasNext()) {
+                    if (iterator.next().size() < sequences.size()) {
+                        iterator.previous();
+                        iterator.add(sequence);
+                        added = true;
+                        break;
+                    }
+                }
+                
+                if (!added) {
+                    fixedOrderSequences.add(sequence);
+                }
+                
+                allEvents += sequence.size();
+            }
+        }
+        
+        Console.println("creating " + subsetCount + " sequence sets covering on average " +
+                        subsetSize + " events by subdividing " + sequences.size() +
+                        " sequences covering " + allEvents + " events into subsets");
+        
+        boolean mutuallyExclusiveSubsets = (subsetSize * subsetCount) < allEvents;
+        
+        if (!mutuallyExclusiveSubsets) {
+            Console.println("subsets will not be independent, i.e., one and the same session may " +
+                            "be included in several subsets");
+        }
+
+        List<List<List<Event>>> subsets = new LinkedList<>();
+        double randomProbability = ((double) subsetSize) / allEvents;
+        ListIterator<List<Event>> sequencesIterator = fixedOrderSequences.listIterator();
+        int minSubsetSize = Integer.MAX_VALUE;
+        int maxSubsetSize = 0;
+        int allCoveredEvents = 0;
+        
+        for (int i = 0; i < subsetCount; i++) {
+            List<List<Event>> subset = new LinkedList<>();
+            int remainingEventsToAdd = subsetSize;
+            while (remainingEventsToAdd > 0) {
+                if (!sequencesIterator.hasNext()) {
+                    if (fixedOrderSequences.size() > 0) { 
+                        sequencesIterator = fixedOrderSequences.listIterator();
+                    }
+                    else {
+                        break;
+                    }
+                }
+            
+                List<Event> candidate = sequencesIterator.next();
+                if ((candidate.size() > 0) && (randomProbability > Math.random())) {
+                    // with the candidate, we may exceed the intended size too much. Hence we
+                    // continue with a shorter candidate, if any
+                    if (remainingEventsToAdd - candidate.size() < 0) {
+                        List<Event> shorterCandidate = null;
+                        
+                        while (sequencesIterator.hasNext()) {
+                            shorterCandidate = sequencesIterator.next();
+                            
+                            if (remainingEventsToAdd - shorterCandidate.size() >= 0) {
+                                break;
+                            }
+                        }
+                        
+                        // always take the shortest
+                        if (shorterCandidate != null) {
+                            candidate = shorterCandidate;
+                        }
+                    }
+                    
+                    subset.add(candidate);
+                    allCoveredEvents += candidate.size();
+                    remainingEventsToAdd -= candidate.size();
+                    
+                    if (mutuallyExclusiveSubsets) {
+                        sequencesIterator.remove();
+                    }
+                }
+            }
+            
+            minSubsetSize = Math.min(minSubsetSize, subset.size());
+            maxSubsetSize = Math.max(maxSubsetSize, subset.size());
+            subsets.add(subset);
+        }
+        
+        Console.println("created " + subsets.size() + " subsets of " + minSubsetSize + " to " +
+                        maxSubsetSize + " sequences covering " + allCoveredEvents + " events (" +
+                        (allCoveredEvents / subsets.size()) + " on average)");
+        
+        int index = 1;
+        for (List<List<Event>> subset : subsets) {
+            String subsetName = newSequencesName + "_" + index++;
+            
+            if (GlobalDataContainer.getInstance().addData(subsetName, subset)) {
+                CommandHelpers.dataOverwritten(subsetName);
+            }
+        }
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see de.ugoe.cs.util.console.Command#help()
+     */
+    @Override
+    public String help() {
+        return "sequencesSubset <subsetCount> <subsetSize> <inputSequencesName> {<outputSequencesName>}";
+    }
+
+}
Index: trunk/autoquest-ui-core/src/main/resources/manuals/sequencesSubset
===================================================================
--- trunk/autoquest-ui-core/src/main/resources/manuals/sequencesSubset	(revision 1885)
+++ trunk/autoquest-ui-core/src/main/resources/manuals/sequencesSubset	(revision 1885)
@@ -0,0 +1,11 @@
+Creates random subsets of a list of sequences that cover roughly a specified amount of events. The result is only an approximation as the sum of events of the selected sequences in a set may not hard fit the intended size. If possible, the subsets are independent. If not, the command will drop a message.
+
+$USAGE$
+<subsetCount> the number of subsets to be created
+<subsetSize> the number of events contained in each subset
+<inputSequencesName> the name of the collection of sequences of which the subsets are to be created
+<outputSequencesName> optional; the name of the collection of sequences into which the resulting subset shall be stored; if left out, the result is stored in the collection identified by <sequencesName>_index
+
+Example(s):
+splitSequences 10 3000 someSequences (creates 10 subsets of sequences with 3000 events each)
+splitSequences 5 4500 someSequences someNewSequences  (creates 5 subsets of sequences with 4500 events each which will be named someNewSequences_1 to someNewSequences_5)
