// Copyright 2012 Georg-August-Universität Göttingen, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package de.ugoe.cs.autoquest.commands.sequences; import java.util.Collection; import java.util.LinkedList; import java.util.List; import java.util.ListIterator; import de.ugoe.cs.autoquest.CommandHelpers; import de.ugoe.cs.autoquest.SequenceInstanceOf; import de.ugoe.cs.autoquest.eventcore.Event; import de.ugoe.cs.util.console.Command; import de.ugoe.cs.util.console.Console; import de.ugoe.cs.util.console.GlobalDataContainer; /** *

* Command to select a subset of a set of sequences covering at least a given percentage of all * events covered by the given sequence set. The resulting set covers a little more events than * intended because the input sequences are randomly selected and they are not cut. Hence, the sum * of their covered events may not fully match the intended sum. *

* * @author Patrick Harms * @version 1.0 */ public class CMDsequencesSubset implements Command { /* * (non-Javadoc) * * @see de.ugoe.cs.util.console.Command#run(java.util.List) */ @SuppressWarnings("unchecked") @Override public void run(List parameters) { int subsetCount; int subsetSize; String sequencesName; String newSequencesName; try { subsetCount = Integer.parseInt((String) parameters.get(0)); subsetSize = Integer.parseInt((String) parameters.get(1)); sequencesName = (String) parameters.get(2); if (parameters.size() > 3) { newSequencesName = (String) parameters.get(3); } else { newSequencesName = sequencesName; } } catch (Exception e) { throw new IllegalArgumentException("must provide two integers, the first defining " + "the number of subsets, the second defining the " + "number of events in each subset"); } Collection> sequences = null; Object dataObject = GlobalDataContainer.getInstance().getData(sequencesName); if (dataObject == null) { CommandHelpers.objectNotFoundMessage(sequencesName); return; } if (!SequenceInstanceOf.isCollectionOfSequences(dataObject)) { CommandHelpers.objectNotType(sequencesName, "Collection>>"); return; } sequences = (Collection>) dataObject; int allEvents = 0; // create a sorted list of sessions List> fixedOrderSequences = new LinkedList<>(); for (List sequence : sequences) { if (sequence.size() > 0) { ListIterator> iterator = fixedOrderSequences.listIterator(); boolean added = false; while (iterator.hasNext()) { if (iterator.next().size() < sequences.size()) { iterator.previous(); iterator.add(sequence); added = true; break; } } if (!added) { fixedOrderSequences.add(sequence); } allEvents += sequence.size(); } } Console.println("creating " + subsetCount + " sequence sets covering on average " + subsetSize + " events by subdividing " + sequences.size() + " sequences covering " + allEvents + " events into subsets"); boolean mutuallyExclusiveSubsets = (subsetSize * subsetCount) < allEvents; if (!mutuallyExclusiveSubsets) { Console.println("subsets will not be independent, i.e., one and the same session may " + "be included in several subsets"); } List>> subsets = new LinkedList<>(); double randomProbability = ((double) subsetSize) / allEvents; ListIterator> sequencesIterator = fixedOrderSequences.listIterator(); int minSubsetSize = Integer.MAX_VALUE; int maxSubsetSize = 0; int allCoveredEvents = 0; for (int i = 0; i < subsetCount; i++) { List> subset = new LinkedList<>(); int remainingEventsToAdd = subsetSize; while (remainingEventsToAdd > 0) { if (!sequencesIterator.hasNext()) { if (fixedOrderSequences.size() > 0) { sequencesIterator = fixedOrderSequences.listIterator(); } else { break; } } List candidate = sequencesIterator.next(); if ((candidate.size() > 0) && (randomProbability > Math.random())) { // with the candidate, we may exceed the intended size too much. Hence we // continue with a shorter candidate, if any if (remainingEventsToAdd - candidate.size() < 0) { List shorterCandidate = null; while (sequencesIterator.hasNext()) { shorterCandidate = sequencesIterator.next(); if (remainingEventsToAdd - shorterCandidate.size() >= 0) { break; } } // always take the shortest if (shorterCandidate != null) { candidate = shorterCandidate; } } subset.add(candidate); allCoveredEvents += candidate.size(); remainingEventsToAdd -= candidate.size(); if (mutuallyExclusiveSubsets) { sequencesIterator.remove(); } } } minSubsetSize = Math.min(minSubsetSize, subset.size()); maxSubsetSize = Math.max(maxSubsetSize, subset.size()); subsets.add(subset); } Console.println("created " + subsets.size() + " subsets of " + minSubsetSize + " to " + maxSubsetSize + " sequences covering " + allCoveredEvents + " events (" + (allCoveredEvents / subsets.size()) + " on average)"); int index = 1; for (List> subset : subsets) { String subsetName = newSequencesName + "_" + index++; if (GlobalDataContainer.getInstance().addData(subsetName, subset)) { CommandHelpers.dataOverwritten(subsetName); } } } /* * (non-Javadoc) * * @see de.ugoe.cs.util.console.Command#help() */ @Override public String help() { return "sequencesSubset {}"; } }