package de.ugoe.cs.eventbench.models;
import java.security.InvalidParameterException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import java.util.Set;
import de.ugoe.cs.eventbench.data.Event;
import de.ugoe.cs.eventbench.models.Trie.Edge;
import de.ugoe.cs.eventbench.models.Trie.TrieVertex;
import edu.uci.ics.jung.graph.Tree;
/**
*
* Implements a skeleton for stochastic processes that can calculate
* probabilities based on a trie. The skeleton provides all functionalities of
* {@link IStochasticProcess} except
* {@link IStochasticProcess#getProbability(List, Event)}.
*
*
* @author Steffen Herbold
* @version 1.0
*/
public abstract class TrieBasedModel implements IStochasticProcess {
/**
*
* Id for object serialization.
*
*/
private static final long serialVersionUID = 1L;
/**
*
* The order of the trie, i.e., the maximum length of subsequences stored in
* the trie.
*
*/
protected int trieOrder;
/**
*
* Trie on which the probability calculations are based.
*
*/
protected Trie> trie = null;
/**
*
* Random number generator used by probabilistic sequence generation
* methods.
*
*/
protected final Random r;
/**
*
* Constructor. Creates a new TrieBasedModel that can be used for stochastic
* processes with a Markov order less than or equal to {@code markovOrder}.
*
*
* @param markovOrder
* Markov order of the model
* @param r
* random number generator used by probabilistic methods of the
* class
*/
public TrieBasedModel(int markovOrder, Random r) {
super();
this.trieOrder = markovOrder + 1;
this.r = r;
}
/**
*
* Trains the model by generating a trie from which probabilities are
* calculated. The trie is newly generated based solely on the passed
* sequences. If an existing model should only be updated, use
* {@link #update(Collection)} instead.
*
*
* @param sequences
* training data
*/
public void train(Collection>> sequences) {
trie = null;
update(sequences);
}
/**
*
* Trains the model by updating the trie from which the probabilities are
* calculated. This function updates an existing trie. In case no trie
* exists yet, a new trie is generated and the function behaves like
* {@link #train(Collection)}.
*
*
* @param sequences
* training data
*/
public void update(Collection>> sequences) {
if (sequences == null) {
return;
}
if (trie == null) {
trie = new Trie>();
}
for (List extends Event>> sequence : sequences) {
List> currentSequence = new LinkedList>(sequence); // defensive
// copy
currentSequence.add(0, Event.STARTEVENT);
currentSequence.add(Event.ENDEVENT);
trie.train(currentSequence, trieOrder);
}
}
/*
* (non-Javadoc)
*
* @see de.ugoe.cs.eventbench.models.IStochasticProcess#randomSequence()
*/
@Override
public List extends Event>> randomSequence() {
List> sequence = new LinkedList>();
if( trie!=null ) {
IncompleteMemory> context = new IncompleteMemory>(
trieOrder - 1);
context.add(Event.STARTEVENT);
Event> currentState = Event.STARTEVENT;
boolean endFound = false;
while (!endFound) {
double randVal = r.nextDouble();
double probSum = 0.0;
List> currentContext = context.getLast(trieOrder);
for (Event> symbol : trie.getKnownSymbols()) {
probSum += getProbability(currentContext, symbol);
if (probSum >= randVal) {
endFound = (symbol == Event.ENDEVENT);
if (!(symbol == Event.STARTEVENT || symbol == Event.ENDEVENT)) {
// only add the symbol the sequence if it is not START
// or END
context.add(symbol);
currentState = symbol;
sequence.add(currentState);
}
break;
}
}
}
}
return sequence;
}
/**
*
* Returns a Dot representation of the internal trie.
*
*
* @return dot representation of the internal trie
*/
public String getTrieDotRepresentation() {
if (trie == null) {
return "";
} else {
return trie.getDotRepresentation();
}
}
/**
*
* Returns a {@link Tree} of the internal trie that can be used for
* visualization.
*
*
* @return {@link Tree} depicting the internal trie
*/
public Tree getTrieGraph() {
if (trie == null) {
return null;
} else {
return trie.getGraph();
}
}
/**
*
* The string representation of the model is {@link Trie#toString()} of
* {@link #trie}.
*
*
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
if (trie == null) {
return "";
} else {
return trie.toString();
}
}
/*
* (non-Javadoc)
*
* @see de.ugoe.cs.eventbench.models.IStochasticProcess#getNumStates()
*/
@Override
public int getNumSymbols() {
if (trie == null) {
return 0;
} else {
return trie.getNumSymbols();
}
}
/*
* (non-Javadoc)
*
* @see de.ugoe.cs.eventbench.models.IStochasticProcess#getStateStrings()
*/
@Override
public String[] getSymbolStrings() {
if (trie == null) {
return new String[0];
}
String[] stateStrings = new String[getNumSymbols()];
int i = 0;
for (Event> symbol : trie.getKnownSymbols()) {
stateStrings[i] = symbol.toString();
i++;
}
return stateStrings;
}
/*
* (non-Javadoc)
*
* @see de.ugoe.cs.eventbench.models.IStochasticProcess#getEvents()
*/
@Override
public Collection extends Event>> getEvents() {
if (trie == null) {
return new HashSet>();
} else {
return trie.getKnownSymbols();
}
}
/*
* (non-Javadoc)
*
* @see
* de.ugoe.cs.eventbench.models.IStochasticProcess#generateSequences(int)
*/
@Override
public Collection>> generateSequences(int length) {
return generateSequences(length, false);
}
/*
* (non-Javadoc)
*
* @see
* de.ugoe.cs.eventbench.models.IStochasticProcess#generateSequences(int,
* boolean)
*/
@Override
public Set>> generateSequences(int length,
boolean fromStart) {
Set>> sequenceSet = new LinkedHashSet>>();
if (length < 1) {
throw new InvalidParameterException(
"Length of generated subsequences must be at least 1.");
}
if (length == 1) {
if (fromStart) {
List> subSeq = new LinkedList>();
subSeq.add(Event.STARTEVENT);
sequenceSet.add(subSeq);
} else {
for (Event> event : getEvents()) {
List> subSeq = new LinkedList>();
subSeq.add(event);
sequenceSet.add(subSeq);
}
}
return sequenceSet;
}
Collection extends Event>> events = getEvents();
Collection>> seqsShorter = generateSequences(
length - 1, fromStart);
for (Event> event : events) {
for (List extends Event>> seqShorter : seqsShorter) {
Event> lastEvent = event;
if (getProbability(seqShorter, lastEvent) > 0.0) {
List> subSeq = new ArrayList>(seqShorter);
subSeq.add(lastEvent);
sequenceSet.add(subSeq);
}
}
}
return sequenceSet;
}
/*
* (non-Javadoc)
*
* @see
* de.ugoe.cs.eventbench.models.IStochasticProcess#generateValidSequences
* (int)
*/
@Override
public Collection>> generateValidSequences(
int length) {
// check for min-length implicitly done by generateSequences
Collection>> allSequences = generateSequences(
length, true);
Collection>> validSequences = new LinkedHashSet>>();
for (List extends Event>> sequence : allSequences) {
if (sequence.size() == length
&& Event.ENDEVENT.equals(sequence.get(sequence.size() - 1))) {
validSequences.add(sequence);
}
}
return validSequences;
}
/*
* (non-Javadoc)
*
* @see
* de.ugoe.cs.eventbench.models.IStochasticProcess#getProbability(java.util
* .List)
*/
@Override
public double getProbability(List extends Event>> sequence) {
double prob = 1.0;
if (sequence != null) {
List> context = new LinkedList>();
for (Event> event : sequence) {
prob *= getProbability(context, event);
context.add(event);
}
}
return prob;
}
/*
* (non-Javadoc)
*
* @see de.ugoe.cs.eventbench.models.IStochasticProcess#getNumFOMStates()
*/
@Override
public int getNumFOMStates() {
if (trie == null) {
return 0;
} else {
return trie.getNumLeafAncestors();
}
}
/*
* (non-Javadoc)
*
* @see de.ugoe.cs.eventbench.models.IStochasticProcess#getNumTransitions()
*/
@Override
public int getNumTransitions() {
if (trie == null) {
return 0;
} else {
return trie.getNumLeafs();
}
}
}