Ignore:
Timestamp:
02/07/13 17:57:07 (11 years ago)
Author:
pharms
Message:
  • extended Trie implementation to be able to use different compare strategies
  • implemented a method to determine subsequences of a minimal length that occur most often
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/autoquest-core-usageprofiles/src/main/java/de/ugoe/cs/autoquest/usageprofiles/Trie.java

    r927 r1060  
    3434 * </p> 
    3535 *  
    36  * @author Steffen Herbold 
     36 * @author Steffen Herbold, Patrick Harms 
    3737 *  
    3838 * @param <T> 
     
    6666    /** 
    6767     * <p> 
    68      * Contructor. Creates a new Trie. 
     68     * Comparator to be used for comparing the symbols with each other 
     69     * </p> 
     70     */ 
     71    private SymbolComparator<T> comparator; 
     72 
     73    /** 
     74     * <p> 
     75     * Contructor. Creates a new Trie with a {@link DefaultSymbolComparator}. 
    6976     * </p> 
    7077     */ 
    7178    public Trie() { 
    72         rootNode = new TrieNode<T>(); 
     79        this(new DefaultSymbolComparator<T>()); 
     80    } 
     81 
     82    /** 
     83     * <p> 
     84     * Contructor. Creates a new Trie with that uses a specific {@link SymbolComparator}. 
     85     * </p> 
     86     */ 
     87    public Trie(SymbolComparator<T> comparator) { 
     88        this.comparator = comparator; 
     89        rootNode = new TrieNode<T>(comparator); 
    7390        knownSymbols = new LinkedHashSet<T>(); 
    7491    } 
     
    7693    /** 
    7794     * <p> 
    78      * Copy-Constructor. Creates a new Trie as the copy of other. The other trie must noch be null. 
     95     * Copy-Constructor. Creates a new Trie as the copy of other. The other trie must not be null. 
    7996     * </p> 
    8097     *  
     
    88105        rootNode = new TrieNode<T>(other.rootNode); 
    89106        knownSymbols = new LinkedHashSet<T>(other.knownSymbols); 
     107        comparator = other.comparator; 
    90108    } 
    91109 
     
    120138        for (T currentEvent : sequence) { 
    121139            latestActions.add(currentEvent); 
    122             knownSymbols.add(currentEvent); 
     140            addToKnownSymbols(currentEvent); 
    123141            i++; 
    124142            if (i >= maxOrder) { 
     
    143161    protected void add(List<T> subsequence) { 
    144162        if (subsequence != null && !subsequence.isEmpty()) { 
    145             knownSymbols.addAll(subsequence); 
     163            addToKnownSymbols(subsequence); 
    146164            subsequence = new LinkedList<T>(subsequence); // defensive copy! 
    147165            T firstSymbol = subsequence.get(0); 
     
    306324    /** 
    307325     * <p> 
     326     * returns a list of symbol sequences which have a minimal length and that occurred most often 
     327     * with the same number of occurrences. The resulting list is empty, if there is no symbol 
     328     * sequence with the minimal length. 
     329     * </p> 
     330     * 
     331     * @param minimalLength the minimal length of the returned sequences 
     332     *  
     333     * @return as described 
     334     */ 
     335    public Collection<List<T>> getSequencesWithMostOccurrences(int minimalLength) { 
     336        LinkedList<TrieNode<T>> context = new LinkedList<TrieNode<T>>(); 
     337        Collection<List<TrieNode<T>>> paths = new LinkedList<List<TrieNode<T>>>(); 
     338         
     339        context.push(rootNode); 
     340         
     341        // traverse the trie and determine all sequences, which have the maximum number of 
     342        // occurrences and a minimal length. 
     343         
     344        // minimalLength + 1 because we denote the depth including the root node 
     345        determineLongPathsWithMostOccurrences(minimalLength + 1, paths, context); 
     346         
     347        Collection<List<T>> resultingPaths = new LinkedList<List<T>>(); 
     348        List<T> resultingPath; 
     349         
     350        if (paths.size() > 0) { 
     351             
     352            for (List<TrieNode<T>> path : paths) { 
     353                resultingPath = new LinkedList<T>(); 
     354                 
     355                for (TrieNode<T> node : path) { 
     356                    if (node.getSymbol() != null) { 
     357                        resultingPath.add(node.getSymbol()); 
     358                    } 
     359                } 
     360                 
     361                resultingPaths.add(resultingPath); 
     362            } 
     363        } 
     364         
     365        return resultingPaths; 
     366    } 
     367 
     368    /** 
     369     * <p> 
     370     * Traverses the trie to collect all sequences with a maximum number of occurrences and with 
     371     * a minimal length. The length is encoded in the provided recursion depth. 
     372     * </p> 
     373     * 
     374     * @param minimalDepth the minimal recursion depth to be done 
     375     * @param paths        the paths through the trie that all occurred with the same amount and 
     376     *                     that have the so far found maximum of occurrences (is updated each 
     377     *                     time a further path with the same number of occurrences is found; is 
     378     *                     replaced if a path with more occurrences is found) 
     379     * @param context      the path through the trie, that is analyzed by the recursive call 
     380     */ 
     381    private void determineLongPathsWithMostOccurrences(int                           minimalDepth, 
     382                                                       Collection<List<TrieNode<T>>> paths, 
     383                                                       LinkedList<TrieNode<T>>       context) 
     384    { 
     385        int maxCount = 0; 
     386 
     387        // only if we already reached the depth to be achieved, we check if the paths have the 
     388        // maximum number of occurrences 
     389        if (context.size() >= minimalDepth) { 
     390             
     391            // try to determine the maximum number of occurrences so far, if any 
     392            if (paths.size() > 0) { 
     393                List<TrieNode<T>> path = paths.iterator().next(); 
     394                maxCount = path.get(path.size() - 1).getCount(); 
     395            } 
     396 
     397            // if the current path has a higher number of occurrences than all so far, clear 
     398            // the paths collected so far and set the new number of occurrences as new maximum 
     399            if (context.getLast().getCount() > maxCount) { 
     400                paths.clear(); 
     401                maxCount = context.getLast().getCount(); 
     402            } 
     403             
     404            // if the path matches the current maximal number of occurrences, add it to the list 
     405            // of collected paths with these number of occurrences 
     406            if (context.getLast().getCount() == maxCount) { 
     407                paths.add(new LinkedList<TrieNode<T>>(context)); 
     408            } 
     409        } 
     410         
     411        // perform the trie traversal 
     412        for (TrieNode<T> child : context.getLast().getChildren()) { 
     413            if (child.getCount() >= maxCount) { 
     414                context.add(child); 
     415                determineLongPathsWithMostOccurrences(minimalDepth, paths, context); 
     416                context.removeLast(); 
     417            } 
     418        } 
     419    } 
     420     
     421    /** 
     422     * <p> 
     423     * adds a new symbol to the collection of known symbols if this symbol is not already 
     424     * contained. The symbols are compared using the comparator. 
     425     * </p> 
     426     * 
     427     * @param symbol the symbol to be added to the known symbols 
     428     */ 
     429    private void addToKnownSymbols(T symbol) { 
     430        for (T knownSymbol : knownSymbols) { 
     431            if (comparator.equals(knownSymbol, symbol)) { 
     432                return; 
     433            } 
     434        } 
     435         
     436        knownSymbols.add(symbol); 
     437    } 
     438 
     439    /** 
     440     * <p> 
     441     * adds a list of new symbols to the collection of known symbols. Uses the 
     442     * {@link #addToKnownSymbols(Object)} method for each element of the provided list. 
     443     * </p> 
     444     * 
     445     * @param symbols the list of symbols to be added to the known symbols 
     446     */ 
     447    private void addToKnownSymbols(List<T> symbols) { 
     448        for (T symbol : symbols) { 
     449            addToKnownSymbols(symbol); 
     450        } 
     451    } 
     452 
     453    /** 
     454     * <p> 
    308455     * Helper class for graph visualization of a trie. 
    309456     * </p> 
     
    441588        knownSymbols = new HashSet<T>(); 
    442589        for (TrieNode<T> node : rootNode.getChildren()) { 
    443             knownSymbols.add(node.getSymbol()); 
     590            addToKnownSymbols(node.getSymbol()); 
    444591        } 
    445592    } 
     
    478625        return hash; 
    479626    } 
     627 
    480628} 
Note: See TracChangeset for help on using the changeset viewer.