Ignore:
Timestamp:
02/21/13 18:41:04 (12 years ago)
Author:
pharms
Message:
  • allowed to search for all sub sequences with a dedicated number of occurrence
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/autoquest-core-usageprofiles/src/main/java/de/ugoe/cs/autoquest/usageprofiles/Trie.java

    r1060 r1110  
    324324    /** 
    325325     * <p> 
    326      * returns a list of symbol sequences which have a minimal length and that occurred most often 
    327      * with the same number of occurrences. The resulting list is empty, if there is no symbol 
    328      * sequence with the minimal length. 
     326     * returns a list of symbol sequences which have a minimal length and that occurred as often 
     327     * as defined by the given occurrence count. If the given occurrence count is smaller 1 then 
     328     * those sequences are returned, that occur most often. The resulting list is empty, if there 
     329     * is no symbol sequence with the minimal length or the provided number of occurrences. 
    329330     * </p> 
    330331     * 
    331      * @param minimalLength the minimal length of the returned sequences 
     332     * @param minimalLength   the minimal length of the returned sequences 
     333     * @param occurrenceCount the number of occurrences of the returned sequences 
    332334     *  
    333335     * @return as described 
    334336     */ 
    335     public Collection<List<T>> getSequencesWithMostOccurrences(int minimalLength) { 
     337    public Collection<List<T>> getSequencesWithOccurrenceCount(int minimalLength, 
     338                                                               int occurrenceCount) 
     339    { 
    336340        LinkedList<TrieNode<T>> context = new LinkedList<TrieNode<T>>(); 
    337341        Collection<List<TrieNode<T>>> paths = new LinkedList<List<TrieNode<T>>>(); 
     
    339343        context.push(rootNode); 
    340344         
    341         // traverse the trie and determine all sequences, which have the maximum number of 
     345        // traverse the trie and determine all sequences, which have the provided number of 
    342346        // occurrences and a minimal length. 
    343347         
    344348        // minimalLength + 1 because we denote the depth including the root node 
    345         determineLongPathsWithMostOccurrences(minimalLength + 1, paths, context); 
     349        determineLongPathsWithMostOccurrences(minimalLength + 1, occurrenceCount, paths, context); 
    346350         
    347351        Collection<List<T>> resultingPaths = new LinkedList<List<T>>(); 
     
    368372    /** 
    369373     * <p> 
    370      * Traverses the trie to collect all sequences with a maximum number of occurrences and with 
    371      * a minimal length. The length is encoded in the provided recursion depth. 
     374     * Traverses the trie to collect all sequences with a defined number of occurrences and with 
     375     * a minimal length. If the given occurrence count is smaller 1 then those sequences are 
     376     * searched that occur most often. The length of the sequences is encoded in the provided 
     377     * recursion depth. 
    372378     * </p> 
    373379     * 
    374      * @param minimalDepth the minimal recursion depth to be done 
    375      * @param paths        the paths through the trie that all occurred with the same amount and 
    376      *                     that have the so far found maximum of occurrences (is updated each 
    377      *                     time a further path with the same number of occurrences is found; is 
    378      *                     replaced if a path with more occurrences is found) 
    379      * @param context      the path through the trie, that is analyzed by the recursive call 
     380     * @param minimalDepth    the minimal recursion depth to be done 
     381     * @param occurrenceCount the number of occurrences of the returned sequences 
     382     * @param paths           the paths through the trie that all occurred with the same amount 
     383     *                        (if occurrence count is smaller 1, the paths which occurred most 
     384     *                        often) and that have the so far found matching number of occurrences 
     385     *                        (is updated each time a further path with the same number of 
     386     *                        occurrences is found; if occurrence count is smaller 1 
     387     *                        it is replaced if a path with more occurrences is found) 
     388     * @param context         the path through the trie, that is analyzed by the recursive call 
    380389     */ 
    381390    private void determineLongPathsWithMostOccurrences(int                           minimalDepth, 
     391                                                       int                           occurrenceCount, 
    382392                                                       Collection<List<TrieNode<T>>> paths, 
    383393                                                       LinkedList<TrieNode<T>>       context) 
    384394    { 
    385         int maxCount = 0; 
     395        int envisagedCount = occurrenceCount; 
    386396 
    387397        // only if we already reached the depth to be achieved, we check if the paths have the 
    388         // maximum number of occurrences 
     398        // required number of occurrences 
    389399        if (context.size() >= minimalDepth) { 
    390400             
    391             // try to determine the maximum number of occurrences so far, if any 
    392             if (paths.size() > 0) { 
    393                 List<TrieNode<T>> path = paths.iterator().next(); 
    394                 maxCount = path.get(path.size() - 1).getCount(); 
    395             } 
    396  
    397             // if the current path has a higher number of occurrences than all so far, clear 
    398             // the paths collected so far and set the new number of occurrences as new maximum 
    399             if (context.getLast().getCount() > maxCount) { 
    400                 paths.clear(); 
    401                 maxCount = context.getLast().getCount(); 
     401            if (envisagedCount < 1) { 
     402                // try to determine the maximum number of occurrences so far, if any 
     403                if (paths.size() > 0) { 
     404                    List<TrieNode<T>> path = paths.iterator().next(); 
     405                    envisagedCount = path.get(path.size() - 1).getCount(); 
     406                } 
     407 
     408                // if the current path has a higher number of occurrences than all so far, clear 
     409                // the paths collected so far and set the new number of occurrences as new maximum 
     410                if (context.getLast().getCount() > envisagedCount) { 
     411                    paths.clear(); 
     412                    envisagedCount = context.getLast().getCount(); 
     413                } 
    402414            } 
    403415             
    404416            // if the path matches the current maximal number of occurrences, add it to the list 
    405417            // of collected paths with these number of occurrences 
    406             if (context.getLast().getCount() == maxCount) { 
     418            if (context.getLast().getCount() == envisagedCount) { 
    407419                paths.add(new LinkedList<TrieNode<T>>(context)); 
    408420            } 
     
    411423        // perform the trie traversal 
    412424        for (TrieNode<T> child : context.getLast().getChildren()) { 
    413             if (child.getCount() >= maxCount) { 
     425            if (child.getCount() >= envisagedCount) { 
    414426                context.add(child); 
    415                 determineLongPathsWithMostOccurrences(minimalDepth, paths, context); 
     427                determineLongPathsWithMostOccurrences 
     428                    (minimalDepth, occurrenceCount, paths, context); 
    416429                context.removeLast(); 
    417430            } 
Note: See TracChangeset for help on using the changeset viewer.