Ignore:
Timestamp:
07/12/13 16:45:53 (11 years ago)
Author:
pharms
Message:
  • improved performance of the trie for large alphabets by using the symbol map. This is an improved list of symbols that allows a more efficient lookup for symbols using buckets of symbol as an initial search order
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/autoquest-core-usageprofiles/src/main/java/de/ugoe/cs/autoquest/usageprofiles/Trie.java

    r1189 r1251  
    1717import java.io.Serializable; 
    1818import java.util.Collection; 
    19 import java.util.HashSet; 
    2019import java.util.LinkedHashSet; 
    2120import java.util.LinkedList; 
     
    5554     * </p> 
    5655     */ 
    57     private Collection<T> knownSymbols; 
     56    private SymbolMap<T, T> knownSymbols; 
    5857 
    5958    /** 
     
    8887        this.comparator = comparator; 
    8988        rootNode = new TrieNode<T>(comparator); 
    90         knownSymbols = new LinkedHashSet<T>(); 
     89        knownSymbols = new SymbolMap<T, T>(this.comparator); 
    9190    } 
    9291 
     
    104103        } 
    105104        rootNode = new TrieNode<T>(other.rootNode); 
    106         knownSymbols = new LinkedHashSet<T>(other.knownSymbols); 
     105        knownSymbols = new SymbolMap<T, T>(other.knownSymbols); 
    107106        comparator = other.comparator; 
    108107    } 
     
    116115     */ 
    117116    public Collection<T> getKnownSymbols() { 
    118         return new LinkedHashSet<T>(knownSymbols); 
     117        return new LinkedHashSet<T>(knownSymbols.getSymbols()); 
    119118    } 
    120119 
     
    153152    /** 
    154153     * <p> 
    155      * Adds a given subsequence to the trie and increases the counters accordingly. 
     154     * Adds a given subsequence to the trie and increases the counters accordingly. NOTE: This 
     155     * method does not add the symbols to the list of known symbols. This is only ensured using 
     156     * the method {@link #train(List, int)}. 
    156157     * </p> 
    157158     *  
     
    162163    protected void add(List<T> subsequence) { 
    163164        if (subsequence != null && !subsequence.isEmpty()) { 
    164             addToKnownSymbols(subsequence); 
    165165            subsequence = new LinkedList<T>(subsequence); // defensive copy! 
    166166            T firstSymbol = subsequence.get(0); 
     
    495495     */ 
    496496    private void addToKnownSymbols(T symbol) { 
    497         for (T knownSymbol : knownSymbols) { 
    498             if (comparator.equals(knownSymbol, symbol)) { 
    499                 return; 
    500             } 
    501         } 
    502          
    503         knownSymbols.add(symbol); 
    504     } 
    505  
    506     /** 
    507      * <p> 
    508      * adds a list of new symbols to the collection of known symbols. Uses the 
    509      * {@link #addToKnownSymbols(Object)} method for each element of the provided list. 
    510      * </p> 
    511      * 
    512      * @param symbols the list of symbols to be added to the known symbols 
    513      */ 
    514     private void addToKnownSymbols(List<T> symbols) { 
    515         for (T symbol : symbols) { 
    516             addToKnownSymbols(symbol); 
     497        if (!knownSymbols.containsSymbol(symbol)) { 
     498            knownSymbols.addSymbol(symbol, symbol); 
    517499        } 
    518500    } 
     
    653635     */ 
    654636    public void updateKnownSymbols() { 
    655         knownSymbols = new HashSet<T>(); 
     637        knownSymbols = new SymbolMap<T, T>(this.comparator); 
    656638        for (TrieNode<T> node : rootNode.getChildren()) { 
    657639            addToKnownSymbols(node.getSymbol()); 
Note: See TracChangeset for help on using the changeset viewer.