Ignore:
Timestamp:
07/12/13 16:45:53 (12 years ago)
Author:
pharms
Message:
  • improved performance of the trie for large alphabets by using the symbol map. This is an improved list of symbols that allows a more efficient lookup for symbols using buckets of symbol as an initial search order
Location:
trunk/autoquest-core-usageprofiles/src/main/java/de/ugoe/cs/autoquest/usageprofiles
Files:
1 added
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/autoquest-core-usageprofiles/src/main/java/de/ugoe/cs/autoquest/usageprofiles/DefaultSymbolComparator.java

    r1196 r1251  
    3030 
    3131    /* (non-Javadoc) 
    32      * @see de.ugoe.cs.autoquest.tasktrees.temporalrelation.SymbolComparator#equals(java.lang.Object, java.lang.Object) 
     32     * @see de.ugoe.cs.autoquest.tasktrees.temporalrelation.SymbolComparator#equals(Object, Object) 
    3333     */ 
    3434    @Override 
     
    4242    } 
    4343 
     44    /* (non-Javadoc) 
     45     * @see de.ugoe.cs.autoquest.usageprofiles.SymbolComparator#getBucketSearchOrder(Object) 
     46     */ 
     47    @Override 
     48    public int[] getBucketSearchOrder(T symbol) { 
     49        if (symbol != null) { 
     50            return new int[] { symbol.hashCode() }; 
     51        } 
     52        else { 
     53            return null; 
     54        } 
     55    } 
     56 
    4457} 
  • trunk/autoquest-core-usageprofiles/src/main/java/de/ugoe/cs/autoquest/usageprofiles/SymbolComparator.java

    r1196 r1251  
    3939     */ 
    4040    public boolean equals(T symbol1, T symbol2); 
     41     
     42    /** 
     43     * <p> 
     44     * returns a search order for buckets. This method can be used for optimizing a search for 
     45     * equal symbols. The client of this class can store symbols in buckets of similar symbols. 
     46     * Those buckets get an id denoted by an integer. The most appropriate bucket for a symbol is 
     47     * the first element in the array returned by this method. The symbol should therefore be put 
     48     * into that bucket. 
     49     * </p> 
     50     * <p> 
     51     * In case a search for an equal symbol shall be performed at the client side, the client 
     52     * checks the available buckets in the order given as return value of this method. All other 
     53     * buckets are searched afterwards. In this scenario it is ensured, that the equal symbol is 
     54     * found as soon as possible as the search always starts in the most appropriate bucket. 
     55     * However, the other buckets are searched, as well, if no equal symbol is found. Therefore, 
     56     * in the worst case, all buckets are searched. In the optimal case, the equal symbol is found 
     57     * in the first bucket. 
     58     * </p> 
     59     * <p> 
     60     * The returned array should contain different integers in each field. This allows a most 
     61     * efficient search. If an integer is repeated, it is up to the clien, if this is ignored. 
     62     * </p> 
     63     * 
     64     * @param symbol the symbol for which the bucket order is to be returned 
     65     *  
     66     * @return a search order for buckets as described 
     67     *  
     68     * @see SymbolMap 
     69     */ 
     70    public int[] getBucketSearchOrder(T symbol); 
    4171} 
  • trunk/autoquest-core-usageprofiles/src/main/java/de/ugoe/cs/autoquest/usageprofiles/Trie.java

    r1189 r1251  
    1717import java.io.Serializable; 
    1818import java.util.Collection; 
    19 import java.util.HashSet; 
    2019import java.util.LinkedHashSet; 
    2120import java.util.LinkedList; 
     
    5554     * </p> 
    5655     */ 
    57     private Collection<T> knownSymbols; 
     56    private SymbolMap<T, T> knownSymbols; 
    5857 
    5958    /** 
     
    8887        this.comparator = comparator; 
    8988        rootNode = new TrieNode<T>(comparator); 
    90         knownSymbols = new LinkedHashSet<T>(); 
     89        knownSymbols = new SymbolMap<T, T>(this.comparator); 
    9190    } 
    9291 
     
    104103        } 
    105104        rootNode = new TrieNode<T>(other.rootNode); 
    106         knownSymbols = new LinkedHashSet<T>(other.knownSymbols); 
     105        knownSymbols = new SymbolMap<T, T>(other.knownSymbols); 
    107106        comparator = other.comparator; 
    108107    } 
     
    116115     */ 
    117116    public Collection<T> getKnownSymbols() { 
    118         return new LinkedHashSet<T>(knownSymbols); 
     117        return new LinkedHashSet<T>(knownSymbols.getSymbols()); 
    119118    } 
    120119 
     
    153152    /** 
    154153     * <p> 
    155      * Adds a given subsequence to the trie and increases the counters accordingly. 
     154     * Adds a given subsequence to the trie and increases the counters accordingly. NOTE: This 
     155     * method does not add the symbols to the list of known symbols. This is only ensured using 
     156     * the method {@link #train(List, int)}. 
    156157     * </p> 
    157158     *  
     
    162163    protected void add(List<T> subsequence) { 
    163164        if (subsequence != null && !subsequence.isEmpty()) { 
    164             addToKnownSymbols(subsequence); 
    165165            subsequence = new LinkedList<T>(subsequence); // defensive copy! 
    166166            T firstSymbol = subsequence.get(0); 
     
    495495     */ 
    496496    private void addToKnownSymbols(T symbol) { 
    497         for (T knownSymbol : knownSymbols) { 
    498             if (comparator.equals(knownSymbol, symbol)) { 
    499                 return; 
    500             } 
    501         } 
    502          
    503         knownSymbols.add(symbol); 
    504     } 
    505  
    506     /** 
    507      * <p> 
    508      * adds a list of new symbols to the collection of known symbols. Uses the 
    509      * {@link #addToKnownSymbols(Object)} method for each element of the provided list. 
    510      * </p> 
    511      * 
    512      * @param symbols the list of symbols to be added to the known symbols 
    513      */ 
    514     private void addToKnownSymbols(List<T> symbols) { 
    515         for (T symbol : symbols) { 
    516             addToKnownSymbols(symbol); 
     497        if (!knownSymbols.containsSymbol(symbol)) { 
     498            knownSymbols.addSymbol(symbol, symbol); 
    517499        } 
    518500    } 
     
    653635     */ 
    654636    public void updateKnownSymbols() { 
    655         knownSymbols = new HashSet<T>(); 
     637        knownSymbols = new SymbolMap<T, T>(this.comparator); 
    656638        for (TrieNode<T> node : rootNode.getChildren()) { 
    657639            addToKnownSymbols(node.getSymbol()); 
  • trunk/autoquest-core-usageprofiles/src/main/java/de/ugoe/cs/autoquest/usageprofiles/TrieNode.java

    r1189 r1251  
    6767     * </p> 
    6868     */ 
    69     private Collection<TrieNode<T>> children; 
    70  
     69    private SymbolMap<T, TrieNode<T>> children; 
     70     
    7171    /** 
    7272     * <p> 
     
    115115        this.symbol = null; 
    116116        count = 0; 
    117         children = new LinkedList<TrieNode<T>>(); 
     117        children = new SymbolMap<T, TrieNode<T>>(this.comparator); 
    118118    } 
    119119 
     
    140140 
    141141        count = 0; 
    142         children = new LinkedList<TrieNode<T>>(); 
     142        children = new SymbolMap<T, TrieNode<T>>(this.comparator); 
    143143    } 
    144144 
     
    157157        count = other.count; 
    158158        comparator = other.comparator; 
    159         children = new LinkedList<TrieNode<T>>(); 
    160         for (TrieNode<T> child : other.children) { 
    161             children.add(new TrieNode<T>(child)); 
     159        children = new SymbolMap<T, TrieNode<T>>(this.comparator); 
     160         
     161        for (TrieNode<T> child : other.children.getValues()) { 
     162            children.addSymbol(child.getSymbol(), new TrieNode<T>(child)); 
    162163        } 
    163164    } 
     
    224225        if (node == null) { 
    225226            node = new TrieNode<T>(symbol, comparator); 
    226             children.add(node); 
     227            children.addSymbol(symbol, node); 
    227228        } 
    228229        return node; 
     
    240241     */ 
    241242    protected TrieNode<T> getChild(T symbol) { 
    242         for (TrieNode<T> child : children) { 
    243             if (comparator.equals(child.getSymbol(), symbol)) { 
    244                 return child; 
    245             } 
    246         } 
    247         return null; 
     243        return children.getValue(symbol); 
    248244    } 
    249245 
     
    256252     */ 
    257253    protected Collection<TrieNode<T>> getChildren() { 
    258         return children; 
     254        return children.getValues(); 
    259255    } 
    260256 
     
    296292    public Collection<T> getFollowingSymbols() { 
    297293        Collection<T> followingSymbols = new LinkedList<T>(); 
    298         for (TrieNode<T> child : children) { 
     294        for (TrieNode<T> child : children.getValues()) { 
    299295            followingSymbols.add(child.getSymbol()); 
    300296        } 
     
    346342            graph.addChild(new Edge(), parent, currentVertex); 
    347343        } 
    348         for (TrieNode<T> node : children) { 
     344        for (TrieNode<T> node : children.getValues()) { 
    349345            node.getGraph(currentVertex, graph); 
    350346        } 
     
    370366        stringBuilder.append(" " + hashCode() + " [label=\"" + thisSaneId + "\"];" + 
    371367            StringTools.ENDLINE); 
    372         for (TrieNode<T> childNode : children) { 
     368        for (TrieNode<T> childNode : children.getValues()) { 
    373369            stringBuilder.append(" " + hashCode() + " -> " + childNode.hashCode() + ";" + 
    374370                StringTools.ENDLINE); 
    375371        } 
    376         for (TrieNode<T> childNode : children) { 
     372        for (TrieNode<T> childNode : children.getValues()) { 
    377373            childNode.appendDotRepresentation(stringBuilder); 
    378374        } 
     
    412408    protected void getLeafAncestors(List<TrieNode<T>> ancestors) { 
    413409        boolean isAncestor = false; 
    414         for (TrieNode<T> child : children) { 
     410        for (TrieNode<T> child : children.getValues()) { 
    415411            child.getLeafAncestors(ancestors); 
    416412            isAncestor |= child.isLeaf(); 
     
    431427    protected int getNumLeafs() { 
    432428        int numLeafs = 0; 
    433         for (TrieNode<T> child : children) { 
     429        for (TrieNode<T> child : children.getValues()) { 
    434430            if (child.isLeaf()) { 
    435431                numLeafs++; 
Note: See TracChangeset for help on using the changeset viewer.