Ignore:
Timestamp:
02/07/14 17:11:39 (11 years ago)
Author:
pharms
Message:
  • added support for id replacements
Location:
trunk/autoquest-plugin-html/src/main
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • trunk/autoquest-plugin-html/src/main/java/de/ugoe/cs/autoquest/plugin/html/HTMLLogParser.java

    r1339 r1354  
    1515package de.ugoe.cs.autoquest.plugin.html; 
    1616 
     17import java.io.File; 
     18import java.io.FileInputStream; 
     19import java.io.FileNotFoundException; 
     20import java.io.IOException; 
    1721import java.util.Arrays; 
     22import java.util.HashMap; 
    1823import java.util.List; 
    1924import java.util.Map; 
     25import java.util.Properties; 
    2026import java.util.regex.Matcher; 
    2127import java.util.regex.Pattern; 
     
    6268     */ 
    6369    private Pattern htmlElementSpecPattern = 
    64         Pattern.compile("(\\w+)(\\[(\\d+)\\]|\\(htmlId=([\\w-]+)\\))?"); 
     70        Pattern.compile("(\\w+)(\\[(\\d+)\\]|\\(htmlId=([\\w-#]+)\\))?"); 
    6571     
    6672    /** 
     
    7076     */ 
    7177    private Map<String, List<String>> parseParams; 
     78 
     79    /** 
     80     * <p> 
     81     * a map containing replacement specifications for ids of GUI elements 
     82     * </p> 
     83     */ 
     84    private Map<String, String> idReplacements; 
    7285 
    7386    /** 
     
    8295         
    8396        for (String paramKey : parseParams.keySet()) { 
    84             if (!"clearId".equals(paramKey) && !"clearIndex".equals(paramKey)) { 
     97            if (!"clearId".equals(paramKey) && !"clearIndex".equals(paramKey) && 
     98                !"idReplacements".equals(paramKey)) 
     99            { 
    85100                throw new IllegalArgumentException("unknown parse parameter key " + paramKey); 
    86101            } 
     
    163178                } 
    164179                 
    165                 if (clearHTMLId(tagName, index, htmlId, parent)) { 
     180                String idReplacement = replaceHTMLId(tagName, index, htmlId, parent); 
     181                if (idReplacement != null) { 
     182                    htmlId = idReplacement; 
     183                } 
     184                else if (clearHTMLId(tagName, index, htmlId, parent)) { 
    166185                    htmlId = null; 
    167186                } 
     
    210229    private boolean clearIndex(String tagName, int index, String id, HTMLGUIElement parent) { 
    211230        return clearSomething("clearIndex", tagName, index, id, parent); 
     231    } 
     232 
     233    /** 
     234     * <p> 
     235     * TODO: comment 
     236     * </p> 
     237     * 
     238     * @param tagName 
     239     * @param index 
     240     * @param htmlId 
     241     * @param parent 
     242     * @return 
     243     */ 
     244    private String replaceHTMLId(String tagName, int index, String htmlId, HTMLGUIElement parent) 
     245        throws SAXException 
     246    { 
     247        if ((idReplacements == null) && (parseParams.containsKey("idReplacements"))) { 
     248            idReplacements = new HashMap<String, String>(); 
     249            for (String fileName : parseParams.get("idReplacements")) { 
     250                Properties props = new Properties(); 
     251                try { 
     252                    props.load(new FileInputStream(new File(fileName))); 
     253                } 
     254                catch (FileNotFoundException e) { 
     255                    throw new SAXException("could not find file " + fileName, e); 
     256                } 
     257                catch (IOException e) { 
     258                    throw new SAXException("error reading file " + fileName, e); 
     259                } 
     260                 
     261                for (Map.Entry<Object, Object> entry : props.entrySet()) { 
     262                    idReplacements.put((String) entry.getKey(), (String) entry.getValue()); 
     263                } 
     264            } 
     265        } 
     266         
     267        if (idReplacements != null) { 
     268            for (Map.Entry<String, String> replacementSpec : idReplacements.entrySet()) { 
     269                String tagSpec = replacementSpec.getKey(); 
     270 
     271                if (tagSpec.startsWith("/")) { 
     272                    throw new IllegalArgumentException("can not handle absolute specifications"); 
     273                } 
     274                else if (tagSpec.endsWith("/")) { 
     275                    throw new IllegalArgumentException("specifications may not end with a /"); 
     276                } 
     277 
     278                String[] tagSpecs = tagSpec.split("/"); 
     279 
     280                if (tagMatchesTagSpec(tagName, index, htmlId, parent, tagSpecs)) { 
     281                    return replacementSpec.getValue(); 
     282                } 
     283            } 
     284        } 
     285         
     286        return null; 
    212287    } 
    213288 
     
    294369            if (idCondition != null) { 
    295370                if (!idCondition.equals(id)) { 
    296                     return false; 
     371                    // check if the id condition would match with ignoring specific characters 
     372                    if ((id != null) && (idCondition.indexOf('#') > -1)) { 
     373                        // first of all, the length must match 
     374                        if (idCondition.length() != id.length()) { 
     375                            return false; 
     376                        } 
     377                         
     378                        for (int i = 0; i < idCondition.length(); i++) { 
     379                            if ((idCondition.charAt(i) != '#') && 
     380                                (idCondition.charAt(i) != id.charAt(i))) 
     381                            { 
     382                                // if there is a character that is neither ignored not matches 
     383                                // the condition at a specific position, return "no match" 
     384                                return false; 
     385                            } 
     386                        } 
     387                         
     388                    } 
     389                    else { 
     390                        // no condition ignoring specific characters 
     391                        return false; 
     392                    } 
    297393                } 
    298394            } 
  • trunk/autoquest-plugin-html/src/main/java/de/ugoe/cs/autoquest/plugin/html/commands/CMDcondenseHTMLGUIModel.java

    r1349 r1354  
    927927 
    928928        /* (non-Javadoc) 
     929         * @see java.lang.Object#equals(java.lang.Object) 
     930         */ 
     931        @Override 
     932        public boolean equals(Object obj) { 
     933            if (obj == this) { 
     934                return true; 
     935            } 
     936            else if (obj instanceof SimilarGUIElement) { 
     937                return (similarGUIElement.equals(((SimilarGUIElement) obj).similarGUIElement)); 
     938            } 
     939            else { 
     940                return false; 
     941            } 
     942        } 
     943 
     944        /* (non-Javadoc) 
     945         * @see java.lang.Object#hashCode() 
     946         */ 
     947        @Override 
     948        public int hashCode() { 
     949            return similarGUIElement.hashCode(); 
     950        } 
     951 
     952        /* (non-Javadoc) 
    929953         * @see java.lang.Object#toString() 
    930954         */ 
     
    933957            return similarGUIElement + " (" + mainClusterParent + ")"; 
    934958        } 
     959 
    935960    } 
    936961         
  • trunk/autoquest-plugin-html/src/main/java/de/ugoe/cs/autoquest/plugin/html/commands/CMDparseDirHTML.java

    r1339 r1354  
    6969                } 
    7070                else { 
    71                     Pattern parseParamPattern = Pattern.compile("-(\\w*)=([\\w=\\[\\]\\(\\)/]*)"); 
     71                    Pattern parseParamPattern = Pattern.compile("-(\\w*)=([\\w=\\[\\]\\(\\)/\\.]*)"); 
    7272                    Matcher matcher = parseParamPattern.matcher(param); 
    7373                     
     
    161161    @Override 
    162162    public String help() { 
    163         return "parseDirHTML <directory> {<sequencesName>} {<clearId>} {<clearIndex>}"; 
     163        return "parseDirHTML <directory> [<sequencesName>] " + 
     164            "{-idReplacements=path/to/replacementfile} {-clearId=path/to[0]/gui(htmlId=element)} " + 
     165            "{-clearIndex=path/to[0]/gui(htmlId=element)}"; 
    164166    } 
    165167 
  • trunk/autoquest-plugin-html/src/main/java/de/ugoe/cs/autoquest/plugin/html/commands/CMDparseHTML.java

    r1339 r1354  
    6464                } 
    6565                else { 
    66                     Pattern parseParamPattern = Pattern.compile("-(\\w*)=([\\w=\\[\\]\\(\\)/]*)"); 
     66                    Pattern parseParamPattern = Pattern.compile("-(\\w*)=([\\w=\\[\\]\\(\\)/\\.]*)"); 
    6767                    Matcher matcher = parseParamPattern.matcher(param); 
    6868                     
     
    124124    @Override 
    125125    public String help() { 
    126         return "parseHTML <filename> {<sequencesName>}"; 
     126        return "parseHTML <filename> [<sequencesName>] " + 
     127            "{-idReplacements=path/to/replacementfile} {-clearId=path/to[0]/gui(htmlId=element)} " + 
     128            "{-clearIndex=path/to[0]/gui(htmlId=element)}"; 
    127129    } 
    128130 
  • trunk/autoquest-plugin-html/src/main/java/de/ugoe/cs/autoquest/plugin/html/eventcore/HTMLEventTypeFactory.java

    r1276 r1354  
    163163        String xCoord = eventParameters.get(xParamName); 
    164164        if (xCoord == null) { 
    165             Console.printerrln("eventParameters do not contain " + xParamName + " coordinate."); 
     165            Console.traceln(Level.WARNING, 
     166                            "eventParameters do not contain " + xParamName + " coordinate."); 
    166167            xCoord = "0"; 
    167168        } 
     
    169170        String yCoord = eventParameters.get(yParamName); 
    170171        if (yCoord == null) { 
    171             Console.printerrln("eventParameters do not contain " + yParamName + " coordinate."); 
     172            Console.traceln(Level.WARNING, 
     173                            "eventParameters do not contain " + yParamName + " coordinate."); 
    172174            yCoord = "0"; 
    173175        } 
     
    179181        catch (NumberFormatException e) { 
    180182            throw new IllegalArgumentException("the coordinates provided by an " + eventName + 
    181                 " event are no numbers"); 
     183                                               " event are no numbers"); 
    182184        } 
    183185    } 
  • trunk/autoquest-plugin-html/src/main/resources/manuals/parseDirHTML

    r1339 r1354  
    11Treats all files in a directory structure as HTML log files and parses them into event sequences and a GUI model. Also sub directories are parsed. 
    22 
    3 The parsing process can be parameterized. This allows to ignore ids or indexes of GUI elements in the log files. If they are ignored, the GUI model is more harmonized and GUI elements are considered equal although they are not. This may be helpful, e.g., if you have a table where each row is semantically the same. Without ignoring indexes or ids of the rows, each row is treated separately. But with ignored indexes or ids, all rows are considered the same. 
     3The parsing process can be parameterized. This allows to replace or ignore ids or indexes of GUI elements in the log files. If they are replaced or ignored, the GUI model is more harmonized and GUI elements are considered equal although they are not. This may be helpful, e.g., if you have a table where each row is semantically the same. Without replacing or ignoring indexes or ids of the rows, each row is treated separately. But with replaced or ignored indexes or ids, all rows are considered the same. 
    44 
    55To ignore the indexes, add -clearIndex=<path to GUI element> as parameter to the command call. To ignore ids, add -clearId=<path to GUI element> to the command call. The path to the GUI element is written using the HTML tag names and either their index or their id as identification. E.g., to denote all rows in a table where the table has the id "table_1" you can specify "table(htmlId=table_1)/tbody/tr". To denote e.g. all divs being the child of a div with an index 1, you specify "div[1]/div".   
    66 
     7To replace ids, a separate files with mappings must be created. The path to this file must be provided using the idReplacements parameter. The file follows a typical properties format. The key is the path denoting the GUI element of which the id shall be set. The value is the actual id. The key may contain the # character to denote a wildcard in html ids. This allows matching several GUI elements with similar ids at once and to give them the same id. An example entry of this file is: 
     8 
     9div(htmlId\=id_number_#)=div_number_X 
     10 
     11This line would give all divs with an id "id_number_#" where # denotes any character the new id "div_number_X". Please note that for specifying the keys, it is required to escape any = sign in the key specification. This is usually required if the path to the denoted GUI elements denotes elements by their id as shown in the example. 
     12 
     13 
     14 
    715$USAGE$ 
    8 <directory> path to the directory  
    9 [<sequenceNames>] array of sequences into which the parsed events shall be stored 
    10 [<clearId>] used to define GUI elements of which the ids shall be ignored 
    11 [<clearIndex>] used to define GUI elements of which the indexes shall be ignored 
     16 
     17<directory> 
     18    path to the directory  
     19[<sequenceNames>] 
     20    array of sequences into which the parsed events shall be stored 
     21{-idReplacements=path/to/replacementfile} 
     22    used to define id replacements as described in a separate file 
     23{-clearId=path/to[0]/gui(htmlId=element)} 
     24    used to define GUI elements of which the ids shall be ignored 
     25{-clearIndex=path/to[0]/gui(htmlId=element)} 
     26    used to define GUI elements of which the indexes shall be ignored 
    1227 
    1328Example(s): 
    1429parseDirHTML /path/to/directory 
    1530parseDirHTML /path/to/directory sequences -clearId=table(htmlId=overview)/tbody[0]/tr 
     31parseDirHTML /path/to/directory sequences -idReplacements=idReplacements.txt -clearId=body 
  • trunk/autoquest-plugin-html/src/main/resources/manuals/parseHTML

    r1339 r1354  
    11Parses an HTML log file them into an event sequence and a GUI model. 
    22 
    3 The parsing process can be parameterized. This allows to ignore ids or indexes of GUI elements in the log files. If they are ignored, the GUI model is more harmonized and GUI elements are considered equal although they are not. This may be helpful, e.g., if you have a table where each row is semantically the same. Without ignoring indexes or ids of the rows, each row is treated separately. But with ignored indexes or ids, all rows are considered the same. 
     3The parsing process can be parameterized. This allows to replace or ignore ids or indexes of GUI elements in the log files. If they are replaced or ignored, the GUI model is more harmonized and GUI elements are considered equal although they are not. This may be helpful, e.g., if you have a table where each row is semantically the same. Without ignoring indexes or ids of the rows, each row is treated separately. But with ignored or replaced indexes or ids, all rows are considered the same. 
    44 
    55To ignore the indexes, add -clearIndex=<path to GUI element> as parameter to the command call. To ignore ids, add -clearId=<path to GUI element> to the command call. The path to the GUI element is written using the HTML tag names and either their index or their id as identification. E.g., to denote all rows in a table where the table has the id "table_1" you can specify "table(htmlId=table_1)/tbody/tr". To denote e.g. all divs being the child of a div with an index 1, you specify "div[1]/div".   
    66 
     7To replace ids, a separate files with mappings must be created. The path to this file must be provided using the idReplacements parameter. The file follows a typical properties format. The key is the path denoting the GUI element of which the id shall be set. The value is the actual id. The key may contain the # character to denote a wildcard in html ids. This allows matching several GUI elements with similar ids at once and to give them the same id. An example entry of this file is: 
     8 
     9div(htmlId\=id_number_#)=div_number_X 
     10 
     11This line would give all divs with an id "id_number_#" where # denotes any character the new id "div_number_X". Please note that for specifying the keys, it is required to escape any = sign in the key specification. This is usually required if the path to the denoted GUI elements denotes elements by their id as shown in the example. 
     12 
     13 
     14 
    715$USAGE$ 
    8 <file> path to the file to be parsed  
    9 [<sequenceNames>] array of sequences into which the parsed events shall be stored 
    10 [<clearId>] used to define GUI elements of which the ids shall be ignored 
    11 [<clearIndex>] used to define GUI elements of which the indexes shall be ignored 
     16 
     17<file> 
     18    path to the file to be parsed  
     19[<sequenceNames>] 
     20    array of sequences into which the parsed events shall be stored 
     21{-idReplacements=path/to/replacementfile} 
     22    used to define id replacements as described in a separate file 
     23{-clearId=path/to[0]/gui(htmlId=element)} 
     24    used to define GUI elements of which the ids shall be ignored 
     25{-clearIndex=path/to[0]/gui(htmlId=element)} 
     26    used to define GUI elements of which the indexes shall be ignored 
    1227 
    1328Example(s): 
    1429parseDirHTML /path/to/file.log 
    1530parseDirHTML /path/to/file.log sequences -clearId=table(htmlId=overview)/tbody[0]/tr 
     31parseDirHTML /path/to/directory sequences -idReplacements=idReplacements.txt -clearId=body 
Note: See TracChangeset for help on using the changeset viewer.