Changeset 1354
- Timestamp:
- 02/07/14 17:11:39 (11 years ago)
- Location:
- trunk/autoquest-plugin-html/src/main
- Files:
-
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/autoquest-plugin-html/src/main/java/de/ugoe/cs/autoquest/plugin/html/HTMLLogParser.java
r1339 r1354 15 15 package de.ugoe.cs.autoquest.plugin.html; 16 16 17 import java.io.File; 18 import java.io.FileInputStream; 19 import java.io.FileNotFoundException; 20 import java.io.IOException; 17 21 import java.util.Arrays; 22 import java.util.HashMap; 18 23 import java.util.List; 19 24 import java.util.Map; 25 import java.util.Properties; 20 26 import java.util.regex.Matcher; 21 27 import java.util.regex.Pattern; … … 62 68 */ 63 69 private Pattern htmlElementSpecPattern = 64 Pattern.compile("(\\w+)(\\[(\\d+)\\]|\\(htmlId=([\\w- ]+)\\))?");70 Pattern.compile("(\\w+)(\\[(\\d+)\\]|\\(htmlId=([\\w-#]+)\\))?"); 65 71 66 72 /** … … 70 76 */ 71 77 private Map<String, List<String>> parseParams; 78 79 /** 80 * <p> 81 * a map containing replacement specifications for ids of GUI elements 82 * </p> 83 */ 84 private Map<String, String> idReplacements; 72 85 73 86 /** … … 82 95 83 96 for (String paramKey : parseParams.keySet()) { 84 if (!"clearId".equals(paramKey) && !"clearIndex".equals(paramKey)) { 97 if (!"clearId".equals(paramKey) && !"clearIndex".equals(paramKey) && 98 !"idReplacements".equals(paramKey)) 99 { 85 100 throw new IllegalArgumentException("unknown parse parameter key " + paramKey); 86 101 } … … 163 178 } 164 179 165 if (clearHTMLId(tagName, index, htmlId, parent)) { 180 String idReplacement = replaceHTMLId(tagName, index, htmlId, parent); 181 if (idReplacement != null) { 182 htmlId = idReplacement; 183 } 184 else if (clearHTMLId(tagName, index, htmlId, parent)) { 166 185 htmlId = null; 167 186 } … … 210 229 private boolean clearIndex(String tagName, int index, String id, HTMLGUIElement parent) { 211 230 return clearSomething("clearIndex", tagName, index, id, parent); 231 } 232 233 /** 234 * <p> 235 * TODO: comment 236 * </p> 237 * 238 * @param tagName 239 * @param index 240 * @param htmlId 241 * @param parent 242 * @return 243 */ 244 private String replaceHTMLId(String tagName, int index, String htmlId, HTMLGUIElement parent) 245 throws SAXException 246 { 247 if ((idReplacements == null) && (parseParams.containsKey("idReplacements"))) { 248 idReplacements = new HashMap<String, String>(); 249 for (String fileName : parseParams.get("idReplacements")) { 250 Properties props = new Properties(); 251 try { 252 props.load(new FileInputStream(new File(fileName))); 253 } 254 catch (FileNotFoundException e) { 255 throw new SAXException("could not find file " + fileName, e); 256 } 257 catch (IOException e) { 258 throw new SAXException("error reading file " + fileName, e); 259 } 260 261 for (Map.Entry<Object, Object> entry : props.entrySet()) { 262 idReplacements.put((String) entry.getKey(), (String) entry.getValue()); 263 } 264 } 265 } 266 267 if (idReplacements != null) { 268 for (Map.Entry<String, String> replacementSpec : idReplacements.entrySet()) { 269 String tagSpec = replacementSpec.getKey(); 270 271 if (tagSpec.startsWith("/")) { 272 throw new IllegalArgumentException("can not handle absolute specifications"); 273 } 274 else if (tagSpec.endsWith("/")) { 275 throw new IllegalArgumentException("specifications may not end with a /"); 276 } 277 278 String[] tagSpecs = tagSpec.split("/"); 279 280 if (tagMatchesTagSpec(tagName, index, htmlId, parent, tagSpecs)) { 281 return replacementSpec.getValue(); 282 } 283 } 284 } 285 286 return null; 212 287 } 213 288 … … 294 369 if (idCondition != null) { 295 370 if (!idCondition.equals(id)) { 296 return false; 371 // check if the id condition would match with ignoring specific characters 372 if ((id != null) && (idCondition.indexOf('#') > -1)) { 373 // first of all, the length must match 374 if (idCondition.length() != id.length()) { 375 return false; 376 } 377 378 for (int i = 0; i < idCondition.length(); i++) { 379 if ((idCondition.charAt(i) != '#') && 380 (idCondition.charAt(i) != id.charAt(i))) 381 { 382 // if there is a character that is neither ignored not matches 383 // the condition at a specific position, return "no match" 384 return false; 385 } 386 } 387 388 } 389 else { 390 // no condition ignoring specific characters 391 return false; 392 } 297 393 } 298 394 } -
trunk/autoquest-plugin-html/src/main/java/de/ugoe/cs/autoquest/plugin/html/commands/CMDcondenseHTMLGUIModel.java
r1349 r1354 927 927 928 928 /* (non-Javadoc) 929 * @see java.lang.Object#equals(java.lang.Object) 930 */ 931 @Override 932 public boolean equals(Object obj) { 933 if (obj == this) { 934 return true; 935 } 936 else if (obj instanceof SimilarGUIElement) { 937 return (similarGUIElement.equals(((SimilarGUIElement) obj).similarGUIElement)); 938 } 939 else { 940 return false; 941 } 942 } 943 944 /* (non-Javadoc) 945 * @see java.lang.Object#hashCode() 946 */ 947 @Override 948 public int hashCode() { 949 return similarGUIElement.hashCode(); 950 } 951 952 /* (non-Javadoc) 929 953 * @see java.lang.Object#toString() 930 954 */ … … 933 957 return similarGUIElement + " (" + mainClusterParent + ")"; 934 958 } 959 935 960 } 936 961 -
trunk/autoquest-plugin-html/src/main/java/de/ugoe/cs/autoquest/plugin/html/commands/CMDparseDirHTML.java
r1339 r1354 69 69 } 70 70 else { 71 Pattern parseParamPattern = Pattern.compile("-(\\w*)=([\\w=\\[\\]\\(\\)/ ]*)");71 Pattern parseParamPattern = Pattern.compile("-(\\w*)=([\\w=\\[\\]\\(\\)/\\.]*)"); 72 72 Matcher matcher = parseParamPattern.matcher(param); 73 73 … … 161 161 @Override 162 162 public String help() { 163 return "parseDirHTML <directory> {<sequencesName>} {<clearId>} {<clearIndex>}"; 163 return "parseDirHTML <directory> [<sequencesName>] " + 164 "{-idReplacements=path/to/replacementfile} {-clearId=path/to[0]/gui(htmlId=element)} " + 165 "{-clearIndex=path/to[0]/gui(htmlId=element)}"; 164 166 } 165 167 -
trunk/autoquest-plugin-html/src/main/java/de/ugoe/cs/autoquest/plugin/html/commands/CMDparseHTML.java
r1339 r1354 64 64 } 65 65 else { 66 Pattern parseParamPattern = Pattern.compile("-(\\w*)=([\\w=\\[\\]\\(\\)/ ]*)");66 Pattern parseParamPattern = Pattern.compile("-(\\w*)=([\\w=\\[\\]\\(\\)/\\.]*)"); 67 67 Matcher matcher = parseParamPattern.matcher(param); 68 68 … … 124 124 @Override 125 125 public String help() { 126 return "parseHTML <filename> {<sequencesName>}"; 126 return "parseHTML <filename> [<sequencesName>] " + 127 "{-idReplacements=path/to/replacementfile} {-clearId=path/to[0]/gui(htmlId=element)} " + 128 "{-clearIndex=path/to[0]/gui(htmlId=element)}"; 127 129 } 128 130 -
trunk/autoquest-plugin-html/src/main/java/de/ugoe/cs/autoquest/plugin/html/eventcore/HTMLEventTypeFactory.java
r1276 r1354 163 163 String xCoord = eventParameters.get(xParamName); 164 164 if (xCoord == null) { 165 Console.printerrln("eventParameters do not contain " + xParamName + " coordinate."); 165 Console.traceln(Level.WARNING, 166 "eventParameters do not contain " + xParamName + " coordinate."); 166 167 xCoord = "0"; 167 168 } … … 169 170 String yCoord = eventParameters.get(yParamName); 170 171 if (yCoord == null) { 171 Console.printerrln("eventParameters do not contain " + yParamName + " coordinate."); 172 Console.traceln(Level.WARNING, 173 "eventParameters do not contain " + yParamName + " coordinate."); 172 174 yCoord = "0"; 173 175 } … … 179 181 catch (NumberFormatException e) { 180 182 throw new IllegalArgumentException("the coordinates provided by an " + eventName + 181 " event are no numbers");183 " event are no numbers"); 182 184 } 183 185 } -
trunk/autoquest-plugin-html/src/main/resources/manuals/parseDirHTML
r1339 r1354 1 1 Treats all files in a directory structure as HTML log files and parses them into event sequences and a GUI model. Also sub directories are parsed. 2 2 3 The parsing process can be parameterized. This allows to ignore ids or indexes of GUI elements in the log files. If they are ignored, the GUI model is more harmonized and GUI elements are considered equal although they are not. This may be helpful, e.g., if you have a table where each row is semantically the same. Without ignoring indexes or ids of the rows, each row is treated separately. But withignored indexes or ids, all rows are considered the same.3 The parsing process can be parameterized. This allows to replace or ignore ids or indexes of GUI elements in the log files. If they are replaced or ignored, the GUI model is more harmonized and GUI elements are considered equal although they are not. This may be helpful, e.g., if you have a table where each row is semantically the same. Without replacing or ignoring indexes or ids of the rows, each row is treated separately. But with replaced or ignored indexes or ids, all rows are considered the same. 4 4 5 5 To ignore the indexes, add -clearIndex=<path to GUI element> as parameter to the command call. To ignore ids, add -clearId=<path to GUI element> to the command call. The path to the GUI element is written using the HTML tag names and either their index or their id as identification. E.g., to denote all rows in a table where the table has the id "table_1" you can specify "table(htmlId=table_1)/tbody/tr". To denote e.g. all divs being the child of a div with an index 1, you specify "div[1]/div". 6 6 7 To replace ids, a separate files with mappings must be created. The path to this file must be provided using the idReplacements parameter. The file follows a typical properties format. The key is the path denoting the GUI element of which the id shall be set. The value is the actual id. The key may contain the # character to denote a wildcard in html ids. This allows matching several GUI elements with similar ids at once and to give them the same id. An example entry of this file is: 8 9 div(htmlId\=id_number_#)=div_number_X 10 11 This line would give all divs with an id "id_number_#" where # denotes any character the new id "div_number_X". Please note that for specifying the keys, it is required to escape any = sign in the key specification. This is usually required if the path to the denoted GUI elements denotes elements by their id as shown in the example. 12 13 14 7 15 $USAGE$ 8 <directory> path to the directory 9 [<sequenceNames>] array of sequences into which the parsed events shall be stored 10 [<clearId>] used to define GUI elements of which the ids shall be ignored 11 [<clearIndex>] used to define GUI elements of which the indexes shall be ignored 16 17 <directory> 18 path to the directory 19 [<sequenceNames>] 20 array of sequences into which the parsed events shall be stored 21 {-idReplacements=path/to/replacementfile} 22 used to define id replacements as described in a separate file 23 {-clearId=path/to[0]/gui(htmlId=element)} 24 used to define GUI elements of which the ids shall be ignored 25 {-clearIndex=path/to[0]/gui(htmlId=element)} 26 used to define GUI elements of which the indexes shall be ignored 12 27 13 28 Example(s): 14 29 parseDirHTML /path/to/directory 15 30 parseDirHTML /path/to/directory sequences -clearId=table(htmlId=overview)/tbody[0]/tr 31 parseDirHTML /path/to/directory sequences -idReplacements=idReplacements.txt -clearId=body -
trunk/autoquest-plugin-html/src/main/resources/manuals/parseHTML
r1339 r1354 1 1 Parses an HTML log file them into an event sequence and a GUI model. 2 2 3 The parsing process can be parameterized. This allows to ignore ids or indexes of GUI elements in the log files. If they are ignored, the GUI model is more harmonized and GUI elements are considered equal although they are not. This may be helpful, e.g., if you have a table where each row is semantically the same. Without ignoring indexes or ids of the rows, each row is treated separately. But with ignored indexes or ids, all rows are considered the same.3 The parsing process can be parameterized. This allows to replace or ignore ids or indexes of GUI elements in the log files. If they are replaced or ignored, the GUI model is more harmonized and GUI elements are considered equal although they are not. This may be helpful, e.g., if you have a table where each row is semantically the same. Without ignoring indexes or ids of the rows, each row is treated separately. But with ignored or replaced indexes or ids, all rows are considered the same. 4 4 5 5 To ignore the indexes, add -clearIndex=<path to GUI element> as parameter to the command call. To ignore ids, add -clearId=<path to GUI element> to the command call. The path to the GUI element is written using the HTML tag names and either their index or their id as identification. E.g., to denote all rows in a table where the table has the id "table_1" you can specify "table(htmlId=table_1)/tbody/tr". To denote e.g. all divs being the child of a div with an index 1, you specify "div[1]/div". 6 6 7 To replace ids, a separate files with mappings must be created. The path to this file must be provided using the idReplacements parameter. The file follows a typical properties format. The key is the path denoting the GUI element of which the id shall be set. The value is the actual id. The key may contain the # character to denote a wildcard in html ids. This allows matching several GUI elements with similar ids at once and to give them the same id. An example entry of this file is: 8 9 div(htmlId\=id_number_#)=div_number_X 10 11 This line would give all divs with an id "id_number_#" where # denotes any character the new id "div_number_X". Please note that for specifying the keys, it is required to escape any = sign in the key specification. This is usually required if the path to the denoted GUI elements denotes elements by their id as shown in the example. 12 13 14 7 15 $USAGE$ 8 <file> path to the file to be parsed 9 [<sequenceNames>] array of sequences into which the parsed events shall be stored 10 [<clearId>] used to define GUI elements of which the ids shall be ignored 11 [<clearIndex>] used to define GUI elements of which the indexes shall be ignored 16 17 <file> 18 path to the file to be parsed 19 [<sequenceNames>] 20 array of sequences into which the parsed events shall be stored 21 {-idReplacements=path/to/replacementfile} 22 used to define id replacements as described in a separate file 23 {-clearId=path/to[0]/gui(htmlId=element)} 24 used to define GUI elements of which the ids shall be ignored 25 {-clearIndex=path/to[0]/gui(htmlId=element)} 26 used to define GUI elements of which the indexes shall be ignored 12 27 13 28 Example(s): 14 29 parseDirHTML /path/to/file.log 15 30 parseDirHTML /path/to/file.log sequences -clearId=table(htmlId=overview)/tbody[0]/tr 31 parseDirHTML /path/to/directory sequences -idReplacements=idReplacements.txt -clearId=body
Note: See TracChangeset
for help on using the changeset viewer.