// Copyright 2012 Georg-August-Universität Göttingen, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package de.ugoe.cs.autoquest.plugin.html; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.xml.sax.SAXException; import de.ugoe.cs.autoquest.eventcore.Event; import de.ugoe.cs.autoquest.eventcore.IEventType; import de.ugoe.cs.autoquest.eventcore.guimodel.GUIModel; import de.ugoe.cs.autoquest.eventcore.guimodel.GUIModelException; import de.ugoe.cs.autoquest.eventcore.guimodel.IGUIElement; import de.ugoe.cs.autoquest.plugin.html.eventcore.HTMLEventTypeFactory; import de.ugoe.cs.autoquest.plugin.html.guimodel.HTMLDocumentSpec; import de.ugoe.cs.autoquest.plugin.html.guimodel.HTMLGUIElement; import de.ugoe.cs.autoquest.plugin.html.guimodel.HTMLGUIElementSpec; import de.ugoe.cs.autoquest.plugin.html.guimodel.HTMLPageElement; import de.ugoe.cs.autoquest.plugin.html.guimodel.HTMLPageElementSpec; import de.ugoe.cs.autoquest.plugin.html.guimodel.HTMLServerSpec; /** *

* This class provides the functionality to parse XML log files generated by the HTMLMonitor of * AutoQUEST. The result of parsing a file is a collection of event sequences and a GUI model. *

*

* The parser can be configured with parsing parameters to ignore, e.g., ids or indexes of * parsed GUI elements. Details can be found in the manual pages of the respective parsing commands. *

* * @author Fabian Glaser, Patrick Harms * @version 1.0 * */ public class HTMLLogParser extends AbstractDefaultLogParser { /** *

* the pattern used for parsing HTML GUI element paths *

*/ private Pattern htmlElementPattern = Pattern.compile("(\\w+)(\\[(\\d+)\\]|\\(htmlId=([\\w-]+)\\))"); /** *

* the pattern used for parsing parsing parameters *

*/ private Pattern htmlElementSpecPattern = Pattern.compile("(\\w+)(\\[(\\d+)\\]|\\(htmlId=([\\w-#]+)\\))?"); /** *

* parameters to influence parsing *

*/ private Map> parseParams; /** *

* a map containing replacement specifications for ids of GUI elements *

*/ private Map idReplacements; /** *

* initializes the parser with the parsing parameters to be considered *

* * @param parseParams the parsing parameters to be considered */ public HTMLLogParser(Map> parseParams) { this.parseParams = parseParams; for (String paramKey : parseParams.keySet()) { if (!"clearId".equals(paramKey) && !"clearIndex".equals(paramKey) && !"idReplacements".equals(paramKey)) { throw new IllegalArgumentException("unknown parse parameter key " + paramKey); } } } /* (non-Javadoc) * @see de.ugoe.cs.autoquest.plugin.html.AbstractDefaultLogParser#handleGUIElement(String, Map) */ @Override protected boolean handleGUIElement(String id, Map parameters) throws SAXException { HTMLGUIElementSpec specification = null; String parentId = parameters.get("parent"); HTMLGUIElement parent = (HTMLGUIElement) super.getGUIElementTree().find(parentId); if (parameters.containsKey("host")) { // this is a server specification int port = 80; String portStr = parameters.get("port"); if (portStr != null) { port = Integer.parseInt(portStr); } specification = new HTMLServerSpec(parameters.get("host"), port); } else if (parameters.containsKey("path")) { // this is a document specification if (parent != null) { if (!(parent.getSpecification() instanceof HTMLServerSpec)) { throw new SAXException ("invalid log: parent GUI element of a document is not of type server"); } specification = new HTMLDocumentSpec ((HTMLServerSpec) parent.getSpecification(), parameters.get("path"), parameters.get("query"), parameters.get("title")); } else if (parentId == null) { throw new SAXException("invalid log: a document has no parent id"); } } else if (parameters.containsKey("tagname")) { String tagName = parameters.get("tagname"); if (!tagNameMustBeConsidered(tagName)) { return true; } if (parent != null) { if (!childrenMustBeConsidered(parent)) { return true; } IGUIElement document = parent; while ((document != null) && (!(document.getSpecification() instanceof HTMLDocumentSpec))) { document = document.getParent(); } if (document == null) { throw new SAXException ("invalid log: parent hierarchy of a page element does not contain a " + "document"); } int index = -1; String indexStr = parameters.get("index"); if ((indexStr != null) && (!"".equals(indexStr))) { index = Integer.parseInt(indexStr); } String htmlId = parameters.get("htmlid"); if (clearIndex(tagName, index, htmlId, parent)) { index = -1; } String idReplacement = replaceHTMLId(tagName, index, htmlId, parent); if (idReplacement != null) { htmlId = idReplacement; } else if (clearHTMLId(tagName, index, htmlId, parent)) { htmlId = null; } if ((htmlId == null) && (index == -1)) { // set at least a default index, if all is to be ignored. index = 0; } specification = new HTMLPageElementSpec ((HTMLDocumentSpec) document.getSpecification(), tagName.intern(), htmlId == null ? null : htmlId.intern(), index); } else if (parentId == null) { throw new SAXException("invalid log: a page element has no parent id"); } } else { throw new SAXException("invalid log: unknown GUI element"); } if (specification != null) { try { super.getGUIElementTree().add(id, parentId, specification); } catch (GUIModelException e) { throw new SAXException("could not handle GUI element with id " + id + ": " + e.getMessage(), e); } return true; } else { return false; } } /** *

* checks if for a specific GUI element the index shall be ignored or not by considering the * parsing parameters. *

* * @param tagName the tag of the considered GUI element * @param index the index of the GUI element * @param id the id of the GUI element * @param parent the parent GUI element of the considered GUI element * * @return true if the index shall be ignored, false else. */ private boolean clearIndex(String tagName, int index, String id, HTMLGUIElement parent) { return clearSomething("clearIndex", tagName, index, id, parent); } /** *

* checks if the parsing parameters define a replacement for the id of the given GUI element * and if so returns this replacement *

* * @param tagName the tag of the considered GUI element * @param index the index of the GUI element * @param id the id of the GUI element * @param parent the parent GUI element of the considered GUI element * * @return the identified replacement */ private String replaceHTMLId(String tagName, int index, String htmlId, HTMLGUIElement parent) throws SAXException { if ((idReplacements == null) && (parseParams.containsKey("idReplacements"))) { idReplacements = new HashMap(); for (String fileName : parseParams.get("idReplacements")) { Properties props = new Properties(); FileInputStream stream = null; try { stream = new FileInputStream(new File(fileName)); props.load(stream); } catch (FileNotFoundException e) { throw new SAXException("could not find file " + fileName, e); } catch (IOException e) { throw new SAXException("error reading file " + fileName, e); } finally { if (stream != null) { try { stream.close(); } catch (IOException e) { // ignore } } } for (Map.Entry entry : props.entrySet()) { idReplacements.put((String) entry.getKey(), (String) entry.getValue()); } } } if (idReplacements != null) { for (Map.Entry replacementSpec : idReplacements.entrySet()) { String tagSpec = replacementSpec.getKey(); if (tagSpec.startsWith("/")) { throw new IllegalArgumentException("can not handle absolute specifications"); } else if (tagSpec.endsWith("/")) { throw new IllegalArgumentException("specifications may not end with a /"); } String[] tagSpecs = tagSpec.split("/"); if (tagMatchesTagSpec(tagName, index, htmlId, parent, tagSpecs)) { return replacementSpec.getValue(); } } } return null; } /** *

* checks if for a specific GUI element the id shall be ignored or not by considering the * parsing parameters. *

* * @param tagName the tag of the considered GUI element * @param index the index of the GUI element * @param id the id of the GUI element * @param parent the parent GUI element of the considered GUI element * * @return true if the id shall be ignored, false else. */ private boolean clearHTMLId(String tagName, int index, String id, HTMLGUIElement parent) { return clearSomething("clearId", tagName, index, id, parent); } /** *

* convenience method to check for the existence for specific parsing parameters for clearing * ids or indexes of GUI elements. *

* * @param parseParamId the id of the parsing parameter to be checked for the GUI element * @param tagName the tag of the considered GUI element * @param index the index of the GUI element * @param id the id of the GUI element * @param parent the parent GUI element of the considered GUI element * * @return true if the denoted parse parameter is set to ignore, false else. */ private boolean clearSomething(String parseParamId, String tagName, int index, String id, HTMLGUIElement parent) { if (parseParams.containsKey(parseParamId)) { for (String spec : parseParams.get(parseParamId)) { // determine the specification parts if (spec.startsWith("/")) { throw new IllegalArgumentException("can not handle absolute specifications"); } else if (spec.endsWith("/")) { throw new IllegalArgumentException("specifications may not end with a /"); } String[] tagSpecs = spec.split("/"); if (tagMatchesTagSpec(tagName, index, id, parent, tagSpecs)) { return true; } } } return false; } /** *

* convenience method to check if a given GUI element matches a specification tags provided * through the parsing parameters. *

* * @param tagName the tag of the considered GUI element * @param index the index of the GUI element * @param id the id of the GUI element * @param parent the parent GUI element of the considered GUI element * @param tagSpecs the specification of a GUI element to match against the given GUI element * * @return true if the denoted parse parameter is set to ignore, false else. */ private boolean tagMatchesTagSpec(String tagName, int index, String id, HTMLGUIElement parent, String[] tagSpecs) { if (tagSpecs.length > 0) { Matcher matcher = htmlElementSpecPattern.matcher(tagSpecs[tagSpecs.length - 1]); if (!matcher.matches()) { throw new IllegalArgumentException ("illegal tag specification " + tagSpecs[tagSpecs.length - 1]); } if (!tagName.equals(matcher.group(1))) { return false; } String idCondition = matcher.group(4); if (idCondition != null) { if (!idCondition.equals(id)) { // check if the id condition would match with ignoring specific characters if ((id != null) && (idCondition.indexOf('#') > -1)) { // first of all, the length must match if (idCondition.length() != id.length()) { return false; } for (int i = 0; i < idCondition.length(); i++) { if ((idCondition.charAt(i) != '#') && (idCondition.charAt(i) != id.charAt(i))) { // if there is a character that is neither ignored not matches // the condition at a specific position, return "no match" return false; } } } else { // no condition ignoring specific characters return false; } } } String indexCondition = matcher.group(3); if (indexCondition != null) { try { if (index != Integer.parseInt(indexCondition)) { return false; } } catch (NumberFormatException e) { throw new IllegalArgumentException ("illegal tag index specification " + indexCondition, e); } } if (tagSpecs.length > 1) { if (parent instanceof HTMLPageElement) { return tagMatchesTagSpec(((HTMLPageElement) parent).getTagName(), ((HTMLPageElement) parent).getIndex(), ((HTMLPageElement) parent).getHtmlId(), (HTMLGUIElement) parent.getParent(), Arrays.copyOfRange(tagSpecs, 0, tagSpecs.length - 1)); } else { throw new IllegalArgumentException ("specification matches documents or servers. This is not supported yet."); } } else { return true; } } else { return true; } } /* (non-Javadoc) * @see de.ugoe.cs.autoquest.plugin.html.AbstractDefaultLogParser#handleEvent(String, Map) */ @Override protected boolean handleEvent(String type, Map parameters) throws SAXException { String targetId = parameters.get("target"); if (targetId == null) { if (parseParams.size() != 0) { throw new SAXException ("old log file versions can not be parsed with parse parameters"); } String targetDocument = parameters.get("targetDocument"); String targetDOMPath = parameters.get("targetDOMPath"); if ((targetDocument == null) || (targetDOMPath == null)) { throw new SAXException("event has no target defined"); } targetId = determineTargetId(targetDocument, targetDOMPath); if (targetId == null) { // the target id can not be determined yet return false; } } IGUIElement target = super.getGUIElementTree().find(targetId); if (target == null) { // event not processible yet return false; } IEventType eventType = HTMLEventTypeFactory.getInstance().getEventType(type, parameters, target); if (eventType != null) { Event event = new Event(eventType, target); String timestampStr = parameters.get("timestamp"); if (timestampStr != null) { event.setTimestamp(Long.parseLong(timestampStr)); } ((HTMLGUIElement) event.getTarget()).markUsed(); super.addToSequence(event); } // else ignore unknown event type return true; } /** *

* used to determine the id of a target denoted by an event. This is only required for older * document formats. The new formats use concrete ids. *

*/ private String determineTargetId(String targetDocument, String targetDOMPath) throws SAXException { IGUIElement document = super.getGUIElementTree().find(targetDocument); if (document == null) { return null; } if (!(document.getSpecification() instanceof HTMLDocumentSpec)) { throw new SAXException("an id that should refer to an HTML document refers to" + "something else"); } GUIModel model = super.getGUIElementTree().getGUIModel(); IGUIElement child = document; String[] pathElements = targetDOMPath.split("/"); int pathIndex = 0; HTMLPageElementSpec compareSpec; String tagName; int index; String htmlId; while ((pathIndex < pathElements.length) && (child != null)) { if ((pathElements[pathIndex] != null) && (!"".equals(pathElements[pathIndex]))) { Matcher matcher = htmlElementPattern.matcher(pathElements[pathIndex]); if (!matcher.matches()) { throw new SAXException ("could not parse target DOM path element " + pathElements[pathIndex]); } tagName = matcher.group(1); String indexStr = matcher.group(3); htmlId = matcher.group(4); index = -1; if ((indexStr != null) && (!"".equals(indexStr))) { index = Integer.parseInt(indexStr); } compareSpec = new HTMLPageElementSpec ((HTMLDocumentSpec) document.getSpecification(), tagName, htmlId, index); List children = model.getChildren(child); child = null; for (IGUIElement candidate : children) { if (compareSpec.getSimilarity(candidate.getSpecification())) { child = candidate; break; } } } pathIndex++; } if (child != null) { return super.getGUIElementTree().find(child); } else { return null; } } /** *

* checks if tags with the provided name must be handled in the GUI model. As an example, * it is not necessary to handle "head" tags and anything included in them. *

* * @param tagName the tag name to check * * @return true, if the tag must be considered, false else */ private boolean tagNameMustBeConsidered(String tagName) { if (!tagName.startsWith("input_")) { for (int i = 0; i < tagName.length(); i++) { // all known HTML tags are either letters or digits, but nothing else. Any GUI model // containing something different is proprietary and, therefore, ignored. if (!Character.isLetterOrDigit(tagName.charAt(i))) { return false; } } } return !"head".equals(tagName) && !"title".equals(tagName) && !"script".equals(tagName) && !"style".equals(tagName) && !"link".equals(tagName) && !"meta".equals(tagName) && !"iframe".equals(tagName) && !"input_hidden".equals(tagName) && !"option".equals(tagName) && !"tt".equals(tagName) && !"br".equals(tagName) && !"colgroup".equals(tagName) && !"col".equals(tagName) && !"hr".equals(tagName) && !"param".equals(tagName) && !"sfmsg".equals(tagName); } /** *

* checks if the children of a specified parent must be added to the GUI model or not. *

* * @param parent the parent tag to check * * @return true, if the child of the tag must be considered, false else */ private boolean childrenMustBeConsidered(HTMLGUIElement parent) { if (parent instanceof HTMLPageElement) { return !"svg".equals(((HTMLPageElement) parent).getTagName()); } else { return true; } } }