// Copyright 2012 Georg-August-Universität Göttingen, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package de.ugoe.cs.autoquest.plugin.html; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.xml.sax.SAXException; import de.ugoe.cs.autoquest.eventcore.Event; import de.ugoe.cs.autoquest.eventcore.IEventType; import de.ugoe.cs.autoquest.eventcore.guimodel.GUIModel; import de.ugoe.cs.autoquest.eventcore.guimodel.GUIModelException; import de.ugoe.cs.autoquest.eventcore.guimodel.IGUIElement; import de.ugoe.cs.autoquest.plugin.html.eventcore.HTMLEventTypeFactory; import de.ugoe.cs.autoquest.plugin.html.guimodel.HTMLDocumentSpec; import de.ugoe.cs.autoquest.plugin.html.guimodel.HTMLGUIElement; import de.ugoe.cs.autoquest.plugin.html.guimodel.HTMLGUIElementSpec; import de.ugoe.cs.autoquest.plugin.html.guimodel.HTMLPageElementSpec; import de.ugoe.cs.autoquest.plugin.html.guimodel.HTMLServerSpec; /** *
* This class provides the functionality to parse XML log files generated by the HTMLMonitor of * AutoQUEST. The result of parsing a file is a collection of event sequences and a GUI model *
* * @author Fabian Glaser, Patrick Harms * @version 1.0 * */ public class HTMLLogParser extends AbstractDefaultLogParser { /** ** the pattern used for parsing HTML GUI element paths *
*/ private Pattern htmlElementPattern = Pattern.compile("(\\w+)(\\[(\\d+)\\]|\\(htmlId=([\\w-]+)\\))"); /* (non-Javadoc) * @see de.ugoe.cs.autoquest.plugin.html.AbstractDefaultLogParser#handleGUIElement(String, Map) */ @Override protected boolean handleGUIElement(String id, Map* used to determine the id of a target denoted by an event. This is only required for older * document formats. The new formats use concrete ids. *
*/ private String determineTargetId(String targetDocument, String targetDOMPath) throws SAXException { IGUIElement document = super.getGUIElementTree().find(targetDocument); if (document == null) { return null; } if (!(document.getSpecification() instanceof HTMLDocumentSpec)) { throw new SAXException("an id that should refer to an HTML document refers to" + "something else"); } GUIModel model = super.getGUIElementTree().getGUIModel(); IGUIElement child = document; String[] pathElements = targetDOMPath.split("/"); int pathIndex = 0; HTMLPageElementSpec compareSpec; String tagName; int index; String htmlId; while ((pathIndex < pathElements.length) && (child != null)) { if ((pathElements[pathIndex] != null) && (!"".equals(pathElements[pathIndex]))) { Matcher matcher = htmlElementPattern.matcher(pathElements[pathIndex]); if (!matcher.matches()) { throw new SAXException ("could not parse target DOM path element " + pathElements[pathIndex]); } tagName = matcher.group(1); String indexStr = matcher.group(3); htmlId = matcher.group(4); index = -1; if ((indexStr != null) && (!"".equals(indexStr))) { index = Integer.parseInt(indexStr); } compareSpec = new HTMLPageElementSpec ((HTMLDocumentSpec) document.getSpecification(), tagName, htmlId, index); List* checks if tags with the provided name must be handled in the GUI model. As an example, * it is not necessary to handle "head" tags and anything included in them. *
* * @param tagName the tag name to check * * @return true, if the tag must be considered, false else */ private boolean tagNameMustBeConsidered(String tagName) { if (!tagName.startsWith("input_")) { for (int i = 0; i < tagName.length(); i++) { // all known HTML tags are either letters or digits, but nothing else. Any GUI model // containing something different is proprietary and, therefore, ignored. if (!Character.isLetterOrDigit(tagName.charAt(i))) { return false; } } } return !"head".equals(tagName) && !"title".equals(tagName) && !"script".equals(tagName) && !"style".equals(tagName) && !"link".equals(tagName) && !"meta".equals(tagName) && !"iframe".equals(tagName) && !"input_hidden".equals(tagName) && !"option".equals(tagName) && !"tt".equals(tagName) && !"br".equals(tagName) && !"colgroup".equals(tagName) && !"col".equals(tagName) && !"hr".equals(tagName) && !"param".equals(tagName) && !"sfmsg".equals(tagName); } }