// Copyright 2012 Georg-August-Universität Göttingen, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package de.ugoe.cs.autoquest.plugin.html; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.logging.Level; import java.util.regex.Matcher; import java.util.regex.Pattern; import de.ugoe.cs.autoquest.eventcore.Event; import de.ugoe.cs.autoquest.eventcore.gui.DialogClose; import de.ugoe.cs.autoquest.eventcore.gui.DialogOpen; import de.ugoe.cs.autoquest.eventcore.gui.IInteraction; import de.ugoe.cs.autoquest.eventcore.gui.KeyboardFocusChange; import de.ugoe.cs.autoquest.eventcore.gui.MouseButtonInteraction; import de.ugoe.cs.autoquest.eventcore.gui.MouseClick; import de.ugoe.cs.autoquest.eventcore.gui.Scroll; import de.ugoe.cs.autoquest.eventcore.gui.TextInput; import de.ugoe.cs.autoquest.eventcore.guimodel.GUIElementFactory; import de.ugoe.cs.autoquest.eventcore.guimodel.GUIModel; import de.ugoe.cs.autoquest.eventcore.guimodel.GUIModelException; import de.ugoe.cs.autoquest.eventcore.guimodel.IGUIElement; import de.ugoe.cs.autoquest.eventcore.guimodel.IGUIElementSpec; import de.ugoe.cs.autoquest.eventcore.guimodel.ITextArea; import de.ugoe.cs.autoquest.eventcore.guimodel.ITextField; import de.ugoe.cs.autoquest.plugin.html.guimodel.HTMLPageElementSpec; import de.ugoe.cs.autoquest.plugin.html.guimodel.HTMLPageSpec; import de.ugoe.cs.util.FileTools; import de.ugoe.cs.util.console.Console; /** *

* TODO comment *

* * @author Patrick Harms */ public class OldHTMLLogParser { /** *

* Name and path of the robot filter. *

*/ private static final String ROBOTFILTERFILE = "data/robots/robotfilter.txt"; /** *

* Field that contains a regular expression that matches all robots * contained in {@link #ROBOTFILTERFILE}. *

*/ private String robotRegex = null; /** * */ private Pattern htmlElementPattern = Pattern.compile("(\\w+)(\\[(\\d+)\\]|\\(id=([\\w-]+)\\))"); /** * */ private List> sequences = new ArrayList>(); /** * */ private GUIModel guiModel = new GUIModel(); /** *

* TODO: comment *

* * @param source */ public void parseFile(String source) throws IllegalArgumentException { if (source == null) { throw new IllegalArgumentException("source must not be null"); } parseFile(new File(source)); } /** *

* TODO: comment *

* * @param source */ public void parseFile(File source) throws IllegalArgumentException { if (source == null) { throw new IllegalArgumentException("source must not be null"); } else if (!source.exists()) { throw new IllegalArgumentException("source file does not exist"); } else if (!source.isFile()) { throw new IllegalArgumentException("source is not a file"); } BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(new FileInputStream(source), "UTF-8")); loadRobotRegex(); List sequence = new ArrayList(); int lineCounter = 0; String line = reader.readLine(); while ((line != null) && (!"".equals(line))) { lineCounter++; String[] values = line.substring(1, line.length() - 1).split("\" \""); String clientId = values[0]; long timestamp = Long.parseLong(values[1]); String title = values[2]; String uriString = values[3]; String agent = values[4]; String eventName = values[5]; String htmlElementPath = values[6]; List eventParameters = null; if (values.length > 7) { eventParameters = new ArrayList(); for (int i = 7; i < values.length; i++) { eventParameters.add(values[i]); } } if (isRobot(agent)) { // do not handle sessions of robots Console.println("ignoring robot session: " + agent); break; } try { URL url = new URL(uriString); IGUIElement guiElement = getGUIElement(url, title, htmlElementPath); IInteraction interaction = getInteraction(eventName, guiElement, eventParameters); if (interaction != null) { Event event = createEvent(clientId, interaction, guiElement, timestamp, agent); sequence.add(event); } } catch (MalformedURLException e) { Console.traceln(Level.FINE, "Ignored line " + lineCounter + ": " + e.getMessage()); } line = reader.readLine(); } Console.traceln(Level.INFO, "read user sequence with " + sequence.size() + " events from " + source); sequences.add(sequence); } catch (Exception e) { Console.printerrln("could not parse file " + source + ": " + e); e.printStackTrace(); } } /** *

* TODO: comment *

* * @return */ public Collection> getSequences() { return sequences; } /** *

* TODO: comment *

* * @return */ public GUIModel getGuiModel() { return guiModel; } /** *

* Reads {@link #ROBOTFILTERFILE} and creates a regular expression that * matches all the robots defined in the file. The regular expression is * stored in the field {@link #robotRegex}. *

* * @throws IOException * thrown if there is a problem reading the robot filter * @throws FileNotFoundException * thrown if the robot filter is not found */ private void loadRobotRegex() throws IOException, FileNotFoundException { String[] lines = FileTools.getLinesFromFile(ROBOTFILTERFILE); StringBuilder regex = new StringBuilder(); for (int i = 0; i < lines.length; i++) { regex.append("(.*" + lines[i] + ".*)"); if (i != lines.length - 1) { regex.append('|'); } } robotRegex = regex.toString(); } /** *

* TODO: comment *

* * @param htmlElementPath * @return * @throws GUIModelException * @throws */ private IGUIElement getGUIElement(URL url, String title, String htmlElementPath) throws GUIModelException { String[] pathElements = htmlElementPath.split("/"); List guiElementPath = new ArrayList(); HTMLPageSpec page = new HTMLPageSpec(url, title); guiElementPath.add(page.getServer()); guiElementPath.add(page); for (String pathElement : pathElements) { if ((pathElement != null) && (!"".equals(pathElement))) { Matcher matcher = htmlElementPattern.matcher(pathElement); if (!matcher.matches()) { throw new IllegalArgumentException("could not parse HTML element " + pathElement); } String type = matcher.group(1); String indexStr = matcher.group(3); String id = matcher.group(4); int index = -1; if ((indexStr != null) && (!"".equals(indexStr))) { index = Integer.parseInt(indexStr); } guiElementPath.add(new HTMLPageElementSpec(page, type, id, index)); } } return guiModel.integratePath(guiElementPath, GUIElementFactory.getInstance()); } /** *

* TODO: comment *

* * @param eventName * @param guiElement * @param eventParameters * @return */ private IInteraction getInteraction(String eventName, IGUIElement guiElement, List eventParameters) { IInteraction result = null; if ("onclick".equals(eventName)) { int[] coordinates = getCoordinateParameter(eventName, eventParameters, 0, "click coordinates"); result = new MouseClick (MouseButtonInteraction.Button.LEFT, coordinates[0], coordinates[1]); } else if ("onfocus".equals(eventName)) { result = new KeyboardFocusChange(); } else if ("onscroll".equals(eventName)) { int[] coordinates = getCoordinateParameter(eventName, eventParameters, 0, "click coordinates"); result = new Scroll(coordinates[0], coordinates[1]); } else if ("onchange".equals(eventName)) { String value = getStringParameter(eventName, eventParameters, 0, "selected value"); if ((guiElement instanceof ITextArea) || (guiElement instanceof ITextField)) { result = new TextInput(value, null); } else { throw new IllegalArgumentException("can not handle onchange events on GUI " + "elements of type " + guiElement.getClass()); } } else if ("onload".equals(eventName)) { result = new DialogOpen(); } else if ("onunload".equals(eventName)) { result = new DialogClose(); } else if ("onbeforeunload".equals(eventName) || "onpagehide".equals(eventName) || "onpageshow".equals(eventName)) { Console.traceln(Level.FINE, "Ignored event name \"" + eventName + "\""); } else { throw new IllegalArgumentException("unknown event name: \"" + eventName + "\""); } return result; } /** *

* TODO: comment *

* * @param clientId * @param interaction * @param guiElement * @param timestamp * @param agent * @return */ private Event createEvent(String clientId, IInteraction interaction, IGUIElement guiElement, long timestamp, String agent) { Event event = new Event(interaction, guiElement); event.setParameter("clientId", clientId); event.setParameter("agent", agent); event.setTimestamp(timestamp); return event; } /** *

* TODO: comment *

* * @param eventName * @param eventParameters * @param i * @param string * @return */ private String getStringParameter(String eventName, List eventParameters, int parameterIndex, String parameterDesc) throws IllegalArgumentException { String value = eventParameters.size() > parameterIndex ? eventParameters.get(parameterIndex) : null; if (value == null) { throw new IllegalArgumentException (eventName + " event does not provide the " + parameterDesc); } return value; } /** *

* TODO: comment *

* * @param eventName * @param eventParameters * @return */ private int[] getCoordinateParameter(String eventName, List eventParameters, int parameterIndex, String parameterDesc) { String value = getStringParameter(eventName, eventParameters, parameterIndex, parameterDesc); String[] intStrs = value.split(","); if ((intStrs == null) || (intStrs.length != 2)) { throw new IllegalArgumentException("the " + parameterDesc + " of an " + eventName + " event does not provide two correct coordinates"); } try { return new int[] { Integer.parseInt(intStrs[0]), Integer.parseInt(intStrs[1]) }; } catch (NumberFormatException e) { throw new IllegalArgumentException("the coordinates provided as " + parameterDesc + " of an " + eventName + " event are no numbers"); } } /** *

* Checks whether an agent is a robot. *

* * @param agent * agent that is checked * @return true, if the agent is a robot; false otherwise */ private boolean isRobot(String agent) { return agent.matches(robotRegex); } }