// Copyright 2012 Georg-August-Universität Göttingen, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package de.ugoe.cs.autoquest.htmlmonitor; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.PrintWriter; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Set; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.json.simple.JSONArray; import org.json.simple.JSONObject; import org.json.simple.JSONValue; import org.json.simple.parser.ParseException; import org.mortbay.jetty.servlet.DefaultServlet; import de.ugoe.cs.util.FileTools; import de.ugoe.cs.util.console.Console; /** *

* the servlet deployed in the web server that receives all client messages and returns the client * java script. The messages are parsed, validated, and forwarded to the provided message listener. * If a message is not valid, it is discarded. If an event in a message is not valid, it is * discarded. Messages are only received via the POST HTTP method. The GET HTTP method is only * implemented for returning the client java script. *

* * @author Patrick Harms */ class HtmlMonitorServlet extends DefaultServlet { /** */ private static final long serialVersionUID = 1L; /** */ private static final boolean DO_TRACE = false; /** *

* Name and path of the robot filter. *

*/ private static final String ROBOTFILTERFILE = "data/robots/robotfilter.txt"; /** *

* Field that contains a regular expression that matches all robots * contained in {@link #ROBOTFILTERFILE}. *

*/ private String robotRegex = null; /** * the message listener to forward received messages to. */ private transient HtmlGUIElementManager guiElementManager = new HtmlGUIElementManager(); /** * the message listener to forward received messages to. */ private transient HtmlMonitorMessageListener messageListener; /** *

* initializes the servlet with the message listener to which all events shall be forwarded *

* * @param messageListener the message listener that shall receive all client events */ HtmlMonitorServlet(HtmlMonitorMessageListener messageListener) { this.messageListener = messageListener; try { loadRobotRegex(); } catch (Exception e) { Console.println ("robot filtering disabled: could not parse robot filter file " + ROBOTFILTERFILE); } } /* (non-Javadoc) * @see org.mortbay.jetty.servlet.DefaultServlet#doGet(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse) */ @Override protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { if ((request.getPathInfo() != null) && (request.getPathInfo().endsWith("/script/autoquest-htmlmonitor.js"))) { BufferedReader reader = null; try { InputStream script = this.getClass().getClassLoader().getResourceAsStream ("autoquest-htmlmonitor.js"); if (script == null) { Console.printerrln("could not read autoquest-htmlmonitor.js from classpath"); } else { reader = new BufferedReader(new InputStreamReader(script, "UTF-8")); PrintWriter output = response.getWriter(); String line; while ((line = reader.readLine()) != null) { output.println(line); } output.close(); } } catch (Exception e) { Console.printerrln("could not read autoquest-htmlmonitor.js from classpath: " + e); Console.logException(e); } finally { if (reader != null) { reader.close(); } } } } /* (non-Javadoc) * @see org.mortbay.jetty.servlet.DefaultServlet#doPost(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse) */ @Override protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { Object value = null; try { //InputStream requestInputStream = dumpStreamContent(request.getInputStream()); InputStream requestInputStream = request.getInputStream(); value = JSONValue.parseWithException (new InputStreamReader(requestInputStream, "UTF-8")); if (!(value instanceof JSONObject)) { Console.printerrln("incoming data is not of the expected type --> discarding it"); } else { handleJSONObject((JSONObject) value); } } catch (ParseException e) { Console.printerrln ("could not parse incoming data --> discarding it (" + e.toString() + ")"); } } /** *

* processes a received JSON object and validates it. If the message is ok, it is forwarded * to the message listener *

* * @param object the JSON object that contains a client message */ private void handleJSONObject(JSONObject object) { if (DO_TRACE) { dumpJSONObject(object, ""); } JSONObject message = assertValue(object, "message", JSONObject.class); if (message == null) { Console.printerrln("incoming data is no valid message --> discarding it"); } else { HtmlClientInfos clientInfos = extractClientInfos(message); if (clientInfos == null) { Console.printerrln ("incoming message does not contain valid client infos --> discarding it"); } else if (isRobot(clientInfos.getUserAgent())) { Console.printerrln ("ignoring robot " + clientInfos.getUserAgent()); } else { HtmlGUIElement guiStructure = extractHtmlPageElements(message, clientInfos); HtmlEvent[] events = extractHtmlEvents(message, clientInfos); if (events == null) { Console.printerrln ("incoming message does not contain valid events --> discarding it"); } else { messageListener.handleMessage(clientInfos, guiStructure, events); } } } } /** *

* tries to extract the client infos out of the received JSON object. If this is not fully * possible, an appropriate message is dumped and the whole message is discarded (the method * return null). *

* * @param message the message to parse the client infos from * * @return the client infos, if the message is valid in this respect, or null if not */ private HtmlClientInfos extractClientInfos(JSONObject message) { HtmlClientInfos clientInfos = null; JSONObject infos = assertValue(message, "clientInfos", JSONObject.class); if (infos != null) { String clientId = assertValue((JSONObject) infos, "clientId", String.class); String userAgent = assertValue((JSONObject) infos, "userAgent", String.class); URL url = assertValue((JSONObject) infos, "url", URL.class); String title = assertValue((JSONObject) infos, "title", String.class); if (clientId == null) { Console.printerrln("client infos do not contain a valid client id"); } else if (userAgent == null) { Console.printerrln("client infos do not contain a valid user agent"); } else if (url == null) { Console.printerrln("client infos do not contain a valid URL"); } else if (title == null) { Console.printerrln("client infos do not contain a valid title"); } else { clientInfos = new HtmlClientInfos(clientId, userAgent, url, title); } } return clientInfos; } /** *

* tries to extract the events out of the received JSON object. If this is not fully * possible, an appropriate message is dumped and the errorprone event is discarded. If no * valid event is found, the whole message is discarded. *

* * @param object the message to parse the events from * @param clientInfos the infos about the client that send the events * * @return the valid events stored in the message, or null if there are none */ private HtmlEvent[] extractHtmlEvents(JSONObject object, HtmlClientInfos clientInfos) { List events = null; JSONArray eventArray = assertValue(object, "events", JSONArray.class); if (eventArray != null) { events = new ArrayList(); HtmlServer server = getServerElement(clientInfos); HtmlDocument document = getPageElementRepresentingWebPage(clientInfos, server); for (int i = 0; i < eventArray.size(); i++) { Object eventObj = eventArray.get(i); if (!(eventObj instanceof JSONObject)) { Console.printerrln("event number " + (i + 1) + " is not a valid event object"); } else { Long time = assertValue(((JSONObject) eventObj), "time", Long.class); String domPath = assertValue(((JSONObject) eventObj), "path", String.class); String eventType = assertValue(((JSONObject) eventObj), "eventType", String.class); Integer[] coordinates = assertValue(((JSONObject) eventObj), "coordinates", Integer[].class); Integer key = assertValue(((JSONObject) eventObj), "key", Integer.class); Integer[] scrollPosition = assertValue(((JSONObject) eventObj), "scrollPosition", Integer[].class); String selectedValue = assertValue(((JSONObject) eventObj), "selectedValue", String.class); if (eventType == null) { Console.printerrln("event number " + (i + 1) + " has no valid event type"); } else if (time == null) { Console.printerrln(eventType + " event has no valid timestamp"); } else if (domPath == null) { Console.printerrln(eventType + " event has no valid DOM path"); } else if ((coordinates != null) && (coordinates.length != 2)) { Console.printerrln(eventType + " event has no valid coordinates"); } else if (checkEventParameterCombinations (eventType, coordinates, key, scrollPosition, selectedValue, domPath)) { HtmlPageElement target = guiElementManager.getPageElement(document, domPath); if (target != null) { events.add(new HtmlEvent(clientInfos, time, target, eventType, coordinates, key, scrollPosition, selectedValue)); } else { events.add(new HtmlEvent(clientInfos, time, document, domPath, eventType, coordinates, key, scrollPosition, selectedValue)); } } else { Console.printerrln(eventType + " event has no valid parameter combination"); } } } } if ((events != null) && (events.size() > 0)) { return events.toArray(new HtmlEvent[events.size()]); } else { return null; } } /** *

* extracts the GUI structure from the provided JSON object. *

* * @param object the JSON object to extract the GUI structure from * @param clientInfos infos about the client who send the data * * @return the GUI structure extracted from the JSON object of which the root node is a * representation of the server of the HTML page that was observed */ private HtmlServer extractHtmlPageElements(JSONObject object, HtmlClientInfos clientInfos) { HtmlServer server = getServerElement(clientInfos); HtmlDocument document = getPageElementRepresentingWebPage(clientInfos, server); JSONObject jsonPageElement = assertValue(object, "guiModel", JSONObject.class); document.addChild(convert(jsonPageElement, document, null)); return server; } /** *

* instantiates an element of the GUI structure representing the server of the observed * web page *

* * @param clientInfos infos about the client who send the data * * @return as described */ private HtmlServer getServerElement(HtmlClientInfos clientInfos) { String host = clientInfos.getUrl().getHost(); int port = 80; if (clientInfos.getUrl().getPort() > -1) { port = clientInfos.getUrl().getPort(); } return guiElementManager.createHtmlServer(host, port); } /** *

* instantiates an element of the GUI structure representing the observed web page. Adds * this element to the provided server as child. *

* * @param clientInfos infos about the client who send the data * @param server the server on which the page represented by the return value resists * * @return as described */ private HtmlDocument getPageElementRepresentingWebPage(HtmlClientInfos clientInfos, HtmlServer server) { String path = clientInfos.getUrl().getPath(); String query = null; if (clientInfos.getUrl().getQuery() != null) { query = "?" + clientInfos.getUrl().getQuery(); } HtmlDocument document = guiElementManager.createHtmlDocument (server, path, query, clientInfos.getTitle()); server.addChild(document); return document; } /** *

* converts a JSON object representing an HTML page element to an HTML page element. Calls * itself recursively to also convert the children of the element, if any. *

* * @param jsonPageElement the JSON object to be converted * @param document the document to which the page element belongs * @param parent the parent page element of the converted element, of null, if none * is present. In this case the document is considered the parent * element. * * @return as described. */ private HtmlPageElement convert(JSONObject jsonPageElement, HtmlDocument document, HtmlPageElement parent) { HtmlPageElement result = null; if (jsonPageElement != null) { String tagName = assertValue(jsonPageElement, "tagName", String.class); String htmlid = assertValue(jsonPageElement, "htmlId", String.class); Integer index = assertValue(jsonPageElement, "index", Integer.class); result = guiElementManager.createHtmlPageElement (document, parent, tagName, htmlid, index); JSONArray childElements = assertValue(jsonPageElement, "children", JSONArray.class); if (childElements != null) { Object jsonChild; for (int i = 0; i < childElements.size(); i++) { jsonChild = childElements.get(i); if (!(jsonChild instanceof JSONObject)) { Console.printerrln("child " + (i + 1) + " of HTML page element " + tagName + " is no valid HTML page element"); } else { result.addChild(convert((JSONObject) jsonChild, document, result)); } } } } return result; } /** *

* validates if for the given event type the parameter combination of coordinates, key, * scroll position, and selected value is valid. As an example, an onclick event should * usually not have an associated scroll position. *

* * @param eventType the type of the event * @param coordinates the coordinates of the event * @param key the key of the event * @param scrollPosition the scroll position of the event * @param selectedValue the value selected through a specific event * @param domPath the path through the DOM of the document of the HTML element on which * the event was executed * * @return true, if the combination of the parameters is valid, false else */ private boolean checkEventParameterCombinations(String eventType, Integer[] coordinates, Integer key, Integer[] scrollPosition, String selectedValue, String domPath) { boolean result = false; if ("onscroll".equals(eventType)) { if ((coordinates == null) && (key == null) && (scrollPosition != null) && (selectedValue == null)) { result = true; } else { Console.printerrln(eventType + " event has invalid parameters"); } } else if ("onclick".equals(eventType) || "ondblclick".equals(eventType)) { if ((coordinates != null) && (key == null) && (scrollPosition == null) && (selectedValue == null)) { result = true; } else { Console.printerrln(eventType + " event has invalid parameters"); } } else if ("onchange".equals(eventType)) { // "input_password" dont have a selectedValue if (domPath.contains("input_password")) { if ((coordinates == null) && (key == null) && (scrollPosition == null)) { result = true; } else { Console.printerrln(eventType + " event has invalid parameters"); } } else { if ((coordinates == null) && (key == null) && (scrollPosition == null) && (selectedValue != null)) { result = true; } else { Console.printerrln(eventType + " event has invalid parameters"); } } } else if ("onkeypress".equals(eventType) || "onkeydown".equals(eventType) || "onkeyup".equals(eventType)) { if ((coordinates == null) && (key != null) && (scrollPosition == null) && (selectedValue == null)) { result = true; } else { Console.printerrln(eventType + " event has invalid parameters"); } } else if ("onfocus".equals(eventType) || "onmouseout".equals(eventType) || "onmousemove".equals(eventType) || "onload".equals(eventType) || "onunload".equals(eventType) || "onbeforeunload".equals(eventType) || "onpagehide".equals(eventType) || "onpageshow".equals(eventType) || "onabort".equals(eventType) || "onsubmit".equals(eventType) || "onplaying".equals(eventType) || "onpause".equals(eventType) || "ontimeupdate".equals(eventType) || "onerror".equals(eventType) || "onundo".equals(eventType) || "onreset".equals(eventType) || "onselect".equals(eventType)) { if ((coordinates == null) && (key == null) && (scrollPosition == null) && (selectedValue == null)) { result = true; } else { Console.printerrln(eventType + " event has invalid parameters"); } } else { Console.printerrln("'" + eventType + "' is not a valid event type"); } return result; } /** *

* converts a value in the provided object matching the provided key to the provided type. If * there is no value with the key or if the value can not be transformed to the provided type, * the method returns null. *

* * @param object the object to read the value from * @param key the key of the value * @param clazz the type to which the value should be transformed * * @return the value or null if either the value does not exist or if it can not be transformed * to the expected type */ @SuppressWarnings("unchecked") private T assertValue(JSONObject object, String key, Class clazz) { Object value = object.get(key); T result = null; if (clazz.isInstance(value)) { result = (T) value; } else if (value instanceof String) { if (URL.class.equals(clazz)) { try { result = (T) new URL((String) value); } catch (MalformedURLException e) { e.printStackTrace(); Console.printerrln("retrieved malformed URL for key '" + key + "': " + value + " (" + e.toString() + ")"); } } else if ((int.class.equals(clazz)) || (Integer.class.equals(clazz))) { try { result = (T) Integer.valueOf(Integer.parseInt((String) value)); } catch (NumberFormatException e) { Console.printerrln ("retrieved malformed integer for key '" + key + "': " + value); } } else if ((long.class.equals(clazz)) || (Long.class.equals(clazz))) { try { result = (T) Long.valueOf(Long.parseLong((String) value)); } catch (NumberFormatException e) { Console.printerrln ("retrieved malformed long for key '" + key + "': " + value); } } } else if (value instanceof Long) { if ((int.class.equals(clazz)) || (Integer.class.equals(clazz))) { result = (T) (Integer) ((Long) value).intValue(); } } else if (value instanceof JSONArray) { if ((int[].class.equals(clazz)) || (Integer[].class.equals(clazz))) { Integer[] resultArray = new Integer[((JSONArray) value).size()]; boolean allCouldBeParsed = true; for (int i = 0; i < ((JSONArray) value).size(); i++) { try { if (((JSONArray) value).get(i) instanceof Long) { resultArray[i] = (int) (long) (Long) ((JSONArray) value).get(i); } else if (((JSONArray) value).get(i) instanceof String) { try { resultArray[i] = (int) Long.parseLong((String) ((JSONArray) value).get(i)); } catch (NumberFormatException e) { Console.printerrln ("retrieved malformed integer array for key '" + key + "': " + value); allCouldBeParsed = false; break; } } else { Console.printerrln ("can not handle type of value in expected integer array '" + key + "': " + value); } } catch (ClassCastException e) { e.printStackTrace(); Console.printerrln("expected integer array for key '" + key + "' but it was something else: " + value); allCouldBeParsed = false; break; } } if (allCouldBeParsed) { result = (T) resultArray; } } } return result; } /** *

* Checks whether an agent is a robot. *

* * @param agent * agent that is checked * @return true, if the agent is a robot; false otherwise */ private boolean isRobot(String agent) { return agent.matches(robotRegex); } /** *

* Reads {@link #ROBOTFILTERFILE} and creates a regular expression that * matches all the robots defined in the file. The regular expression is * stored in the field {@link #robotRegex}. *

* * @throws IOException * thrown if there is a problem reading the robot filter * @throws FileNotFoundException * thrown if the robot filter is not found */ private void loadRobotRegex() throws IOException, FileNotFoundException { String[] lines = FileTools.getLinesFromFile(ROBOTFILTERFILE); StringBuilder regex = new StringBuilder(); for (int i = 0; i < lines.length; i++) { regex.append("(.*" + lines[i] + ".*)"); if (i != lines.length - 1) { regex.append('|'); } } robotRegex = regex.toString(); } /** *

* convenience method for dumping the content of a stream and returning a new stream * containing the same data. *

* * @param inputStream the stream to be dumped and copied * @return the copy of the stream * * @throws IOException if the stream can not be read */ /* private InputStream dumpStreamContent(ServletInputStream inputStream) throws IOException { List bytes = new ArrayList(); int buf; while ((buf = inputStream.read()) >= 0) { bytes.add((byte) buf); } byte[] byteArray = new byte[bytes.size()]; for (int i = 0; i < bytes.size(); i++) { byteArray[i] = bytes.get(i); } System.out.println(new String(byteArray, "UTF-8")); return new ByteArrayInputStream(byteArray); }*/ /** *

* convenience method for dumping an object to std out. If the object is a JSON object, it is * deeply analyzed and its internal structure is dumped as well. *

* * @param object the object to dump * @param indent the indentation to be used. */ private void dumpJSONObject(Object object, String indent) { if (object instanceof JSONArray) { boolean arrayContainsJSONObjects = false; for (Object arrayElem : (JSONArray) object) { if (arrayElem instanceof JSONObject) { arrayContainsJSONObjects = true; break; } } if (arrayContainsJSONObjects) { System.out.println(); System.out.print(indent); System.out.println('['); System.out.print(indent); System.out.print(' '); } else { System.out.print(' '); System.out.print('['); } int index = 0; for (Object arrayElem : (JSONArray) object) { if (index++ > 0) { System.out.print(","); if (arrayContainsJSONObjects) { System.out.println(); System.out.print(indent); } System.out.print(' '); } dumpJSONObject(arrayElem, indent + " "); } if (arrayContainsJSONObjects) { System.out.println(); System.out.print(indent); } System.out.print(']'); } else if (object instanceof JSONObject) { System.out.println(" {"); @SuppressWarnings("unchecked") Set> entrySet = ((JSONObject) object).entrySet(); int index = 0; for (Map.Entry entry : entrySet) { if (index++ > 0) { System.out.println(","); } System.out.print(indent); System.out.print(" \""); System.out.print(entry.getKey()); System.out.print("\":"); dumpJSONObject(entry.getValue(), indent + " "); } System.out.println(); System.out.print(indent); System.out.print('}'); } else { System.out.print('"'); System.out.print(object); System.out.print('"'); } } }