Index: /trunk/autoquest-htmlmonitor/data/robots/robotfilter.txt
===================================================================
--- /trunk/autoquest-htmlmonitor/data/robots/robotfilter.txt	(revision 1019)
+++ /trunk/autoquest-htmlmonitor/data/robots/robotfilter.txt	(revision 1019)
@@ -0,0 +1,66 @@
+findlinks
+discobot
+Googlebot
+Slurp
+YandexBot
+Spider
+ScholarUniverse
+Baiduspider
+Exabot
+Robot
+MetaGer-Bot
+YandexImages
+Gigabot
+SiteBot
+bingbot
+Ezooms
+Jeeves/Teoma
+msnbot
+DotBot
+changedetection.com/bot.html
+FAST Enterprise Crawler 6
+psbot
+http://ws.daum.net/aboutWebSearch.html
+NerdByNature.Bot
+Sogou web spider
+ssearch_bot
+Purebot
+http://www.icjobs.de
+scoutjet
+Netcraft Web Server Survey
+TurnitinBot
+ia_archiver
+MJ12bot
+Domnutch-Bot
+Eurobot
+GarlikCrawler
+CMS Crawler
+MSIECrawler
+NaverBot
+80legs
+AhrefsBot
+SISTRIX Crawler
+NetcraftSurveyAgent
+Search17Bot
+Semager
+YandexFavicons
+heritrix
+suggybot
+Netluchs
+Ocelli
+PHPCrawl
+SolomonoBot
+Sosospider
+Xerka WebBot
+YahooCacheSystem
+Xenu Link Sleuth
+cmsworldmap
+suchen.de
+amaredo.com/de/suche.html
+ibot
+w3af.sourceforge.net
+w3af.sf.net
+yacybot
+larbin2
+t-h-u-n-d-e-r-s-t-o-n-e
+sqlmap
Index: /trunk/autoquest-htmlmonitor/src/main/assembly/bin.xml
===================================================================
--- /trunk/autoquest-htmlmonitor/src/main/assembly/bin.xml	(revision 1018)
+++ /trunk/autoquest-htmlmonitor/src/main/assembly/bin.xml	(revision 1019)
@@ -34,4 +34,12 @@
       </includes>
     </fileSet>
+    <fileSet>
+      <directory>data</directory>
+      <outputDirectory>data</outputDirectory>
+      <fileMode>664</fileMode>
+      <includes>
+        <include>**/*</include>
+      </includes>
+    </fileSet>
   </fileSets>
 </assembly>
Index: /trunk/autoquest-htmlmonitor/src/main/java/de/ugoe/cs/autoquest/htmlmonitor/HtmlClientInfos.java
===================================================================
--- /trunk/autoquest-htmlmonitor/src/main/java/de/ugoe/cs/autoquest/htmlmonitor/HtmlClientInfos.java	(revision 1018)
+++ /trunk/autoquest-htmlmonitor/src/main/java/de/ugoe/cs/autoquest/htmlmonitor/HtmlClientInfos.java	(revision 1019)
@@ -57,5 +57,5 @@
      * @param title     title of the web site shown by the browser of the client
      */
-    public HtmlClientInfos(String clientId, String userAgent, URL url, String title) {
+    HtmlClientInfos(String clientId, String userAgent, URL url, String title) {
         this.clientId = clientId;
         this.userAgent = userAgent;
Index: /trunk/autoquest-htmlmonitor/src/main/java/de/ugoe/cs/autoquest/htmlmonitor/HtmlMonitorLogManager.java
===================================================================
--- /trunk/autoquest-htmlmonitor/src/main/java/de/ugoe/cs/autoquest/htmlmonitor/HtmlMonitorLogManager.java	(revision 1018)
+++ /trunk/autoquest-htmlmonitor/src/main/java/de/ugoe/cs/autoquest/htmlmonitor/HtmlMonitorLogManager.java	(revision 1019)
@@ -115,5 +115,8 @@
      */
     @Override
-    public void handleMessage(HtmlClientInfos clientInfos, HtmlEvent[] events) {
+    public void handleMessage(HtmlClientInfos clientInfos,
+                              HtmlPageElement guiStructure,
+                              HtmlEvent[]     events)
+    {
         HtmlMonitorOutputWriter writer = writers.get(clientInfos.getClientId());
         
@@ -132,5 +135,5 @@
             }
 
-            writer.handleMessage(clientInfos, events);
+            writer.handleMessage(clientInfos, guiStructure, events);
         }
         catch (Exception e) {
@@ -144,5 +147,5 @@
             if (writer != null) {
                 try {
-                    writer.handleMessage(clientInfos, events);
+                    writer.handleMessage(clientInfos, guiStructure, events);
                 }
                 catch (Exception e1) {
Index: /trunk/autoquest-htmlmonitor/src/main/java/de/ugoe/cs/autoquest/htmlmonitor/HtmlMonitorMessageListener.java
===================================================================
--- /trunk/autoquest-htmlmonitor/src/main/java/de/ugoe/cs/autoquest/htmlmonitor/HtmlMonitorMessageListener.java	(revision 1018)
+++ /trunk/autoquest-htmlmonitor/src/main/java/de/ugoe/cs/autoquest/htmlmonitor/HtmlMonitorMessageListener.java	(revision 1019)
@@ -32,7 +32,10 @@
      *
      * @param clientInfos infos about the client that send the events
+     * @param TODO comment
      * @param events      the received events
      */
-    void handleMessage(HtmlClientInfos clientInfos, HtmlEvent[] events);
+    void handleMessage(HtmlClientInfos clientInfos,
+                       HtmlPageElement guiStructure,
+                       HtmlEvent[]     events);
 
 }
Index: /trunk/autoquest-htmlmonitor/src/main/java/de/ugoe/cs/autoquest/htmlmonitor/HtmlMonitorOutputWriter.java
===================================================================
--- /trunk/autoquest-htmlmonitor/src/main/java/de/ugoe/cs/autoquest/htmlmonitor/HtmlMonitorOutputWriter.java	(revision 1018)
+++ /trunk/autoquest-htmlmonitor/src/main/java/de/ugoe/cs/autoquest/htmlmonitor/HtmlMonitorOutputWriter.java	(revision 1019)
@@ -22,4 +22,5 @@
 import java.text.DecimalFormat;
 
+import de.ugoe.cs.util.StringTools;
 import de.ugoe.cs.util.console.Console;
 
@@ -177,9 +178,16 @@
      */
     @Override
-    public void handleMessage(HtmlClientInfos clientInfos, HtmlEvent[] events) {
+    public void handleMessage(HtmlClientInfos clientInfos,
+                              HtmlPageElement guiStructure,
+                              HtmlEvent[]     events)
+    {
         if (outputWriter == null) {
             throw new IllegalStateException("not initialized. Call init() first");
         }
         
+        if (guiStructure != null) {
+            dumpGuiStructure(guiStructure);
+        }
+        
         for (HtmlEvent event : events) {
             dumpEvent(event);
@@ -200,4 +208,31 @@
     /**
      * <p>
+     * TODO: comment
+     * </p>
+     *
+     * @param guiStructure
+     */
+    private void dumpGuiStructure(HtmlPageElement guiStructure) {
+        outputWriter.print("<component path=\"");
+        outputWriter.print(guiStructure.getPath());
+        outputWriter.println("\">");
+        
+        dumpParam("class", guiStructure.getTagName());
+        dumpParam("htmlId", guiStructure.getId());
+        dumpParam("title", guiStructure.getTitle());
+        dumpParam("index", guiStructure.getIndex());
+        dumpParam("parent", guiStructure.getParentPath());
+        
+        outputWriter.println("</component>");
+        
+        if (guiStructure.getChildren() != null) {
+            for (HtmlPageElement child : guiStructure.getChildren()) {
+                dumpGuiStructure(child);
+            }
+        }
+    }
+
+    /**
+     * <p>
      * formats a received event and writes it to the log file. One event results in one line
      * in the log file containing all infos of the event.
@@ -207,76 +242,54 @@
      */
     private void dumpEvent(HtmlEvent event) {
-        dumpString(event.getClientInfos().getClientId());
-        outputWriter.print(' ');
-        dumpString(event.getTime().toString());
-        outputWriter.print(' ');
-        dumpString(event.getClientInfos().getTitle());
-        outputWriter.print(' ');
-        dumpString(event.getClientInfos().getUrl().toString());
-        outputWriter.print(' ');
-        dumpString(event.getClientInfos().getUserAgent());
-        outputWriter.print(' ');
-        dumpString(event.getEventType());
-        outputWriter.print(' ');
-        dumpString(event.getPath());
-
+        outputWriter.print("<event type=\"");
+        outputWriter.print(event.getEventType());
+        outputWriter.println("\">");
+        
         if (event.getCoordinates() != null) {
-            outputWriter.print(' ');
-            
-            StringBuffer value = new StringBuffer();
-            for (int i = 0; i < event.getCoordinates().length; i++) {
-                if (i > 0) {
-                    value.append(',');
-                }
-                value.append(event.getCoordinates()[i]);
-            }
-            
-            dumpString(value.toString());
-        }
-
-        if (event.getKey() != null) {
-            outputWriter.print(' ');
-            dumpString(event.getKey().toString());
-        }
+            dumpParam("X", event.getCoordinates()[0]);
+            dumpParam("Y", event.getCoordinates()[1]);
+        }
+
+        dumpParam("key", event.getKey());
             
         if (event.getScrollPosition() != null) {
-            outputWriter.print(' ');
-            
-            StringBuffer value = new StringBuffer();
-            for (int i = 0; i < event.getScrollPosition().length; i++) {
-                if (i > 0) {
-                    value.append(',');
-                }
-                value.append(event.getScrollPosition()[i]);
-            }
-            
-            dumpString(value.toString());
-        }
-
-        if (event.getSelectedValue() != null) {
-            outputWriter.print(' ');
-            dumpString(event.getSelectedValue());
-        }
-            
-        outputWriter.println();
-    }
-
-    /**
-     * <p>
-     * convenience method to dump a string with trailing and leading " as well as replaced
-     * backslashes, ", and newlines
-     * </p>
-     *
-     * @param clientId2
-     */
-    private void dumpString(String str) {
-        String value = str;
-        value = value.replaceAll("\\\\", "\\\\\\\\");
-        value = value.replaceAll("\\\"", "\\\\\\\"");
-        value = value.replaceAll("\n", "\\\\n");
-        
-        outputWriter.print('"');
-        outputWriter.print(value);
-        outputWriter.print('"');
+            dumpParam("scrollX", event.getScrollPosition()[0]);
+            dumpParam("scrollY", event.getScrollPosition()[1]);
+        }
+
+        dumpParam("selectedValue", event.getSelectedValue());
+        dumpParam("target", event.getPath());
+        dumpParam("timestamp", event.getTime());
+        
+        outputWriter.println("</event>");
+    }
+
+    /**
+     * <p>
+     * TODO: comment
+     * </p>
+     *
+     * @param string
+     * @param integer
+     */
+    private void dumpParam(String name, Object value) {
+        if (value == null) {
+            return;
+        }
+        
+        String val;
+        
+        if (value instanceof String) {
+            val = (String) value;
+        }
+        else {
+            val = value.toString();
+        }
+        
+        outputWriter.print(" <param name=\"");
+        outputWriter.print(name);
+        outputWriter.print("\" value=\"");
+        outputWriter.print(StringTools.xmlEntityReplacement(val));
+        outputWriter.println("\"/>");
     }
 
@@ -332,4 +345,6 @@
         FileOutputStream fis = new FileOutputStream(logFile);
         outputWriter = new PrintWriter(new OutputStreamWriter(fis, "UTF-8"));
+        outputWriter.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
+        outputWriter.println("<session>");
     }
 
@@ -341,4 +356,5 @@
     private void closeLogWriter() {
         if (outputWriter != null) {
+            outputWriter.println("</session>");
             outputWriter.flush();
             outputWriter.close();
Index: /trunk/autoquest-htmlmonitor/src/main/java/de/ugoe/cs/autoquest/htmlmonitor/HtmlMonitorServlet.java
===================================================================
--- /trunk/autoquest-htmlmonitor/src/main/java/de/ugoe/cs/autoquest/htmlmonitor/HtmlMonitorServlet.java	(revision 1018)
+++ /trunk/autoquest-htmlmonitor/src/main/java/de/ugoe/cs/autoquest/htmlmonitor/HtmlMonitorServlet.java	(revision 1019)
@@ -16,4 +16,5 @@
 
 import java.io.BufferedReader;
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
@@ -37,4 +38,5 @@
 import org.mortbay.jetty.servlet.DefaultServlet;
 
+import de.ugoe.cs.util.FileTools;
 import de.ugoe.cs.util.console.Console;
 
@@ -59,4 +61,19 @@
     
     /**
+     * <p>
+     * Name and path of the robot filter.
+     * </p>
+     */
+    private static final String ROBOTFILTERFILE = "data/robots/robotfilter.txt";
+
+    /**
+     * <p>
+     * Field that contains a regular expression that matches all robots
+     * contained in {@link #ROBOTFILTERFILE}.
+     * </p>
+     */
+    private String robotRegex = null;
+
+    /**
      * the message listener to forward received messages to.
      */
@@ -72,4 +89,11 @@
     HtmlMonitorServlet(HtmlMonitorMessageListener messageListener) {
         this.messageListener = messageListener;
+        try {
+            loadRobotRegex();
+        }
+        catch (Exception e) {
+            Console.println
+                ("robot filtering disabled: could not parse robot filter file " + ROBOTFILTERFILE);
+        }
     }
 
@@ -89,5 +113,5 @@
                 InputStream script = this.getClass().getClassLoader().getResourceAsStream
                      ("autoquest-htmlmonitor.js");
-            
+                
                 if (script == null) {
                     Console.printerrln("could not read autoquest-htmlmonitor.js from classpath");
@@ -126,6 +150,7 @@
         Object value = null;
         try {
+            //InputStream requestInputStream = dumpStreamContent(request.getInputStream());
             InputStream requestInputStream = request.getInputStream();
-            
+
             value = JSONValue.parseWithException
                 (new InputStreamReader(requestInputStream, "UTF-8"));
@@ -169,6 +194,12 @@
                     ("incoming message does not contain valid client infos --> discarding it");
             }
-            else {
+            else if (isRobot(clientInfos.getUserAgent())) {
+                Console.printerrln
+                    ("ignoring robot " + clientInfos.getUserAgent());
+            }
+            else {
+                HtmlPageElement guiStructure = extractHtmlPageElements(message, clientInfos);
                 HtmlEvent[] events = extractHtmlEvents(message, clientInfos);
+                
                 if (events == null) {
                     Console.printerrln
@@ -176,5 +207,5 @@
                 }
                 else {
-                    messageListener.handleMessage(clientInfos, events);
+                    messageListener.handleMessage(clientInfos, guiStructure, events);
                 }
             }
@@ -244,4 +275,7 @@
             events = new ArrayList<HtmlEvent>();
             
+            HtmlPageElement server = getServerElement(clientInfos);
+            HtmlPageElement page = getPageElementRepresentingWebPage(clientInfos, server);
+
             for (int i = 0; i < eventArray.size(); i++) {
                 Object eventObj = eventArray.get(i);
@@ -277,6 +311,7 @@
                                 (eventType, coordinates, key, scrollPosition, selectedValue))
                     {
-                        events.add(new HtmlEvent(clientInfos, time, path, eventType, coordinates,
-                                                 key, scrollPosition, selectedValue));
+                        path = page.getPath() + path;
+                        events.add(new HtmlEvent(clientInfos, time, path, eventType,
+                                                 coordinates, key, scrollPosition, selectedValue));
                     }
                     else {
@@ -294,4 +329,106 @@
             return null;
         }
+    }
+
+    /**
+     * <p>
+     * TODO: comment
+     * </p>
+     *
+     * @param message
+     * @param clientInfos
+     * @return
+     */
+    private HtmlPageElement extractHtmlPageElements(JSONObject      object,
+                                                    HtmlClientInfos clientInfos)
+    {
+        HtmlPageElement server = getServerElement(clientInfos);
+        HtmlPageElement page = getPageElementRepresentingWebPage(clientInfos, server);
+
+        JSONObject jsonPageElement = assertValue(object, "guiModel", JSONObject.class);
+        page.addChild(convert(jsonPageElement, page.getPath()));
+        
+        return server;
+    }
+
+    /**
+     * <p>
+     * TODO: comment
+     * </p>
+     *
+     * @param clientInfos
+     * @return
+     */
+    private HtmlPageElement getServerElement(HtmlClientInfos clientInfos) {
+        String id = clientInfos.getUrl().getHost();
+        if (clientInfos.getUrl().getPort() > -1) {
+            id += ":" + clientInfos.getUrl().getPort();
+        }
+        
+        return new HtmlPageElement(null, "server", id, 0);
+    }
+
+    /**
+     * <p>
+     * TODO: comment
+     * </p>
+     *
+     * @param clientInfos
+     */
+    private HtmlPageElement getPageElementRepresentingWebPage(HtmlClientInfos clientInfos,
+                                                              HtmlPageElement server)
+    {
+        String id = clientInfos.getUrl().getPath();
+        
+        if (clientInfos.getUrl().getQuery() != null) {
+            id += "?" + clientInfos.getUrl().getQuery();
+        }
+        
+        HtmlPageElement page =
+            new HtmlPageElement(server.getPath(), "document", id, clientInfos.getTitle(), 0);
+        
+        server.addChild(page);
+        
+        return page;
+    }
+
+    /**
+     * <p>
+     * TODO: comment
+     * </p>
+     *
+     * @param jsonPageElement
+     * @return
+     */
+    private HtmlPageElement convert(JSONObject jsonPageElement, String parentPath) {
+        HtmlPageElement result = null;
+
+        if (jsonPageElement != null) {
+            String tagName = assertValue(jsonPageElement, "tagName", String.class);
+            String id = assertValue(jsonPageElement, "id", String.class);
+            Integer index = assertValue(jsonPageElement, "index", Integer.class);
+
+            result = new HtmlPageElement(parentPath, tagName, id, index);
+
+            JSONArray childElements = assertValue(jsonPageElement, "children", JSONArray.class);
+            
+            if (childElements != null) {
+                Object jsonChild;
+
+                for (int i = 0; i < childElements.size(); i++) {
+                    jsonChild = childElements.get(i);
+                    if (!(jsonChild instanceof JSONObject)) {
+                        Console.printerrln("child " + (i + 1) + " of HTML page element " + tagName +
+                                           " is no valid HTML page element");
+                    }
+                    else {
+                        result.addChild(convert((JSONObject) jsonChild, result.getPath()));
+                    }
+                }
+            }
+            
+        }
+        
+        return result;    
     }
 
@@ -495,4 +632,41 @@
     /**
      * <p>
+     * Checks whether an agent is a robot.
+     * </p>
+     * 
+     * @param agent
+     *            agent that is checked
+     * @return true, if the agent is a robot; false otherwise
+     */
+    private boolean isRobot(String agent) {
+        return agent.matches(robotRegex);
+    }
+
+    /**
+     * <p>
+     * Reads {@link #ROBOTFILTERFILE} and creates a regular expression that
+     * matches all the robots defined in the file. The regular expression is
+     * stored in the field {@link #robotRegex}.
+     * </p>
+     * 
+     * @throws IOException
+     *             thrown if there is a problem reading the robot filter
+     * @throws FileNotFoundException
+     *             thrown if the robot filter is not found
+     */
+    private void loadRobotRegex() throws IOException, FileNotFoundException {
+        String[] lines = FileTools.getLinesFromFile(ROBOTFILTERFILE);
+        StringBuilder regex = new StringBuilder();
+        for (int i = 0; i < lines.length; i++) {
+            regex.append("(.*" + lines[i] + ".*)");
+            if (i != lines.length - 1) {
+                regex.append('|');
+            }
+        }
+        robotRegex = regex.toString();
+    }
+
+    /**
+     * <p>
      * convenience method for dumping the content of a stream and returning a new stream
      * containing the same data.
@@ -504,5 +678,5 @@
      * @throws IOException if the stream can not be read 
      */
-    /*private InputStream dumpStreamContent(ServletInputStream inputStream) throws IOException {
+/*    private InputStream dumpStreamContent(ServletInputStream inputStream) throws IOException {
         List<Byte> bytes = new ArrayList<Byte>();
         int buf;
Index: /trunk/autoquest-htmlmonitor/src/main/java/de/ugoe/cs/autoquest/htmlmonitor/HtmlPageElement.java
===================================================================
--- /trunk/autoquest-htmlmonitor/src/main/java/de/ugoe/cs/autoquest/htmlmonitor/HtmlPageElement.java	(revision 1019)
+++ /trunk/autoquest-htmlmonitor/src/main/java/de/ugoe/cs/autoquest/htmlmonitor/HtmlPageElement.java	(revision 1019)
@@ -0,0 +1,181 @@
+//   Copyright 2012 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
+
+package de.ugoe.cs.autoquest.htmlmonitor;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * <p>
+ * TODO comment
+ * </p>
+ * 
+ * @author Patrick Harms
+ */
+class HtmlPageElement {
+
+    /**
+     * 
+     */
+    private String parentPath;
+    
+    /**
+     * 
+     */
+    private String tagName;
+    
+    /**
+     * 
+     */
+    private String id;
+    
+    /**
+     * 
+     */
+    private String title;
+    
+    /**
+     * 
+     */
+    private Integer index;
+    
+    /**
+     * 
+     */
+    private List<HtmlPageElement> children;
+
+    /**
+     * <p>
+     * TODO: comment
+     * </p>
+     *
+     * @param tagName
+     * @param index
+     * @param id
+     * @param children
+     */
+    HtmlPageElement(String parentPath, String tagName, String id, Integer index) {
+        this.parentPath = parentPath;
+        this.tagName = tagName;
+        this.id = id;
+        this.index = index;
+    }
+
+    /**
+     * <p>
+     * TODO: comment
+     * </p>
+     *
+     * @param tagName
+     * @param index
+     * @param id
+     * @param children
+     */
+    HtmlPageElement(String parentPath, String tagName, String id, String title, Integer index) {
+        this(parentPath, tagName, id, index);
+        this.title = title;
+    }
+
+    /**
+     * @return the tagName
+     */
+    String getTagName() {
+        return tagName;
+    }
+
+    /**
+     * @return the id
+     */
+    String getId() {
+        return id;
+    }
+
+    /**
+     * @return the title
+     */
+    String getTitle() {
+        return title;
+    }
+
+    /**
+     * @return the index
+     */
+    Integer getIndex() {
+        return index;
+    }
+
+    /**
+     * @return the children
+     */
+    List<HtmlPageElement> getChildren() {
+        return children;
+    }
+
+
+    /**
+     * 
+     */
+    void addChild(HtmlPageElement child) {
+        if (child != null) {
+            if (children == null) {
+                children = new ArrayList<HtmlPageElement>();
+            }
+            
+            children.add(child);
+        }
+    }
+
+    /**
+     * <p>
+     * TODO: comment
+     * </p>
+     *
+     * @return
+     */
+    String getParentPath() {
+        return parentPath;
+    }
+
+    /**
+     * <p>
+     * TODO: comment
+     * </p>
+     *
+     * @return
+     */
+    String getPath() {
+        StringBuffer result = new StringBuffer();
+        if (parentPath != null) {
+            result.append(parentPath);
+        }
+
+        result.append("/");
+        result.append(tagName);
+        
+        if ((id != null) && (!"".equals(id))) {
+            result.append("(id=");
+            result.append(id);
+            result.append(")");
+        }
+        else {
+            result.append("[");
+            result.append(index);
+            result.append("]");
+        }
+        
+        return result.toString();
+    }
+
+}
