source: trunk/autoquest-plugin-html/src/main/java/de/ugoe/cs/autoquest/plugin/html/HTMLLogTextInputPseudomizer.java @ 2215

Last change on this file since 2215 was 2125, checked in by pharms, 8 years ago
  • added and corrected pseudonymization support
File size: 12.0 KB
Line 
1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.plugin.html;
16
17import java.io.File;
18import java.io.FileNotFoundException;
19import java.io.FileOutputStream;
20import java.io.OutputStreamWriter;
21import java.io.PrintWriter;
22import java.io.UnsupportedEncodingException;
23import java.security.MessageDigest;
24import java.security.NoSuchAlgorithmException;
25import java.util.HashSet;
26import java.util.LinkedList;
27import java.util.List;
28import java.util.Map;
29import java.util.Set;
30
31import org.apache.commons.codec.binary.Base64;
32import org.xml.sax.SAXException;
33
34import de.ugoe.cs.util.StringTools;
35import de.ugoe.cs.util.console.Console;
36
37/**
38 * <p>
39 * pseudomizes the text entered in text input events by replacing it with an MD5 hash. For this, it
40 * parses a given file and dumps a replacement, in which all text input events have an MD5 hash
41 * as entered text. If the events already have an MD5 hash, it stays unchanged. Providing the
42 * parameters to the constructor, it can be decided to pseudomize text entries into file and search
43 * input fields, as well.
44 * </p>
45 *
46 * @author Patrick Harms
47 * @version 1.0
48 *
49 */
50public class HTMLLogTextInputPseudomizer extends AbstractDefaultLogParser {
51   
52    /**
53     * <p>
54     * Indicator if search input fields must be pseudomized, as well.
55     * </p>
56     */
57    private boolean pseudomizeSearchInputs = false;
58   
59    /**
60     * <p>
61     * Indicator if file input fields must be pseudomized, as well.
62     * </p>
63     */
64    private boolean pseudomizeFileInputs = false;
65   
66    /**
67     * <p>
68     * The output writer into which the pseudomized variant of the log file is written
69     * </p>
70     */
71    private PrintWriter outputWriter;
72   
73    /**
74     * <p>
75     * The set of text input fields found in the GUI model
76     * </p>
77     */
78    private Set<String> textInputFieldIds = new HashSet<String>();
79   
80    /**
81     * <p>
82     * the events that were read
83     * </p>
84     */
85    private List<EventEntry> sortedEvents = new LinkedList<EventEntry>();
86
87    /**
88     * <p>
89     * creates the input pseudomizer with the switches, if text inputs into file and search fields
90     * shall be pseudomized, as well.
91     * </p>
92     *
93     * @param pseudomizeSearchInputs true, if inputs into search fields shall be pseudomized, as
94     *                               well; false else
95     * @param pseudomizeFileInputs   true, if inputs into file fields shall be pseudomized, as well;
96     *                               false else
97     */
98    public HTMLLogTextInputPseudomizer(boolean pseudomizeSearchInputs,
99                                       boolean pseudomizeFileInputs)
100    {
101        this.pseudomizeSearchInputs = pseudomizeSearchInputs;
102        this.pseudomizeFileInputs = pseudomizeFileInputs;
103    }
104
105    /**
106     * <p>
107     * called to pseudomize all text inputs in the given log file. The method reuses
108     * {@link #pseudomizeFile(File)}.
109     * </p>
110     *
111     * @param file the log file in which the text inputs must be pseudomized
112     */
113    public void pseudomizeFile(String file) {
114        if (file == null) {
115            throw new IllegalArgumentException("file must not be null");
116        }
117
118        pseudomizeFile(new File(file));
119    }
120
121    /**
122     * <p>
123     * called to pseudomize all text inputs in the given log file. The given file is read
124     * completely. All GUI elements are written to an output file as they are. All events are
125     * written to an output file as they are, as well, as long as they do not represent text inputs.
126     * If they are text input events, the entered text is replaced by its hash value for
127     * pseudomizing the text input. Finally, the original log file is deleted and replaced by
128     * the pseudomized variant. Log files, which are already pseudomized, stay untouched.
129     * </p>
130     *
131     * @param file the log file in which the text inputs must be pseudomized
132     */
133    public void pseudomizeFile(File file) {
134        if (file == null) {
135            throw new IllegalArgumentException("file must not be null");
136        }
137       
138        if (!file.exists()) {
139            throw new IllegalArgumentException("file must denote an existing file");
140        }
141       
142        if (!file.isFile()) {
143            throw new IllegalArgumentException("file must denote a file");
144        }
145       
146        File outFile = new File(file.getParentFile(), file.getName() + "_tmp");
147        boolean parsingFailed = false;
148       
149        try {
150            FileOutputStream fis = new FileOutputStream(outFile);
151            outputWriter = new PrintWriter(new OutputStreamWriter(fis, "UTF-8"));
152            outputWriter.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
153            outputWriter.println("<session>");
154
155            textInputFieldIds.clear();
156            sortedEvents.clear();
157           
158            try {
159                super.parseFile(file);
160            }
161            catch (SAXException e) {
162                parsingFailed = true;
163            }
164           
165            for (EventEntry event : sortedEvents) {
166                event.dump();
167            }
168           
169            outputWriter.println("</session>");
170            outputWriter.flush();
171        }
172        catch (FileNotFoundException e) {
173            Console.printerrln("could not create pseudomized file " + outFile);
174        }
175        catch (UnsupportedEncodingException e) {
176            // this should never happen
177            e.printStackTrace();
178        }
179        finally {
180            if (outputWriter != null) {
181                outputWriter.close();
182                outputWriter = null;
183            }
184        }
185       
186        if (!parsingFailed && outFile.exists()) {
187            if (!file.delete()) {
188                Console.printerrln("could not delete pseudomized file " + file);
189            }
190            else if (!outFile.renameTo(file)) {
191                Console.printerrln
192                    ("could not rename pseudomized file to original file name " + file);
193            }           
194            else {
195                Console.println("pseudomized file " + file);
196            }
197        }
198        else {
199            if (!outFile.delete()) {
200                Console.printerrln("could not delete temporary file " + outFile);
201            }
202        }
203    }
204   
205    /* (non-Javadoc)
206     * @see de.ugoe.cs.autoquest.plugin.html.AbstractDefaultLogParser#parseFile(java.lang.String)
207     */
208    @Override
209    public void parseFile(String filename) {
210        throw new IllegalStateException("this method must not be called externally");
211    }
212
213    /* (non-Javadoc)
214     * @see de.ugoe.cs.autoquest.plugin.html.AbstractDefaultLogParser#parseFile(java.io.File)
215     */
216    @Override
217    public void parseFile(File file) {
218        throw new IllegalStateException("this method must not be called externally");
219    }
220
221    /* (non-Javadoc)
222     * @see de.ugoe.cs.autoquest.plugin.html.AbstractDefaultLogParser#handleGUIElement(String, Map)
223     */
224    @Override
225    protected boolean handleGUIElement(String id, Map<String, String> parameters)
226        throws SAXException
227    {
228        outputWriter.print("<component id=\"");
229        outputWriter.print(id);
230        outputWriter.println("\">");
231       
232        for (Map.Entry<String, String> param : parameters.entrySet()) {
233            dumpParam(param.getKey(), param.getValue());
234           
235            if ("tagname".equals(param.getKey())) {
236                if ("input_text".equals(param.getValue()) || "textarea".equals(param.getValue()) ||
237                    "input_password".equals(param.getValue()) ||
238                    (pseudomizeSearchInputs && "input_search".equals(param.getValue())) ||
239                    (pseudomizeFileInputs && "input_file".equals(param.getValue())))
240                {
241                    textInputFieldIds.add(id);
242                }
243            }
244        }
245           
246        outputWriter.println("</component>");
247       
248        return true;
249    }
250
251    /* (non-Javadoc)
252     * @see de.ugoe.cs.autoquest.plugin.html.AbstractDefaultLogParser#handleEvent(String,Map)
253     */
254    @Override
255    protected boolean handleEvent(String type, Map<String, String> parameters) throws SAXException {
256        if ("onchange".equals(type)) {
257            String targetId = parameters.get("target");
258       
259            if ((targetId != null) && textInputFieldIds.contains(targetId)) {
260                String value = parameters.get("selectedValue");
261               
262                if ((value != null) && !value.endsWith("==")) {
263                    try {
264                        MessageDigest md = MessageDigest.getInstance("SHA-512");
265                        md.update(value.getBytes("UTF-8"));
266                        value =  Base64.encodeBase64String(md.digest());
267                    }
268                    catch (UnsupportedEncodingException e) {
269                        throw new IllegalStateException("Java VM does not support this code");
270                    }
271                    catch (NoSuchAlgorithmException e) {
272                        throw new IllegalStateException("Java VM does not support this code");
273                    }
274                   
275                    parameters.put("selectedValue", value);
276                }
277            }
278        }
279       
280        sortedEvents.add(new EventEntry(type, parameters));
281
282        return true;
283    }
284
285    /**
286     * <p>
287     * dumps a parameter with the given name and value to the log file. The result is a
288     * tag named param with a name attribute and a value attribute. The value is transformed
289     * to a String if it is no String already. Furthermore, an XML entity replacement is performed
290     * if required.
291     * </p>
292     *
293     * @param name  the name of the parameter to be dumped
294     * @param value the value of the parameter to be dumped
295     */
296    private void dumpParam(String name, Object value) {
297        if (value == null) {
298            return;
299        }
300       
301        String val;
302       
303        if (value instanceof String) {
304            val = (String) value;
305        }
306        else {
307            val = String.valueOf(value);
308        }
309       
310        outputWriter.print(" <param name=\"");
311        outputWriter.print(name);
312        outputWriter.print("\" value=\"");
313        outputWriter.print(StringTools.xmlEntityReplacement(val));
314        outputWriter.println("\"/>");
315    }
316
317
318    /**
319     * <p>
320     * this class is used internally for storing events in a sorted list together with the
321     * timestamps, being the sort criteria.
322     * </p>
323     */
324    private class EventEntry {
325       
326        /**
327         * <p>
328         * the type of the event
329         * </p>
330         */
331        private String type;
332       
333        /**
334         * <p>
335         * the parameters of the event
336         * </p>
337         */
338        private Map<String, String> parameters;
339
340        /**
341         * <p>
342         * creates a new event entry with event type and parameters
343         * </p>
344         */
345        private EventEntry(String type, Map<String, String> parameters) {
346            this.type = type;
347            this.parameters = parameters;
348        }
349       
350        /**
351         * <p>
352         * convenience method for dumping the event into the compressed log file
353         * </p>
354         */
355        private void dump() {
356            outputWriter.print("<event type=\"");
357            outputWriter.print(type);
358            outputWriter.println("\">");
359           
360            for (Map.Entry<String, String> param : parameters.entrySet()) {
361                dumpParam(param.getKey(), param.getValue());
362            }
363           
364            outputWriter.println("</event>");
365        }
366    }
367}
Note: See TracBrowser for help on using the repository browser.