Changeset 171 for trunk/EventBenchConsole/src/de/ugoe/cs/eventbench/web
- Timestamp:
- 09/09/11 06:23:36 (13 years ago)
- Location:
- trunk/EventBenchConsole/src/de/ugoe/cs/eventbench/web
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/EventBenchConsole/src/de/ugoe/cs/eventbench/web/WeblogParser.java
r111 r171 17 17 import de.ugoe.cs.util.console.Console; 18 18 19 /** 20 * <p> 21 * Provides functionality to parse log files with web request. 22 * </p> 23 * 24 * @author Steffen Herbold 25 * @version 1.0 26 */ 19 27 public class WeblogParser { 20 28 29 /** 30 * <p> 31 * Timeout between two sessions in milliseconds. 32 * </p> 33 */ 21 34 private long timeout; 22 35 36 /** 37 * <p> 38 * Minimal length of a session. All shorter sessions will be pruned. 39 * Default: 2 40 * </p> 41 */ 23 42 private int minLength = 2; 24 43 44 /** 45 * <p> 46 * Collection of generated sequences. 47 * </p> 48 */ 25 49 private List<List<WebEvent>> sequences; 26 50 51 /** 52 * <p> 53 * Name and path of the robot filter. 54 * </p> 55 */ 27 56 private static final String ROBOTFILTERFILE = "misc/robotfilter.txt"; 28 29 private String robotRegex = ".*"; 30 57 58 /** 59 * <p> 60 * Field that contains a regular expression that matches all robots 61 * contained in {@link #ROBOTFILTERFILE}. 62 * </p> 63 */ 64 private String robotRegex = null; 65 66 /** 67 * <p> 68 * Constructor. Creates a new WeblogParser with a default timeout of 69 * 3,600,000 milliseconds (1 hour). 70 * </p> 71 */ 31 72 public WeblogParser() { 32 timeout = 3600000; // 1 hour session-timeout as default 33 } 34 73 this(3600000); 74 } 75 76 /** 77 * <p> 78 * Constructor. Creates a new WeblogParser. 79 * </p> 80 * 81 * @param timeout 82 * session timeout 83 */ 35 84 public WeblogParser(long timeout) { 36 85 this.timeout = timeout; 37 86 } 38 87 88 /** 89 * <p> 90 * Returns the generated event sequences. 91 * </p> 92 * 93 * @return generated event sequences 94 */ 39 95 public List<List<WebEvent>> getSequences() { 40 return sequences; 41 } 42 96 return sequences; 97 } 98 99 /** 100 * <p> 101 * Sets the session timeout. 102 * </p> 103 * 104 * @param timeout 105 * new session timeout 106 */ 43 107 public void setTimeout(long timeout) { 44 108 this.timeout = timeout; 45 109 } 46 110 111 /** 112 * <p> 113 * Sets the minimal length of a session. All sessions that contain less 114 * events will be pruned. 115 * </p> 116 * 117 * @param minLength 118 * new minimal length 119 */ 47 120 public void setMinLength(int minLength) { 48 121 this.minLength = minLength; 49 122 } 50 51 public void parseFile(String filename) throws IOException, FileNotFoundException, ParseException, URISyntaxException { 123 124 /** 125 * <p> 126 * Parses a web log file. 127 * </p> 128 * 129 * @param filename 130 * name and path of the log file 131 * @throws IOException 132 * thrown if there is a problem with reading the log file 133 * @throws FileNotFoundException 134 * thrown if the log file is not found 135 * @throws ParseException 136 * thrown the date format is invalid 137 * @throws URISyntaxException 138 * thrown if the URI is invalid 139 */ 140 public void parseFile(String filename) throws IOException, 141 FileNotFoundException, ParseException, URISyntaxException { 52 142 String[] lines = FileTools.getLinesFromFile(filename); 53 143 54 144 Map<String, List<Integer>> cookieSessionMap = new HashMap<String, List<Integer>>(); 55 145 int lastId = -1; 56 57 SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 146 147 SimpleDateFormat dateFormat = new SimpleDateFormat( 148 "yyyy-MM-dd HH:mm:ss"); 58 149 loadRobotRegex(); 59 150 60 151 sequences = new ArrayList<List<WebEvent>>(); 61 62 for( String line : lines ) { 63 String[] values = line.substring(1, line.length()-1).split("\" \""); 64 152 153 for (String line : lines) { 154 String[] values = line.substring(1, line.length() - 1).split( 155 "\" \""); 156 65 157 // use cookie as session identifier 66 158 int cookieStart = values[0].lastIndexOf('.'); 67 String cookie = values[0].substring(cookieStart +1);159 String cookie = values[0].substring(cookieStart + 1); 68 160 String dateString = values[1]; 69 161 long timestamp = dateFormat.parse(dateString).getTime(); … … 71 163 // String ref = values[3]; // referer is not yet used! 72 164 String agent; 73 if ( values.length>4) {165 if (values.length > 4) { 74 166 agent = values[4]; 75 167 } else { 76 168 agent = "noagent"; 77 169 } 78 170 79 171 List<String> postedVars = new ArrayList<String>(); 80 if ( values.length==6) { // post vars found81 for ( String postVar : values[5].trim().split(" ")) {172 if (values.length == 6) { // post vars found 173 for (String postVar : values[5].trim().split(" ")) { 82 174 postedVars.add(postVar); 83 175 } 84 176 } 85 if ( !isRobot(agent)) {177 if (!isRobot(agent)) { 86 178 URI uri = new URI(uriString); 87 88 String path = uri.getPath(); 179 180 String path = uri.getPath(); 89 181 List<String> getVars = extractGetVarsFromUri(uri); 90 91 WebEvent event = new WebEvent(path, timestamp, postedVars, getVars); 92 182 183 WebEvent event = new WebEvent(path, timestamp, postedVars, 184 getVars); 185 93 186 // find session and add event 94 187 List<Integer> sessionIds = cookieSessionMap.get(cookie); 95 if ( sessionIds==null) {188 if (sessionIds == null) { 96 189 sessionIds = new ArrayList<Integer>(); 97 190 // start new session … … 99 192 cookieSessionMap.put(cookie, sessionIds); 100 193 sequences.add(new LinkedList<WebEvent>()); 101 } 102 Integer lastSessionIndex = sessionIds.get(sessionIds.size()-1); 194 } 195 Integer lastSessionIndex = sessionIds 196 .get(sessionIds.size() - 1); 103 197 List<WebEvent> lastSession = sequences.get(lastSessionIndex); 104 198 long lastEventTime = timestamp; 105 if( !lastSession.isEmpty() ) { 106 lastEventTime = lastSession.get(lastSession.size()-1).getTimestamp(); 107 } 108 if( timestamp-lastEventTime>timeout ) { 199 if (!lastSession.isEmpty()) { 200 lastEventTime = lastSession.get(lastSession.size() - 1) 201 .getTimestamp(); 202 } 203 if (timestamp - lastEventTime > timeout) { 109 204 sessionIds.add(++lastId); 110 205 List<WebEvent> newSession = new LinkedList<WebEvent>(); … … 119 214 } 120 215 216 /** 217 * <p> 218 * Prunes sequences shorter than {@link #minLength}. 219 * </p> 220 */ 121 221 private void pruneShortSequences() { 122 Console.traceln("" +sequences.size()+ " user sequences found");222 Console.traceln("" + sequences.size() + " user sequences found"); 123 223 // prune sequences shorter than min-length 124 int i =0;125 while ( i<sequences.size()) {126 if ( sequences.get(i).size()<minLength) {224 int i = 0; 225 while (i < sequences.size()) { 226 if (sequences.get(i).size() < minLength) { 127 227 sequences.remove(i); 128 228 } else { … … 130 230 } 131 231 } 132 Console.traceln(""+sequences.size()+ " remaining after pruning of sequences shorter than " + minLength); 133 } 134 232 Console.traceln("" + sequences.size() 233 + " remaining after pruning of sequences shorter than " 234 + minLength); 235 } 236 237 /** 238 * <p> 239 * Reads {@link #ROBOTFILTERFILE} and creates a regular expression that 240 * matches all the robots defined in the file. The regular expression is 241 * stored in the field {@link #robotRegex}. 242 * </p> 243 * 244 * @throws IOException 245 * thrown if there is a problem reading the robot filter 246 * @throws FileNotFoundException 247 * thrown if the robot filter is not found 248 */ 135 249 private void loadRobotRegex() throws IOException, FileNotFoundException { 136 250 String[] lines = FileTools.getLinesFromFile(ROBOTFILTERFILE); 137 251 StringBuilder regex = new StringBuilder(); 138 for ( int i=0; i<lines.length; i++) {139 regex.append("(.*" +lines[i]+".*)");140 if ( i!=lines.length-1) {252 for (int i = 0; i < lines.length; i++) { 253 regex.append("(.*" + lines[i] + ".*)"); 254 if (i != lines.length - 1) { 141 255 regex.append("|"); 142 256 } … … 144 258 robotRegex = regex.toString(); 145 259 } 146 260 261 /** 262 * <p> 263 * Checks whether an agent is a robot. 264 * </p> 265 * 266 * @param agent 267 * agent that is checked 268 * @return true, if the agent is a robot; false otherwise 269 */ 147 270 private boolean isRobot(String agent) { 148 271 return agent.matches(robotRegex); 149 272 } 150 273 274 /** 275 * <p> 276 * Parses the URI and extracts the GET variables that have been passed. 277 * </p> 278 * 279 * @param uri 280 * URI that is parsed 281 * @return a list with all GET variables 282 */ 151 283 private List<String> extractGetVarsFromUri(URI uri) { 152 284 List<String> getVars = new ArrayList<String>(); 153 285 String query = uri.getQuery(); 154 if ( query!=null) {286 if (query != null) { 155 287 String[] paramPairs = query.split("&"); 156 for ( String paramPair : paramPairs) {288 for (String paramPair : paramPairs) { 157 289 String[] paramSplit = paramPair.split("="); 158 290 getVars.add(paramSplit[0]); -
trunk/EventBenchConsole/src/de/ugoe/cs/eventbench/web/commands/CMDloadSessionsFromClickstream.java
r111 r171 13 13 import de.ugoe.cs.util.console.Console; 14 14 15 /** 16 * <p> 17 * Command to load sessions from a web log. 18 * </p> 19 * @author Steffen Herbold 20 * @version 1.0 21 */ 15 22 public class CMDloadSessionsFromClickstream implements Command { 16 23 24 /* (non-Javadoc) 25 * @see de.ugoe.cs.util.console.Command#run(java.util.List) 26 */ 17 27 @Override 18 28 public void run(List<Object> parameters) { … … 52 62 } 53 63 64 /* (non-Javadoc) 65 * @see de.ugoe.cs.util.console.Command#help() 66 */ 54 67 @Override 55 68 public void help() { -
trunk/EventBenchConsole/src/de/ugoe/cs/eventbench/web/data/WebEvent.java
r111 r171 5 5 import de.ugoe.cs.eventbench.data.ReplayableEvent; 6 6 7 /** 8 * <p> 9 * This class defines web events (of PHP-based web applications). 10 * </p> 11 * 12 * @author Steffen Herbold 13 * @version 1.0 14 * 15 */ 7 16 public class WebEvent extends ReplayableEvent<WebRequest> { 8 17 … … 13 22 */ 14 23 private static final long serialVersionUID = 1L; 15 24 25 /** 26 * Timestamp of the event. 27 */ 16 28 private final long timestamp; 17 18 19 private final static String makeType(String path, List<String> postVars, List<String> getVars) { 29 30 /** 31 * <p> 32 * Helper method that generates the type of the event based on the of the 33 * URI, the POST variables, and the GET variables. 34 * </p> 35 * 36 * @param path 37 * path of the URI of the event 38 * @param postVars 39 * POST variables send with the event 40 * @param getVars 41 * GET variables send with the event 42 * @return type of the event 43 */ 44 private final static String makeType(String path, List<String> postVars, 45 List<String> getVars) { 20 46 String type = path; 21 if ( getVars!=null && !getVars.isEmpty()) {22 type += "+GET" +getVars.toString().replace(" ", "");47 if (getVars != null && !getVars.isEmpty()) { 48 type += "+GET" + getVars.toString().replace(" ", ""); 23 49 } 24 if ( postVars!=null && !postVars.isEmpty()) {25 type += "+POST" +postVars.toString().replace(" ", "");50 if (postVars != null && !postVars.isEmpty()) { 51 type += "+POST" + postVars.toString().replace(" ", ""); 26 52 } 27 53 return type; 28 54 } 29 30 public WebEvent(String path, long timestamp, List<String> postVars, List<String> getVars) { 55 56 /** 57 * <p> 58 * Constructor. Creates a new WebEvent. 59 * </p> 60 * 61 * @param path 62 * path of the URI 63 * @param timestamp 64 * timestamp of when the event took place 65 * @param postVars 66 * POST variables send with the event 67 * @param getVars 68 * GET variables send with the event 69 */ 70 public WebEvent(String path, long timestamp, List<String> postVars, 71 List<String> getVars) { 31 72 super(makeType(path, postVars, getVars)); 32 73 this.timestamp = timestamp; 33 74 addReplayEvent(new WebRequest(path, postVars, getVars)); 34 75 } 35 76 77 /** 78 * <p> 79 * Returns the timestamp of the event. 80 * </p> 81 * 82 * @return timestamp of th event 83 */ 36 84 public long getTimestamp() { 37 85 return timestamp; -
trunk/EventBenchConsole/src/de/ugoe/cs/eventbench/web/data/WebRequest.java
r111 r171 6 6 import de.ugoe.cs.eventbench.data.IReplayable; 7 7 8 /** 9 * <p> 10 * Contains all information related to a web request, i.e., the path, the POST 11 * variables and the GET variables. 12 * </p> 13 * 14 * @author Steffen Herbold 15 * @version 1.0 16 */ 8 17 public class WebRequest implements IReplayable { 9 18 10 19 /** 20 * <p> 11 21 * Id for object serialization. 22 * </p> 12 23 */ 13 24 private static final long serialVersionUID = 1L; 14 25 26 /** 27 * <p> 28 * POST variables of the web request. 29 * </p> 30 */ 15 31 List<String> postVars; 32 33 /** 34 * <p> 35 * GET variables of the web request. 36 * </p> 37 */ 16 38 List<String> getVars; 17 39 40 /** 41 * <p> 42 * URI of the web request. 43 * </p> 44 */ 18 45 String targetUri; 19 46 47 /** 48 * <p> 49 * Constructor. Creates a new WebRequest. 50 * </p> 51 * 52 * @param uri 53 * URI of the request 54 * @param postVars 55 * POST variables of the request 56 * @param getVars 57 * GET variables of the request 58 */ 20 59 public WebRequest(String uri, List<String> postVars, List<String> getVars) { 21 60 targetUri = uri; … … 23 62 this.getVars = new ArrayList<String>(getVars); 24 63 } 25 64 65 /* 66 * (non-Javadoc) 67 * 68 * @see de.ugoe.cs.eventbench.data.IReplayable#getReplay() 69 */ 26 70 @Override 27 71 public String getReplay() { … … 30 74 } 31 75 76 /* 77 * (non-Javadoc) 78 * 79 * @see de.ugoe.cs.eventbench.data.IReplayable#getTarget() 80 */ 32 81 @Override 33 82 public String getTarget() { … … 35 84 return null; 36 85 } 37 86 87 /** 88 * <p> 89 * Two {@link WebRequest}s are equal, if their {@link #targetUri}, 90 * {@link #postVars}, and {@link #getVars} are equal. 91 * </p> 92 * 93 * @see java.lang.Object#equals(java.lang.Object) 94 */ 38 95 @Override 39 96 public boolean equals(Object other) { 40 if ( this==other) {97 if (this == other) { 41 98 return true; 42 99 } 43 if( other instanceof WebRequest ) { 44 return targetUri.equals(((WebRequest) other).targetUri) && postVars.equals(((WebRequest) other).postVars); 100 if (other instanceof WebRequest) { 101 return targetUri.equals(((WebRequest) other).targetUri) 102 && postVars.equals(((WebRequest) other).postVars) 103 && getVars.equals(((WebRequest) other).getVars); 45 104 } 46 105 return false; 47 106 } 48 107 108 /* 109 * (non-Javadoc) 110 * 111 * @see java.lang.Object#hashCode() 112 */ 49 113 @Override 50 114 public int hashCode() { … … 54 118 hash = multiplier * hash + targetUri.hashCode(); 55 119 hash = multiplier * hash + postVars.hashCode(); 120 hash = multiplier * hash + getVars.hashCode(); 56 121 57 122 return hash;
Note: See TracChangeset
for help on using the changeset viewer.