package de.ugoe.cs.eventbench.web; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import de.ugoe.cs.eventbench.web.data.WebEvent; import de.ugoe.cs.util.console.Console; public class WeblogParser { private long timeout; private int minLength = 2; private List> sequences; public WeblogParser() { timeout = 3600000; // 1 hour session-timeout as default } public WeblogParser(long timeout) { this.timeout = timeout; } public List> getSequences() { return sequences; } public void setTimeout(long timeout) { this.timeout = timeout; } public void setMinLength(int minLength) { this.minLength = minLength; } public void parseFile(String filename) throws IOException, FileNotFoundException, ParseException { File f = new File(filename); FileReader reader = new FileReader(f); char[] buffer = new char[(int) f.length()]; reader.read(buffer); reader.close(); String[] lines = (new String(buffer)).split("\n"); Map> cookieSessionMap = new HashMap>(); int lastId = -1; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); sequences = new ArrayList>(); for( String line : lines ) { String[] values = line.trim().split(" "); // use cookie as session identifier int cookieStart = values[0].lastIndexOf('.'); String cookie = values[0].substring(cookieStart+1); String dateString = values[1].substring(1)+" "+values[2].substring(0, values[2].length()-1); long timestamp = dateFormat.parse(dateString).getTime(); String uri = values[3]; // String ref = values[4]; // referer is not yet used! List postedVars = new ArrayList(); for( int i=5 ; i sessionIds = cookieSessionMap.get(cookie); if( sessionIds==null ) { sessionIds = new ArrayList(); // start new session sessionIds.add(++lastId); cookieSessionMap.put(cookie, sessionIds); sequences.add(new LinkedList()); } Integer lastSessionIndex = sessionIds.get(sessionIds.size()-1); List lastSession = sequences.get(lastSessionIndex); long lastEventTime = timestamp; if( !lastSession.isEmpty() ) { lastEventTime = lastSession.get(lastSession.size()-1).getTimestamp(); } if( timestamp-lastEventTime>timeout ) { sessionIds.add(++lastId); List newSession = new LinkedList(); newSession.add(event); sequences.add(newSession); } else { lastSession.add(event); } } Console.traceln(""+sequences.size()+ " user sequences found"); // prune sequences shorter than min-length for( int i=0; i