//   Copyright 2015 Georg-August-Universität Göttingen, Germany
//
//   Licensed under the Apache License, Version 2.0 (the "License");
//   you may not use this file except in compliance with the License.
//   You may obtain a copy of the License at
//
//       http://www.apache.org/licenses/LICENSE-2.0
//
//   Unless required by applicable law or agreed to in writing, software
//   distributed under the License is distributed on an "AS IS" BASIS,
//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//   See the License for the specific language governing permissions and
//   limitations under the License.

package de.ugoe.cs.autoquest.plugin.genericevents.commands;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import de.ugoe.cs.autoquest.eventcore.Event;
import de.ugoe.cs.autoquest.eventcore.StringEventType;
import de.ugoe.cs.autoquest.plugin.genericevents.eventCore.GenericEventTarget;
import de.ugoe.cs.autoquest.plugin.genericevents.eventCore.GenericEventTargetSpec;
import de.ugoe.cs.util.console.Console;

/**
 * <p>
 * TODO comment
 * </p>
 * 
 * @author Patrick Harms
 */
public class SogouQDataFileParser {
    
    /** */
    private static DateFormat TIME_FORMAT = new SimpleDateFormat("HH:mm:ss");

    /**
     * 
     */
    Map<String, List<Event>> parseFile(File    file,
                                       boolean hasTimestamp,
                                       boolean ignoreQuery,
                                       boolean compareDomainOnly)
    {
        Console.println("reading file " + file);
        Map<String, List<Event>> userSessions = new HashMap<>();
        try {
            BufferedReader reader = new BufferedReader
                (new InputStreamReader(new FileInputStream(file), "GB2312"));
            
            String line = null;
            long lastTimeStamp = 0;
            
            int timestampIndex = 0;
            int userIdIndex = 1;
            
            if (!hasTimestamp) {
                timestampIndex = -1;
                userIdIndex = 0;
            }
            
            do {
                line = reader.readLine();
                if (line != null) {
                    String[] elements = line.split("\t");
                    
                    String userId = elements[userIdIndex].intern();
                    
                    if (hasTimestamp) {
                        try {
                            long timestamp = TIME_FORMAT.parse(elements[timestampIndex]).getTime();
                            if (timestamp > lastTimeStamp) {
                                lastTimeStamp = timestamp;
                            }
                            else {
                                lastTimeStamp++;
                            }
                        }
                        catch (ParseException e) {
                            // just ignore this and count next
                            lastTimeStamp++;
                        }
                    }
                    
                    StringBuffer query = new StringBuffer();
                    
                    for (int i = userIdIndex + 1; i < elements.length - 2; i++) {
                        query.append(elements[i]);
                    }
                    
                    String queryStr = query.toString().intern();
                    
                    String selectedResultPage =
                        elements[elements.length - 2].split(" ")[0].intern();
                    String selectedResultIndex =
                        elements[elements.length - 2].split(" ")[1].intern();
                    String selectedResult = elements[elements.length - 1];
                    
                    String fullSelectedResult = selectedResult;
                    
                    if (compareDomainOnly) {
                        int index = selectedResult.indexOf("://");
                        // ensure with the second condition, that we do not match something in the
                        // url query
                        if ((index >= 0) && (index < 15)) {
                            selectedResult = selectedResult.substring(index + 3);
                        }
                        
                        index = selectedResult.indexOf("/");
                        if (index > 0) {
                            selectedResult = selectedResult.substring(0, index);
                        }
                    }
                    
                    selectedResult = selectedResult.intern();
                    
                    Event event;
                    
                    GenericEventTargetSpec spec = new GenericEventTargetSpec(selectedResult, null);
                    
                    if (!ignoreQuery) {
                        event = new Event(new StringEventType("query for " + queryStr),
                                          new GenericEventTarget(spec, null));
                    }
                    else {
                        event = new Event(new StringEventType("query"),
                                          new GenericEventTarget(spec, null));
                    }
                    
                    event.setTimestamp(lastTimeStamp);
                    event.setParameter("userId".intern(), userId);
                    event.setParameter("query".intern(), queryStr);
                    event.setParameter("selectedResultPage".intern(), selectedResultPage);
                    event.setParameter("selectedResultIndex".intern(), selectedResultIndex);
                    event.setParameter("selectedResult".intern(), fullSelectedResult);
                    
                    List<Event> session = userSessions.get(userId);
                    
                    if (session == null) {
                        session = new ArrayList<>();
                        userSessions.put(userId, session);
                    }
                    
                    session.add(event);
                }
            }
            while (line != null);
            
            reader.close();
        }
        catch (FileNotFoundException e) {
            Console.printerrln("could not read " + file);
            Console.logException(e);
            return null;
        }
        catch (IOException e) {
            Console.printerrln("problem while reading a line from " + file);
            Console.logException(e);
            return null;
        }
        
        return userSessions;
    }
}
