//   Copyright 2012 Georg-August-Universität Göttingen, Germany
//
//   Licensed under the Apache License, Version 2.0 (the "License");
//   you may not use this file except in compliance with the License.
//   You may obtain a copy of the License at
//
//       http://www.apache.org/licenses/LICENSE-2.0
//
//   Unless required by applicable law or agreed to in writing, software
//   distributed under the License is distributed on an "AS IS" BASIS,
//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//   See the License for the specific language governing permissions and
//   limitations under the License.

package de.ugoe.cs.autoquest.plugin.html;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.xml.sax.SAXException;

import de.ugoe.cs.autoquest.eventcore.Event;
import de.ugoe.cs.autoquest.eventcore.IEventType;
import de.ugoe.cs.autoquest.eventcore.guimodel.GUIModel;
import de.ugoe.cs.autoquest.eventcore.guimodel.GUIModelException;
import de.ugoe.cs.autoquest.eventcore.guimodel.IGUIElement;
import de.ugoe.cs.autoquest.plugin.html.eventcore.HTMLEventTypeFactory;
import de.ugoe.cs.autoquest.plugin.html.guimodel.HTMLDocument;
import de.ugoe.cs.autoquest.plugin.html.guimodel.HTMLDocumentSpec;
import de.ugoe.cs.autoquest.plugin.html.guimodel.HTMLGUIElement;
import de.ugoe.cs.autoquest.plugin.html.guimodel.HTMLGUIElementSpec;
import de.ugoe.cs.autoquest.plugin.html.guimodel.HTMLPageElement;
import de.ugoe.cs.autoquest.plugin.html.guimodel.HTMLPageElementSpec;
import de.ugoe.cs.autoquest.plugin.html.guimodel.HTMLServerSpec;

/**
 * <p>
 * This class provides the functionality to parse XML log files generated by the HTMLMonitor of
 * AutoQUEST. The result of parsing a file is a collection of event sequences and a GUI model.
 * </p>
 * <p>
 * The parser can be configured with parsing parameters to adapt, e.g., ids or or ignore indexes of
 * parsed GUI elements. Details can be found in the manual pages of the respective parsing commands.
 * </p>
 * 
 * @author Fabian Glaser, Patrick Harms
 * @version 1.0
 * 
 */
public class HTMLLogParser extends AbstractDefaultLogParser {
    
    /**
     * <p>
     * the pattern used for parsing HTML GUI element paths
     * </p>
     */
    private Pattern htmlElementPattern =
        Pattern.compile("(\\w+)(\\[(\\d+)\\]|\\(htmlId=([\\w-]+)\\))");
    
    /**
     * <p>
     * file containing parameters to influence parsing
     * </p>
     */
    private String parseParamFile;

    /**
     * <p>
     * a map containing replacement specifications for ids of GUI elements
     * </p>
     */
    private Map<String, List<ReplacementSpecification>> replacementSpecifications;

    /**
     * <p>
     * initializes the parser with the file containing parsing parameters to be considered
     * </p>
     *
     * @param parseParamFile the parsing parameters to be considered
     */
    public HTMLLogParser(String parseParamFile) {
        this.parseParamFile = parseParamFile;
    }

    /* (non-Javadoc)
     * @see de.ugoe.cs.autoquest.plugin.html.AbstractDefaultLogParser#handleGUIElement(String, Map)
     */
    @Override
    protected boolean handleGUIElement(String id, Map<String, String> parameters)
        throws SAXException
    {
        ensureParsingParameters();
        
        HTMLGUIElementSpec specification = null;
        
        String parentId = parameters.get("parent");
        HTMLGUIElement parent = (HTMLGUIElement) super.getGUIElementTree().find(parentId);

        if (parameters.containsKey("host")) {
            // this is a server specification
            int port = 80;
            String portStr = parameters.get("port");
            
            if (portStr != null) {
                port = Integer.parseInt(portStr);
            }
            
            specification = new HTMLServerSpec(parameters.get("host"), port);
        }
        else if (parameters.containsKey("path")) {
            // this is a document specification
            
            if (parent != null) {
                if (!(parent.getSpecification() instanceof HTMLServerSpec)) {
                    throw new SAXException
                        ("invalid log: parent GUI element of a document is not of type server");
                }
                
                specification = new HTMLDocumentSpec
                    ((HTMLServerSpec) parent.getSpecification(), parameters.get("path"),
                     parameters.get("query"), parameters.get("title"));
            }
            else if (parentId == null) {
                throw new SAXException("invalid log: a document has no parent id");
            }
        }
        else if (parameters.containsKey("tagname")) {
            String tagName = parameters.get("tagname");
            
            if (!tagNameMustBeConsidered(tagName)) {
                return true;
            }

            if (parent != null) {
                if (!childrenMustBeConsidered(parent)) {
                    return true;
                }
                
                IGUIElement document = parent;
                
                while ((document != null) &&
                       (!(document.getSpecification() instanceof HTMLDocumentSpec)))
                {
                    document = document.getParent();
                }
                
                if (document == null) {
                    throw new SAXException
                        ("invalid log: parent hierarchy of a page element does not contain a " +
                         "document");
                }
                
                int index = -1;
                String indexStr = parameters.get("index");

                if ((indexStr != null) && (!"".equals(indexStr))) {
                    index = Integer.parseInt(indexStr);
                }
                
                String htmlId = parameters.get("htmlid");
                
                String replacement = getReplacementMapping(tagName, index, htmlId, parent);
                
                if (replacement != null) {
                    if (replacement.startsWith("CLEAR_INDEX,")) {
                        index = -1;
                        replacement = replacement.substring("CLEAR_INDEX,".length());
                    }
                    else if ("CLEAR_INDEX".equals(replacement)) {
                        index = -1;
                        replacement = htmlId;
                    }
                    
                    if ("".equals(replacement)) {
                        htmlId = null;
                    }
                    else {
                        htmlId = replacement;
                    }
                }
                
                if ((htmlId == null) && (index == -1)) {
                    // set at least a default index, if all is to be ignored.
                    index = 0;
                }

                specification = new HTMLPageElementSpec
                    ((HTMLDocumentSpec) document.getSpecification(),
                     tagName.intern(), htmlId == null ? null : htmlId.intern(), index);
                
            }
            else if (parentId == null) {
                throw new SAXException("invalid log: a page element has no parent id");
            }
        }
        else {
            throw new SAXException("invalid log: unknown GUI element");
        }

        if (specification != null) {
            try {
                super.getGUIElementTree().add(id, parentId, specification);
            }
            catch (GUIModelException e) {
                throw new SAXException("could not handle GUI element with id " +
                                       id + ": " + e.getMessage(), e);
            }
            return true;
        }
        else {
            return false;
        }
    }
    
    /**
     * <p>
     * returns the replacement mapping for the tag specified by the parameters, if a mapping exists.
     * </p>
     *
     * @param tagName      the tag of the considered GUI element
     * @param index        the index of the GUI element
     * @param id           the id of the GUI element
     * @param parent       the parent GUI element of the considered GUI element
     * 
     * @return the replacement mapping, if any is configured; null else
     */
    private String getReplacementMapping(String         tagName,
                                         int            index,
                                         String         htmlId,
                                         HTMLGUIElement parent)
    {
        List<ReplacementSpecification> mappingCandidates = replacementSpecifications.get(tagName);
        
        if (mappingCandidates != null) {
            for (ReplacementSpecification replacementSpec : mappingCandidates) {
                if (replacementSpec.matches(tagName, index, htmlId, parent)) {
                    return replacementSpec.getReplacement();
                }
            }
        }
        
        return null;
    }

    /* (non-Javadoc)
     * @see de.ugoe.cs.autoquest.plugin.html.AbstractDefaultLogParser#handleEvent(String, Map)
     */
    @Override
    protected boolean handleEvent(String type, Map<String, String> parameters) throws SAXException {
        String targetId = parameters.get("target");
        
        if (targetId == null) {
            if (replacementSpecifications.size() != 0) {
                throw new SAXException
                    ("old log file versions can not be parsed with parse parameters");
            }
            
            String targetDocument = parameters.get("targetDocument");
            String targetDOMPath = parameters.get("targetDOMPath");
            
            if ((targetDocument == null) || (targetDOMPath == null)) {
                throw new SAXException("event has no target defined");
            }
            
            targetId = determineTargetId(targetDocument, targetDOMPath);
            
            if (targetId == null) {
                // the target id can not be determined yet
                return false;
            }
        }
        
        IGUIElement target = super.getGUIElementTree().find(targetId);
        
        if (target == null) {
            // event not processible yet
            return false;
        }

        IEventType eventType =
            HTMLEventTypeFactory.getInstance().getEventType(type, parameters, target);
        
        if (eventType != null) {
            Event event = new Event(eventType, target);

            String timestampStr = parameters.get("timestamp");
        
            if (timestampStr != null) {
                event.setTimestamp(Long.parseLong(timestampStr));
            }

            ((HTMLGUIElement) event.getTarget()).markUsed();
        
            super.addToSequence(event);
        }
        // else ignore unknown event type

        return true;
    }

    /**
     * <p>
     * reads parsing parameters from the config file and makes them available for the parsing
     * process
     * </p>
     */
    private void ensureParsingParameters() throws SAXException {
        if (replacementSpecifications == null) {
            replacementSpecifications = new HashMap<String, List<ReplacementSpecification>>();
            
            if (parseParamFile != null) {
                Properties props = new Properties();
                FileInputStream stream = null;
                try {
                    stream = new FileInputStream(new File(parseParamFile));
                    props.load(stream);
                }
                catch (FileNotFoundException e) {
                    throw new SAXException("could not find file " + parseParamFile, e);
                }
                catch (IOException e) {
                    throw new SAXException("error reading file " + parseParamFile, e);
                }
                finally {
                    if (stream != null) {
                        try {
                            stream.close();
                        }
                        catch (IOException e) {
                            // ignore
                        }
                    }
                }

                for (Map.Entry<Object, Object> entry : props.entrySet()) {
                    ReplacementSpecification replSpec = new ReplacementSpecification
                        ((String) entry.getKey(), (String) entry.getValue());
                    
                    List<ReplacementSpecification> similarReplSpecs =
                        replacementSpecifications.get(replSpec.getLastTagName());
                    
                    if (similarReplSpecs == null) {
                        similarReplSpecs = new LinkedList<ReplacementSpecification>();
                        replacementSpecifications.put(replSpec.getLastTagName(), similarReplSpecs);
                    }
                    
                    similarReplSpecs.add(replSpec);
                }
            }
        }
    }
    
    /**
     * <p>
     * used to determine the id of a target denoted by an event. This is only required for older
     * document formats. The new formats use concrete ids.
     * </p>
     */
    private String determineTargetId(String targetDocument, String targetDOMPath)
        throws SAXException
    {
        IGUIElement document = super.getGUIElementTree().find(targetDocument);
        
        if (document == null) {
            return null;
        }
        
        if (!(document.getSpecification() instanceof HTMLDocumentSpec)) {
            throw new SAXException("an id that should refer to an HTML document refers to" +
                                   "something else");
        }
        
        GUIModel model = super.getGUIElementTree().getGUIModel();
        IGUIElement child = document;
        String[] pathElements = targetDOMPath.split("/");
        int pathIndex = 0;
        
        HTMLPageElementSpec compareSpec;
        String tagName;
        int index;
        String htmlId;
        
        while ((pathIndex < pathElements.length) && (child != null)) {
            if ((pathElements[pathIndex] != null) && (!"".equals(pathElements[pathIndex]))) {           
                Matcher matcher = htmlElementPattern.matcher(pathElements[pathIndex]);
                if (!matcher.matches()) {
                    throw new SAXException
                        ("could not parse target DOM path element " + pathElements[pathIndex]);
                }

                tagName = matcher.group(1);
                String indexStr = matcher.group(3);
                htmlId = matcher.group(4);

                index = -1;
                if ((indexStr != null) && (!"".equals(indexStr))) {
                    index = Integer.parseInt(indexStr);
                }

                compareSpec = new HTMLPageElementSpec
                    ((HTMLDocumentSpec) document.getSpecification(), tagName, htmlId, index);

                List<IGUIElement> children = model.getChildren(child);
                child = null;

                for (IGUIElement candidate : children) {
                    if (compareSpec.getSimilarity(candidate.getSpecification())) {
                        child = candidate;
                        break;
                    }
                }
            }
            
            pathIndex++;
        }
        
        if (child != null) {
            return super.getGUIElementTree().find(child);
        }
        else {
            return null;
        }
    }

    /**
     * <p>
     * checks if tags with the provided name must be handled in the GUI model. As an example,
     * it is not necessary to handle "head" tags and anything included in them. 
     * </p>
     *
     * @param tagName the tag name to check
     * 
     * @return true, if the tag must be considered, false else
     */
    private boolean tagNameMustBeConsidered(String tagName) {
        if (!tagName.startsWith("input_")) {
            for (int i = 0; i < tagName.length(); i++) {
                // all known HTML tags are either letters or digits, but nothing else. Any GUI model
                // containing something different is proprietary and, therefore, ignored.
                if (!Character.isLetterOrDigit(tagName.charAt(i))) {
                    return false;
                }
            }
        }
        
        return
            !"head".equals(tagName) && !"title".equals(tagName) && !"script".equals(tagName) &&
            !"style".equals(tagName) && !"link".equals(tagName) && !"meta".equals(tagName) &&
            !"iframe".equals(tagName) && !"input_hidden".equals(tagName) &&
            !"option".equals(tagName) && !"tt".equals(tagName) && !"br".equals(tagName) &&
            !"colgroup".equals(tagName) && !"col".equals(tagName) && !"hr".equals(tagName) &&
            !"param".equals(tagName) && !"sfmsg".equals(tagName) &&
            !"wappalyzerdata".equals(tagName);

    }

    /**
     * <p>
     * checks if the children of a specified parent must be added to the GUI model or not.
     * </p>
     *
     * @param parent the parent tag to check
     * 
     * @return true, if the child of the tag must be considered, false else
     */
    private boolean childrenMustBeConsidered(HTMLGUIElement parent) {
        if (parent instanceof HTMLPageElement) {
            return !"svg".equals(((HTMLPageElement) parent).getTagName());
        }
        else {
            return true;
        }
    }

    /**
     * <p>specification for a replacement consisting of path of tag or document specifications
     * and the appropriate replacement.</p>
     */
    private static class ReplacementSpecification {
        
        /**
         * <p>
         * the pattern used for parsing parsing parameters
         * </p>
         */
        private Pattern htmlElementSpecPattern = Pattern.compile
            ("(document\\(path=([\\w/-]+)\\))|((\\w+)(\\[(\\d+)\\]|\\(htmlId=([\\w-_#]+)\\))?)");
        
        /**
         * <p>
         * the path of specifications (tags and document) specifying the tag for which this
         * replacement is specified
         * </p>
         */
        private List<Spec> specs = new LinkedList<Spec>();
        
        /**
         * <p>
         * the name of the last tag in the specification path (used for indexing purposes)
         * </p>
         */
        private String lastTagName;
        
        /**
         * <p>
         * the configured replacement
         * </p>
         */
        private String replacement;

        /**
         * <p>
         * initializes the specification with the key/value strings from the config file. Parses
         * the key to get the specification path consisting of, optionally, a document
         * specification and one or more tag specification.
         * </p>
         */
        public ReplacementSpecification(String tagSpec, String replacement) {
            List<String> tagSpecs = split(tagSpec);
            
            for (int i = 0; i < tagSpecs.size(); i++) {
                Matcher matcher = htmlElementSpecPattern.matcher(tagSpecs.get(i));
                
                if (!matcher.matches()) {
                    throw new IllegalArgumentException
                        ("illegal tag specification " + tagSpecs.get(i));
                }
                
                if (matcher.group(1) != null) {
                    this.specs.add(new DocumentSpec(matcher.group(2)));
                }
                else if (matcher.group(4) != null) {
                    String indexConditionStr = matcher.group(6);
                    Integer indexCondition = null;
                
                    if (indexConditionStr != null) {
                        try {
                            indexCondition = Integer.parseInt(indexConditionStr);
                        }
                        catch (NumberFormatException e) {
                            throw new IllegalArgumentException
                                ("illegal tag index specification " + indexConditionStr, e);
                        }
                    }
                
                    this.specs.add
                        (new TagSpec(matcher.group(4), indexCondition, matcher.group(7)));
                }
            }
            
            this.lastTagName = ((TagSpec) this.specs.get(this.specs.size() - 1)).getTagName();
            
            this.replacement = replacement;
        }

        /**
         * <p>
         * convenience method to split the key of a key/value pair from the config file into its
         * parts
         * </p>
         */
        private List<String> split(String tagSpec) {
            List<String> specs = new LinkedList<String>();
            
            StringBuffer currentSpec = new StringBuffer();
            int openBraces = 0;
            
            for (int i = 0; i < tagSpec.length(); i++) {
                char curChar = tagSpec.charAt(i);
                if ((openBraces == 0) && ('/' == curChar) && (currentSpec.length() > 0)) {
                    specs.add(currentSpec.toString());
                    currentSpec.setLength(0);
                }
                else {
                    if ('(' == curChar) {
                        openBraces++;
                    }
                    else if (')' == curChar) {
                        openBraces--;
                    }
                    currentSpec.append(curChar);
                }
            }
            
            if (currentSpec.length() > 0) {
                specs.add(currentSpec.toString());
            }
            
            return specs;
        }

        /**
         * <p>
         * checks, if the tag identified by the parameters matches this specificaiton.
         * </p>
         */
        private boolean matches(String tagName, int index, String htmlId, HTMLGUIElement parent) {
            String currentTagName = tagName;
            int currentIndex = index;
            String currentHtmlId = htmlId;
            String currentPath = null;
            HTMLGUIElement currentParent = parent;
            
            int i = specs.size() - 1;
            
            while (i >= 0) {
                if ((specs.get(i) instanceof TagSpec) &&
                    (!((TagSpec) specs.get(i)).matches(currentTagName, currentIndex, currentHtmlId)))
                {
                    return false;
                }
                else if ((specs.get(i) instanceof DocumentSpec) &&
                         (!((DocumentSpec) specs.get(i)).matches(currentPath)))
                {
                    return false;
                }
                
                i--;
                
                if (i >= 0) {
                    if (currentParent instanceof HTMLPageElement) {
                        currentTagName = ((HTMLPageElement) currentParent).getTagName();
                        currentIndex = ((HTMLPageElement) currentParent).getIndex();
                        currentHtmlId = ((HTMLPageElement) currentParent).getHtmlId();
                        currentPath = null;
                        currentParent = (HTMLGUIElement) currentParent.getParent();
                     }
                    else if (currentParent instanceof HTMLDocument) {
                        currentTagName = null;
                        currentIndex = Integer.MIN_VALUE;
                        currentHtmlId = null;
                        currentPath = ((HTMLDocument) currentParent).getPath();
                        currentParent = (HTMLGUIElement) currentParent.getParent();
                    }
                    else {
                        throw new IllegalArgumentException
                            ("specification matches documents or servers. This is not supported yet.");
                    }
                }
            }
            
            return true;
        }

        /**
         * <p>
         * returns the specified replacement
         * </p>
         */
        private String getReplacement() {
            return replacement;
        }

        /**
         * <p>
         * returns the name of the last tag specified in the specification path
         * </p>
         */
        private String getLastTagName() {
            return lastTagName;
        }

        /* (non-Javadoc)
         * @see java.lang.Object#toString()
         */
        @Override
        public String toString() {
            StringBuffer result = new StringBuffer();
            for (Spec spec : specs) {
                if (result.length() > 0) {
                    result.append("/");
                }
                result.append(spec);
            }
            
            result.append('=');
            result.append(replacement);
            
            return result.toString();
        }
        
    }

    /**
     * <p>
     * parent type for document and tag specifications
     * </p>
     */
    private static interface Spec { }

    /**
     * <p>
     * specification of a document
     * </p>
     */
    private static class DocumentSpec implements Spec {
        
        /**
         * <p>
         * the part of the path the document path must have to match this specification
         * </p>
         */
        private String pathPart;

        /**
         * <p>
         * initializes the document specification with the path part
         * </p>
         */
        private DocumentSpec(String pathPart) {
            this.pathPart = pathPart;
        }

        /**
         * <p>
         * returns true if the provided path contains the path part provided to the parameter
         * </p>
         */
        private boolean matches(String path) {
            return path.contains(pathPart);
        }

        /* (non-Javadoc)
         * @see java.lang.Object#toString()
         */
        @Override
        public String toString() {
            return "document(path=" + pathPart + ")";
        }
    }

    /**
     * <p>
     * specification for a tag containing a tag name and either an index or id condition.
     * </p>
     */
    private static class TagSpec implements Spec {

        /**
         * <p>
         * the name of the tag to match
         * </p>
         */
        private String tagName;
        
        /**
         * <p>
         * the index of the tag to match
         * </p>
         */
        private Integer indexCondition;
        
        /**
         * <p>
         * the id of the tag to match
         * </p>
         */
        private String idCondition;

        /**
         * <p>
         * initializes the specification with all required parameters
         * </p>
         */
        private TagSpec(String tagName, Integer indexCondition, String idCondition) {
            this.tagName = tagName;
            this.indexCondition = indexCondition;
            this.idCondition = idCondition;
        }

        /**
         * <p>
         * returns true if the provided tag information matches this specification. The id is
         * checked first. If the id condition has a # at some position, the respective element
         * of the provided id is ignored.
         * </p>
         */
        private boolean matches(String tagName, int index, String htmlId) {
            if (!this.tagName.equals(tagName)) {
                return false;
            }
            
            if (idCondition != null) {
                if (!idCondition.equals(htmlId)) {
                    // check if the id condition would match with ignoring specific characters
                    if ((htmlId != null) && (idCondition.indexOf('#') > -1)) {
                        // first of all, the length must match
                        if (idCondition.length() != htmlId.length()) {
                            return false;
                        }
                        
                        for (int i = 0; i < idCondition.length(); i++) {
                            if ((idCondition.charAt(i) != '#') &&
                                (idCondition.charAt(i) != htmlId.charAt(i)))
                            {
                                // if there is a character that is neither ignored nor matches
                                // the condition at a specific position, return "no match"
                                return false;
                            }
                        }
                        
                    }
                    else {
                        // no condition ignoring specific characters
                        return false;
                    }
                }
            }
            
            if ((indexCondition != null) && (index != indexCondition)) {
                return false;
            }
            
            return true;
        }

        /**
         * <p>
         * returns the name of the tags matched by this specification
         * </p>
         */
        private String getTagName() {
            return tagName;
        }

        /* (non-Javadoc)
         * @see java.lang.Object#toString()
         */
        @Override
        public String toString() {
            StringBuffer result = new StringBuffer(tagName);
            
            if (idCondition != null) {
                result.append("(htmlId=");
                result.append(idCondition);
                result.append(')');
            }
            else if (indexCondition != null) {
                result.append('[');
                result.append(indexCondition);
                result.append(']');
            }
            
            return result.toString();
        }
        
    }
}
