source: trunk/autoquest-core-tasktrees/src/main/java/de/ugoe/cs/autoquest/tasktrees/temporalrelation/IterationOfSubtreesDetectionRule.java @ 1131

Last change on this file since 1131 was 1127, checked in by pharms, 11 years ago
  • complete refactoring of task detection
  • many performance improvements in task detection
  • improved merging of sequences using Myers diff algorithm
  • Property svn:executable set to *
File size: 21.6 KB
Line 
1//   Copyright 2012 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.autoquest.tasktrees.temporalrelation;
16
17import java.util.ArrayList;
18import java.util.List;
19
20import de.ugoe.cs.autoquest.tasktrees.nodeequality.NodeEquality;
21import de.ugoe.cs.autoquest.tasktrees.nodeequality.NodeEqualityRuleManager;
22import de.ugoe.cs.autoquest.tasktrees.treeifc.IEventTask;
23import de.ugoe.cs.autoquest.tasktrees.treeifc.IIteration;
24import de.ugoe.cs.autoquest.tasktrees.treeifc.ISelection;
25import de.ugoe.cs.autoquest.tasktrees.treeifc.ISequence;
26import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskTreeBuilder;
27import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskTreeNode;
28import de.ugoe.cs.autoquest.tasktrees.treeifc.ITaskTreeNodeFactory;
29
30/**
31 * <p>
32 * iterations in a list of nodes are equal subsequences following each other directly. The
33 * subsequences can be of any length depending on the type of equality they need to have. If the
34 * subsequences have to be lexically equal, then they have to have the same length if they only
35 * contain event tasks. As an example entering text can be done through appropriate keystrokes or
36 * through pasting the text. As a result, two syntactically different sequences are semantically
37 * equal. If both follow each other, then they are an iteration of semantically equal children.
38 * But they are not lexically equal.
39 * </p>
40 * <p>
41 * This class determines equal subsequences following each other. It is provided with a minimal node
42 * equality the equal nodes should have. Through this, it is possible to find e.g. lexically
43 * equal subsequence through a first application of this rule and semantically equal children to
44 * a later application of this rule. This is used by the {@link TemporalRelationshipRuleManager}
45 * which instantiates this rule three times, each with a different minimal equality.
46 * </p>
47 * <p>
48 * The equal subsequences are determined through trial and error. This algorithm has a high effort
49 * as it tries in the worst case all possible combinations of sub lists in all possible parts of
50 * the list of children of a provided parent node. The steps for each trial are.
51 * <ul>
52 *   <li>for all possible subparts of the children of the provided parent
53 *   <ul>
54 *     <li>for all possible first sublists in the subpart
55 *     <ul>
56 *       <li>for all succeeding next sublists in this part</li>
57 *       <ul>
58 *         <li>check if this sublist is equal to all previously identified sublist in this part</li>
59 *       </ul>
60 *     </ul>
61 *     <li>
62 *       if a combination of sublists is found in this subpart which are all equal to each other
63 *       at the provided minimal equality level, an iteration in this subpart was found.
64 *     </li>
65 *       <ul>
66 *         <li>merge the identified equal sublists to an iteration</li>
67 *       </ul>
68 *   </ul>
69 * </ul>
70 * The algorithm tries to optimize if all children are event tasks and if the sublists shall be
71 * lexically equal. In this case, the sublist all have to have the same length. The trial and
72 * error reduces to a minimum of possible sublists.
73 * </p>
74 *
75 * @author Patrick Harms
76 */
77class IterationOfSubtreesDetectionRule implements TemporalRelationshipRule {
78
79    /**
80     * <p>
81     * the maximum length for iterated sequences
82     * </p>
83     */
84    private static final int MAX_LENGTH_OF_ITERATED_SEQUENCE = 50;
85   
86    /**
87     * <p>
88     * the task tree node factory to be used for creating substructures for the temporal
89     * relationships identified during rule
90     * </p>
91     */
92    private ITaskTreeNodeFactory taskTreeNodeFactory;
93    /**
94     * <p>
95     * the task tree builder to be used for creating substructures for the temporal relationships
96     * identified during rule application
97     * </p>
98     */
99    private ITaskTreeBuilder taskTreeBuilder;
100
101    /**
102     * <p>
103     * the node comparator used for comparing task tree nodes with each other
104     * </p>
105     */
106    private TaskTreeNodeComparator nodeComparator;
107
108    /**
109     * <p>
110     * instantiates the rule and initializes it with a node equality rule manager and the minimal
111     * node equality identified sublist must have to consider them as iterated.
112     * </p>
113     */
114    IterationOfSubtreesDetectionRule(NodeEqualityRuleManager nodeEqualityRuleManager,
115                                     NodeEquality            minimalNodeEquality,
116                                     ITaskTreeNodeFactory    taskTreeNodeFactory,
117                                     ITaskTreeBuilder        taskTreeBuilder)
118    {
119        this.taskTreeNodeFactory = taskTreeNodeFactory;
120        this.taskTreeBuilder = taskTreeBuilder;
121       
122        this.nodeComparator =
123            new TaskTreeNodeComparator(nodeEqualityRuleManager, minimalNodeEquality);
124    }
125
126    /**
127     * <p>
128     * instantiates the rule and initializes it with a node equality rule manager and the minimal
129     * node equality identified sublist must have to consider them as iterated.
130     * </p>
131     */
132    IterationOfSubtreesDetectionRule(TaskTreeNodeComparator nodeComparator,
133                                     ITaskTreeNodeFactory   taskTreeNodeFactory,
134                                     ITaskTreeBuilder       taskTreeBuilder)
135    {
136        this.nodeComparator = nodeComparator;
137        this.taskTreeNodeFactory = taskTreeNodeFactory;
138        this.taskTreeBuilder = taskTreeBuilder;
139    }
140
141    /* (non-Javadoc)
142     * @see java.lang.Object#toString()
143     */
144    @Override
145    public String toString() {
146        return "IterationOfSubtreesDetectionRule";
147    }
148
149    /*
150     * (non-Javadoc)
151     *
152     * @see de.ugoe.cs.tasktree.temporalrelation.TemporalRelationshipRule#apply(TaskTreeNode,
153     * boolean)
154     */
155    @Override
156    public RuleApplicationResult apply(ITaskTreeNode parent, boolean finalize) {
157        if (!(parent instanceof ISequence)) {
158            return null;
159        }
160
161        if (!finalize) {
162            // the rule is always feasible as iterations may occur at any time
163            RuleApplicationResult result = new RuleApplicationResult();
164            result.setRuleApplicationStatus(RuleApplicationStatus.FEASIBLE);
165            return result;
166        }
167
168        List<ITaskTreeNode> children = parent.getChildren();
169       
170        // parent must already have at least 2 children
171        if ((children == null) || (children.size() < 2)) {
172            return null;
173        }
174       
175        SubSequences subSequences = getEqualSubsequences(children);
176
177        if (subSequences != null) {
178            RuleApplicationResult result = new RuleApplicationResult();
179
180            // merge the identified variants, but preserve the differences in form of selections
181            // by using lexical equality for merge comparisons
182            TaskTreeNodeMerger merger = new TaskTreeNodeMerger
183                (taskTreeNodeFactory, taskTreeBuilder, nodeComparator);
184
185            merger.mergeTaskNodes(subSequences.equalVariants);
186           
187            IIteration newIteration =
188                createIterationBasedOnIdentifiedVariants(subSequences, result);
189
190            determineNewlyCreatedParentTasks(parent, newIteration, result);
191           
192            // remove iterated children
193            for (int j = subSequences.start; j < subSequences.end; j++) {
194                taskTreeBuilder.removeChild((ISequence) parent, subSequences.start);
195            }
196
197            // add the new iteration instead
198            taskTreeBuilder.addChild((ISequence) parent, subSequences.start, newIteration);
199
200            result.setRuleApplicationStatus(RuleApplicationStatus.FINISHED);
201            return result;
202        }
203
204        return null;
205    }
206
207    /**
208     * <p>
209     * this method initiates the trial and error algorithm denoted in the description of this class.
210     * Its main purpose is the selection of a subpart of the provided list of nodes in which
211     * equal sublists shall be searched. It is important, to always find the last iterations in a
212     * part first. The reason for this are iterations of iterations. If we always found the first
213     * iteration in a subpart first, then this may be an iteration of iterations. However, there
214     * may be subsequent iterations to be included in this iteration. But these iterations are not
215     * found yet, as they occur later in the sequence. Therefore, if we always find the last
216     * iteration in a sequence first, iterations of iterations are identified, last.
217     * </p>
218     *
219     * @param nodes the list of nodes in which iterations shall be found
220     *
221     * @return the iterated subsequences identified in a specific part (contains the equal
222     *         subsequences as well as the start (inclusive) and end (exclusive) index of the
223     *         subpart in which the sequences were found)
224     */
225    private SubSequences getEqualSubsequences(List<ITaskTreeNode> nodes) {
226        SubSequences subSequences = null;
227
228        // to find longer iterations first, start with long sequences
229        FIND_ITERATION:
230        for (int end = nodes.size(); end > 0; end--) {
231            for (int start = 0; start < end; start++) {
232                boolean useEqualSublistLengths = equalSublistLengthsCanBeUsed(nodes, start, end);
233
234                subSequences = new SubSequences();
235                subSequences.start = start;
236
237                boolean foundFurtherVariants = findFurtherVariants
238                    (subSequences, nodes, start, end, useEqualSublistLengths);
239
240                if (foundFurtherVariants) {
241                    break FIND_ITERATION;
242                }
243                else {
244                    subSequences = null;
245                }
246            }
247        }
248       
249        return subSequences;
250    }
251
252    /**
253     * <p>
254     * for optimization purposes, we check if the length of the sublists to be identified as
255     * iterations has to be the same for any sublist. This only applies, if the minimum node
256     * equality to be checked for is lexical equality. If the nodes in the provided list are all
257     * event tasks, then sublists can only be lexically equal, if they all have the same length.
258     * Therefore we check, if the minimal node equality is lexical equality. And if so, we also
259     * check if all nodes in the list in which an iteration shall be searched for are event tasks.
260     * </p>
261     *
262     * @param nodes  the list of nodes to search for iterations
263     * @param start  the beginning of the subpart (inclusive) to be considered
264     * @param end    the end of the subpart (exclusive) to be considered
265     *
266     * @return true, if the sublists must have the same lengths, false else
267     */
268    private boolean equalSublistLengthsCanBeUsed(List<ITaskTreeNode> nodes, int start, int end) {
269        boolean equalLengthsCanBeUsed =
270            nodeComparator.getConsideredNodeEquality().isAtLeast(NodeEquality.LEXICALLY_EQUAL);
271       
272        if (equalLengthsCanBeUsed) {
273            for (int i = start; i < end; i++) {
274                if (!(nodes.get(i) instanceof IEventTask)) {
275                    equalLengthsCanBeUsed = false;
276                    break;
277                }
278            }
279        }
280
281        return equalLengthsCanBeUsed;
282    }
283
284    /**
285     * <p>
286     * this method starts at a specific position in the provided list of nodes and checks, if it
287     * finds a further sublist, that matches the already found sublists. If the sublist lengths
288     * must be equal, it only searches for a sublist of the same length of the already found
289     * sublists. The method calls itself if it identifies a further equal sublist but
290     * if the end of the subpart of the provided list is not yet reached.
291     * </p>
292     *
293     * @param subSequences           the sublist found so far against which equality of the next
294     *                               sublist must be checked
295     * @param nodes                  the list of nodes to be checked for iterations
296     * @param start                  the starting index from which to start the next sublist to be
297     *                               identified
298     * @param end                    the end index (exclusive) of the current subpart of list of
299     *                               nodes in which iterations are searched for
300     * @param useEqualSublistLengths true if the sublists to be searched for all need to have the
301     *                               same length
302     *
303     * @return true if a further equal variant was found, false else
304     */
305    private boolean findFurtherVariants(SubSequences        subSequences,
306                                        List<ITaskTreeNode> nodes,
307                                        int                 start,
308                                        int                 end,
309                                        boolean             useEqualSublistLengths)
310    {
311        boolean foundFurtherVariants = (start == end) && (subSequences.equalVariants.size() > 1);
312       
313        int minChildCount = 1;
314        int maxChildCount = Math.min(MAX_LENGTH_OF_ITERATED_SEQUENCE, end - start);
315       
316        if (useEqualSublistLengths && (subSequences.equalVariants.size() > 0)) {
317            minChildCount = subSequences.equalVariants.get(0).getChildren().size();
318            maxChildCount = Math.min(minChildCount, maxChildCount);
319        }
320       
321        for (int childCount = minChildCount; childCount <= maxChildCount; childCount++) {
322            if (useEqualSublistLengths && (((end - start) % childCount) != 0)) {
323                continue;
324            }
325           
326            ISequence furtherVariant = taskTreeNodeFactory.createNewSequence();
327           
328            for (int j = start; j < start + childCount; j++) {
329                taskTreeBuilder.addChild(furtherVariant, nodes.get(j));
330            }
331           
332            boolean allVariantsEqual = true;
333           
334            for (ITaskTreeNode equalVariant : subSequences.equalVariants) {
335                if (!nodeComparator.equals(equalVariant, furtherVariant)) {
336                    allVariantsEqual = false;
337                    break;
338                }
339            }
340           
341            if (allVariantsEqual) {
342               
343                // we found a further variant. Add it to the list of variants and try to find
344                // further variants. Ignore, if none is available
345                int index = subSequences.equalVariants.size();
346                subSequences.equalVariants.add(index, furtherVariant);
347               
348                foundFurtherVariants = findFurtherVariants
349                    (subSequences, nodes, start + childCount, end, useEqualSublistLengths);
350
351                if (foundFurtherVariants) {
352                    subSequences.end = end;
353                    break;
354                }
355                else {
356                    subSequences.equalVariants.remove(index);
357                }
358            }
359        }
360       
361        return foundFurtherVariants;
362    }
363
364    /**
365     * <p>
366     * this is a convenience method to create an iteration based on the identified and already
367     * merged iterated subsequences. This method creates the simplest iteration possible. As an
368     * example, if always the same task tree node is iterated, it becomes the child of the
369     * iteration. If a sequence of tasks is iterated, this sequence becomes the child of the
370     * iteration. It several equal sublists or nodes which are not lexically equal are iterated
371     * they become a selection which in turn become the child of the iteration.
372     * </p>
373     *
374     * @param subsequences the identified and already merged equal subsequences
375     *
376     * @return the resulting iteration
377     */
378    private IIteration createIterationBasedOnIdentifiedVariants(SubSequences          subsequences,
379                                                                RuleApplicationResult result)
380    {
381        IIteration newIteration = taskTreeNodeFactory.createNewIteration();
382        result.addNewlyCreatedParentNode(newIteration);
383
384        if (subsequences.equalVariants.size() == 1) {
385            // all children are the same. Create an iteration of this child
386            if (subsequences.equalVariants.get(0).getChildren().size() == 1) {
387                // there is only one equal variant and this has only one child. So create an
388                // iteration of this child
389                taskTreeBuilder.setChild
390                    (newIteration, subsequences.equalVariants.get(0).getChildren().get(0));
391            }
392            else {
393                // there was an iteration of one equal sequence
394                taskTreeBuilder.setChild(newIteration, subsequences.equalVariants.get(0));
395                result.addNewlyCreatedParentNode(subsequences.equalVariants.get(0));
396            }
397        }
398        else {
399            // there are distinct variants of equal subsequences or children --> create an
400            // iterated selection
401            ISelection selection = taskTreeNodeFactory.createNewSelection();
402            result.addNewlyCreatedParentNode(selection);
403
404            for (ITaskTreeNode variant : subsequences.equalVariants) {
405                if (variant.getChildren().size() == 1) {
406                    taskTreeBuilder.addChild(selection, variant.getChildren().get(0));
407                }
408                else {
409                    taskTreeBuilder.addChild(selection, variant);
410                    result.addNewlyCreatedParentNode(variant);
411                }
412            }
413
414            taskTreeBuilder.setChild(newIteration, selection);
415        }
416       
417        return newIteration;
418    }
419
420    /**
421     * <p>
422     * as the method has to denote all newly created parent nodes this method identifies them by
423     * comparing the existing subtree with the newly created iteration. Only those parent nodes
424     * in the new iteration, which are not already found in the existing sub tree are denoted as
425     * newly created. We do this in this way, as during the iteration detection algorithm, many
426     * parent nodes are created, which may be discarded later. It is easier to identify the
427     * remaining newly created parent nodes through this way than to integrate it into the
428     * algorithm.
429     * </p>
430     *
431     * @param existingSubTree the existing subtree
432     * @param newSubTree      the identified iteration
433     * @param result          the rule application result into which the newly created parent nodes
434     *                        shall be stored.
435     */
436    private void determineNewlyCreatedParentTasks(ITaskTreeNode         existingSubTree,
437                                                  ITaskTreeNode         newSubTree,
438                                                  RuleApplicationResult result)
439    {
440        List<ITaskTreeNode> existingParentNodes = getParentNodes(existingSubTree);
441        List<ITaskTreeNode> newParentNodes = getParentNodes(newSubTree);
442       
443        boolean foundNode;
444        for (ITaskTreeNode newParentNode : newParentNodes) {
445            foundNode = false;
446            for (ITaskTreeNode existingParentNode : existingParentNodes) {
447                // It is sufficient to compare the references. The algorithm reuses nodes as they
448                // are. So any node existing in the new structure that is also in the old structure
449                // was unchanged an therefore does not need to be handled as a newly created one.
450                // but every node in the new structure that is not included in the old structure
451                // must be treated as a newly created one.
452                if (newParentNode == existingParentNode) {
453                    foundNode = true;
454                    break;
455                }
456            }
457           
458            if (!foundNode) {
459                result.addNewlyCreatedParentNode(newParentNode);
460            }
461        }
462       
463    }
464
465    /**
466     * <p>
467     * convenience method to determine all parent nodes existing in a subtree
468     * </p>
469     *
470     * @param subtree the subtree to search for parent nodes in
471     *
472     * @return a list of parent nodes existing in the subtree
473     */
474    private List<ITaskTreeNode> getParentNodes(ITaskTreeNode subtree) {
475        List<ITaskTreeNode> parentNodes = new ArrayList<ITaskTreeNode>();
476       
477        List<ITaskTreeNode> children = subtree.getChildren();
478       
479        if (children.size() > 0) {
480            parentNodes.add(subtree);
481           
482            for (ITaskTreeNode child : children) {
483                parentNodes.addAll(getParentNodes(child));
484            }
485        }
486       
487        return parentNodes;
488    }
489
490    /**
491     * <p>
492     * used to have a container for equal sublists identified in a sub part of the children of
493     * a parent node.
494     * </p>
495     *
496     * @author Patrick Harms
497     */
498    private static class SubSequences {
499
500        /**
501         * <p>
502         * the beginning of the subpart of the children of the parent node in which the sublists
503         * are found (inclusive)
504         * </p>
505         */
506        public int start;
507       
508        /**
509         * <p>
510         * the end of the subpart of the children of the parent node in which the sublists
511         * are found (exclusive)
512         * </p>
513         */
514        public int end;
515       
516        /**
517         * <p>
518         * the equal sublists found in the subpart of the children of the parent node
519         * </p>
520         */
521        List<ITaskTreeNode> equalVariants = new ArrayList<ITaskTreeNode>();
522       
523    }
524
525}
Note: See TracBrowser for help on using the repository browser.