- Timestamp:
- 08/13/14 09:26:50 (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/autoquest-core-tasktrees-alignment/src/main/java/de/ugoe/cs/autoquest/tasktrees/temporalrelation/SequenceForTaskDetectionRuleAlignment.java
r1666 r1668 148 148 RuleApplicationData appData = new RuleApplicationData(sessions); 149 149 150 // this is the real rule application. Loop while something is replaced. 151 SymbolMap<ITaskInstance, ITask> uniqueTasks = harmonizeEventTaskInstancesModel(appData); 152 153 // Generate a substitution matrix between all occurring events. 154 Console.traceln(Level.INFO, "generating substitution matrix"); 155 ObjectDistanceSubstitionMatrix submat = new ObjectDistanceSubstitionMatrix( 156 uniqueTasks, 6, -3); 157 submat.generate(); 158 159 // Generate pairwise alignments 160 Console.traceln(Level.INFO, "generating pairwise alignments"); 161 LinkedList<Match> matchseqs = new LinkedList<Match>(); 162 PairwiseAlignmentStorage alignments = PairwiseAlignmentGenerator 163 .generate(appData.getNumberSequences(), submat, 9); 164 165 // Retrieve all matches reached a specific threshold 166 Console.traceln(Level.INFO, "retrieving significant sequence pieces"); 167 for (int i = 0; i < appData.getNumberSequences().size(); i++) { 168 Console.traceln( 169 Level.FINEST, 170 "retrieving significant sequence pieces: " 171 + Math.round((float) i 172 / (float) appData.getNumberSequences() 173 .size() * 100) + "%"); 174 for (int j = 0; j < appData.getNumberSequences().size(); j++) { 175 if (i != j) { 176 matchseqs.addAll(alignments.get(i, j).getMatches()); 177 } 178 } 179 } 180 Console.traceln(Level.FINEST, 181 "retrieving significant sequence pieces: 100%"); 182 Console.traceln(Level.INFO, "searching for patterns occuring most"); 183 184 // search each match in every other sequence 185 for (Iterator<Match> it = matchseqs.iterator(); it.hasNext();) { 186 Match pattern = it.next(); 187 188 // Skip sequences with more 0 events (scrolls) than other events. 189 // Both of the pattern sequences are equally long, so the zero 190 // counts just need to be smaller than the length of one sequence 191 if (pattern.getFirstSequence().eventCount(0) 192 + pattern.getSecondSequence().eventCount(0) + 1 > pattern 193 .getFirstSequence().size()) 194 continue; 195 196 for (int j = 0; j < appData.getNumberSequences().size(); j++) { 197 LinkedList<Integer> startpositions = appData 198 .getNumberSequences().get(j).containsPattern(pattern); 199 if (startpositions.size() > 0) { 200 for (Iterator<Integer> jt = startpositions.iterator(); jt 201 .hasNext();) { 202 int start = jt.next(); 203 pattern.addOccurence(new MatchOccurence(start, start 204 + pattern.size(), j)); 205 } 206 207 } 208 } 209 } 210 211 Console.traceln(Level.INFO, "sorting results"); 212 // Sort results to get the most occurring results 213 Comparator<Match> comparator = new Comparator<Match>() { 214 public int compare(Match m1, Match m2) { 215 return m2.occurenceCount() - m1.occurenceCount(); 216 217 } 218 }; 219 Collections.sort(matchseqs, comparator); 220 221 HashMap<Integer, List<MatchOccurence>> replacedOccurences = new HashMap<Integer, List<MatchOccurence>>(); 222 // Replace matches in the sessions 223 for (int i = 0; i < matchseqs.size(); i++) { 224 // Every pattern consists of 2 sequences, therefore the minimum 225 // occurrences here is 2. 226 // We just need the sequences also occurring in other sequences as 227 // well 228 if (matchseqs.get(i).occurenceCount() > 2) { 229 230 ISequence task = matchAsSequence(appData, matchseqs.get(i)); 231 invalidOccurence: for (Iterator<MatchOccurence> it = matchseqs 232 .get(i).getOccurences().iterator(); it.hasNext();) { 233 MatchOccurence oc = it.next(); 234 /* 235 System.out.println("Trying to replace sequence: "); 236 matchseqs.get(i).getFirstSequence().printSequence(); 237 matchseqs.get(i).getSecondSequence().printSequence(); 238 System.out.println(" in session number: " 239 + (oc.getSequenceId() + 1) 240 + " at position " 241 + (oc.getStartindex()) 242 + "-" 243 + oc.getEndindex()); 244 System.out.println(); 245 */ 246 247 // System.out.println("Printing session: "); 248 //for (int j = 0; j < sessions.get(oc.getSequenceId()).size(); j++) { 249 // System.out.println(j + ": " 250 // + sessions.get(oc.getSequenceId()).get(j)); 251 //} 252 253 // Check if nothing has been replaced in the sequence we 254 // want to replace 255 if (replacedOccurences.get(oc.getSequenceId()) == null) { 256 replacedOccurences.put(oc.getSequenceId(), 257 new LinkedList<MatchOccurence>()); 258 } else { 259 // check if we have any replaced occurence with indexes 260 // smaller than ours. If so, we need to adjust our start 261 // and endpoints 262 // of the replacement. 263 // Also do a check if we have replaced this specific 264 // MatchOccurence in this sequence already. Jump to the 265 // next occurence if this is the case. 266 // This is no more neccessary once the matches are 267 // harmonized. 268 for (Iterator<MatchOccurence> jt = replacedOccurences 269 .get(oc.getSequenceId()).iterator(); jt 270 .hasNext();) { 271 MatchOccurence tmpOC = jt.next(); 272 273 if (oc.getStartindex() >= tmpOC.getStartindex() && oc.getStartindex()<=tmpOC.getEndindex()) { 274 continue invalidOccurence; 275 } 276 if (oc.getEndindex()>=tmpOC.getStartindex()) { 277 continue invalidOccurence; 278 279 } 280 else if (oc.getStartindex()>tmpOC.getEndindex()) { 281 int diff = tmpOC.getEndindex() 282 - tmpOC.getStartindex(); 283 // Just to be sure. 284 if (diff > 0) { 285 oc.setStartindex(oc.getStartindex() - diff+1); 286 oc.setEndindex(oc.getEndindex() - diff+1); 287 } else { 288 Console.traceln(Level.WARNING, 289 "End index of a Match before start. This should never happen"); 290 } 291 } 292 } 293 } 294 ISequenceInstance sequenceInstances = RuleUtils 295 .createNewSubSequenceInRange( 296 sessions.get(oc.getSequenceId()), 297 oc.getStartindex(), oc.getEndindex(), task, 298 taskFactory, taskBuilder); 299 // Adjust the length of the match regarding to the length of 300 // instance. (OptionalInstances may be shorter) 301 oc.setEndindex(oc.getStartindex() 302 + sequenceInstances.size() 303 - RuleUtils.missedOptionals); 304 replacedOccurences.get(oc.getSequenceId()).add(oc); 305 } 306 } 307 } 308 309 alignments = null; 150 151 harmonizeEventTaskInstancesModel(appData); 310 152 311 153 do { 312 154 313 155 appData.getStopWatch().start("whole loop"); // 314 detectAndReplaceIterations(appData); 315 156 //detectAndReplaceIterations(appData); 316 157 appData.getStopWatch().start("task replacement"); // 317 //detectAndReplaceTasks(appData); //158 detectAndReplaceTasks(appData); // 318 159 appData.getStopWatch().stop("task replacement"); // 319 160 appData.getStopWatch().stop("whole loop"); 320 321 161 appData.getStopWatch().dumpStatistics(System.out); // 322 162 appData.getStopWatch().reset(); … … 353 193 * @return Returns the unique tasks symbol map 354 194 */ 355 private SymbolMap<ITaskInstance, ITask>harmonizeEventTaskInstancesModel(195 private void harmonizeEventTaskInstancesModel( 356 196 RuleApplicationData appData) { 357 197 Console.traceln(Level.INFO, 358 198 "harmonizing task model of event task instances"); 359 199 appData.getStopWatch().start("harmonizing event tasks"); 360 361 SymbolMap<ITaskInstance, ITask>uniqueTasks = preparationTaskHandlingStrategy200 201 appData.uniqueTasks = preparationTaskHandlingStrategy 362 202 .createSymbolMap(); 203 363 204 TaskInstanceComparator comparator = preparationTaskHandlingStrategy 364 205 .getTaskComparator(); … … 374 215 for (int i = 0; i < session.size(); i++) { 375 216 ITaskInstance taskInstance = session.get(i); 376 task = uniqueTasks.getValue(taskInstance);217 task = appData.getUniqueTasks().getValue(taskInstance); 377 218 378 219 if (task == null) { 379 uniqueTasks.addSymbol(taskInstance, taskInstance.getTask());220 appData.getUniqueTasks().addSymbol(taskInstance, taskInstance.getTask()); 380 221 templist.getSequence()[i] = taskInstance.getTask().getId(); 381 222 … … 404 245 appData.getStopWatch().stop("harmonizing event tasks"); 405 246 Console.traceln(Level.INFO, "harmonized " + unifiedTasks 406 + " task occurrences (still " + uniqueTasks.size()247 + " task occurrences (still " + appData.getUniqueTasks().size() 407 248 + " different tasks)"); 408 249 409 250 appData.getStopWatch().dumpStatistics(System.out); 410 251 appData.getStopWatch().reset(); 411 return uniqueTasks;412 252 } 413 253 … … 675 515 appData.getStopWatch().start("detecting tasks"); 676 516 677 // getSequencesOccuringMostOften(appData); 678 679 appData.getStopWatch().stop("detecting tasks"); 680 appData.getStopWatch().start("replacing tasks"); 681 682 replaceSequencesOccurringMostOften(appData); 683 517 518 // Generate a substitution matrix between all occurring events. 519 Console.traceln(Level.INFO, "generating substitution matrix"); 520 ObjectDistanceSubstitionMatrix submat = new ObjectDistanceSubstitionMatrix( 521 appData.getUniqueTasks(), 6, -3); 522 submat.generate(); 523 524 // Generate pairwise alignments 525 Console.traceln(Level.INFO, "generating pairwise alignments"); 526 LinkedList<Match> matchseqs = new LinkedList<Match>(); 527 PairwiseAlignmentStorage alignments = PairwiseAlignmentGenerator 528 .generate(appData.getNumberSequences(), submat, 9); 529 530 // Retrieve all matches reached a specific threshold 531 Console.traceln(Level.INFO, "retrieving significant sequence pieces"); 532 for (int i = 0; i < appData.getNumberSequences().size(); i++) { 533 Console.traceln( 534 Level.FINEST, 535 "retrieving significant sequence pieces: " 536 + Math.round((float) i 537 / (float) appData.getNumberSequences() 538 .size() * 100) + "%"); 539 for (int j = 0; j < appData.getNumberSequences().size(); j++) { 540 if (i != j) { 541 matchseqs.addAll(alignments.get(i, j).getMatches()); 542 } 543 } 544 } 545 Console.traceln(Level.FINEST, 546 "retrieving significant sequence pieces: 100%"); 547 Console.traceln(Level.INFO, "searching for patterns occuring most"); 548 549 // search each match in every other sequence 550 for (Iterator<Match> it = matchseqs.iterator(); it.hasNext();) { 551 Match pattern = it.next(); 552 553 // Skip sequences with more 0 events (scrolls) than other events. 554 // Both of the pattern sequences are equally long, so the zero 555 // counts just need to be smaller than the length of one sequence 556 if (pattern.getFirstSequence().eventCount(0) 557 + pattern.getSecondSequence().eventCount(0) + 1 > pattern 558 .getFirstSequence().size()) 559 continue; 560 561 for (int j = 0; j < appData.getNumberSequences().size(); j++) { 562 LinkedList<Integer> startpositions = appData 563 .getNumberSequences().get(j).containsPattern(pattern); 564 if (startpositions.size() > 0) { 565 for (Iterator<Integer> jt = startpositions.iterator(); jt 566 .hasNext();) { 567 int start = jt.next(); 568 pattern.addOccurence(new MatchOccurence(start, start 569 + pattern.size(), j)); 570 } 571 572 } 573 } 574 } 575 576 Console.traceln(Level.INFO, "sorting results"); 577 // Sort results to get the most occurring results 578 Comparator<Match> comparator = new Comparator<Match>() { 579 public int compare(Match m1, Match m2) { 580 return m2.occurenceCount() - m1.occurenceCount(); 581 582 } 583 }; 584 Collections.sort(matchseqs, comparator); 585 appData.getStopWatch().stop("detecting tasks"); 586 587 appData.getStopWatch().start("replacing tasks"); 588 HashMap<Integer, List<MatchOccurence>> replacedOccurences = new HashMap<Integer, List<MatchOccurence>>(); 589 // Replace matches in the sessions 590 for (int i = 0; i < matchseqs.size(); i++) { 591 // Every pattern consists of 2 sequences, therefore the minimum 592 // occurrences here is 2. 593 // We just need the sequences also occurring in other sequences as 594 // well 595 if (matchseqs.get(i).occurenceCount() > 2) { 596 597 ISequence task = matchAsSequence(appData, matchseqs.get(i)); 598 invalidOccurence: for (Iterator<MatchOccurence> it = matchseqs 599 .get(i).getOccurences().iterator(); it.hasNext();) { 600 MatchOccurence oc = it.next(); 601 /* 602 System.out.println("Trying to replace sequence: "); 603 matchseqs.get(i).getFirstSequence().printSequence(); 604 matchseqs.get(i).getSecondSequence().printSequence(); 605 System.out.println(" in session number: " 606 + (oc.getSequenceId() + 1) 607 + " at position " 608 + (oc.getStartindex()) 609 + "-" 610 + oc.getEndindex()); 611 System.out.println(); 612 */ 613 614 // System.out.println("Printing session: "); 615 //for (int j = 0; j < sessions.get(oc.getSequenceId()).size(); j++) { 616 // System.out.println(j + ": " 617 // + sessions.get(oc.getSequenceId()).get(j)); 618 //} 619 620 // Check if nothing has been replaced in the sequence we 621 // want to replace 622 if (replacedOccurences.get(oc.getSequenceId()) == null) { 623 replacedOccurences.put(oc.getSequenceId(), 624 new LinkedList<MatchOccurence>()); 625 } else { 626 // check if we have any replaced occurence with indexes 627 // smaller than ours. If so, we need to adjust our start 628 // and endpoints 629 // of the replacement. 630 // Also do a check if we have replaced this specific 631 // MatchOccurence in this sequence already. Jump to the 632 // next occurence if this is the case. 633 // This is no more neccessary once the matches are 634 // harmonized. 635 for (Iterator<MatchOccurence> jt = replacedOccurences 636 .get(oc.getSequenceId()).iterator(); jt 637 .hasNext();) { 638 MatchOccurence tmpOC = jt.next(); 639 640 if (oc.getStartindex() >= tmpOC.getStartindex() && oc.getStartindex()<=tmpOC.getEndindex()) { 641 continue invalidOccurence; 642 } 643 if (oc.getEndindex()>=tmpOC.getStartindex()) { 644 continue invalidOccurence; 645 646 } 647 else if (oc.getStartindex()>tmpOC.getEndindex()) { 648 int diff = tmpOC.getEndindex() 649 - tmpOC.getStartindex(); 650 // Just to be sure. 651 if (diff > 0) { 652 oc.setStartindex(oc.getStartindex() - diff+1); 653 oc.setEndindex(oc.getEndindex() - diff+1); 654 } else { 655 Console.traceln(Level.WARNING, 656 "End index of a Match before start. This should never happen"); 657 } 658 } 659 } 660 } 661 ISequenceInstance sequenceInstances = RuleUtils 662 .createNewSubSequenceInRange( 663 appData.getSessions().get(oc.getSequenceId()), 664 oc.getStartindex(), oc.getEndindex(), task, 665 taskFactory, taskBuilder); 666 // Adjust the length of the match regarding to the length of 667 // instance. (OptionalInstances may be shorter) 668 oc.setEndindex(oc.getStartindex() 669 + sequenceInstances.size() 670 - RuleUtils.missedOptionals); 671 replacedOccurences.get(oc.getSequenceId()).add(oc); 672 } 673 } 674 } 675 676 alignments = null; 684 677 appData.getStopWatch().stop("replacing tasks"); 685 686 // Console.traceln(Level.INFO, "detected and replaced " 687 // + appData.getLastFoundTasks().size() + " tasks occuring " 688 // + appData.getLastFoundTasks().getOccurrenceCount() + " times"); 689 } 690 691 /** 692 * @param appData 693 * the rule application data combining all data used for applying 694 * this rule 695 */ 696 private void replaceSequencesOccurringMostOften(RuleApplicationData appData) { 697 appData.detectedAndReplacedTasks(false); 698 699 /* 700 * Console.traceln(Level.FINER, "replacing tasks occurrences"); 701 * 702 * for (List<ITaskInstance> task : appData.getLastFoundTasks()) { 703 * ISequence sequence = taskFactory.createNewSequence(); 704 * 705 * Console.traceln(Level.FINEST, "replacing " + sequence.getId() + ": " 706 * + task); 707 * 708 * List<ISequenceInstance> sequenceInstances = replaceTaskOccurrences( 709 * task, appData.getSessions(), sequence); 710 * 711 * harmonizeSequenceInstancesModel(sequence, sequenceInstances, 712 * task.size()); appData.detectedAndReplacedTasks(appData 713 * .detectedAndReplacedTasks() || (sequenceInstances.size() > 0)); 714 * 715 * if (sequenceInstances.size() < appData.getLastFoundTasks() 716 * .getOccurrenceCount()) { Console.traceln(Level.FINE, sequence.getId() 717 * + ": replaced task only " + sequenceInstances.size() + 718 * " times instead of expected " + appData.getLastFoundTasks() 719 * .getOccurrenceCount()); } } 720 */ 721 } 678 } 679 722 680 723 681 /** … … 791 749 } 792 750 793 /** 794 * @param tree 795 */ 796 private List<ISequenceInstance> replaceTaskOccurrences( 797 List<ITaskInstance> task, List<IUserSession> sessions, 798 ISequence temporalTaskModel) { 799 List<ISequenceInstance> sequenceInstances = new LinkedList<ISequenceInstance>(); 800 801 for (IUserSession session : sessions) { 802 int index = -1; 803 804 do { 805 index = getSubListIndex(session, task, ++index); 806 807 if (index > -1) { 808 sequenceInstances.add(RuleUtils 809 .createNewSubSequenceInRange(session, index, index 810 + task.size() - 1, temporalTaskModel, 811 taskFactory, taskBuilder)); 812 } 813 } while (index > -1); 814 } 815 816 return sequenceInstances; 817 } 818 819 /** 820 * @param trie 821 * @param object 822 * @return 823 */ 824 private int getSubListIndex(ITaskInstanceList list, 825 List<ITaskInstance> subList, int startIndex) { 826 boolean matchFound; 827 int result = -1; 828 829 for (int i = startIndex; i <= list.size() - subList.size(); i++) { 830 matchFound = true; 831 832 for (int j = 0; j < subList.size(); j++) { 833 // we prepared the task instances to refer to unique tasks, if 834 // they are treated 835 // as equal. Therefore, we just compare the identity of the 836 // tasks of the task 837 // instances 838 if (list.get(i + j).getTask() != subList.get(j).getTask()) { 839 matchFound = false; 840 break; 841 } 842 } 843 844 if (matchFound) { 845 result = i; 846 break; 847 } 848 } 849 850 return result; 851 } 852 751 853 752 /** 854 753 * … … 857 756 858 757 private HashMap<Integer, ITask> number2task; 758 759 private SymbolMap<ITaskInstance, ITask> uniqueTasks; 859 760 860 761 private ArrayList<NumberSequence> numberseqs; … … 898 799 } 899 800 801 private SymbolMap<ITaskInstance, ITask> getUniqueTasks() { 802 return uniqueTasks; 803 } 804 805 private void setUniqueTasks(SymbolMap<ITaskInstance, ITask> ut) { 806 this.uniqueTasks = ut; 807 } 808 900 809 private ArrayList<NumberSequence> getNumberSequences() { 901 810 return numberseqs; 902 811 } 903 812 904 /**905 *906 */907 private void detectedAndReplacedTasks(boolean detectedAndReplacedTasks) {908 this.detectedAndReplacedTasks = detectedAndReplacedTasks;909 }910 813 911 814 /**
Note: See TracChangeset
for help on using the changeset viewer.