Changeset 1119 for trunk/autoquest-core-tasktrees/src/main/java/de
- Timestamp:
- 03/06/13 23:39:45 (12 years ago)
- Location:
- trunk/autoquest-core-tasktrees/src/main/java/de/ugoe/cs/autoquest/tasktrees/temporalrelation
- Files:
-
- 2 added
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/autoquest-core-tasktrees/src/main/java/de/ugoe/cs/autoquest/tasktrees/temporalrelation/DefaultTaskSequenceDetectionRule.java
r1113 r1119 15 15 package de.ugoe.cs.autoquest.tasktrees.temporalrelation; 16 16 17 import java.util.Collection;18 17 import java.util.Iterator; 19 18 import java.util.LinkedList; … … 28 27 import de.ugoe.cs.autoquest.usageprofiles.SymbolComparator; 29 28 import de.ugoe.cs.autoquest.usageprofiles.Trie; 29 import de.ugoe.cs.autoquest.usageprofiles.TrieProcessor; 30 30 31 31 /** … … 78 78 } 79 79 80 /* (non-Javadoc) 81 * @see java.lang.Object#toString() 82 */ 83 @Override 84 public String toString() { 85 return "DefaultTaskSequenceDetectionRule"; 86 } 87 80 88 /* 81 89 * (non-Javadoc) … … 97 105 } 98 106 99 long timestamp1; 100 long timestamp2 = System.currentTimeMillis(); 101 long ms1 = 0; 102 long ms2 = 0; 103 long ms3 = 0; 104 long ms4 = 0; 105 106 List<ISequence> createdSequences = new LinkedList<ISequence>(); 107 108 int evisagedNoOfOccurrences = 0; 107 RuleApplicationData<ITaskTreeNode> appData = new RuleApplicationData<ITaskTreeNode>(parent); 108 109 appData.getStopWatch().start("whole rule application"); 109 110 110 111 do { 111 timestamp1 = timestamp2; 112 Trie<ITaskTreeNode> trie = new Trie<ITaskTreeNode>(nodeComparator); 113 114 System.out.println("training trie"); 115 trainTrie(trie, parent, createdSequences); 116 117 timestamp2 = System.currentTimeMillis(); 118 ms1 += timestamp2 - timestamp1; 119 timestamp1 = timestamp2; 120 121 System.out.println("determining most prominent tasks"); 122 Collection<List<ITaskTreeNode>> tasks = 123 trie.getSequencesWithOccurrenceCount(2, evisagedNoOfOccurrences); 124 125 tasks = sortAndRemovePermutationsOfShorterTasks(tasks); 126 127 timestamp2 = System.currentTimeMillis(); 128 ms2 += timestamp2 - timestamp1; 129 timestamp1 = timestamp2; 130 131 if ((tasks != null) && (tasks.size() > 0)) { 132 Iterator<List<ITaskTreeNode>> tasksIterator = tasks.iterator(); 133 134 while (tasksIterator.hasNext()) { 135 List<ITaskTreeNode> task = tasksIterator.next(); 136 evisagedNoOfOccurrences = trie.getCount(task); 137 112 getSequencesOccuringMostOften(appData); 113 114 if ((appData.getLastFoundSequences().size() > 0) && 115 (appData.getLastFoundSequences().getOccurrenceCount() > 1)) 116 { 117 System.out.println("found " + appData.getLastFoundSequences().size() + 118 " tasks occurring " + 119 appData.getLastFoundSequences().getOccurrenceCount() + " times"); 120 121 for (List<ITaskTreeNode> task : appData.getLastFoundSequences()) { 138 122 // only tasks occurring more often than once are of interest 139 if (evisagedNoOfOccurrences > 1) { 140 timestamp2 = System.currentTimeMillis(); 141 ms3 += timestamp2 - timestamp1; 142 timestamp1 = timestamp2; 143 144 String taskId = "task " + RuleUtils.getNewId(); 145 System.out.println("creating sequences for task " + taskId + ": " + task); 146 createSequenceForTaskOccurrences(taskId, task, parent, createdSequences); 147 148 timestamp2 = System.currentTimeMillis(); 149 ms4 += timestamp2 - timestamp1; 150 timestamp1 = timestamp2; 123 appData.stopWatch.start("creating task sequences"); 124 125 String taskId = "task " + RuleUtils.getNewId(); 126 System.out.println(taskId + ": " + task); 127 128 appData.resetReplacementCounter(); 129 createSequenceForTaskOccurrences(taskId, task, parent, appData); 130 131 if (appData.getReplacementCounter() < 132 appData.getLastFoundSequences().getOccurrenceCount()) 133 { 134 System.out.println(taskId + ": replaced task only " + 135 appData.getReplacementCounter() + 136 " times instead of expected " + 137 appData.getLastFoundSequences().getOccurrenceCount()); 138 151 139 } 152 } 153 } 154 155 evisagedNoOfOccurrences--; 156 } 157 while (evisagedNoOfOccurrences > 1); 158 159 System.out.println(ms1 + " " + ms2 + " " + ms3 + " " + ms4); 160 RuleApplicationResult result = new RuleApplicationResult(); 161 162 if (createdSequences.size() > 0) { 163 for (ISequence sequence : createdSequences) { 164 result.addNewlyCreatedParentNode(sequence); 165 } 166 167 result.setRuleApplicationStatus(RuleApplicationStatus.RULE_APPLICATION_FINISHED); 168 } 169 170 171 return result; 140 141 appData.stopWatch.stop("creating task sequences"); 142 } 143 } 144 145 } 146 while (appData.getLastFoundSequences().getOccurrenceCount() > 2); 147 148 appData.getStopWatch().stop("whole rule application"); 149 appData.getStopWatch().dumpStatistics(System.out); 150 151 if (appData.getResult().getNewlyCreatedParentNodes().size() > 0) { 152 appData.getResult().setRuleApplicationStatus 153 (RuleApplicationStatus.RULE_APPLICATION_FINISHED); 154 } 155 156 System.out.println(appData.getResult().getNewlyCreatedParentNodes().size()); 157 158 for (ITaskTreeNode node : appData.getResult().getNewlyCreatedParentNodes()) { 159 for (ITaskTreeNode child : node.getChildren()) { 160 System.out.println(child); 161 } 162 System.out.println(); 163 } 164 165 return appData.getResult(); 166 } 167 168 /** 169 * <p> 170 * TODO: comment 171 * </p> 172 * 173 * @param i 174 * @return 175 */ 176 private void getSequencesOccuringMostOften(RuleApplicationData<ITaskTreeNode> appData) { 177 System.out.println("determining most prominent tasks with a maximum of " + 178 (appData.getLastFoundSequences().getOccurrenceCount() - 1) + 179 " occurrences"); 180 181 Sequences<ITaskTreeNode> sequences; 182 boolean createNewTrie = (appData.getLastTrie() == null) || 183 appData.getReplacementCounter() > 0; // tree has changed 184 185 do { 186 if (createNewTrie) { 187 createNewTrie(appData); 188 } 189 190 /*PathDumper dumper = new PathDumper(); 191 trie.process(dumper); 192 193 dumper.dump();*/ 194 195 appData.getStopWatch().start("determining tasks"); 196 197 MaxCountAndLongestSequenceFinder finder = new MaxCountAndLongestSequenceFinder 198 (appData.getLastFoundSequences().getOccurrenceCount() - 1); 199 appData.getLastTrie().process(finder); 200 201 sequences = finder.getFoundSequences(); 202 203 createNewTrie = false; 204 205 for (List<ITaskTreeNode> task : sequences) { 206 if (task.size() >= appData.getTrainedSequenceLength()) { 207 // Trie must be recreated with a longer sequence length to be sure that 208 // the found sequences occurring most often are found in their whole length 209 appData.setTrainedSequenceLength(appData.getTrainedSequenceLength() + 1); 210 createNewTrie = true; 211 break; 212 } 213 } 214 215 appData.getStopWatch().stop("determining tasks"); 216 } 217 while (createNewTrie); 218 219 appData.setLastFoundSequences(sequences); 220 } 221 222 /** 223 * <p> 224 * TODO: comment 225 * </p> 226 * 227 * @param parent 228 * @return 229 */ 230 private void createNewTrie(RuleApplicationData<ITaskTreeNode> appData) { 231 System.out.println("training trie with a maximum sequence length of " + 232 appData.getTrainedSequenceLength()); 233 234 appData.getStopWatch().start("training trie"); 235 appData.setLastTrie(new Trie<ITaskTreeNode>(nodeComparator)); 236 237 trainTrie(appData.getTree(), appData); 238 appData.getStopWatch().stop("training trie"); 172 239 } 173 240 … … 180 247 * @param parent 181 248 */ 182 private void trainTrie(Trie<ITaskTreeNode> trie, 183 ITaskTreeNode parent, 184 List<ISequence> createdSequences) 185 { 186 List<ITaskTreeNode> children = parent.getChildren(); 187 188 if ((children != null) && (children.size() > 0)) { 189 trie.train(children, children.size()); 190 191 for (ITaskTreeNode child : children) { 192 trainTrie(trie, child, createdSequences); 193 } 194 } 195 } 196 197 /** 198 * 199 */ 200 private List<List<ITaskTreeNode>> 201 sortAndRemovePermutationsOfShorterTasks(Collection<List<ITaskTreeNode>> tasks) 202 { 203 // first of all create a sorted list of the tasks with the longest first 204 List<List<ITaskTreeNode>> sortedTasks = new LinkedList<List<ITaskTreeNode>>(); 205 206 boolean added; 207 for (List<ITaskTreeNode> task : tasks) { 208 added = false; 209 for (int i = 0; i < sortedTasks.size(); i++) { 210 if (sortedTasks.get(i).size() < task.size()) { 211 sortedTasks.add(i, task); 212 added = true; 213 break; 214 } 215 } 216 217 if (!added) { 218 sortedTasks.add(task); 219 } 220 } 221 222 // now iterate the sorted list and for each task remove all other tasks that are shorter 223 // (i.e. come later in the sorted list) and that represent a subpart of the task 224 for (int i = 0; i < sortedTasks.size(); i++) { 225 for (int j = i + 1; j < sortedTasks.size();) { 226 if (sortedTasks.get(j).size() < sortedTasks.get(i).size()) { 227 // found a task that is a potential subtask. Check for this and remove the 228 // subtask if needed 229 List<ITaskTreeNode> longTask = sortedTasks.get(i); 230 List<ITaskTreeNode> shortTask = sortedTasks.get(j); 231 232 if (getSubListIndex(longTask, shortTask, 0) > -1) { 233 sortedTasks.remove(j); 234 } 235 else { 236 j++; 237 } 238 } 239 else { 240 j++; 241 } 242 } 243 } 244 245 return sortedTasks; 249 private void trainTrie(ITaskTreeNode parent, RuleApplicationData<ITaskTreeNode> appData) { 250 // prevent training of already replaces sequences as those shall not be replaced anymore 251 if (!appData.getResult().getNewlyCreatedParentNodes().contains(parent)) { 252 List<ITaskTreeNode> children = parent.getChildren(); 253 254 if ((children != null) && (children.size() > 0)) { 255 256 /*System.out.println(); 257 for (int i = 0; i < children.size(); i++) { 258 System.out.println(children.get(i)); 259 }*/ 260 261 appData.getLastTrie().train(children, appData.getTrainedSequenceLength()); 262 263 for (ITaskTreeNode child : children) { 264 trainTrie(child, appData); 265 } 266 } 267 } 246 268 } 247 269 … … 257 279 * @param result 258 280 */ 259 private void createSequenceForTaskOccurrences(String taskId,260 List<ITaskTreeNode> task,261 ITaskTreeNode parent,262 List<ISequence> createdSequences)281 private void createSequenceForTaskOccurrences(String taskId, 282 List<ITaskTreeNode> task, 283 ITaskTreeNode parent, 284 RuleApplicationData<ITaskTreeNode> appData) 263 285 { 264 286 List<ITaskTreeNode> children = parent.getChildren(); … … 270 292 // first traverse the children 271 293 for (ITaskTreeNode child : children) { 272 createSequenceForTaskOccurrences(taskId, task, child, createdSequences);294 createSequenceForTaskOccurrences(taskId, task, child, appData); 273 295 } 274 296 … … 285 307 taskTreeNodeFactory, taskTreeBuilder); 286 308 287 createdSequences.add(sequence); 309 appData.getResult().addNewlyCreatedParentNode(sequence); 310 appData.incrementReplacementCounter(); 288 311 289 312 children = parent.getChildren(); … … 333 356 break; 334 357 } 358 else if (!nodeComparator.equals(subList.get(j), list.get(i + j))){ 359 throw new RuntimeException("comparator not commutative"); 360 } 335 361 } 336 362 … … 344 370 } 345 371 372 /** 373 * <p> 374 * TODO comment 375 * </p> 376 * 377 * @author Patrick Harms 378 */ 379 private class MaxCountAndLongestSequenceFinder implements TrieProcessor<ITaskTreeNode> { 380 381 /** 382 * 383 */ 384 private int maxCount; 385 386 /** 387 * 388 */ 389 private int currentCount; 390 391 /** 392 * 393 */ 394 private List<List<ITaskTreeNode>> foundSequences = new LinkedList<List<ITaskTreeNode>>(); 395 396 /** 397 * <p> 398 * TODO: comment 399 * </p> 400 * 401 * @param maxCount 402 */ 403 public MaxCountAndLongestSequenceFinder(int maxCount) { 404 super(); 405 this.maxCount = maxCount; 406 this.currentCount = 0; 407 } 408 409 /* (non-Javadoc) 410 * @see de.ugoe.cs.autoquest.usageprofiles.TrieProcessor#process(java.util.List, int) 411 */ 412 @Override 413 public TrieProcessor.Result process(List<ITaskTreeNode> sequence, int count) { 414 if (sequence.size() < 2) { 415 // ignore single nodes 416 return TrieProcessor.Result.CONTINUE; 417 } 418 419 if (count < 2) { 420 // ignore singular occurrences 421 return TrieProcessor.Result.SKIP_NODE; 422 } 423 424 if (count > this.maxCount) { 425 // ignore sequences that occur too often 426 return TrieProcessor.Result.CONTINUE; 427 } 428 429 if (this.currentCount > count) { 430 // ignore this children of this node, as they may have only smaller counts than 431 // the already found sequences 432 return TrieProcessor.Result.SKIP_NODE; 433 } 434 435 if (this.currentCount < count) { 436 // the provided sequence occurs more often that all sequences found so far. 437 // clear all found sequences and use the new count as the one searched for 438 foundSequences.clear(); 439 this.currentCount = count; 440 } 441 442 if (this.currentCount == count) { 443 // the sequence is of interest. Sort it into the other found sequences so that 444 // the longest come first 445 boolean added = false; 446 for (int i = 0; i < foundSequences.size(); i++) { 447 if (foundSequences.get(i).size() < sequence.size()) { 448 // defensive copy 449 foundSequences.add(i, new LinkedList<ITaskTreeNode>(sequence)); // defensive copy 450 added = true; 451 break; 452 } 453 } 454 455 if (!added) { 456 foundSequences.add(new LinkedList<ITaskTreeNode>(sequence)); // defensive copy 457 } 458 } 459 460 return TrieProcessor.Result.CONTINUE; 461 } 462 463 /** 464 * <p> 465 * TODO: comment 466 * </p> 467 * 468 * @return 469 */ 470 public Sequences<ITaskTreeNode> getFoundSequences() { 471 removePermutationsOfShorterTasks(); 472 return new Sequences<ITaskTreeNode>(currentCount, foundSequences); 473 } 474 475 /** 476 * <p> 477 * TODO: comment 478 * </p> 479 * 480 */ 481 private void removePermutationsOfShorterTasks() { 482 // now iterate the sorted list and for each task remove all other tasks that are shorter 483 // (i.e. come later in the sorted list) and that represent a subpart of the task 484 for (int i = 0; i < foundSequences.size(); i++) { 485 for (int j = i + 1; j < foundSequences.size();) { 486 if (foundSequences.get(j).size() < foundSequences.get(i).size()) { 487 // found a task that is a potential subtask. Check for this and remove the 488 // subtask if needed 489 List<ITaskTreeNode> longTask = foundSequences.get(i); 490 List<ITaskTreeNode> shortTask = foundSequences.get(j); 491 492 if (getSubListIndex(longTask, shortTask, 0) > -1) { 493 foundSequences.remove(j); 494 } 495 else { 496 j++; 497 } 498 } 499 else { 500 j++; 501 } 502 } 503 } 504 } 505 506 } 507 508 /** 509 * 510 */ 511 private class RuleApplicationData<T> { 512 513 /** 514 * 515 */ 516 private T tree; 517 518 /** 519 * 520 */ 521 private RuleApplicationResult result = new RuleApplicationResult(); 522 523 /** 524 * 525 */ 526 private Sequences<T> lastFoundSequences = new Sequences<T>(Integer.MAX_VALUE, null); 527 528 /** 529 * 530 */ 531 private Trie<T> lastTrie; 532 533 /** 534 * default and minimum trained sequence length is 3 535 */ 536 private int trainedSequenceLength = 3; 537 538 /** 539 * 540 */ 541 private int replacementCounter; 542 543 /** 544 * 545 */ 546 private StopWatch stopWatch = new StopWatch(); 547 548 /** 549 * 550 */ 551 private RuleApplicationData(T tree) { 552 this.tree = tree; 553 } 554 555 /** 556 * @return the lastFoundSequences 557 */ 558 private Sequences<T> getLastFoundSequences() { 559 return lastFoundSequences; 560 } 561 562 /** 563 * @param lastFoundSequences the lastFoundSequences to set 564 */ 565 private void setLastFoundSequences(Sequences<T> lastFoundSequences) { 566 this.lastFoundSequences = lastFoundSequences; 567 } 568 569 /** 570 * @return the lastTrie 571 */ 572 private Trie<T> getLastTrie() { 573 return lastTrie; 574 } 575 576 /** 577 * @return the trainedSequenceLength 578 */ 579 private int getTrainedSequenceLength() { 580 return trainedSequenceLength; 581 } 582 583 /** 584 * @param trainedSequenceLength the trainedSequenceLength to set 585 */ 586 private void setTrainedSequenceLength(int trainedSequenceLength) { 587 this.trainedSequenceLength = trainedSequenceLength; 588 } 589 590 /** 591 * @param lastTrie the lastTrie to set 592 */ 593 private void setLastTrie(Trie<T> lastTrie) { 594 this.lastTrie = lastTrie; 595 } 596 597 /** 598 * @return the tree 599 */ 600 private T getTree() { 601 return tree; 602 } 603 604 /** 605 * @return the result 606 */ 607 private RuleApplicationResult getResult() { 608 return result; 609 } 610 611 /** 612 * @return the stopWatch 613 */ 614 private StopWatch getStopWatch() { 615 return stopWatch; 616 } 617 618 /** 619 * 620 */ 621 private void resetReplacementCounter() { 622 replacementCounter = 0; 623 } 624 625 /** 626 * 627 */ 628 private void incrementReplacementCounter() { 629 replacementCounter++; 630 } 631 632 /** 633 * 634 */ 635 private int getReplacementCounter() { 636 return replacementCounter; 637 } 638 } 639 640 641 /** 642 * <p> 643 * TODO comment 644 * </p> 645 * 646 * @author Patrick Harms 647 */ 648 private class Sequences<T> implements Iterable<List<T>> { 649 650 /** 651 * 652 */ 653 private int occurrenceCount; 654 655 /** 656 * 657 */ 658 private List<List<T>> sequences; 659 660 /** 661 * <p> 662 * TODO: comment 663 * </p> 664 * 665 * @param occurrenceCount 666 * @param sequences 667 */ 668 private Sequences(int occurrenceCount, List<List<T>> sequences) { 669 super(); 670 this.occurrenceCount = occurrenceCount; 671 this.sequences = sequences; 672 } 673 674 /** 675 * <p> 676 * TODO: comment 677 * </p> 678 * 679 * @return 680 */ 681 private int getOccurrenceCount() { 682 return occurrenceCount; 683 } 684 685 /** 686 * <p> 687 * TODO: comment 688 * </p> 689 * 690 * @return 691 */ 692 private int size() { 693 return this.sequences.size(); 694 } 695 696 /** 697 * 698 */ 699 700 /* (non-Javadoc) 701 * @see java.lang.Iterable#iterator() 702 */ 703 @Override 704 public Iterator<List<T>> iterator() { 705 return this.sequences.iterator(); 706 } 707 708 } 709 346 710 }
Note: See TracChangeset
for help on using the changeset viewer.