Changeset 1110
Legend:
- Unmodified
- Added
- Removed
-
trunk/autoquest-core-usageprofiles-test/src/test/java/de/ugoe/cs/autoquest/usageprofiles/TrieTest.java
r1060 r1110 247 247 248 248 @Test 249 public void testGetContextSuffix_5() throws Exception { 250 Trie<String> fixture = new Trie<String>(); 251 fixture.train(sequence, 3); 252 List<String> context = new ArrayList<String>(); 253 context.add("a"); 254 context.add("a"); 255 context.add("b"); 256 List<String> expected = new ArrayList<String>(); 257 expected.add("a"); 258 expected.add("b"); 259 260 List<String> result = fixture.getContextSuffix(context); 261 262 ListAssert.assertEquals(expected, result); 263 } 264 265 @Test 249 266 public void testGetSequencesWithMostOccurrences_1() throws Exception { 250 267 Trie<String> fixture = new Trie<String>(); 251 268 fixture.train(sequence, 3); 252 269 270 // get all sequences with a minimal length of one that occur most often 271 Collection<List<String>> result = fixture.getSequencesWithOccurrenceCount(1, 0); 272 273 assertEquals(1, result.size()); 274 253 275 List<String> expected = new ArrayList<String>(); 254 276 expected.add("a"); 255 277 256 Collection<List<String>> result = fixture.getSequencesWithMostOccurrences(1); 278 ListAssert.assertEquals(expected, result.iterator().next()); 279 } 280 281 @Test 282 public void testGetSequencesWithMostOccurrences_2() throws Exception { 283 Trie<String> fixture = new Trie<String>(); 284 fixture.train(sequence, 3); 285 286 // get all sequences with a minimal length of one that occur exactly once 287 Collection<List<String>> result = fixture.getSequencesWithOccurrenceCount(1, 1); 288 289 assertEquals(11, result.size()); 290 291 List<String> expected = new ArrayList<String>(); 292 expected.add("r"); 293 expected.add("a"); 294 expected.add("c"); 295 // rac 296 ListAssert.assertContains((List<List<String>>) result, expected); 297 298 expected.clear(); 299 expected.add("a"); 300 expected.add("c"); 301 // ac 302 ListAssert.assertContains((List<List<String>>) result, expected); 303 304 expected.add("a"); 305 // aca 306 ListAssert.assertContains((List<List<String>>) result, expected); 307 308 expected.clear(); 309 expected.add("c"); 310 // c 311 ListAssert.assertContains((List<List<String>>) result, expected); 312 313 expected.add("a"); 314 // ca 315 ListAssert.assertContains((List<List<String>>) result, expected); 316 317 expected.add("d"); 318 // cad 319 ListAssert.assertContains((List<List<String>>) result, expected); 320 321 expected.clear(); 322 expected.add("a"); 323 expected.add("d"); 324 // ad 325 ListAssert.assertContains((List<List<String>>) result, expected); 326 327 expected.add("a"); 328 // ada 329 ListAssert.assertContains((List<List<String>>) result, expected); 330 331 expected.clear(); 332 expected.add("d"); 333 // d 334 ListAssert.assertContains((List<List<String>>) result, expected); 335 336 expected.add("a"); 337 // da 338 ListAssert.assertContains((List<List<String>>) result, expected); 339 340 expected.add("b"); 341 // dab 342 ListAssert.assertContains((List<List<String>>) result, expected); 343 } 344 345 @Test 346 public void testGetSequencesWithMostOccurrences_3() throws Exception { 347 Trie<String> fixture = new Trie<String>(); 348 fixture.train(sequence, 3); 349 350 // get all sequences with a minimal length of one that occur exactly twice 351 Collection<List<String>> result = fixture.getSequencesWithOccurrenceCount(1, 2); 352 353 assertEquals(7, result.size()); 354 355 List<String> expected = new ArrayList<String>(); 356 expected.add("a"); 357 expected.add("b"); 358 // ab 359 ListAssert.assertContains((List<List<String>>) result, expected); 360 361 expected.add("r"); 362 // abr 363 ListAssert.assertContains((List<List<String>>) result, expected); 364 365 expected.clear(); 366 expected.add("b"); 367 // b 368 ListAssert.assertContains((List<List<String>>) result, expected); 369 370 expected.add("r"); 371 // br 372 ListAssert.assertContains((List<List<String>>) result, expected); 373 374 expected.add("a"); 375 // bra 376 ListAssert.assertContains((List<List<String>>) result, expected); 377 378 expected.clear(); 379 expected.add("r"); 380 // r 381 ListAssert.assertContains((List<List<String>>) result, expected); 382 383 expected.add("a"); 384 // ra 385 ListAssert.assertContains((List<List<String>>) result, expected); 386 } 387 388 @Test 389 public void testGetSequencesWithMostOccurrences_4() throws Exception { 390 Trie<String> fixture = new Trie<String>(); 391 fixture.train(sequence, 3); 392 393 // get all sequences with a minimal length of one that occur exactly three times 394 Collection<List<String>> result = fixture.getSequencesWithOccurrenceCount(1, 3); 395 396 assertEquals(0, result.size()); 397 } 398 399 @Test 400 public void testGetSequencesWithMostOccurrences_5() throws Exception { 401 Trie<String> fixture = new Trie<String>(); 402 fixture.train(sequence, 3); 403 404 // get all sequences with a minimal length of one that occur exactly four times 405 Collection<List<String>> result = fixture.getSequencesWithOccurrenceCount(1, 4); 406 407 assertEquals(0, result.size()); 408 } 409 410 @Test 411 public void testGetSequencesWithMostOccurrences_6() throws Exception { 412 Trie<String> fixture = new Trie<String>(); 413 fixture.train(sequence, 3); 414 415 // get all sequences with a minimal length of one that occur exactly five times 416 Collection<List<String>> result = fixture.getSequencesWithOccurrenceCount(1, 5); 257 417 258 418 assertEquals(1, result.size()); 259 ListAssert.assertEquals(expected, result.iterator().next()); 260 } 261 262 @Test 263 public void testGetSequencesWithMostOccurrences_2() throws Exception { 264 Trie<String> fixture = new Trie<String>(); 265 fixture.train(sequence, 3); 266 267 Collection<List<String>> result = fixture.getSequencesWithMostOccurrences(2); 419 420 List<String> expected = new ArrayList<String>(); 421 expected.add("a"); 422 ListAssert.assertContains((List<List<String>>) result, expected); 423 } 424 425 @Test 426 public void testGetSequencesWithMostOccurrences_7() throws Exception { 427 Trie<String> fixture = new Trie<String>(); 428 fixture.train(sequence, 3); 429 430 // get all sequences with a minimal length of two that occur most often 431 Collection<List<String>> result = fixture.getSequencesWithOccurrenceCount(2, 0); 268 432 269 433 assertEquals(5, result.size()); … … 292 456 293 457 @Test 294 public void testGetSequencesWithMostOccurrences_3() throws Exception { 295 Trie<String> fixture = new Trie<String>(); 296 fixture.train(sequence, 3); 297 298 Collection<List<String>> result = fixture.getSequencesWithMostOccurrences(3); 458 public void testGetSequencesWithMostOccurrences_8() throws Exception { 459 Trie<String> fixture = new Trie<String>(); 460 fixture.train(sequence, 3); 461 462 // get all sequences with a minimal length of two that occur exactly once 463 Collection<List<String>> result = fixture.getSequencesWithOccurrenceCount(2, 1); 464 465 assertEquals(9, result.size()); 466 467 List<String> expected = new ArrayList<String>(); 468 expected.add("r"); 469 expected.add("a"); 470 expected.add("c"); 471 // rac 472 ListAssert.assertContains((List<List<String>>) result, expected); 473 474 expected.clear(); 475 expected.add("a"); 476 expected.add("c"); 477 // ac 478 ListAssert.assertContains((List<List<String>>) result, expected); 479 480 expected.add("a"); 481 // aca 482 ListAssert.assertContains((List<List<String>>) result, expected); 483 484 expected.clear(); 485 expected.add("c"); 486 expected.add("a"); 487 // ca 488 ListAssert.assertContains((List<List<String>>) result, expected); 489 490 expected.add("d"); 491 // cad 492 ListAssert.assertContains((List<List<String>>) result, expected); 493 494 expected.clear(); 495 expected.add("a"); 496 expected.add("d"); 497 // ad 498 ListAssert.assertContains((List<List<String>>) result, expected); 499 500 expected.add("a"); 501 // ada 502 ListAssert.assertContains((List<List<String>>) result, expected); 503 504 expected.clear(); 505 expected.add("d"); 506 expected.add("a"); 507 // da 508 ListAssert.assertContains((List<List<String>>) result, expected); 509 510 expected.add("b"); 511 // dab 512 ListAssert.assertContains((List<List<String>>) result, expected); 513 } 514 515 @Test 516 public void testGetSequencesWithMostOccurrences_9() throws Exception { 517 Trie<String> fixture = new Trie<String>(); 518 fixture.train(sequence, 3); 519 520 // get all sequences with a minimal length of two that occur exactly twice 521 Collection<List<String>> result = fixture.getSequencesWithOccurrenceCount(2, 2); 522 523 assertEquals(5, result.size()); 524 525 List<String> expected = new ArrayList<String>(); 526 expected.add("a"); 527 expected.add("b"); 528 // ab 529 ListAssert.assertContains((List<List<String>>) result, expected); 530 531 expected.add("r"); 532 // abr 533 ListAssert.assertContains((List<List<String>>) result, expected); 534 535 expected.clear(); 536 expected.add("b"); 537 expected.add("r"); 538 // br 539 ListAssert.assertContains((List<List<String>>) result, expected); 540 541 expected.add("a"); 542 // bra 543 ListAssert.assertContains((List<List<String>>) result, expected); 544 545 expected.clear(); 546 expected.add("r"); 547 expected.add("a"); 548 // ra 549 ListAssert.assertContains((List<List<String>>) result, expected); 550 } 551 552 @Test 553 public void testGetSequencesWithMostOccurrences_10() throws Exception { 554 Trie<String> fixture = new Trie<String>(); 555 fixture.train(sequence, 3); 556 557 // get all sequences with a minimal length of two that occur exactly three times 558 Collection<List<String>> result = fixture.getSequencesWithOccurrenceCount(2, 3); 559 560 assertEquals(0, result.size()); 561 } 562 563 @Test 564 public void testGetSequencesWithMostOccurrences_11() throws Exception { 565 Trie<String> fixture = new Trie<String>(); 566 fixture.train(sequence, 3); 567 568 // get all sequences with a minimal length of three that occur most often 569 Collection<List<String>> result = fixture.getSequencesWithOccurrenceCount(3, 0); 299 570 300 571 assertEquals(2, result.size()); … … 314 585 315 586 @Test 316 public void testGetSequencesWithMostOccurrences_4() throws Exception { 317 Trie<String> fixture = new Trie<String>(); 318 fixture.train(sequence, 3); 319 320 Collection<List<String>> result = fixture.getSequencesWithMostOccurrences(4); 321 587 public void testGetSequencesWithMostOccurrences_12() throws Exception { 588 Trie<String> fixture = new Trie<String>(); 589 fixture.train(sequence, 3); 590 591 // get all sequences with a minimal length of three that occur exactly once 592 Collection<List<String>> result = fixture.getSequencesWithOccurrenceCount(3, 1); 593 594 assertEquals(5, result.size()); 595 596 List<String> expected = new ArrayList<String>(); 597 expected.add("r"); 598 expected.add("a"); 599 expected.add("c"); 600 // rac 601 ListAssert.assertContains((List<List<String>>) result, expected); 602 603 expected.clear(); 604 expected.add("a"); 605 expected.add("c"); 606 expected.add("a"); 607 // aca 608 ListAssert.assertContains((List<List<String>>) result, expected); 609 610 expected.clear(); 611 expected.add("c"); 612 expected.add("a"); 613 expected.add("d"); 614 // cad 615 ListAssert.assertContains((List<List<String>>) result, expected); 616 617 expected.clear(); 618 expected.add("a"); 619 expected.add("d"); 620 expected.add("a"); 621 // ada 622 ListAssert.assertContains((List<List<String>>) result, expected); 623 624 expected.clear(); 625 expected.add("d"); 626 expected.add("a"); 627 expected.add("b"); 628 // dab 629 ListAssert.assertContains((List<List<String>>) result, expected); 630 } 631 632 @Test 633 public void testGetSequencesWithMostOccurrences_13() throws Exception { 634 Trie<String> fixture = new Trie<String>(); 635 fixture.train(sequence, 3); 636 637 // get all sequences with a minimal length of four that occur most often 638 Collection<List<String>> result = fixture.getSequencesWithOccurrenceCount(4, 0); 639 640 // none of these exist, as the tree is only trained with sequences of length 3 322 641 assertEquals(0, result.size()); 323 642 } 324 643 325 @Test 326 public void testGetContextSuffix_5() throws Exception { 327 Trie<String> fixture = new Trie<String>(); 328 fixture.train(sequence, 3); 329 List<String> context = new ArrayList<String>(); 330 context.add("a"); 331 context.add("a"); 332 context.add("b"); 333 List<String> expected = new ArrayList<String>(); 334 expected.add("a"); 335 expected.add("b"); 336 337 List<String> result = fixture.getContextSuffix(context); 338 339 ListAssert.assertEquals(expected, result); 340 } 644 @Test 645 public void testGetSequencesWithMostOccurrences_14() throws Exception { 646 Trie<String> fixture = new Trie<String>(); 647 fixture.train(sequence, 3); 648 649 // get all sequences with a minimal length of four that occur exactly once 650 Collection<List<String>> result = fixture.getSequencesWithOccurrenceCount(4, 1); 651 652 // none of these exist, as the tree is only trained with sequences of length 3 653 assertEquals(0, result.size()); 654 } 341 655 342 656 @Test -
trunk/autoquest-core-usageprofiles/src/main/java/de/ugoe/cs/autoquest/usageprofiles/Trie.java
r1060 r1110 324 324 /** 325 325 * <p> 326 * returns a list of symbol sequences which have a minimal length and that occurred most often 327 * with the same number of occurrences. The resulting list is empty, if there is no symbol 328 * sequence with the minimal length. 326 * returns a list of symbol sequences which have a minimal length and that occurred as often 327 * as defined by the given occurrence count. If the given occurrence count is smaller 1 then 328 * those sequences are returned, that occur most often. The resulting list is empty, if there 329 * is no symbol sequence with the minimal length or the provided number of occurrences. 329 330 * </p> 330 331 * 331 * @param minimalLength the minimal length of the returned sequences 332 * @param minimalLength the minimal length of the returned sequences 333 * @param occurrenceCount the number of occurrences of the returned sequences 332 334 * 333 335 * @return as described 334 336 */ 335 public Collection<List<T>> getSequencesWithMostOccurrences(int minimalLength) { 337 public Collection<List<T>> getSequencesWithOccurrenceCount(int minimalLength, 338 int occurrenceCount) 339 { 336 340 LinkedList<TrieNode<T>> context = new LinkedList<TrieNode<T>>(); 337 341 Collection<List<TrieNode<T>>> paths = new LinkedList<List<TrieNode<T>>>(); … … 339 343 context.push(rootNode); 340 344 341 // traverse the trie and determine all sequences, which have the maximumnumber of345 // traverse the trie and determine all sequences, which have the provided number of 342 346 // occurrences and a minimal length. 343 347 344 348 // minimalLength + 1 because we denote the depth including the root node 345 determineLongPathsWithMostOccurrences(minimalLength + 1, paths, context);349 determineLongPathsWithMostOccurrences(minimalLength + 1, occurrenceCount, paths, context); 346 350 347 351 Collection<List<T>> resultingPaths = new LinkedList<List<T>>(); … … 368 372 /** 369 373 * <p> 370 * Traverses the trie to collect all sequences with a maximum number of occurrences and with 371 * a minimal length. The length is encoded in the provided recursion depth. 374 * Traverses the trie to collect all sequences with a defined number of occurrences and with 375 * a minimal length. If the given occurrence count is smaller 1 then those sequences are 376 * searched that occur most often. The length of the sequences is encoded in the provided 377 * recursion depth. 372 378 * </p> 373 379 * 374 * @param minimalDepth the minimal recursion depth to be done 375 * @param paths the paths through the trie that all occurred with the same amount and 376 * that have the so far found maximum of occurrences (is updated each 377 * time a further path with the same number of occurrences is found; is 378 * replaced if a path with more occurrences is found) 379 * @param context the path through the trie, that is analyzed by the recursive call 380 * @param minimalDepth the minimal recursion depth to be done 381 * @param occurrenceCount the number of occurrences of the returned sequences 382 * @param paths the paths through the trie that all occurred with the same amount 383 * (if occurrence count is smaller 1, the paths which occurred most 384 * often) and that have the so far found matching number of occurrences 385 * (is updated each time a further path with the same number of 386 * occurrences is found; if occurrence count is smaller 1 387 * it is replaced if a path with more occurrences is found) 388 * @param context the path through the trie, that is analyzed by the recursive call 380 389 */ 381 390 private void determineLongPathsWithMostOccurrences(int minimalDepth, 391 int occurrenceCount, 382 392 Collection<List<TrieNode<T>>> paths, 383 393 LinkedList<TrieNode<T>> context) 384 394 { 385 int maxCount = 0;395 int envisagedCount = occurrenceCount; 386 396 387 397 // only if we already reached the depth to be achieved, we check if the paths have the 388 // maximumnumber of occurrences398 // required number of occurrences 389 399 if (context.size() >= minimalDepth) { 390 400 391 // try to determine the maximum number of occurrences so far, if any 392 if (paths.size() > 0) { 393 List<TrieNode<T>> path = paths.iterator().next(); 394 maxCount = path.get(path.size() - 1).getCount(); 395 } 396 397 // if the current path has a higher number of occurrences than all so far, clear 398 // the paths collected so far and set the new number of occurrences as new maximum 399 if (context.getLast().getCount() > maxCount) { 400 paths.clear(); 401 maxCount = context.getLast().getCount(); 401 if (envisagedCount < 1) { 402 // try to determine the maximum number of occurrences so far, if any 403 if (paths.size() > 0) { 404 List<TrieNode<T>> path = paths.iterator().next(); 405 envisagedCount = path.get(path.size() - 1).getCount(); 406 } 407 408 // if the current path has a higher number of occurrences than all so far, clear 409 // the paths collected so far and set the new number of occurrences as new maximum 410 if (context.getLast().getCount() > envisagedCount) { 411 paths.clear(); 412 envisagedCount = context.getLast().getCount(); 413 } 402 414 } 403 415 404 416 // if the path matches the current maximal number of occurrences, add it to the list 405 417 // of collected paths with these number of occurrences 406 if (context.getLast().getCount() == maxCount) {418 if (context.getLast().getCount() == envisagedCount) { 407 419 paths.add(new LinkedList<TrieNode<T>>(context)); 408 420 } … … 411 423 // perform the trie traversal 412 424 for (TrieNode<T> child : context.getLast().getChildren()) { 413 if (child.getCount() >= maxCount) {425 if (child.getCount() >= envisagedCount) { 414 426 context.add(child); 415 determineLongPathsWithMostOccurrences(minimalDepth, paths, context); 427 determineLongPathsWithMostOccurrences 428 (minimalDepth, occurrenceCount, paths, context); 416 429 context.removeLast(); 417 430 }
Note: See TracChangeset
for help on using the changeset viewer.