Changeset 1620 for branches/ralph/src/main/java/de/ugoe/cs
- Timestamp:
- 07/23/14 18:18:11 (10 years ago)
- Location:
- branches/ralph/src/main/java/de/ugoe/cs/autoquest/tasktrees
- Files:
-
- 3 added
- 9 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/ralph/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms/AlignmentAlgorithm.java
r1612 r1620 6 6 7 7 public interface AlignmentAlgorithm { 8 9 public abstract void align(int[] input1, int[] input2, SubstitutionMatrix submat,float threshold);10 8 11 9 /** … … 20 18 public abstract void printAlignment(); 21 19 22 public abstract ArrayList<ArrayList<NumberSequence>> getMatches(); 20 public abstract ArrayList<Match> getMatches(); 21 22 void align(NumberSequence input1, NumberSequence input2, 23 SubstitutionMatrix submat, float threshold); 23 24 24 25 } -
branches/ralph/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms/NeedlemanWunsch.java
r1616 r1620 260 260 261 261 @Override 262 public ArrayList< ArrayList<NumberSequence>> getMatches() {262 public ArrayList<Match> getMatches() { 263 263 // TODO Auto-generated method stub 264 264 return null; … … 266 266 267 267 @Override 268 public void align( int[] input1, int[]input2, SubstitutionMatrix submat,268 public void align(NumberSequence input1, NumberSequence input2, SubstitutionMatrix submat, 269 269 float threshold) { 270 this.input1 = input1 ;271 this.input2 = input2 ;272 length1 = input1. length;273 length2 = input2. length;270 this.input1 = input1.getSequence(); 271 this.input2 = input2.getSequence(); 272 length1 = input1.size(); 273 length2 = input2.size(); 274 274 this.submat = submat; 275 275 -
branches/ralph/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms/NumberSequence.java
r1619 r1620 6 6 public class NumberSequence { 7 7 private int[] sequence; 8 private int signature; 8 private int id; 9 10 public ArrayList<Match> getMatches() { 11 return matches; 12 } 13 14 15 public void setMatches(ArrayList<Match> matches) { 16 this.matches = matches; 17 } 18 19 public void addMatches(ArrayList<Match> matches) { 20 this.matches.addAll(matches); 21 } 22 23 private ArrayList<Match> matches; 9 24 10 25 public NumberSequence(int size) { … … 12 27 sequence = new int[size]; 13 28 } 14 29 15 30 public int[] getSequence() { 16 31 return sequence; … … 21 36 } 22 37 23 public int getSignature() { 24 return signature; 25 } 26 27 public void setSignature(int signature) { 28 this.signature = signature; 29 } 38 30 39 31 40 public void printSequence() { … … 38 47 public NumberSequence shuffle() { 39 48 NumberSequence result = new NumberSequence(sequence.length); 49 result.setId(getId()); 40 50 result.setSequence(this.sequence); 41 51 Random rgen = new Random(); … … 51 61 } 52 62 53 // TODO: This can be done faster 54 public int containsPattern( ArrayList<NumberSequence>pattern) {63 64 public int containsPattern(Match pattern) { 55 65 int i = 0; 56 66 int count = 0; 57 int[] pat1 = pattern.get (0).getSequence();58 int[] pat2 = pattern.get (1).getSequence();67 int[] pat1 = pattern.getFirstSequence().getSequence(); 68 int[] pat2 = pattern.getSecondSequence().getSequence(); 59 69 notmatched: while (i < sequence.length - pat1.length) { 60 70 if (sequence[i] == pat1[0] || sequence[i] == pat2[0]) { 61 for (int j = 0; j < pat1.length; j++) { 62 if (sequence[i + j] != pat1[j] 63 && sequence[i + j] != pat2[j]) { 71 int ipat1 =0; 72 int ipat2 =0; 73 while(ipat1 < pat1.length && ipat2<pat2.length) { 74 if(pat1[ipat1]==-1) { 75 ipat1++; 76 continue; 77 } 78 if(pat2[ipat2]==-1) { 79 ipat2++; 80 continue; 81 } 82 if (sequence[i + ipat1] != pat1[ipat1] 83 && sequence[i + ipat2] != pat2[ipat2]) { 64 84 i++; 65 85 continue notmatched; 66 86 } 87 ipat1++; 88 ipat2++; 67 89 } 68 90 count++; … … 72 94 return count; 73 95 } 96 97 //Returns the number of times event occurs in this sequence 98 public int eventCount(int event) { 99 int count = 0; 100 for(int i=0;i<sequence.length;i++) { 101 if(sequence[i] == event) { 102 count++; 103 } 104 } 105 return count; 106 } 74 107 75 108 public int size() { 76 109 return sequence.length; 77 110 } 111 112 113 public int getId() { 114 return id; 115 } 116 117 118 public void setId(int id) { 119 this.id = id; 120 } 78 121 } -
branches/ralph/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms/SmithWaterman.java
r1612 r1620 265 265 266 266 @Override 267 public ArrayList< ArrayList<NumberSequence>> getMatches() {267 public ArrayList<Match> getMatches() { 268 268 // TODO Auto-generated method stub 269 269 return null; … … 271 271 272 272 @Override 273 public void align( int[] input1, int[]input2, SubstitutionMatrix submat,273 public void align(NumberSequence input1, NumberSequence input2, SubstitutionMatrix submat, 274 274 float threshold) { 275 this.input1 = input1 ;276 this.input2 = input2 ;277 length1 = input1. length;278 length2 = input2. length;275 this.input1 = input1.getSequence(); 276 this.input2 = input2.getSequence(); 277 length1 = input1.size(); 278 length2 = input2.size(); 279 279 this.submat = submat; 280 280 -
branches/ralph/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms/SmithWatermanRepeated.java
r1619 r1620 231 231 ns1.setSequence(reversed1); 232 232 ns2.setSequence(reversed2); 233 234 alignment.add(ns1); 235 alignment.add(ns2); 233 ns1.setId(alignment.get(0).getId()); 234 ns2.setId(alignment.get(1).getId()); 235 236 alignment.set(0, ns1); 237 alignment.set(1, ns2); 236 238 } 237 239 … … 272 274 } 273 275 274 public ArrayList< ArrayList<NumberSequence>> getMatches() {275 ArrayList< ArrayList<NumberSequence>> result = new ArrayList<ArrayList<NumberSequence>>();276 public ArrayList<Match> getMatches() { 277 ArrayList<Match> result = new ArrayList<Match>(); 276 278 277 279 //both alignment sequences should be equally long … … 288 290 count++; 289 291 } 290 //I am really missing memcpy here 292 //I am really missing memcpy here? How does one do this better in java? 291 293 int[] tmp1 = new int[count]; 292 294 int[] tmp2 = new int[count]; … … 299 301 tmpns1.setSequence(tmp1); 300 302 tmpns2.setSequence(tmp2); 301 ArrayList<NumberSequence> tmpal = new ArrayList<NumberSequence>(); 302 tmpal.add(tmpns1); 303 tmpal.add(tmpns2); 303 Match tmpal = new Match(); 304 tmpal.setFirstSequence(tmpns1); 305 tmpal.setSecondSequence(tmpns2); 306 tmpal.addOccurence(new MatchOccurence(start,alignment.get(0).getId())); 307 tmpal.addOccurence(new MatchOccurence(start,alignment.get(1).getId())); 304 308 result.add(tmpal); 305 309 } 306 310 i++; 307 311 } 308 309 310 311 312 313 314 312 return result; 315 316 313 } 317 314 … … 354 351 355 352 @Override 356 public void align( int[] input1, int[]input2, SubstitutionMatrix submat,353 public void align(NumberSequence input1, NumberSequence input2, SubstitutionMatrix submat, 357 354 float threshold) { 358 this.input1 = input1; 359 this.input2 = input2; 360 length1 = input1.length; 361 length2 = input2.length; 355 356 alignment = new ArrayList<NumberSequence>(); 357 alignment.add(input1); 358 alignment.add(input2); 359 360 this.input1=input1.getSequence(); 361 this.input2=input2.getSequence(); 362 363 length1 = input1.size(); 364 length2 = input2.size(); 362 365 this.submat = submat; 363 366 … … 367 370 368 371 matrix = new MatrixEntry[length1+2][length2+1]; 369 alignment = new ArrayList<NumberSequence>();372 370 373 371 374 for (int i = 0; i <= length1+1; i++) { -
branches/ralph/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/matrix/ObjectDistanceSubstitionMatrix.java
r1615 r1620 25 25 26 26 public ObjectDistanceSubstitionMatrix( 27 SymbolMap<ITaskInstance, ITask> uniqueTasks,float positiveThreshold ) {27 SymbolMap<ITaskInstance, ITask> uniqueTasks,float positiveThreshold, int gapPenalty) { 28 28 this.uniqueTasks = uniqueTasks; 29 29 this.positiveThreshold = positiveThreshold; 30 30 idmapping = new HashMap<Integer, Integer>(); 31 31 matrix = new TriangleMatrix(uniqueTasks.size()+1); 32 gapPenalty = -3;32 this.gapPenalty = gapPenalty; 33 33 34 34 } -
branches/ralph/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/matrix/PairwiseAlignmentGenerator.java
r1618 r1620 14 14 public static PairwiseAlignmentStorage generate( 15 15 ArrayList<NumberSequence> numberseqs, 16 ObjectDistanceSubstitionMatrix submat) { 16 ObjectDistanceSubstitionMatrix submat, 17 int threshold) { 17 18 PairwiseAlignmentStorage alignments = new PairwiseAlignmentStorage( 18 19 numberseqs.size(), numberseqs.size()); 20 int smithWatermanThreshold = threshold; 19 21 20 22 for (int i = 0; i < numberseqs.size(); i++) { … … 25 27 if (i != j) { 26 28 Console.traceln(Level.FINEST,"Aligning sequence " + i + " with sequence " + j); 27 int smithWatermanThreshold = 10;29 28 30 AlignmentAlgorithm aa = AlignmentAlgorithmFactory.create(); 29 aa.align(ns1 .getSequence(), ns2.getSequence(), submat,31 aa.align(ns1, ns2, submat, 30 32 smithWatermanThreshold); 31 33 alignments.set(i, j, aa); … … 33 35 AlignmentAlgorithm sameSequence1 = AlignmentAlgorithmFactory 34 36 .create(); 35 sameSequence1.align(ns1 .getSequence(), ns1.getSequence(),37 sameSequence1.align(ns1, ns1, 36 38 submat, smithWatermanThreshold); 37 39 AlignmentAlgorithm sameSequence2 = AlignmentAlgorithmFactory 38 40 .create(); 39 sameSequence2.align(ns2 .getSequence(), ns2.getSequence(),41 sameSequence2.align(ns2, ns2, 40 42 submat, smithWatermanThreshold); 41 43 AlignmentAlgorithm randomSequence = AlignmentAlgorithmFactory 42 44 .create(); 43 randomSequence.align(ns1.shuffle() .getSequence(), ns244 .shuffle() .getSequence(), submat,45 randomSequence.align(ns1.shuffle(), ns2 46 .shuffle(), submat, 45 47 smithWatermanThreshold); 46 48 -
branches/ralph/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/pal/tree/UPGMAAligningTree.java
r1612 r1620 223 223 if(seqCount1 == 1 && seqCount2 == 1) { 224 224 AlignmentAlgorithm aa = AlignmentAlgorithmFactory.create(); 225 aa.align(node1.getSequence(0) .getSequence(), node2.getSequence(0).getSequence(), submat, 5);225 aa.align(node1.getSequence(0), node2.getSequence(0), submat, 5); 226 226 alignment = aa.getAlignment(); 227 227 … … 236 236 for(int i=0;i<seqCount1;i++){ 237 237 AlignmentAlgorithm aa = AlignmentAlgorithmFactory.create(); 238 aa.align(node1.getSequence(i) .getSequence(), node2.getSequence(0).getSequence() , submat, 5);238 aa.align(node1.getSequence(i), node2.getSequence(0) , submat, 5); 239 239 tempStorage.set(i, 1, aa); 240 240 … … 256 256 for(int i=0;i<seqCount2;i++) { 257 257 AlignmentAlgorithm aa = AlignmentAlgorithmFactory.create(); 258 aa.align(node2.getSequence(i) .getSequence(), node1.getSequence(0).getSequence() , submat, 5);258 aa.align(node2.getSequence(i), node1.getSequence(0) , submat, 5); 259 259 tempStorage.set(1, i, aa); 260 260 if(maxScore < tempStorage.get(1, i).getAlignmentScore()) { … … 278 278 for(int j=0;j<seqCount2;j++) { 279 279 AlignmentAlgorithm aa =AlignmentAlgorithmFactory.create(); 280 aa.align(node1.getSequence(i) .getSequence(), node2.getSequence(j).getSequence(), submat, 5);280 aa.align(node1.getSequence(i), node2.getSequence(j), submat, 5); 281 281 tempStorage1.set(j, 0, aa); 282 282 if(maxScore1 < tempStorage1.get(j, 0).getAlignmentScore()) { … … 291 291 for (int j=0;j<seqCount1;j++) { 292 292 AlignmentAlgorithm aa =AlignmentAlgorithmFactory.create(); 293 aa.align(node2.getSequence(i) .getSequence(),node1.getSequence(j).getSequence(),submat,5);293 aa.align(node2.getSequence(i),node1.getSequence(j),submat,5); 294 294 tempStorage2.set(j, 0, aa); 295 295 if(maxScore2 < tempStorage2.get(j, 0).getAlignmentScore()) { -
branches/ralph/src/main/java/de/ugoe/cs/autoquest/tasktrees/temporalrelation/SequenceForTaskDetectionRuleAlignment.java
r1619 r1620 26 26 27 27 import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithmFactory; 28 import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.Match; 28 29 import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.NumberSequence; 29 30 import de.ugoe.cs.autoquest.tasktrees.alignment.matrix.PairwiseAlignmentGenerator; … … 155 156 Console.traceln(Level.INFO,"generating substitution matrix"); 156 157 ObjectDistanceSubstitionMatrix submat = new ObjectDistanceSubstitionMatrix( 157 uniqueTasks, 5);158 uniqueTasks, 6,-3); 158 159 submat.generate(); 159 160 160 161 Console.traceln(Level.INFO,"generating pairwise alignments"); 161 ArrayList< ArrayList<NumberSequence>> matchseqs = new ArrayList<ArrayList<NumberSequence>>();162 PairwiseAlignmentStorage alignments = PairwiseAlignmentGenerator.generate(numberseqs,submat );162 ArrayList<Match> matchseqs = new ArrayList<Match>(); 163 PairwiseAlignmentStorage alignments = PairwiseAlignmentGenerator.generate(numberseqs,submat,9); 163 164 164 165 Console.traceln(Level.INFO,"retrieving significant sequence pieces"); … … 167 168 for(int j=0; j< numberseqs.size();j++) { 168 169 if(i != j) { 169 ArrayList<ArrayList<NumberSequence>> tmp = alignments.get(i, j).getMatches(); 170 matchseqs.addAll(tmp); 170 matchseqs.addAll(alignments.get(i, j).getMatches()); 171 171 } 172 172 } … … 178 178 //search this match in every other sequence 179 179 for(int i=0; i < matchseqs.size();i++) { 180 int occurencecount = 0; 180 int sequencecount = 0; 181 int totalcount = 0; 182 Match pattern = matchseqs.get(i); 183 184 185 186 //Skip sequences with more 0 events (scrolls) than other events. Both of the pattern sequences are equally long, so the added 0 counts 187 //just need to be smaller than the length of one sequence 188 if(pattern.getFirstSequence().eventCount(0) + pattern.getSecondSequence().eventCount(0) +1 > pattern.getFirstSequence().size()) 189 continue; 181 190 Console.traceln(Level.FINEST, "searching for patterns occuring most: " + Math.round((float)i/(float)matchseqs.size()*100) + "%"); 182 for(int j=0; j < matchseqs.size();j++) { 183 if(i>j) { 184 if(matchseqs.get(j).get(0).containsPattern(matchseqs.get(i)) > 0 || matchseqs.get(j).get(1).containsPattern(matchseqs.get(i)) > 0) { 185 occurencecount++; 191 for(int j=0; j < numberseqs.size();j++) { 192 int tmpcount = numberseqs.get(j).containsPattern(pattern); 193 if(tmpcount > 0) { 194 sequencecount++; 195 totalcount+=tmpcount; 186 196 } 187 } 188 } 189 if(occurencecount > 1) { 190 System.out.println("Found pattern \n "); 191 matchseqs.get(i).get(0).printSequence(); 192 matchseqs.get(i).get(1).printSequence(); 193 System.out.println(occurencecount + " times"); 197 } 198 199 if(totalcount > 1) { 200 matchseqs.get(i).getFirstSequence().printSequence(); 201 matchseqs.get(i).getFirstSequence().printSequence(); 202 System.out.println("Found pattern in " + sequencecount +"/" + numberseqs.size() + " sequences, total of " + totalcount + " occurences"); 194 203 System.out.println(); 195 204 } 196 205 } 197 198 199 200 206 alignments = null; 201 207 … … 290 296 } 291 297 } 298 templist.setId(numberseqs.size()); 292 299 numberseqs.add(templist); 293 300 comparator.clearBuffers();
Note: See TracChangeset
for help on using the changeset viewer.