Changeset 1733 for branches/autoquest-core-tasktrees-alignment/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms/SmithWaterman.java
- Timestamp:
- 09/05/14 19:33:12 (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/autoquest-core-tasktrees-alignment/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms/SmithWaterman.java
r1620 r1733 38 38 private SubstitutionMatrix submat; 39 39 40 @Override 41 public void align(NumberSequence input1, NumberSequence input2, 42 SubstitutionMatrix submat, float threshold) { 43 this.input1 = input1.getSequence(); 44 this.input2 = input2.getSequence(); 45 length1 = input1.size(); 46 length2 = input2.size(); 47 this.submat = submat; 48 49 // System.out.println("Starting SmithWaterman algorithm with a " 50 // + submat.getClass() + " Substitution Matrix: " + 51 // submat.getClass().getCanonicalName()); 52 53 matrix = new MatrixEntry[length1 + 1][length2 + 1]; 54 alignment = new ArrayList<NumberSequence>(); 55 56 for (int i = 0; i < (length1 + 1); i++) { 57 for (int j = 0; j < (length2 + 1); j++) { 58 matrix[i][j] = new MatrixEntry(); 59 } 60 } 61 62 buildMatrix(); 63 traceback(); 64 65 } 66 67 /** 68 * Build the score matrix using dynamic programming. 69 */ 70 private void buildMatrix() { 71 if (submat.getGapPenalty() >= 0) { 72 throw new Error("Indel score must be negative"); 73 } 74 75 // it's a gap 76 matrix[0][0].setScore(0); 77 matrix[0][0].setPrevious(null); // starting point 78 79 // the first column 80 for (int j = 1; j < length2; j++) { 81 matrix[0][j].setScore(0); 82 matrix[0][j].setPrevious(matrix[0][j - 1]); 83 matrix[0][j].setYvalue(input2[j]); 84 matrix[0][j].setXvalue(Constants.GAP_SYMBOL); 85 } 86 // the first row 87 88 for (int j = 1; j < length1; j++) { 89 matrix[j][0].setScore(0); 90 matrix[j][0].setPrevious(matrix[j - 1][0]); 91 matrix[j][0].setXvalue(input1[j]); 92 matrix[j][0].setYvalue(Constants.GAP_SYMBOL); 93 } 94 95 for (int i = 1; i < length1; i++) { 96 97 for (int j = 1; j < length2; j++) { 98 final double diagScore = matrix[i - 1][j - 1].getScore() 99 + similarity(i, j); 100 final double upScore = matrix[i][j - 1].getScore() 101 + submat.getGapPenalty(); 102 final double leftScore = matrix[i - 1][j].getScore() 103 + submat.getGapPenalty(); 104 105 matrix[i][j].setScore(Math.max(diagScore, 106 Math.max(upScore, Math.max(leftScore, 0)))); 107 108 // find the directions that give the maximum scores. 109 // TODO: Multiple directions are ignored, we choose the first 110 // maximum score 111 // True if we had a match 112 if (diagScore == matrix[i][j].getScore()) { 113 matrix[i][j].setPrevious(matrix[i - 1][j - 1]); 114 matrix[i][j].setXvalue(input1[i - 1]); 115 matrix[i][j].setYvalue(input2[j - 1]); 116 } 117 // true if we took an event from sequence x and not from y 118 if (leftScore == matrix[i][j].getScore()) { 119 matrix[i][j].setXvalue(input1[i - 1]); 120 matrix[i][j].setYvalue(Constants.GAP_SYMBOL); 121 matrix[i][j].setPrevious(matrix[i - 1][j]); 122 } 123 // true if we took an event from sequence y and not from x 124 if (upScore == matrix[i][j].getScore()) { 125 matrix[i][j].setXvalue(Constants.GAP_SYMBOL); 126 matrix[i][j].setYvalue(input2[j - 1]); 127 matrix[i][j].setPrevious(matrix[i][j - 1]); 128 } 129 if (0 == matrix[i][j].getScore()) { 130 matrix[i][j].setPrevious(matrix[0][0]); 131 matrix[i][j].setXvalue(-2); 132 matrix[i][j].setYvalue(-2); 133 } 134 } 135 136 // Set the complete score cell 137 138 } 139 } 140 141 /* 142 * (non-Javadoc) 143 * 144 * @see 145 * de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm 146 * #getAlignment() 147 */ 148 @Override 149 public ArrayList<NumberSequence> getAlignment() { 150 return alignment; 151 } 152 153 /* 154 * (non-Javadoc) 155 * 156 * @see 157 * de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm 158 * #getAlignmentScore() 159 */ 160 @Override 161 public double getAlignmentScore() { 162 return getMaxScore(); 163 } 164 165 @Override 166 public ArrayList<Match> getMatches() { 167 // TODO Auto-generated method stub 168 return null; 169 } 170 171 /** 172 * Get the maximum value in the score matrix. 173 */ 174 @Override 175 public double getMaxScore() { 176 double maxScore = 0; 177 178 // skip the first row and column 179 for (int i = 1; i <= length1; i++) { 180 for (int j = 1; j < length2; j++) { 181 if (matrix[i][j].getScore() > maxScore) { 182 maxScore = matrix[i][j].getScore(); 183 } 184 } 185 } 186 187 return maxScore; 188 } 189 190 @Override 191 public void printAlignment() { 192 MatrixEntry tmp = matrix[length1 + 1][0]; 193 String aligned1 = ""; 194 String aligned2 = ""; 195 int count = 0; 196 do { 197 String append1 = ""; 198 String append2 = ""; 199 200 if (tmp.getXvalue() == Constants.GAP_SYMBOL) { 201 append1 = " ___"; 202 } else { 203 append1 = String.format("%5d", tmp.getXvalue()); 204 } 205 206 if (tmp.getYvalue() == Constants.GAP_SYMBOL) { 207 append2 = " ___"; 208 } else { 209 append2 = String.format("%5d", tmp.getYvalue()); 210 } 211 if (count != 0) { 212 aligned1 = append1 + aligned1; 213 aligned2 = append2 + aligned2; 214 } 215 216 tmp = tmp.getPrevious(); 217 count++; 218 219 } while (tmp != null); 220 System.out.println(aligned1); 221 System.out.println(aligned2); 222 } 223 224 /** 225 * print the dynmaic programming matrix 226 */ 227 @Override 228 public void printDPMatrix() { 229 System.out.print(" "); 230 for (int i = 1; i <= length1; i++) { 231 System.out.format("%5d", input1[i - 1]); 232 } 233 System.out.println(); 234 for (int j = 0; j <= length2; j++) { 235 if (j > 0) { 236 System.out.format("%5d ", input2[j - 1]); 237 } else { 238 System.out.print(" "); 239 } 240 for (int i = 0; i <= length1; i++) { 241 System.out.format("%4.1f ", matrix[i][j].getScore()); 242 } 243 System.out.println(); 244 } 245 } 246 247 public void setAlignment(ArrayList<NumberSequence> alignment) { 248 this.alignment = alignment; 249 } 40 250 41 251 /** … … 53 263 } 54 264 55 /**56 * Build the score matrix using dynamic programming.57 */58 private void buildMatrix() {59 if (submat.getGapPenalty() >= 0) {60 throw new Error("Indel score must be negative");61 }62 63 // it's a gap64 matrix[0][0].setScore(0);65 matrix[0][0].setPrevious(null); // starting point66 67 // the first column68 for (int j = 1; j < length2; j++) {69 matrix[0][j].setScore(0);70 matrix[0][j].setPrevious(matrix[0][j - 1]);71 matrix[0][j].setYvalue(input2[j]);72 matrix[0][j].setXvalue(Constants.GAP_SYMBOL);73 }74 // the first row75 76 for (int j = 1; j < length1; j++) {77 matrix[j][0].setScore(0);78 matrix[j][0].setPrevious(matrix[j - 1][0]);79 matrix[j][0].setXvalue(input1[j]);80 matrix[j][0].setYvalue(Constants.GAP_SYMBOL);81 }82 83 for (int i = 1; i < length1; i++) {84 85 for (int j = 1; j < length2; j++) {86 double diagScore = matrix[i - 1][j - 1].getScore()87 + similarity(i, j);88 double upScore = matrix[i][j - 1].getScore()89 + submat.getGapPenalty();90 double leftScore = matrix[i - 1][j].getScore()91 + submat.getGapPenalty();92 93 matrix[i][j].setScore(Math.max(diagScore,94 Math.max(upScore, Math.max(leftScore, 0))));95 96 // find the directions that give the maximum scores.97 // TODO: Multiple directions are ignored, we choose the first98 // maximum score99 // True if we had a match100 if (diagScore == matrix[i][j].getScore()) {101 matrix[i][j].setPrevious(matrix[i - 1][j - 1]);102 matrix[i][j].setXvalue(input1[i - 1]);103 matrix[i][j].setYvalue(input2[j - 1]);104 }105 // true if we took an event from sequence x and not from y106 if (leftScore == matrix[i][j].getScore()) {107 matrix[i][j].setXvalue(input1[i - 1]);108 matrix[i][j].setYvalue(Constants.GAP_SYMBOL);109 matrix[i][j].setPrevious(matrix[i - 1][j]);110 }111 // true if we took an event from sequence y and not from x112 if (upScore == matrix[i][j].getScore()) {113 matrix[i][j].setXvalue(Constants.GAP_SYMBOL);114 matrix[i][j].setYvalue(input2[j - 1]);115 matrix[i][j].setPrevious(matrix[i][j - 1]);116 }117 if (0 == matrix[i][j].getScore()) {118 matrix[i][j].setPrevious(matrix[0][0]);119 matrix[i][j].setXvalue(-2);120 matrix[i][j].setYvalue(-2);121 }122 }123 124 // Set the complete score cell125 126 }127 }128 129 /**130 * Get the maximum value in the score matrix.131 */132 public double getMaxScore() {133 double maxScore = 0;134 135 // skip the first row and column136 for (int i = 1; i <= length1; i++) {137 for (int j = 1; j < length2; j++) {138 if (matrix[i][j].getScore() > maxScore) {139 maxScore = matrix[i][j].getScore();140 }141 }142 }143 144 return maxScore;145 }146 147 /*148 * (non-Javadoc)149 *150 * @see151 * de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm152 * #getAlignmentScore()153 */154 @Override155 public double getAlignmentScore() {156 return getMaxScore();157 }158 159 265 public void traceback() { 160 266 MatrixEntry tmp = matrix[length1][length2]; 161 int aligned1[] = new int[length1 + length2 + 2];162 int aligned2[] = new int[length1 + length2 + 2];267 final int aligned1[] = new int[length1 + length2 + 2]; 268 final int aligned2[] = new int[length1 + length2 + 2]; 163 269 int count = 0; 164 270 do { 165 if ( length1 + length2 + 2== count) {271 if ((length1 + length2 + 2) == count) { 166 272 Console.traceln(Level.WARNING, 167 273 "Traceback longer than both sequences summed up!"); … … 177 283 count--; 178 284 // reverse order of the alignment 179 int reversed1[] = new int[count];180 int reversed2[] = new int[count];285 final int reversed1[] = new int[count]; 286 final int reversed2[] = new int[count]; 181 287 182 288 for (int i = count - 1; i > 0; i--) { … … 185 291 } 186 292 187 NumberSequence ns1 = new NumberSequence(reversed1.length);188 NumberSequence ns2 = new NumberSequence(reversed2.length);293 final NumberSequence ns1 = new NumberSequence(reversed1.length); 294 final NumberSequence ns2 = new NumberSequence(reversed2.length); 189 295 ns1.setSequence(reversed1); 190 296 ns2.setSequence(reversed2); … … 194 300 } 195 301 196 public void printAlignment() {197 MatrixEntry tmp = matrix[length1 + 1][0];198 String aligned1 = "";199 String aligned2 = "";200 int count = 0;201 do {202 String append1 = "";203 String append2 = "";204 205 if (tmp.getXvalue() == Constants.GAP_SYMBOL) {206 append1 = " ___";207 } else {208 append1 = String.format("%5d", tmp.getXvalue());209 }210 211 if (tmp.getYvalue() == Constants.GAP_SYMBOL) {212 append2 = " ___";213 } else {214 append2 = String.format("%5d", tmp.getYvalue());215 }216 if (count != 0) {217 aligned1 = append1 + aligned1;218 aligned2 = append2 + aligned2;219 }220 221 tmp = tmp.getPrevious();222 count++;223 224 } while (tmp != null);225 System.out.println(aligned1);226 System.out.println(aligned2);227 }228 229 /**230 * print the dynmaic programming matrix231 */232 public void printDPMatrix() {233 System.out.print(" ");234 for (int i = 1; i <= length1; i++)235 System.out.format("%5d", input1[i - 1]);236 System.out.println();237 for (int j = 0; j <= length2; j++) {238 if (j > 0)239 System.out.format("%5d ", input2[j - 1]);240 else {241 System.out.print(" ");242 }243 for (int i = 0; i <= length1; i++) {244 System.out.format("%4.1f ", matrix[i][j].getScore());245 }246 System.out.println();247 }248 }249 250 /*251 * (non-Javadoc)252 *253 * @see254 * de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm255 * #getAlignment()256 */257 @Override258 public ArrayList<NumberSequence> getAlignment() {259 return alignment;260 }261 262 public void setAlignment(ArrayList<NumberSequence> alignment) {263 this.alignment = alignment;264 }265 266 @Override267 public ArrayList<Match> getMatches() {268 // TODO Auto-generated method stub269 return null;270 }271 272 @Override273 public void align(NumberSequence input1, NumberSequence input2, SubstitutionMatrix submat,274 float threshold) {275 this.input1 = input1.getSequence();276 this.input2 = input2.getSequence();277 length1 = input1.size();278 length2 = input2.size();279 this.submat = submat;280 281 // System.out.println("Starting SmithWaterman algorithm with a "282 // + submat.getClass() + " Substitution Matrix: " +283 // submat.getClass().getCanonicalName());284 285 matrix = new MatrixEntry[length1 + 1][length2 + 1];286 alignment = new ArrayList<NumberSequence>();287 288 for (int i = 0; i < length1+1; i++) {289 for (int j = 0; j < length2+1; j++) {290 matrix[i][j] = new MatrixEntry();291 }292 }293 294 buildMatrix();295 traceback();296 297 }298 299 302 }
Note: See TracChangeset
for help on using the changeset viewer.