Changeset 1733 for branches/autoquest-core-tasktrees-alignment/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms/NeedlemanWunsch.java
- Timestamp:
- 09/05/14 19:33:12 (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/autoquest-core-tasktrees-alignment/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms/NeedlemanWunsch.java
r1620 r1733 8 8 import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.Constants; 9 9 10 11 10 public class NeedlemanWunsch implements AlignmentAlgorithm { 12 11 … … 39 38 private SubstitutionMatrix submat; 40 39 40 @Override 41 public void align(NumberSequence input1, NumberSequence input2, 42 SubstitutionMatrix submat, float threshold) { 43 this.input1 = input1.getSequence(); 44 this.input2 = input2.getSequence(); 45 length1 = input1.size(); 46 length2 = input2.size(); 47 this.submat = submat; 48 49 // System.out.println("Starting SmithWaterman algorithm with a " 50 // + submat.getClass() + " Substitution Matrix: " + 51 // submat.getClass().getCanonicalName()); 52 53 matrix = new MatrixEntry[length1 + 1][length2 + 1]; 54 alignment = new ArrayList<NumberSequence>(); 55 56 for (int i = 0; i < (length1 + 1); i++) { 57 for (int j = 0; j < (length2 + 1); j++) { 58 matrix[i][j] = new MatrixEntry(); 59 } 60 } 61 62 buildMatrix(); 63 traceback(); 64 65 } 66 67 /** 68 * Build the score matrix using dynamic programming. 69 */ 70 private void buildMatrix() { 71 if (submat.getGapPenalty() >= 0) { 72 throw new Error("Indel score must be negative"); 73 } 74 75 // it's a gap 76 matrix[0][0].setScore(0); 77 matrix[0][0].setPrevious(null); // starting point 78 79 // the first column 80 for (int j = 1; j <= length2; j++) { 81 matrix[0][j].setScore(j * submat.getGapPenalty()); 82 matrix[0][j].setPrevious(matrix[0][j - 1]); 83 matrix[0][j].setYvalue(input2[j - 1]); 84 matrix[0][j].setXvalue(Constants.GAP_SYMBOL); 85 } 86 // the first row 87 88 for (int j = 1; j <= length1; j++) { 89 matrix[j][0].setScore(j * submat.getGapPenalty()); 90 matrix[j][0].setPrevious(matrix[j - 1][0]); 91 matrix[j][0].setXvalue(input1[j - 1]); 92 matrix[j][0].setYvalue(Constants.GAP_SYMBOL); 93 } 94 95 for (int i = 1; i <= length1; i++) { 96 97 for (int j = 1; j <= length2; j++) { 98 final double diagScore = matrix[i - 1][j - 1].getScore() 99 + similarity(i, j); 100 final double upScore = matrix[i][j - 1].getScore() 101 + submat.getGapPenalty(); 102 final double leftScore = matrix[i - 1][j].getScore() 103 + submat.getGapPenalty(); 104 105 matrix[i][j].setScore(Math.max(diagScore, 106 Math.max(upScore, leftScore))); 107 108 // find the directions that give the maximum scores. 109 // TODO: Multiple directions are ignored, we choose the first 110 // maximum score 111 // True if we had a match 112 if (diagScore == matrix[i][j].getScore()) { 113 matrix[i][j].setPrevious(matrix[i - 1][j - 1]); 114 matrix[i][j].setXvalue(input1[i - 1]); 115 matrix[i][j].setYvalue(input2[j - 1]); 116 } 117 // true if we took an event from sequence x and not from y 118 if (leftScore == matrix[i][j].getScore()) { 119 matrix[i][j].setXvalue(input1[i - 1]); 120 matrix[i][j].setYvalue(Constants.GAP_SYMBOL); 121 matrix[i][j].setPrevious(matrix[i - 1][j]); 122 } 123 // true if we took an event from sequence y and not from x 124 if (upScore == matrix[i][j].getScore()) { 125 matrix[i][j].setXvalue(Constants.GAP_SYMBOL); 126 matrix[i][j].setYvalue(input2[j - 1]); 127 matrix[i][j].setPrevious(matrix[i][j - 1]); 128 } 129 } 130 } 131 } 132 133 /* 134 * (non-Javadoc) 135 * 136 * @see 137 * de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm 138 * #getAlignment() 139 */ 140 @Override 141 public ArrayList<NumberSequence> getAlignment() { 142 return alignment; 143 } 144 145 /* 146 * (non-Javadoc) 147 * 148 * @see 149 * de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm 150 * #getAlignmentScore() 151 */ 152 @Override 153 public double getAlignmentScore() { 154 return getMaxScore(); 155 } 156 157 @Override 158 public ArrayList<Match> getMatches() { 159 // TODO Auto-generated method stub 160 return null; 161 } 162 163 /** 164 * Get the maximum value in the score matrix. 165 */ 166 @Override 167 public double getMaxScore() { 168 double maxScore = 0; 169 170 // skip the first row and column 171 for (int i = 1; i <= length1; i++) { 172 for (int j = 1; j <= length2; j++) { 173 if (matrix[i][j].getScore() > maxScore) { 174 maxScore = matrix[i][j].getScore(); 175 } 176 } 177 } 178 179 return maxScore; 180 } 181 182 @Override 183 public void printAlignment() { 184 final int[] tmp1 = alignment.get(0).getSequence(); 185 final int[] tmp2 = alignment.get(1).getSequence(); 186 for (int i = 0; i < tmp1.length; i++) { 187 if (tmp1[i] == Constants.GAP_SYMBOL) { 188 System.out.print(" ___"); 189 } else if (tmp1[i] == Constants.UNMATCHED_SYMBOL) { 190 System.out.print(" ..."); 191 } else { 192 System.out.format("%5d", tmp1[i]); 193 } 194 195 } 196 System.out.println(); 197 for (int i = 0; i < tmp2.length; i++) { 198 if (tmp2[i] == Constants.GAP_SYMBOL) { 199 System.out.print(" ___"); 200 } else if (tmp2[i] == Constants.UNMATCHED_SYMBOL) { 201 System.out.print(" ..."); 202 } else { 203 System.out.format("%5d", tmp2[i]); 204 } 205 206 } 207 System.out.println(); 208 209 } 210 211 /** 212 * print the dynmaic programming matrix 213 */ 214 @Override 215 public void printDPMatrix() { 216 System.out.print(" "); 217 for (int i = 1; i <= length1; i++) { 218 System.out.format("%5d", input1[i - 1]); 219 } 220 System.out.println(); 221 for (int j = 0; j <= length2; j++) { 222 if (j > 0) { 223 System.out.format("%5d ", input2[j - 1]); 224 } else { 225 System.out.print(" "); 226 } 227 for (int i = 0; i <= length1; i++) { 228 System.out.format("%4.1f ", matrix[i][j].getScore()); 229 } 230 System.out.println(); 231 } 232 } 233 234 public void setAlignment(ArrayList<NumberSequence> alignment) { 235 this.alignment = alignment; 236 } 41 237 42 238 /** … … 54 250 } 55 251 56 /**57 * Build the score matrix using dynamic programming.58 */59 private void buildMatrix() {60 if (submat.getGapPenalty() >= 0) {61 throw new Error("Indel score must be negative");62 }63 64 // it's a gap65 matrix[0][0].setScore(0);66 matrix[0][0].setPrevious(null); // starting point67 68 // the first column69 for (int j = 1; j <= length2; j++) {70 matrix[0][j].setScore(j*submat.getGapPenalty());71 matrix[0][j].setPrevious(matrix[0][j - 1]);72 matrix[0][j].setYvalue(input2[j-1]);73 matrix[0][j].setXvalue(Constants.GAP_SYMBOL);74 }75 // the first row76 77 for (int j = 1; j <= length1; j++) {78 matrix[j][0].setScore(j*submat.getGapPenalty());79 matrix[j][0].setPrevious(matrix[j - 1][0]);80 matrix[j][0].setXvalue(input1[j-1]);81 matrix[j][0].setYvalue(Constants.GAP_SYMBOL);82 }83 84 for (int i = 1; i <= length1; i++) {85 86 for (int j = 1; j <= length2; j++) {87 double diagScore = matrix[i - 1][j - 1].getScore()88 + similarity(i, j);89 double upScore = matrix[i][j - 1].getScore()90 + submat.getGapPenalty();91 double leftScore = matrix[i - 1][j].getScore()92 + submat.getGapPenalty();93 94 matrix[i][j].setScore(Math.max(diagScore,95 Math.max(upScore, leftScore)));96 97 // find the directions that give the maximum scores.98 // TODO: Multiple directions are ignored, we choose the first99 // maximum score100 // True if we had a match101 if (diagScore == matrix[i][j].getScore()) {102 matrix[i][j].setPrevious(matrix[i - 1][j - 1]);103 matrix[i][j].setXvalue(input1[i - 1]);104 matrix[i][j].setYvalue(input2[j - 1]);105 }106 // true if we took an event from sequence x and not from y107 if (leftScore == matrix[i][j].getScore()) {108 matrix[i][j].setXvalue(input1[i - 1]);109 matrix[i][j].setYvalue(Constants.GAP_SYMBOL);110 matrix[i][j].setPrevious(matrix[i - 1][j]);111 }112 // true if we took an event from sequence y and not from x113 if (upScore == matrix[i][j].getScore()) {114 matrix[i][j].setXvalue(Constants.GAP_SYMBOL);115 matrix[i][j].setYvalue(input2[j - 1]);116 matrix[i][j].setPrevious(matrix[i][j - 1]);117 }118 }119 }120 }121 122 /**123 * Get the maximum value in the score matrix.124 */125 public double getMaxScore() {126 double maxScore = 0;127 128 // skip the first row and column129 for (int i = 1; i <= length1; i++) {130 for (int j = 1; j <= length2; j++) {131 if (matrix[i][j].getScore() > maxScore) {132 maxScore = matrix[i][j].getScore();133 }134 }135 }136 137 return maxScore;138 }139 140 /*141 * (non-Javadoc)142 *143 * @see144 * de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm145 * #getAlignmentScore()146 */147 @Override148 public double getAlignmentScore() {149 return getMaxScore();150 }151 152 252 public void traceback() { 153 253 MatrixEntry tmp = matrix[length1][length2]; 154 LinkedList<Integer> aligned1 = new LinkedList<Integer>();155 LinkedList<Integer> aligned2 = new LinkedList<Integer>();254 final LinkedList<Integer> aligned1 = new LinkedList<Integer>(); 255 final LinkedList<Integer> aligned2 = new LinkedList<Integer>(); 156 256 while (tmp.getPrevious() != null) { 157 257 158 258 aligned1.add(new Integer(tmp.getXvalue())); 159 259 aligned2.add(new Integer(tmp.getYvalue())); 160 260 161 261 tmp = tmp.getPrevious(); 162 } 163 262 } 263 164 264 // reverse order of the alignment 165 int reversed1[] = new int[aligned1.size()];166 int reversed2[] = new int[aligned2.size()];265 final int reversed1[] = new int[aligned1.size()]; 266 final int reversed2[] = new int[aligned2.size()]; 167 267 168 268 int count = 0; 169 for ( Iterator<Integer> it = aligned1.iterator(); it.hasNext();) {269 for (final Iterator<Integer> it = aligned1.iterator(); it.hasNext();) { 170 270 count++; 171 271 reversed1[reversed1.length - count] = it.next(); 172 272 173 273 } 174 274 count = 0; 175 for ( Iterator<Integer> it = aligned2.iterator(); it.hasNext();) {275 for (final Iterator<Integer> it = aligned2.iterator(); it.hasNext();) { 176 276 count++; 177 277 reversed2[reversed2.length - count] = it.next(); 178 278 } 179 279 180 NumberSequence ns1 = new NumberSequence(reversed1.length);181 NumberSequence ns2 = new NumberSequence(reversed2.length);280 final NumberSequence ns1 = new NumberSequence(reversed1.length); 281 final NumberSequence ns2 = new NumberSequence(reversed2.length); 182 282 ns1.setSequence(reversed1); 183 283 ns2.setSequence(reversed2); … … 187 287 } 188 288 189 190 /**191 * print the dynmaic programming matrix192 */193 public void printDPMatrix() {194 System.out.print(" ");195 for (int i = 1; i <= length1; i++)196 System.out.format("%5d", input1[i - 1]);197 System.out.println();198 for (int j = 0; j <= length2; j++) {199 if (j > 0)200 System.out.format("%5d ", input2[j - 1]);201 else {202 System.out.print(" ");203 }204 for (int i = 0; i <= length1; i++) {205 System.out.format("%4.1f ", matrix[i][j].getScore());206 }207 System.out.println();208 }209 }210 211 public void printAlignment() {212 int[] tmp1 = alignment.get(0).getSequence();213 int[] tmp2 = alignment.get(1).getSequence();214 for (int i=0; i< tmp1.length;i++) {215 if(tmp1[i] == Constants.GAP_SYMBOL) {216 System.out.print(" ___");217 }218 else if(tmp1[i] == Constants.UNMATCHED_SYMBOL) {219 System.out.print(" ...");220 }221 else {222 System.out.format("%5d", tmp1[i]);223 }224 225 }226 System.out.println();227 for (int i=0; i< tmp2.length;i++) {228 if(tmp2[i] == Constants.GAP_SYMBOL) {229 System.out.print(" ___");230 }231 else if(tmp2[i] == Constants.UNMATCHED_SYMBOL) {232 System.out.print(" ...");233 }234 else {235 System.out.format("%5d", tmp2[i]);236 }237 238 }239 System.out.println();240 241 242 243 }244 245 /*246 * (non-Javadoc)247 *248 * @see249 * de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm250 * #getAlignment()251 */252 @Override253 public ArrayList<NumberSequence> getAlignment() {254 return alignment;255 }256 257 public void setAlignment(ArrayList<NumberSequence> alignment) {258 this.alignment = alignment;259 }260 261 @Override262 public ArrayList<Match> getMatches() {263 // TODO Auto-generated method stub264 return null;265 }266 267 @Override268 public void align(NumberSequence input1, NumberSequence input2, SubstitutionMatrix submat,269 float threshold) {270 this.input1 = input1.getSequence();271 this.input2 = input2.getSequence();272 length1 = input1.size();273 length2 = input2.size();274 this.submat = submat;275 276 // System.out.println("Starting SmithWaterman algorithm with a "277 // + submat.getClass() + " Substitution Matrix: " +278 // submat.getClass().getCanonicalName());279 280 matrix = new MatrixEntry[length1 + 1][length2 + 1];281 alignment = new ArrayList<NumberSequence>();282 283 for (int i = 0; i < length1+1; i++) {284 for (int j = 0; j < length2+1; j++) {285 matrix[i][j] = new MatrixEntry();286 }287 }288 289 buildMatrix();290 traceback();291 292 }293 294 295 296 289 }
Note: See TracChangeset
for help on using the changeset viewer.