package de.ugoe.cs.autoquest.tasktrees.alignment.algorithms; import java.util.ArrayList; import java.util.logging.Level; import de.ugoe.cs.autoquest.tasktrees.alignment.matrix.SubstitutionMatrix; import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.Constants; import de.ugoe.cs.util.console.Console; public class SmithWatermanRepeated implements AlignmentAlgorithm { /** * The first input */ private int[] input1; /** * The second input String */ private int[] input2; /** * The lengths of the input */ private int length1, length2; /** * The score matrix. The true scores should be divided by the normalization * factor. */ private MatrixEntry[][] matrix; private ArrayList alignment; private float scoreThreshold; /** * Substitution matrix to calculate scores */ private SubstitutionMatrix submat; public SmithWatermanRepeated(int[] input1, int[] input2, SubstitutionMatrix submat,float threshold) { this.input1 = input1; this.input2 = input2; length1 = input1.length; length2 = input2.length; this.submat = submat; //System.out.println("Starting SmithWaterman algorithm with a " // + submat.getClass() + " Substitution Matrix: " + submat.getClass().getCanonicalName()); this.scoreThreshold = threshold; matrix = new MatrixEntry[length1+2][length2+1]; alignment = new ArrayList(); for (int i = 0; i <= length1+1; i++) { for(int j = 0; j< length2; j++) { matrix[i][j] = new MatrixEntry(); } } buildMatrix(); traceback(); } /** * Compute the similarity score of substitution The position of the first * character is 1. A position of 0 represents a gap. * * @param i * Position of the character in str1 * @param j * Position of the character in str2 * @return Cost of substitution of the character in str1 by the one in str2 */ private double similarity(int i, int j) { return submat.getScore(input1[i - 1], input2[j - 1]); } /** * Build the score matrix using dynamic programming. */ private void buildMatrix() { if (submat.getGapPenalty() >= 0) { throw new Error("Indel score must be negative"); } // it's a gap matrix[0][0].setScore(0); matrix[0][0].setPrevious(null); // starting point // the first column for (int j = 1; j < length2; j++) { matrix[0][j].setScore(0); //We don't need to go back to [0][0] if we reached matrix[0][x], so just end here //matrix[0][j].setPrevious(matrix[0][j-1]); matrix[0][j].setPrevious(null); } for (int i = 1; i < length1 + 2; i++) { // Formula for first row: // F(i,0) = max { F(i-1,0), F(i-1,j)-T j=1,...,m double firstRowLeftScore = matrix[i-1][0].getScore(); //for sequences of length 1 double tempMax; int maxRowIndex; if(length2 == 1) { tempMax = matrix[i-1][0].getScore(); maxRowIndex = 0; } else { tempMax = matrix[i-1][1].getScore(); maxRowIndex = 1; //position of the maximal score of the previous row for(int j = 2; j < length2;j++) { if(matrix[i-1][j].getScore() > tempMax) { tempMax = matrix[i-1][j].getScore(); maxRowIndex = j; } } } tempMax -= scoreThreshold; matrix[i][0].setScore(Math.max(firstRowLeftScore, tempMax)); if(tempMax ==matrix[i][0].getScore()){ matrix[i][0].setPrevious(matrix[i-1][maxRowIndex]); } if(firstRowLeftScore == matrix[i][0].getScore()) { matrix[i][0].setPrevious(matrix[i-1][0]); } //The last additional score is not related to a character in the input sequence, it's the total score. Therefore we don't need to save something for it if(i maxScore) { maxScore = matrix[i][j].getScore(); } } } return maxScore; } /* (non-Javadoc) * @see de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm#getAlignmentScore() */ @Override public double getAlignmentScore() { return matrix[length1+1][0].getScore(); } public void traceback() { MatrixEntry tmp = matrix[length1+1][0]; int aligned1[] = new int[length1+1+length2+2]; int aligned2[] = new int[length1+1+length2+2]; int count = 0; do { if(count != 0) { if (length1+1+length2+2 == count) { Console.traceln(Level.WARNING, "Traceback longer than both sequences summed up!"); break; } aligned1[count] = tmp.getXvalue(); aligned2[count] = tmp.getYvalue(); } tmp = tmp.getPrevious(); count++; } while(tmp != null); count--; //reverse order of the alignment int reversed1[] = new int[count]; int reversed2[] = new int[count]; for(int i = count-1; i > 0; i--) { reversed1[reversed1.length-i]= aligned1[i]; reversed2[reversed2.length-i]= aligned2[i]; } NumberSequence ns1 = new NumberSequence(reversed1.length); NumberSequence ns2 = new NumberSequence(reversed2.length); ns1.setSequence(reversed1); ns2.setSequence(reversed2); alignment.add(ns1); alignment.add(ns2); } public void printAlignment() { MatrixEntry tmp = matrix[length1+1][0]; String aligned1 = ""; String aligned2 = ""; int count = 0; do { String append1=""; String append2=""; if(tmp.getXvalue() == Constants.GAP_SYMBOL) { append1 = " ___"; } else if(tmp.getXvalue() == Constants.UNMATCHED_SYMBOL) { append1 = " ..."; } else { append1 = String.format("%5d", tmp.getXvalue()); } if(tmp.getYvalue() == Constants.GAP_SYMBOL) { append2 = " ___"; } else if(tmp.getYvalue() == Constants.UNMATCHED_SYMBOL) { append2 = " ..."; } else { append2 = String.format("%5d", tmp.getYvalue()); } if(count != 0) { aligned1 = append1 + aligned1; aligned2 = append2 + aligned2; } tmp = tmp.getPrevious(); count++; } while(tmp != null); System.out.println(aligned1); System.out.println(aligned2); } /** * print the dynmaic programming matrix */ public void printDPMatrix() { System.out.print(" "); for (int i = 1; i <= length1; i++) System.out.format("%5d", input1[i - 1]); System.out.println(); for (int j = 0; j <= length2; j++) { if (j > 0) System.out.format("%5d ",input2[j - 1]); else{ System.out.print(" "); } for (int i = 0; i <= length1 + 1; i++) { if((i getAlignment() { return alignment; } public void setAlignment(ArrayList alignment) { this.alignment = alignment; } }