package de.ugoe.cs.autoquest.tasktrees.alignment.algorithms; import java.util.ArrayList; import java.util.List; import de.ugoe.cs.autoquest.tasktrees.alignment.substitution.SubstitutionMatrix; public class SmithWatermanRepeated implements Alignment { /** * The first input */ private int[] input1; /** * The second input String */ private int[] input2; /** * The lengths of the input */ private int length1, length2; /** * The score matrix. The true scores should be divided by the normalization * factor. */ private MatrixEntry[][] matrix; private float scoreThreshold; /** * Substitution matrix to calculate scores */ private SubstitutionMatrix submat; public SmithWatermanRepeated(int[] input1, int[] input2, SubstitutionMatrix submat,float threshold) { this.input1 = input1; this.input2 = input2; length1 = input1.length; length2 = input2.length; this.submat = submat; //System.out.println("Starting SmithWaterman algorithm with a " // + submat.getClass() + " Substitution Matrix: " + submat.getClass().getCanonicalName()); this.scoreThreshold = threshold; matrix = new MatrixEntry[length1+2][length2+1]; for (int i = 0; i <= length1; i++) { for(int j = 0; j< length2; j++) { matrix[i][j] = new MatrixEntry(); } } buildMatrix(); } /** * Compute the similarity score of substitution The position of the first * character is 1. A position of 0 represents a gap. * * @param i * Position of the character in str1 * @param j * Position of the character in str2 * @return Cost of substitution of the character in str1 by the one in str2 */ private float similarity(int i, int j) { if (i == 0 || j == 0) { // it's a gap return submat.getGapPenalty(); } // System.out.println("Diag letters: " + input1[i-1] + " " + // input2[j-1]); // return (input1[i - 1] == input2[j - 1]) ? MATCH_SCORE : // MISMATCH_SCORE; return submat.getDistance(input1[i - 1], input2[j - 1]); } /** * Build the score matrix using dynamic programming. Note: The indel scores * must be negative. Otherwise, the part handling the first row and column * has to be modified. */ private void buildMatrix() { if (submat.getGapPenalty() >= 0) { throw new Error("Indel score must be negative"); } // base case matrix[0][0].setScore(0); matrix[0][0].setPrevious(null); // starting point // the first column for (int j = 1; j < length2; j++) { matrix[0][j].setScore(0); matrix[0][j].setPrevious(matrix[0][j-1]); } for (int i = 1; i <= length1; i++) { // Formula for first row: // F(i,0) = max { F(i-1,0), F(i-1,j)-T j=1,...,m float firstRowLeftScore = matrix[i-1][0].getScore(); float tempMax = matrix[i-1][1].getScore(); //position of the maximal score of the previous row int maxRowIndex = 1; for(int j = 2; j < length2;j++) { if(matrix[i-1][j].getScore() > tempMax) { tempMax = matrix[i-1][j].getScore(); maxRowIndex = j; } } tempMax -= scoreThreshold; matrix[i][0].setScore(Math.max(firstRowLeftScore, tempMax)); for (int j = 1; j < length2; j++) { float diagScore = matrix[i - 1][j - 1].getScore() + similarity(i, j); float upScore = matrix[i][j - 1].getScore() + similarity(0, j); float leftScore = matrix[i - 1][j].getScore() + similarity(i, 0); matrix[i][j].setScore(Math.max(diagScore,Math.max(upScore, Math.max(leftScore,matrix[i][0].getScore())))); // find the directions that give the maximum scores. // Multiple directions are ignored TODO if (diagScore == matrix[i][j].getScore()) { matrix[i][j].setPrevious(matrix[i-1][j-1]); } if (leftScore == matrix[i][j].getScore()) { matrix[i][j].setPrevious(matrix[i-1][j]); } if (upScore == matrix[i][j].getScore()) { matrix[i][j].setPrevious(matrix[i][j-1]); } if (matrix[i][0].getScore() == matrix[i][j].getScore()) { matrix[i][j].setPrevious(matrix[i-1][maxRowIndex]); } } } } /** * Get the maximum value in the score matrix. */ public double getMaxScore() { double maxScore = 0; // skip the first row and column for (int i = 1; i <= length1; i++) { for (int j = 1; j < length2; j++) { if (matrix[i][j].getScore() > maxScore) { maxScore = matrix[i][j].getScore(); } } } return maxScore; } /** * Get the alignment score between the two input strings. */ public double getAlignmentScore() { return getMaxScore(); } /** * given the bottom right corner point trace back the top left conrner. at * entry: i, j hold bottom right (end of Aligment coords) at return: hold * top left (start of Alignment coords) */ private int[] traceback(int i, int j) { return null; } /** * print the dynmaic programming matrix */ public void printDPMatrix() { System.out.print(" "); for (int i = 1; i <= length1; i++) System.out.format("%5d", input1[i - 1]); System.out.println(); for (int j = 0; j < length2; j++) { if (j > 0) System.out.format("%5d ",input2[j - 1]); else{ System.out.print(" "); } for (int i = 0; i <= length1; i++) { System.out.format("%4.1f ",matrix[i][j].getScore()); } System.out.println(); } } /** * Return a set of Matches identified in Dynamic programming matrix. A match * is a pair of subsequences whose score is higher than the preset * scoreThreshold **/ public List getMatches() { ArrayList matchList = new ArrayList(); int fA = 0, fB = 0; // skip the first row and column, find the next maxScore after // prevmaxScore for (int i = 1; i <= length1; i++) { for (int j = 1; j <= length2; j++) { if (matrix[i][j].getScore() > scoreThreshold && matrix[i][j].getScore() > matrix[i - 1][j - 1].getScore() && matrix[i][j].getScore() > matrix[i - 1][j].getScore() && matrix[i][j].getScore() > matrix[i][j - 1].getScore()) { if (i == length1 || j == length2 || matrix[i][j].getScore() > matrix[i + 1][j + 1].getScore()) { // should be lesser than prev maxScore fA = i; fB = j; int[] f = traceback(fA, fB); // sets the x, y to // startAlignment // coordinates System.out.println(f[0] + " " + i + " " + f[1] + " " + j + " " + matrix[i][j].getScore()); // TODO Add matches to matchList } } } } return matchList; } }