package de.ugoe.cs.autoquest.tasktrees.alignment.algorithms; import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedList; import de.ugoe.cs.autoquest.tasktrees.alignment.matrix.SubstitutionMatrix; import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.Constants; public class NeedlemanWunsch implements AlignmentAlgorithm { /** * The first input */ private int[] input1; /** * The second input String */ private int[] input2; /** * The lengths of the input */ private int length1, length2; /** * The score matrix. The true scores should be divided by the normalization * factor. */ private MatrixEntry[][] matrix; private ArrayList alignment; /** * Substitution matrix to calculate scores */ private SubstitutionMatrix submat; /** * Compute the similarity score of substitution The position of the first * character is 1. A position of 0 represents a gap. * * @param i * Position of the character in str1 * @param j * Position of the character in str2 * @return Cost of substitution of the character in str1 by the one in str2 */ private double similarity(int i, int j) { return submat.getScore(input1[i - 1], input2[j - 1]); } /** * Build the score matrix using dynamic programming. */ private void buildMatrix() { if (submat.getGapPenalty() >= 0) { throw new Error("Indel score must be negative"); } // it's a gap matrix[0][0].setScore(0); matrix[0][0].setPrevious(null); // starting point // the first column for (int j = 1; j <= length2; j++) { matrix[0][j].setScore(j*submat.getGapPenalty()); matrix[0][j].setPrevious(matrix[0][j - 1]); matrix[0][j].setYvalue(input2[j-1]); matrix[0][j].setXvalue(Constants.GAP_SYMBOL); } // the first row for (int j = 1; j <= length1; j++) { matrix[j][0].setScore(j*submat.getGapPenalty()); matrix[j][0].setPrevious(matrix[j - 1][0]); matrix[j][0].setXvalue(input1[j-1]); matrix[j][0].setYvalue(Constants.GAP_SYMBOL); } for (int i = 1; i <= length1; i++) { for (int j = 1; j <= length2; j++) { double diagScore = matrix[i - 1][j - 1].getScore() + similarity(i, j); double upScore = matrix[i][j - 1].getScore() + submat.getGapPenalty(); double leftScore = matrix[i - 1][j].getScore() + submat.getGapPenalty(); matrix[i][j].setScore(Math.max(diagScore, Math.max(upScore, leftScore))); // find the directions that give the maximum scores. // TODO: Multiple directions are ignored, we choose the first // maximum score // True if we had a match if (diagScore == matrix[i][j].getScore()) { matrix[i][j].setPrevious(matrix[i - 1][j - 1]); matrix[i][j].setXvalue(input1[i - 1]); matrix[i][j].setYvalue(input2[j - 1]); } // true if we took an event from sequence x and not from y if (leftScore == matrix[i][j].getScore()) { matrix[i][j].setXvalue(input1[i - 1]); matrix[i][j].setYvalue(Constants.GAP_SYMBOL); matrix[i][j].setPrevious(matrix[i - 1][j]); } // true if we took an event from sequence y and not from x if (upScore == matrix[i][j].getScore()) { matrix[i][j].setXvalue(Constants.GAP_SYMBOL); matrix[i][j].setYvalue(input2[j - 1]); matrix[i][j].setPrevious(matrix[i][j - 1]); } } } } /** * Get the maximum value in the score matrix. */ public double getMaxScore() { double maxScore = 0; // skip the first row and column for (int i = 1; i <= length1; i++) { for (int j = 1; j <= length2; j++) { if (matrix[i][j].getScore() > maxScore) { maxScore = matrix[i][j].getScore(); } } } return maxScore; } /* * (non-Javadoc) * * @see * de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm * #getAlignmentScore() */ @Override public double getAlignmentScore() { return getMaxScore(); } public void traceback() { MatrixEntry tmp = matrix[length1][length2]; LinkedList aligned1 = new LinkedList(); LinkedList aligned2 = new LinkedList(); while (tmp.getPrevious() != null) { aligned1.add(new Integer(tmp.getXvalue())); aligned2.add(new Integer(tmp.getYvalue())); tmp = tmp.getPrevious(); } // reverse order of the alignment int reversed1[] = new int[aligned1.size()]; int reversed2[] = new int[aligned2.size()]; int count = 0; for (Iterator it = aligned1.iterator(); it.hasNext();) { count++; reversed1[reversed1.length - count] = it.next(); } count = 0; for (Iterator it = aligned2.iterator(); it.hasNext();) { count++; reversed2[reversed2.length - count] = it.next(); } NumberSequence ns1 = new NumberSequence(reversed1.length); NumberSequence ns2 = new NumberSequence(reversed2.length); ns1.setSequence(reversed1); ns2.setSequence(reversed2); alignment.add(ns1); alignment.add(ns2); } public void printAlignment() { MatrixEntry tmp = matrix[length1][length2]; String aligned1 = ""; String aligned2 = ""; int count = 0; do { System.out.println(tmp); String append1 = ""; String append2 = ""; if (tmp.getXvalue() == Constants.GAP_SYMBOL) { append1 = " ___"; } else { append1 = String.format("%5d", tmp.getXvalue()); } if (tmp.getYvalue() == Constants.GAP_SYMBOL) { append2 = " ___"; } else { append2 = String.format("%5d", tmp.getYvalue()); } if (count != 0) { aligned1 = append1 + aligned1; aligned2 = append2 + aligned2; } tmp = tmp.getPrevious(); count++; } while (tmp != null); System.out.println(aligned1); System.out.println(aligned2); } /** * print the dynmaic programming matrix */ public void printDPMatrix() { System.out.print(" "); for (int i = 1; i <= length1; i++) System.out.format("%5d", input1[i - 1]); System.out.println(); for (int j = 0; j <= length2; j++) { if (j > 0) System.out.format("%5d ", input2[j - 1]); else { System.out.print(" "); } for (int i = 0; i <= length1; i++) { System.out.format("%4.1f ", matrix[i][j].getScore()); } System.out.println(); } } /* * (non-Javadoc) * * @see * de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm * #getAlignment() */ @Override public ArrayList getAlignment() { return alignment; } public void setAlignment(ArrayList alignment) { this.alignment = alignment; } @Override public ArrayList> getMatches() { // TODO Auto-generated method stub return null; } @Override public void align(int[] input1, int[] input2, SubstitutionMatrix submat, float threshold) { this.input1 = input1; this.input2 = input2; length1 = input1.length; length2 = input2.length; this.submat = submat; // System.out.println("Starting SmithWaterman algorithm with a " // + submat.getClass() + " Substitution Matrix: " + // submat.getClass().getCanonicalName()); matrix = new MatrixEntry[length1 + 1][length2 + 1]; alignment = new ArrayList(); for (int i = 0; i < length1+1; i++) { for (int j = 0; j < length2+1; j++) { matrix[i][j] = new MatrixEntry(); } } buildMatrix(); traceback(); } }