Changeset 1733 for branches/autoquest-core-tasktrees-alignment/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms
- Timestamp:
- 09/05/14 19:33:12 (10 years ago)
- Location:
- branches/autoquest-core-tasktrees-alignment/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms
- Files:
-
- 12 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/autoquest-core-tasktrees-alignment/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms/Alignment.java
r1593 r1733 5 5 public class Alignment { 6 6 ArrayList<NumberSequence> alingment; 7 8 7 9 8 } -
branches/autoquest-core-tasktrees-alignment/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms/AlignmentAlgorithm.java
r1649 r1733 6 6 7 7 public interface AlignmentAlgorithm { 8 8 9 void align(NumberSequence input1, NumberSequence input2, 10 SubstitutionMatrix submat, float threshold); 11 12 public abstract ArrayList<NumberSequence> getAlignment(); 13 9 14 /** 10 15 * Get the alignment score between the two input strings. … … 12 17 public abstract double getAlignmentScore(); 13 18 14 public abstract ArrayList<NumberSequence> getAlignment(); 19 public abstract ArrayList<Match> getMatches(); 20 21 public double getMaxScore(); 22 23 public abstract void printAlignment(); 15 24 16 25 public abstract void printDPMatrix(); 17 26 18 public abstract void printAlignment();19 20 public abstract ArrayList<Match> getMatches();21 22 public double getMaxScore();23 24 void align(NumberSequence input1, NumberSequence input2,25 SubstitutionMatrix submat, float threshold);26 27 27 } -
branches/autoquest-core-tasktrees-alignment/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms/AlignmentAlgorithmFactory.java
r1616 r1733 1 1 package de.ugoe.cs.autoquest.tasktrees.alignment.algorithms; 2 2 3 public class AlignmentAlgorithmFactory { 3 4 4 public class AlignmentAlgorithmFactory {5 6 public static void setDefaultAlgorithm(String algorithmname) {7 //TODO: check for valid algorihm class names here8 algorithmclass = algorithmname;9 }10 11 private static String algorithmclass = "de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.SmithWatermanRepeated";12 13 14 15 5 public static AlignmentAlgorithm create() { 16 6 Class<?> newclass; 17 7 Object object = null; 18 8 try { 19 newclass = Class.forName(algorithmclass); 20 object = newclass.newInstance(); 21 22 } catch (ClassNotFoundException | SecurityException | InstantiationException | IllegalAccessException | IllegalArgumentException e) { 9 newclass = Class.forName(algorithmclass); 10 object = newclass.newInstance(); 11 12 } catch (ClassNotFoundException | SecurityException 13 | InstantiationException | IllegalAccessException 14 | IllegalArgumentException e) { 23 15 e.printStackTrace(); 24 16 } 25 17 return (AlignmentAlgorithm) object; 26 18 } 19 20 public static void setDefaultAlgorithm(String algorithmname) { 21 // TODO: check for valid algorihm class names here 22 algorithmclass = algorithmname; 23 } 24 25 private static String algorithmclass = "de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.SmithWatermanRepeated"; 27 26 } -
branches/autoquest-core-tasktrees-alignment/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms/AlignmentHelpers.java
r1553 r1733 7 7 import de.ugoe.cs.autoquest.eventcore.guimodel.IGUIElement; 8 8 9 public class AlignmentHelpers extends GUIModel { 9 10 10 public class AlignmentHelpers extends GUIModel { 11 11 public static int distanceBetween(IGUIElement first, IGUIElement second) { 12 12 13 14 /** 15 * 16 */ 17 private static final long serialVersionUID = -2593092958275693133L; 13 int hopcount1 = 0; 14 int hopcount2 = 0; 15 final List<IGUIElement> tmp = new ArrayList<IGUIElement>(); 16 tmp.add(first); 17 tmp.add(second); 18 final IGUIElement commonDenominator = getCommonDenominator(tmp); 19 20 while (!(first.equals(commonDenominator))) { 21 first = first.getParent(); 22 hopcount1++; 23 } 24 25 while (!(second.equals(commonDenominator))) { 26 second = second.getParent(); 27 hopcount2++; 28 } 29 30 return hopcount1 + hopcount2; 31 } 18 32 19 33 /** … … 24 38 * </p> 25 39 */ 26 private static IGUIElement getCommonDenominator(List<IGUIElement> guiElements) { 40 private static IGUIElement getCommonDenominator( 41 List<IGUIElement> guiElements) { 27 42 IGUIElement commonDenominator = null; 28 43 29 44 if (guiElements.size() > 0) { 30 List<IGUIElement> commonDenominatorPath = new ArrayList<IGUIElement>();45 final List<IGUIElement> commonDenominatorPath = new ArrayList<IGUIElement>(); 31 46 32 47 // create a reference list using the first GUI element … … 49 64 // well as it subsequent 50 65 // siblings 51 List<IGUIElement> currentPath = new ArrayList<IGUIElement>();66 final List<IGUIElement> currentPath = new ArrayList<IGUIElement>(); 52 67 for (int i = 1; i < guiElements.size(); i++) { 53 68 currentPath.clear(); … … 55 70 while (guiElement != null) { 56 71 // if (guiElementMatchesConsideredTypes(guiElement)) { 57 72 currentPath.add(0, guiElement); 58 73 // } 59 74 guiElement = guiElement.getParent(); … … 84 99 } 85 100 86 public static int distanceBetween(IGUIElement first, IGUIElement second) { 87 88 int hopcount1 = 0; 89 int hopcount2 = 0; 90 List<IGUIElement> tmp = new ArrayList<IGUIElement>(); 91 tmp.add(first); 92 tmp.add(second); 93 IGUIElement commonDenominator = getCommonDenominator(tmp); 94 95 while(!(first.equals(commonDenominator))) { 96 first = first.getParent(); 97 hopcount1++; 98 } 99 100 while(!(second.equals(commonDenominator))) { 101 second = second.getParent(); 102 hopcount2++; 103 } 104 105 return hopcount1 + hopcount2; 106 } 101 /** 102 * 103 */ 104 private static final long serialVersionUID = -2593092958275693133L; 107 105 108 106 } -
branches/autoquest-core-tasktrees-alignment/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms/Constants.java
r1578 r1733 2 2 3 3 public class Constants { 4 4 5 5 public static final int GAP_SYMBOL = -1; 6 6 public static final int UNMATCHED_SYMBOL = -2; -
branches/autoquest-core-tasktrees-alignment/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms/Match.java
r1717 r1733 5 5 import java.util.LinkedList; 6 6 7 8 9 public class Match implements Serializable{ 7 public class Match implements Serializable { 10 8 /** 11 9 * … … 13 11 private static final long serialVersionUID = -3206992723755714741L; 14 12 15 private ArrayList<NumberSequence> matchseqs;13 private final ArrayList<NumberSequence> matchseqs; 16 14 17 15 private LinkedList<MatchOccurence> occurences; … … 24 22 } 25 23 26 public NumberSequence getFirstSequence() {27 return matchseqs.get(0);28 }29 30 public NumberSequence getSecondSequence() {31 return matchseqs.get(1);32 }33 34 public void setFirstSequence(NumberSequence seq) {35 matchseqs.set(0, seq);36 }37 38 public void setSecondSequence(NumberSequence seq) {39 matchseqs.set(1, seq);40 }41 42 24 public void addOccurence(MatchOccurence occurence) { 43 25 occurences.add(occurence); 44 26 } 45 27 46 public int occurenceCount() { 47 return occurences.size(); 48 } 49 50 public int size() { 51 //Both sequences should be equally long 52 return matchseqs.get(0).size(); 28 public void addOccurencesOf(Match m) { 29 occurences.addAll(m.getOccurences()); 53 30 } 54 31 … … 63 40 } 64 41 42 public NumberSequence getFirstSequence() { 43 return matchseqs.get(0); 44 } 45 65 46 public LinkedList<MatchOccurence> getOccurences() { 66 47 return occurences; 48 } 49 50 public NumberSequence getSecondSequence() { 51 return matchseqs.get(1); 52 } 53 54 public int occurenceCount() { 55 return occurences.size(); 56 } 57 58 public void setFirstSequence(NumberSequence seq) { 59 matchseqs.set(0, seq); 67 60 } 68 61 … … 71 64 } 72 65 73 74 public void addOccurencesOf(Match m) { 75 occurences.addAll(m.getOccurences()); 66 public void setSecondSequence(NumberSequence seq) { 67 matchseqs.set(1, seq); 76 68 } 77 78 79 69 70 public int size() { 71 // Both sequences should be equally long 72 return matchseqs.get(0).size(); 73 } 80 74 81 75 } -
branches/autoquest-core-tasktrees-alignment/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms/MatchOccurence.java
r1717 r1733 3 3 import java.io.Serializable; 4 4 5 public class MatchOccurence implements Serializable {5 public class MatchOccurence implements Serializable { 6 6 /** 7 7 * … … 11 11 private int endindex; 12 12 private int sequenceId; 13 13 14 14 public MatchOccurence(int startindex, int endindex, int sequenceId) { 15 15 this.startindex = startindex; … … 17 17 this.sequenceId = sequenceId; 18 18 } 19 19 20 20 public int getEndindex() { 21 21 return endindex; 22 } 23 24 public int getSequenceId() { 25 return sequenceId; 26 } 27 28 public int getStartindex() { 29 return startindex; 22 30 } 23 31 … … 26 34 } 27 35 28 public int getStartindex() {29 return startindex;36 public void setSequenceId(int sequenceId) { 37 this.sequenceId = sequenceId; 30 38 } 39 31 40 public void setStartindex(int startindex) { 32 41 this.startindex = startindex; 33 42 } 34 public int getSequenceId() {35 return sequenceId;36 }37 38 public void setSequenceId(int sequenceId) {39 this.sequenceId = sequenceId;40 }41 43 } -
branches/autoquest-core-tasktrees-alignment/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms/MatrixEntry.java
r1568 r1733 6 6 private int xvalue; 7 7 private int yvalue; 8 9 public MatrixEntry(float score, MatrixEntry previous) { 8 9 public MatrixEntry() { 10 } 11 12 public MatrixEntry(float score, MatrixEntry previous) { 10 13 this.score = score; 11 14 this.previous = previous; 12 15 } 13 14 public MatrixEntry() { 16 17 public MatrixEntry getPrevious() { 18 return previous; 15 19 } 16 20 17 21 public double getScore() { 18 22 return score; 19 }20 public void setScore(double score) {21 this.score = score;22 }23 public MatrixEntry getPrevious() {24 return previous;25 }26 public void setPrevious(MatrixEntry previous) {27 this.previous = previous;28 23 } 29 24 … … 32 27 } 33 28 29 public int getYvalue() { 30 return yvalue; 31 } 32 33 public void setPrevious(MatrixEntry previous) { 34 this.previous = previous; 35 } 36 37 public void setScore(double score) { 38 this.score = score; 39 } 40 34 41 public void setXvalue(int xvalue) { 35 42 this.xvalue = xvalue; 36 }37 38 public int getYvalue() {39 return yvalue;40 43 } 41 44 -
branches/autoquest-core-tasktrees-alignment/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms/NeedlemanWunsch.java
r1620 r1733 8 8 import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.Constants; 9 9 10 11 10 public class NeedlemanWunsch implements AlignmentAlgorithm { 12 11 … … 39 38 private SubstitutionMatrix submat; 40 39 40 @Override 41 public void align(NumberSequence input1, NumberSequence input2, 42 SubstitutionMatrix submat, float threshold) { 43 this.input1 = input1.getSequence(); 44 this.input2 = input2.getSequence(); 45 length1 = input1.size(); 46 length2 = input2.size(); 47 this.submat = submat; 48 49 // System.out.println("Starting SmithWaterman algorithm with a " 50 // + submat.getClass() + " Substitution Matrix: " + 51 // submat.getClass().getCanonicalName()); 52 53 matrix = new MatrixEntry[length1 + 1][length2 + 1]; 54 alignment = new ArrayList<NumberSequence>(); 55 56 for (int i = 0; i < (length1 + 1); i++) { 57 for (int j = 0; j < (length2 + 1); j++) { 58 matrix[i][j] = new MatrixEntry(); 59 } 60 } 61 62 buildMatrix(); 63 traceback(); 64 65 } 66 67 /** 68 * Build the score matrix using dynamic programming. 69 */ 70 private void buildMatrix() { 71 if (submat.getGapPenalty() >= 0) { 72 throw new Error("Indel score must be negative"); 73 } 74 75 // it's a gap 76 matrix[0][0].setScore(0); 77 matrix[0][0].setPrevious(null); // starting point 78 79 // the first column 80 for (int j = 1; j <= length2; j++) { 81 matrix[0][j].setScore(j * submat.getGapPenalty()); 82 matrix[0][j].setPrevious(matrix[0][j - 1]); 83 matrix[0][j].setYvalue(input2[j - 1]); 84 matrix[0][j].setXvalue(Constants.GAP_SYMBOL); 85 } 86 // the first row 87 88 for (int j = 1; j <= length1; j++) { 89 matrix[j][0].setScore(j * submat.getGapPenalty()); 90 matrix[j][0].setPrevious(matrix[j - 1][0]); 91 matrix[j][0].setXvalue(input1[j - 1]); 92 matrix[j][0].setYvalue(Constants.GAP_SYMBOL); 93 } 94 95 for (int i = 1; i <= length1; i++) { 96 97 for (int j = 1; j <= length2; j++) { 98 final double diagScore = matrix[i - 1][j - 1].getScore() 99 + similarity(i, j); 100 final double upScore = matrix[i][j - 1].getScore() 101 + submat.getGapPenalty(); 102 final double leftScore = matrix[i - 1][j].getScore() 103 + submat.getGapPenalty(); 104 105 matrix[i][j].setScore(Math.max(diagScore, 106 Math.max(upScore, leftScore))); 107 108 // find the directions that give the maximum scores. 109 // TODO: Multiple directions are ignored, we choose the first 110 // maximum score 111 // True if we had a match 112 if (diagScore == matrix[i][j].getScore()) { 113 matrix[i][j].setPrevious(matrix[i - 1][j - 1]); 114 matrix[i][j].setXvalue(input1[i - 1]); 115 matrix[i][j].setYvalue(input2[j - 1]); 116 } 117 // true if we took an event from sequence x and not from y 118 if (leftScore == matrix[i][j].getScore()) { 119 matrix[i][j].setXvalue(input1[i - 1]); 120 matrix[i][j].setYvalue(Constants.GAP_SYMBOL); 121 matrix[i][j].setPrevious(matrix[i - 1][j]); 122 } 123 // true if we took an event from sequence y and not from x 124 if (upScore == matrix[i][j].getScore()) { 125 matrix[i][j].setXvalue(Constants.GAP_SYMBOL); 126 matrix[i][j].setYvalue(input2[j - 1]); 127 matrix[i][j].setPrevious(matrix[i][j - 1]); 128 } 129 } 130 } 131 } 132 133 /* 134 * (non-Javadoc) 135 * 136 * @see 137 * de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm 138 * #getAlignment() 139 */ 140 @Override 141 public ArrayList<NumberSequence> getAlignment() { 142 return alignment; 143 } 144 145 /* 146 * (non-Javadoc) 147 * 148 * @see 149 * de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm 150 * #getAlignmentScore() 151 */ 152 @Override 153 public double getAlignmentScore() { 154 return getMaxScore(); 155 } 156 157 @Override 158 public ArrayList<Match> getMatches() { 159 // TODO Auto-generated method stub 160 return null; 161 } 162 163 /** 164 * Get the maximum value in the score matrix. 165 */ 166 @Override 167 public double getMaxScore() { 168 double maxScore = 0; 169 170 // skip the first row and column 171 for (int i = 1; i <= length1; i++) { 172 for (int j = 1; j <= length2; j++) { 173 if (matrix[i][j].getScore() > maxScore) { 174 maxScore = matrix[i][j].getScore(); 175 } 176 } 177 } 178 179 return maxScore; 180 } 181 182 @Override 183 public void printAlignment() { 184 final int[] tmp1 = alignment.get(0).getSequence(); 185 final int[] tmp2 = alignment.get(1).getSequence(); 186 for (int i = 0; i < tmp1.length; i++) { 187 if (tmp1[i] == Constants.GAP_SYMBOL) { 188 System.out.print(" ___"); 189 } else if (tmp1[i] == Constants.UNMATCHED_SYMBOL) { 190 System.out.print(" ..."); 191 } else { 192 System.out.format("%5d", tmp1[i]); 193 } 194 195 } 196 System.out.println(); 197 for (int i = 0; i < tmp2.length; i++) { 198 if (tmp2[i] == Constants.GAP_SYMBOL) { 199 System.out.print(" ___"); 200 } else if (tmp2[i] == Constants.UNMATCHED_SYMBOL) { 201 System.out.print(" ..."); 202 } else { 203 System.out.format("%5d", tmp2[i]); 204 } 205 206 } 207 System.out.println(); 208 209 } 210 211 /** 212 * print the dynmaic programming matrix 213 */ 214 @Override 215 public void printDPMatrix() { 216 System.out.print(" "); 217 for (int i = 1; i <= length1; i++) { 218 System.out.format("%5d", input1[i - 1]); 219 } 220 System.out.println(); 221 for (int j = 0; j <= length2; j++) { 222 if (j > 0) { 223 System.out.format("%5d ", input2[j - 1]); 224 } else { 225 System.out.print(" "); 226 } 227 for (int i = 0; i <= length1; i++) { 228 System.out.format("%4.1f ", matrix[i][j].getScore()); 229 } 230 System.out.println(); 231 } 232 } 233 234 public void setAlignment(ArrayList<NumberSequence> alignment) { 235 this.alignment = alignment; 236 } 41 237 42 238 /** … … 54 250 } 55 251 56 /**57 * Build the score matrix using dynamic programming.58 */59 private void buildMatrix() {60 if (submat.getGapPenalty() >= 0) {61 throw new Error("Indel score must be negative");62 }63 64 // it's a gap65 matrix[0][0].setScore(0);66 matrix[0][0].setPrevious(null); // starting point67 68 // the first column69 for (int j = 1; j <= length2; j++) {70 matrix[0][j].setScore(j*submat.getGapPenalty());71 matrix[0][j].setPrevious(matrix[0][j - 1]);72 matrix[0][j].setYvalue(input2[j-1]);73 matrix[0][j].setXvalue(Constants.GAP_SYMBOL);74 }75 // the first row76 77 for (int j = 1; j <= length1; j++) {78 matrix[j][0].setScore(j*submat.getGapPenalty());79 matrix[j][0].setPrevious(matrix[j - 1][0]);80 matrix[j][0].setXvalue(input1[j-1]);81 matrix[j][0].setYvalue(Constants.GAP_SYMBOL);82 }83 84 for (int i = 1; i <= length1; i++) {85 86 for (int j = 1; j <= length2; j++) {87 double diagScore = matrix[i - 1][j - 1].getScore()88 + similarity(i, j);89 double upScore = matrix[i][j - 1].getScore()90 + submat.getGapPenalty();91 double leftScore = matrix[i - 1][j].getScore()92 + submat.getGapPenalty();93 94 matrix[i][j].setScore(Math.max(diagScore,95 Math.max(upScore, leftScore)));96 97 // find the directions that give the maximum scores.98 // TODO: Multiple directions are ignored, we choose the first99 // maximum score100 // True if we had a match101 if (diagScore == matrix[i][j].getScore()) {102 matrix[i][j].setPrevious(matrix[i - 1][j - 1]);103 matrix[i][j].setXvalue(input1[i - 1]);104 matrix[i][j].setYvalue(input2[j - 1]);105 }106 // true if we took an event from sequence x and not from y107 if (leftScore == matrix[i][j].getScore()) {108 matrix[i][j].setXvalue(input1[i - 1]);109 matrix[i][j].setYvalue(Constants.GAP_SYMBOL);110 matrix[i][j].setPrevious(matrix[i - 1][j]);111 }112 // true if we took an event from sequence y and not from x113 if (upScore == matrix[i][j].getScore()) {114 matrix[i][j].setXvalue(Constants.GAP_SYMBOL);115 matrix[i][j].setYvalue(input2[j - 1]);116 matrix[i][j].setPrevious(matrix[i][j - 1]);117 }118 }119 }120 }121 122 /**123 * Get the maximum value in the score matrix.124 */125 public double getMaxScore() {126 double maxScore = 0;127 128 // skip the first row and column129 for (int i = 1; i <= length1; i++) {130 for (int j = 1; j <= length2; j++) {131 if (matrix[i][j].getScore() > maxScore) {132 maxScore = matrix[i][j].getScore();133 }134 }135 }136 137 return maxScore;138 }139 140 /*141 * (non-Javadoc)142 *143 * @see144 * de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm145 * #getAlignmentScore()146 */147 @Override148 public double getAlignmentScore() {149 return getMaxScore();150 }151 152 252 public void traceback() { 153 253 MatrixEntry tmp = matrix[length1][length2]; 154 LinkedList<Integer> aligned1 = new LinkedList<Integer>();155 LinkedList<Integer> aligned2 = new LinkedList<Integer>();254 final LinkedList<Integer> aligned1 = new LinkedList<Integer>(); 255 final LinkedList<Integer> aligned2 = new LinkedList<Integer>(); 156 256 while (tmp.getPrevious() != null) { 157 257 158 258 aligned1.add(new Integer(tmp.getXvalue())); 159 259 aligned2.add(new Integer(tmp.getYvalue())); 160 260 161 261 tmp = tmp.getPrevious(); 162 } 163 262 } 263 164 264 // reverse order of the alignment 165 int reversed1[] = new int[aligned1.size()];166 int reversed2[] = new int[aligned2.size()];265 final int reversed1[] = new int[aligned1.size()]; 266 final int reversed2[] = new int[aligned2.size()]; 167 267 168 268 int count = 0; 169 for ( Iterator<Integer> it = aligned1.iterator(); it.hasNext();) {269 for (final Iterator<Integer> it = aligned1.iterator(); it.hasNext();) { 170 270 count++; 171 271 reversed1[reversed1.length - count] = it.next(); 172 272 173 273 } 174 274 count = 0; 175 for ( Iterator<Integer> it = aligned2.iterator(); it.hasNext();) {275 for (final Iterator<Integer> it = aligned2.iterator(); it.hasNext();) { 176 276 count++; 177 277 reversed2[reversed2.length - count] = it.next(); 178 278 } 179 279 180 NumberSequence ns1 = new NumberSequence(reversed1.length);181 NumberSequence ns2 = new NumberSequence(reversed2.length);280 final NumberSequence ns1 = new NumberSequence(reversed1.length); 281 final NumberSequence ns2 = new NumberSequence(reversed2.length); 182 282 ns1.setSequence(reversed1); 183 283 ns2.setSequence(reversed2); … … 187 287 } 188 288 189 190 /**191 * print the dynmaic programming matrix192 */193 public void printDPMatrix() {194 System.out.print(" ");195 for (int i = 1; i <= length1; i++)196 System.out.format("%5d", input1[i - 1]);197 System.out.println();198 for (int j = 0; j <= length2; j++) {199 if (j > 0)200 System.out.format("%5d ", input2[j - 1]);201 else {202 System.out.print(" ");203 }204 for (int i = 0; i <= length1; i++) {205 System.out.format("%4.1f ", matrix[i][j].getScore());206 }207 System.out.println();208 }209 }210 211 public void printAlignment() {212 int[] tmp1 = alignment.get(0).getSequence();213 int[] tmp2 = alignment.get(1).getSequence();214 for (int i=0; i< tmp1.length;i++) {215 if(tmp1[i] == Constants.GAP_SYMBOL) {216 System.out.print(" ___");217 }218 else if(tmp1[i] == Constants.UNMATCHED_SYMBOL) {219 System.out.print(" ...");220 }221 else {222 System.out.format("%5d", tmp1[i]);223 }224 225 }226 System.out.println();227 for (int i=0; i< tmp2.length;i++) {228 if(tmp2[i] == Constants.GAP_SYMBOL) {229 System.out.print(" ___");230 }231 else if(tmp2[i] == Constants.UNMATCHED_SYMBOL) {232 System.out.print(" ...");233 }234 else {235 System.out.format("%5d", tmp2[i]);236 }237 238 }239 System.out.println();240 241 242 243 }244 245 /*246 * (non-Javadoc)247 *248 * @see249 * de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm250 * #getAlignment()251 */252 @Override253 public ArrayList<NumberSequence> getAlignment() {254 return alignment;255 }256 257 public void setAlignment(ArrayList<NumberSequence> alignment) {258 this.alignment = alignment;259 }260 261 @Override262 public ArrayList<Match> getMatches() {263 // TODO Auto-generated method stub264 return null;265 }266 267 @Override268 public void align(NumberSequence input1, NumberSequence input2, SubstitutionMatrix submat,269 float threshold) {270 this.input1 = input1.getSequence();271 this.input2 = input2.getSequence();272 length1 = input1.size();273 length2 = input2.size();274 this.submat = submat;275 276 // System.out.println("Starting SmithWaterman algorithm with a "277 // + submat.getClass() + " Substitution Matrix: " +278 // submat.getClass().getCanonicalName());279 280 matrix = new MatrixEntry[length1 + 1][length2 + 1];281 alignment = new ArrayList<NumberSequence>();282 283 for (int i = 0; i < length1+1; i++) {284 for (int j = 0; j < length2+1; j++) {285 matrix[i][j] = new MatrixEntry();286 }287 }288 289 buildMatrix();290 traceback();291 292 }293 294 295 296 289 } -
branches/autoquest-core-tasktrees-alignment/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms/NumberSequence.java
r1717 r1733 5 5 import java.util.Random; 6 6 7 public class NumberSequence implements Serializable {7 public class NumberSequence implements Serializable { 8 8 /** 9 9 * … … 18 18 } 19 19 20 public int[] getSequence() { 21 return sequence; 22 } 20 // Searching occurrences of pattern 21 public LinkedList<Integer> containsPattern(Match pattern) { 22 final LinkedList<Integer> result = new LinkedList<Integer>(); 23 int i = 0; 24 final int[] pat1 = pattern.getFirstSequence().getSequence(); 25 final int[] pat2 = pattern.getSecondSequence().getSequence(); 23 26 24 public void setSequence(int[] sequence) { 25 this.sequence = sequence; 26 } 27 28 public void printSequence() { 29 for (int i = 0; i < sequence.length; i++) { 30 System.out.format("%5d", sequence[i]); 31 } 32 System.out.println(); 33 } 34 35 public NumberSequence shuffle() { 36 NumberSequence result = new NumberSequence(sequence.length); 37 result.setId(getId()); 38 result.setSequence(this.sequence); 39 Random rgen = new Random(); 40 41 for (int i = 0; i < result.sequence.length; i++) { 42 int randomPosition = rgen.nextInt(result.sequence.length); 43 int temp = result.sequence[i]; 44 result.sequence[i] = result.sequence[randomPosition]; 45 result.sequence[randomPosition] = temp; 46 } 47 return result; 48 49 } 50 51 //Recursive check if sequence contains pattern at position i 52 private boolean matches(int i, 53 int[] p1, 54 int[] p2 , 55 int ip1, 56 int ip2, 57 boolean jumped1, //True if there was a gap in Sequence 1 of the pattern 58 boolean jumped2, //True if there was a gap in Sequence 2 of the pattern 59 boolean hadSelection, //True if the last match was a selection 60 boolean matchseq1, 61 boolean matchseq2) { 62 63 if(p1.length==ip1) { 64 return true; 65 } 66 if(p2.length==ip2) { 67 return true; 68 } 69 if(i==sequence.length) { 70 return false; 71 } 72 73 boolean foundselection=(!(p1[ip1] == p2[ip2])&&!(p1[ip1]==-1||p2[ip2]==-1)); 74 boolean matchInFirstPattern = (p1[ip1]==sequence[i]); 75 boolean matchInSecondPattern = (p2[ip2]==sequence[i]); 76 77 if(foundselection && hadSelection) { 78 if((matchInFirstPattern && matchseq1) || (matchInSecondPattern && matchseq2)){ 79 if(jumped1) { 80 return matches(i+1,p1,p2,ip1+1,ip2+2,false,false,foundselection,matchInFirstPattern,matchInSecondPattern); 81 } 82 if(jumped2) { 83 return matches(i+1,p1,p2,ip1+2,ip2+1,false,false,foundselection,matchInFirstPattern,matchInSecondPattern); 84 } 85 return matches(i+1,p1,p2,ip1+1,ip2+1,false,false,foundselection,matchInFirstPattern,matchInSecondPattern); 86 } 87 else { 88 return false; 89 } 90 } 91 92 if((matchInFirstPattern||matchInSecondPattern) && jumped1) { 93 return matches(i+1,p1,p2,ip1+1,ip2+2,false,false,foundselection,matchInFirstPattern,matchInSecondPattern); 94 } 95 if((matchInFirstPattern||matchInSecondPattern) && jumped2) { 96 return matches(i+1,p1,p2,ip1+2,ip2+1,false,false,foundselection,matchInFirstPattern,matchInSecondPattern); 97 } 98 if(matchInFirstPattern||matchInSecondPattern) { 99 return matches(i+1,p1,p2,ip1+1,ip2+1,false,false,foundselection,matchInFirstPattern,matchInSecondPattern); 100 } 101 if(p1[ip1]==-1) { 102 return matches(i,p1,p2,ip1+1,ip2,true,false,false,false,false); 103 } 104 if(p2[ip2]==-1) { 105 return matches(i,p1,p2,ip1,ip2+1,false,true,false,false,false); 106 } 107 108 return false; 109 } 110 111 //Searching occurrences of pattern 112 public LinkedList<Integer> containsPattern(Match pattern) { 113 LinkedList<Integer> result = new LinkedList<Integer>(); 114 int i = 0; 115 int[] pat1 = pattern.getFirstSequence().getSequence(); 116 int[] pat2 = pattern.getSecondSequence().getSequence(); 117 118 while (i < sequence.length ) { 119 if(matches(i,pat1,pat2,0,0,false,false,false,false,false)) { 27 while (i < sequence.length) { 28 if (matches(i, pat1, pat2, 0, 0, false, false, false, false, false)) { 120 29 result.add(i); 121 30 } … … 123 32 } 124 33 return result; 34 } 35 36 public boolean equals(NumberSequence n) { 37 final int[] seq = n.getSequence(); 38 if (n.size() != this.size()) { 39 return false; 40 } 41 for (int i = 0; i < n.size(); i++) { 42 if (seq[i] != this.sequence[i]) { 43 return false; 44 } 45 } 46 return true; 125 47 } 126 48 … … 136 58 } 137 59 138 public int size() {139 return sequence.length;60 public int getId() { 61 return id; 140 62 } 141 63 142 public int getId() { 143 return id; 64 public int[] getSequence() { 65 return sequence; 66 } 67 68 // Recursive check if sequence contains pattern at position i 69 private boolean matches(int i, int[] p1, int[] p2, int ip1, int ip2, 70 boolean jumped1, // True if there was a gap in Sequence 1 of the 71 // pattern 72 boolean jumped2, // True if there was a gap in Sequence 2 of the 73 // pattern 74 boolean hadSelection, // True if the last match was a selection 75 boolean matchseq1, boolean matchseq2) { 76 77 if (p1.length == ip1) { 78 return true; 79 } 80 if (p2.length == ip2) { 81 return true; 82 } 83 if (i == sequence.length) { 84 return false; 85 } 86 87 final boolean foundselection = (!(p1[ip1] == p2[ip2]) && !((p1[ip1] == -1) || (p2[ip2] == -1))); 88 final boolean matchInFirstPattern = (p1[ip1] == sequence[i]); 89 final boolean matchInSecondPattern = (p2[ip2] == sequence[i]); 90 91 if (foundselection && hadSelection) { 92 if ((matchInFirstPattern && matchseq1) 93 || (matchInSecondPattern && matchseq2)) { 94 if (jumped1) { 95 return matches(i + 1, p1, p2, ip1 + 1, ip2 + 2, false, 96 false, foundselection, matchInFirstPattern, 97 matchInSecondPattern); 98 } 99 if (jumped2) { 100 return matches(i + 1, p1, p2, ip1 + 2, ip2 + 1, false, 101 false, foundselection, matchInFirstPattern, 102 matchInSecondPattern); 103 } 104 return matches(i + 1, p1, p2, ip1 + 1, ip2 + 1, false, false, 105 foundselection, matchInFirstPattern, 106 matchInSecondPattern); 107 } else { 108 return false; 109 } 110 } 111 112 if ((matchInFirstPattern || matchInSecondPattern) && jumped1) { 113 return matches(i + 1, p1, p2, ip1 + 1, ip2 + 2, false, false, 114 foundselection, matchInFirstPattern, matchInSecondPattern); 115 } 116 if ((matchInFirstPattern || matchInSecondPattern) && jumped2) { 117 return matches(i + 1, p1, p2, ip1 + 2, ip2 + 1, false, false, 118 foundselection, matchInFirstPattern, matchInSecondPattern); 119 } 120 if (matchInFirstPattern || matchInSecondPattern) { 121 return matches(i + 1, p1, p2, ip1 + 1, ip2 + 1, false, false, 122 foundselection, matchInFirstPattern, matchInSecondPattern); 123 } 124 if (p1[ip1] == -1) { 125 return matches(i, p1, p2, ip1 + 1, ip2, true, false, false, false, 126 false); 127 } 128 if (p2[ip2] == -1) { 129 return matches(i, p1, p2, ip1, ip2 + 1, false, true, false, false, 130 false); 131 } 132 133 return false; 134 } 135 136 public void printSequence() { 137 for (int i = 0; i < sequence.length; i++) { 138 System.out.format("%5d", sequence[i]); 139 } 140 System.out.println(); 144 141 } 145 142 … … 148 145 } 149 146 150 public boolean equals(NumberSequence n) { 151 int[] seq = n.getSequence(); 152 if (n.size() != this.size()) { 153 return false; 147 public void setSequence(int[] sequence) { 148 this.sequence = sequence; 149 } 150 151 public NumberSequence shuffle() { 152 final NumberSequence result = new NumberSequence(sequence.length); 153 result.setId(getId()); 154 result.setSequence(this.sequence); 155 final Random rgen = new Random(); 156 157 for (int i = 0; i < result.sequence.length; i++) { 158 final int randomPosition = rgen.nextInt(result.sequence.length); 159 final int temp = result.sequence[i]; 160 result.sequence[i] = result.sequence[randomPosition]; 161 result.sequence[randomPosition] = temp; 154 162 } 155 for (int i = 0; i < n.size(); i++) {156 if (seq[i] != this.sequence[i]) { 157 return false;158 } 159 }160 return true;163 return result; 164 165 } 166 167 public int size() { 168 return sequence.length; 161 169 } 162 170 -
branches/autoquest-core-tasktrees-alignment/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms/SmithWaterman.java
r1620 r1733 38 38 private SubstitutionMatrix submat; 39 39 40 @Override 41 public void align(NumberSequence input1, NumberSequence input2, 42 SubstitutionMatrix submat, float threshold) { 43 this.input1 = input1.getSequence(); 44 this.input2 = input2.getSequence(); 45 length1 = input1.size(); 46 length2 = input2.size(); 47 this.submat = submat; 48 49 // System.out.println("Starting SmithWaterman algorithm with a " 50 // + submat.getClass() + " Substitution Matrix: " + 51 // submat.getClass().getCanonicalName()); 52 53 matrix = new MatrixEntry[length1 + 1][length2 + 1]; 54 alignment = new ArrayList<NumberSequence>(); 55 56 for (int i = 0; i < (length1 + 1); i++) { 57 for (int j = 0; j < (length2 + 1); j++) { 58 matrix[i][j] = new MatrixEntry(); 59 } 60 } 61 62 buildMatrix(); 63 traceback(); 64 65 } 66 67 /** 68 * Build the score matrix using dynamic programming. 69 */ 70 private void buildMatrix() { 71 if (submat.getGapPenalty() >= 0) { 72 throw new Error("Indel score must be negative"); 73 } 74 75 // it's a gap 76 matrix[0][0].setScore(0); 77 matrix[0][0].setPrevious(null); // starting point 78 79 // the first column 80 for (int j = 1; j < length2; j++) { 81 matrix[0][j].setScore(0); 82 matrix[0][j].setPrevious(matrix[0][j - 1]); 83 matrix[0][j].setYvalue(input2[j]); 84 matrix[0][j].setXvalue(Constants.GAP_SYMBOL); 85 } 86 // the first row 87 88 for (int j = 1; j < length1; j++) { 89 matrix[j][0].setScore(0); 90 matrix[j][0].setPrevious(matrix[j - 1][0]); 91 matrix[j][0].setXvalue(input1[j]); 92 matrix[j][0].setYvalue(Constants.GAP_SYMBOL); 93 } 94 95 for (int i = 1; i < length1; i++) { 96 97 for (int j = 1; j < length2; j++) { 98 final double diagScore = matrix[i - 1][j - 1].getScore() 99 + similarity(i, j); 100 final double upScore = matrix[i][j - 1].getScore() 101 + submat.getGapPenalty(); 102 final double leftScore = matrix[i - 1][j].getScore() 103 + submat.getGapPenalty(); 104 105 matrix[i][j].setScore(Math.max(diagScore, 106 Math.max(upScore, Math.max(leftScore, 0)))); 107 108 // find the directions that give the maximum scores. 109 // TODO: Multiple directions are ignored, we choose the first 110 // maximum score 111 // True if we had a match 112 if (diagScore == matrix[i][j].getScore()) { 113 matrix[i][j].setPrevious(matrix[i - 1][j - 1]); 114 matrix[i][j].setXvalue(input1[i - 1]); 115 matrix[i][j].setYvalue(input2[j - 1]); 116 } 117 // true if we took an event from sequence x and not from y 118 if (leftScore == matrix[i][j].getScore()) { 119 matrix[i][j].setXvalue(input1[i - 1]); 120 matrix[i][j].setYvalue(Constants.GAP_SYMBOL); 121 matrix[i][j].setPrevious(matrix[i - 1][j]); 122 } 123 // true if we took an event from sequence y and not from x 124 if (upScore == matrix[i][j].getScore()) { 125 matrix[i][j].setXvalue(Constants.GAP_SYMBOL); 126 matrix[i][j].setYvalue(input2[j - 1]); 127 matrix[i][j].setPrevious(matrix[i][j - 1]); 128 } 129 if (0 == matrix[i][j].getScore()) { 130 matrix[i][j].setPrevious(matrix[0][0]); 131 matrix[i][j].setXvalue(-2); 132 matrix[i][j].setYvalue(-2); 133 } 134 } 135 136 // Set the complete score cell 137 138 } 139 } 140 141 /* 142 * (non-Javadoc) 143 * 144 * @see 145 * de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm 146 * #getAlignment() 147 */ 148 @Override 149 public ArrayList<NumberSequence> getAlignment() { 150 return alignment; 151 } 152 153 /* 154 * (non-Javadoc) 155 * 156 * @see 157 * de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm 158 * #getAlignmentScore() 159 */ 160 @Override 161 public double getAlignmentScore() { 162 return getMaxScore(); 163 } 164 165 @Override 166 public ArrayList<Match> getMatches() { 167 // TODO Auto-generated method stub 168 return null; 169 } 170 171 /** 172 * Get the maximum value in the score matrix. 173 */ 174 @Override 175 public double getMaxScore() { 176 double maxScore = 0; 177 178 // skip the first row and column 179 for (int i = 1; i <= length1; i++) { 180 for (int j = 1; j < length2; j++) { 181 if (matrix[i][j].getScore() > maxScore) { 182 maxScore = matrix[i][j].getScore(); 183 } 184 } 185 } 186 187 return maxScore; 188 } 189 190 @Override 191 public void printAlignment() { 192 MatrixEntry tmp = matrix[length1 + 1][0]; 193 String aligned1 = ""; 194 String aligned2 = ""; 195 int count = 0; 196 do { 197 String append1 = ""; 198 String append2 = ""; 199 200 if (tmp.getXvalue() == Constants.GAP_SYMBOL) { 201 append1 = " ___"; 202 } else { 203 append1 = String.format("%5d", tmp.getXvalue()); 204 } 205 206 if (tmp.getYvalue() == Constants.GAP_SYMBOL) { 207 append2 = " ___"; 208 } else { 209 append2 = String.format("%5d", tmp.getYvalue()); 210 } 211 if (count != 0) { 212 aligned1 = append1 + aligned1; 213 aligned2 = append2 + aligned2; 214 } 215 216 tmp = tmp.getPrevious(); 217 count++; 218 219 } while (tmp != null); 220 System.out.println(aligned1); 221 System.out.println(aligned2); 222 } 223 224 /** 225 * print the dynmaic programming matrix 226 */ 227 @Override 228 public void printDPMatrix() { 229 System.out.print(" "); 230 for (int i = 1; i <= length1; i++) { 231 System.out.format("%5d", input1[i - 1]); 232 } 233 System.out.println(); 234 for (int j = 0; j <= length2; j++) { 235 if (j > 0) { 236 System.out.format("%5d ", input2[j - 1]); 237 } else { 238 System.out.print(" "); 239 } 240 for (int i = 0; i <= length1; i++) { 241 System.out.format("%4.1f ", matrix[i][j].getScore()); 242 } 243 System.out.println(); 244 } 245 } 246 247 public void setAlignment(ArrayList<NumberSequence> alignment) { 248 this.alignment = alignment; 249 } 40 250 41 251 /** … … 53 263 } 54 264 55 /**56 * Build the score matrix using dynamic programming.57 */58 private void buildMatrix() {59 if (submat.getGapPenalty() >= 0) {60 throw new Error("Indel score must be negative");61 }62 63 // it's a gap64 matrix[0][0].setScore(0);65 matrix[0][0].setPrevious(null); // starting point66 67 // the first column68 for (int j = 1; j < length2; j++) {69 matrix[0][j].setScore(0);70 matrix[0][j].setPrevious(matrix[0][j - 1]);71 matrix[0][j].setYvalue(input2[j]);72 matrix[0][j].setXvalue(Constants.GAP_SYMBOL);73 }74 // the first row75 76 for (int j = 1; j < length1; j++) {77 matrix[j][0].setScore(0);78 matrix[j][0].setPrevious(matrix[j - 1][0]);79 matrix[j][0].setXvalue(input1[j]);80 matrix[j][0].setYvalue(Constants.GAP_SYMBOL);81 }82 83 for (int i = 1; i < length1; i++) {84 85 for (int j = 1; j < length2; j++) {86 double diagScore = matrix[i - 1][j - 1].getScore()87 + similarity(i, j);88 double upScore = matrix[i][j - 1].getScore()89 + submat.getGapPenalty();90 double leftScore = matrix[i - 1][j].getScore()91 + submat.getGapPenalty();92 93 matrix[i][j].setScore(Math.max(diagScore,94 Math.max(upScore, Math.max(leftScore, 0))));95 96 // find the directions that give the maximum scores.97 // TODO: Multiple directions are ignored, we choose the first98 // maximum score99 // True if we had a match100 if (diagScore == matrix[i][j].getScore()) {101 matrix[i][j].setPrevious(matrix[i - 1][j - 1]);102 matrix[i][j].setXvalue(input1[i - 1]);103 matrix[i][j].setYvalue(input2[j - 1]);104 }105 // true if we took an event from sequence x and not from y106 if (leftScore == matrix[i][j].getScore()) {107 matrix[i][j].setXvalue(input1[i - 1]);108 matrix[i][j].setYvalue(Constants.GAP_SYMBOL);109 matrix[i][j].setPrevious(matrix[i - 1][j]);110 }111 // true if we took an event from sequence y and not from x112 if (upScore == matrix[i][j].getScore()) {113 matrix[i][j].setXvalue(Constants.GAP_SYMBOL);114 matrix[i][j].setYvalue(input2[j - 1]);115 matrix[i][j].setPrevious(matrix[i][j - 1]);116 }117 if (0 == matrix[i][j].getScore()) {118 matrix[i][j].setPrevious(matrix[0][0]);119 matrix[i][j].setXvalue(-2);120 matrix[i][j].setYvalue(-2);121 }122 }123 124 // Set the complete score cell125 126 }127 }128 129 /**130 * Get the maximum value in the score matrix.131 */132 public double getMaxScore() {133 double maxScore = 0;134 135 // skip the first row and column136 for (int i = 1; i <= length1; i++) {137 for (int j = 1; j < length2; j++) {138 if (matrix[i][j].getScore() > maxScore) {139 maxScore = matrix[i][j].getScore();140 }141 }142 }143 144 return maxScore;145 }146 147 /*148 * (non-Javadoc)149 *150 * @see151 * de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm152 * #getAlignmentScore()153 */154 @Override155 public double getAlignmentScore() {156 return getMaxScore();157 }158 159 265 public void traceback() { 160 266 MatrixEntry tmp = matrix[length1][length2]; 161 int aligned1[] = new int[length1 + length2 + 2];162 int aligned2[] = new int[length1 + length2 + 2];267 final int aligned1[] = new int[length1 + length2 + 2]; 268 final int aligned2[] = new int[length1 + length2 + 2]; 163 269 int count = 0; 164 270 do { 165 if ( length1 + length2 + 2== count) {271 if ((length1 + length2 + 2) == count) { 166 272 Console.traceln(Level.WARNING, 167 273 "Traceback longer than both sequences summed up!"); … … 177 283 count--; 178 284 // reverse order of the alignment 179 int reversed1[] = new int[count];180 int reversed2[] = new int[count];285 final int reversed1[] = new int[count]; 286 final int reversed2[] = new int[count]; 181 287 182 288 for (int i = count - 1; i > 0; i--) { … … 185 291 } 186 292 187 NumberSequence ns1 = new NumberSequence(reversed1.length);188 NumberSequence ns2 = new NumberSequence(reversed2.length);293 final NumberSequence ns1 = new NumberSequence(reversed1.length); 294 final NumberSequence ns2 = new NumberSequence(reversed2.length); 189 295 ns1.setSequence(reversed1); 190 296 ns2.setSequence(reversed2); … … 194 300 } 195 301 196 public void printAlignment() {197 MatrixEntry tmp = matrix[length1 + 1][0];198 String aligned1 = "";199 String aligned2 = "";200 int count = 0;201 do {202 String append1 = "";203 String append2 = "";204 205 if (tmp.getXvalue() == Constants.GAP_SYMBOL) {206 append1 = " ___";207 } else {208 append1 = String.format("%5d", tmp.getXvalue());209 }210 211 if (tmp.getYvalue() == Constants.GAP_SYMBOL) {212 append2 = " ___";213 } else {214 append2 = String.format("%5d", tmp.getYvalue());215 }216 if (count != 0) {217 aligned1 = append1 + aligned1;218 aligned2 = append2 + aligned2;219 }220 221 tmp = tmp.getPrevious();222 count++;223 224 } while (tmp != null);225 System.out.println(aligned1);226 System.out.println(aligned2);227 }228 229 /**230 * print the dynmaic programming matrix231 */232 public void printDPMatrix() {233 System.out.print(" ");234 for (int i = 1; i <= length1; i++)235 System.out.format("%5d", input1[i - 1]);236 System.out.println();237 for (int j = 0; j <= length2; j++) {238 if (j > 0)239 System.out.format("%5d ", input2[j - 1]);240 else {241 System.out.print(" ");242 }243 for (int i = 0; i <= length1; i++) {244 System.out.format("%4.1f ", matrix[i][j].getScore());245 }246 System.out.println();247 }248 }249 250 /*251 * (non-Javadoc)252 *253 * @see254 * de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm255 * #getAlignment()256 */257 @Override258 public ArrayList<NumberSequence> getAlignment() {259 return alignment;260 }261 262 public void setAlignment(ArrayList<NumberSequence> alignment) {263 this.alignment = alignment;264 }265 266 @Override267 public ArrayList<Match> getMatches() {268 // TODO Auto-generated method stub269 return null;270 }271 272 @Override273 public void align(NumberSequence input1, NumberSequence input2, SubstitutionMatrix submat,274 float threshold) {275 this.input1 = input1.getSequence();276 this.input2 = input2.getSequence();277 length1 = input1.size();278 length2 = input2.size();279 this.submat = submat;280 281 // System.out.println("Starting SmithWaterman algorithm with a "282 // + submat.getClass() + " Substitution Matrix: " +283 // submat.getClass().getCanonicalName());284 285 matrix = new MatrixEntry[length1 + 1][length2 + 1];286 alignment = new ArrayList<NumberSequence>();287 288 for (int i = 0; i < length1+1; i++) {289 for (int j = 0; j < length2+1; j++) {290 matrix[i][j] = new MatrixEntry();291 }292 }293 294 buildMatrix();295 traceback();296 297 }298 299 302 } -
branches/autoquest-core-tasktrees-alignment/src/main/java/de/ugoe/cs/autoquest/tasktrees/alignment/algorithms/SmithWatermanRepeated.java
r1654 r1733 5 5 import java.util.LinkedList; 6 6 7 8 7 import de.ugoe.cs.autoquest.tasktrees.alignment.matrix.SubstitutionMatrix; 9 8 import de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.Constants; 10 9 11 12 10 public class SmithWatermanRepeated implements AlignmentAlgorithm { 13 11 … … 33 31 private MatrixEntry[][] matrix; 34 32 35 36 33 private ArrayList<NumberSequence> alignment; 37 34 38 35 private float scoreThreshold; 39 36 … … 44 41 45 42 public SmithWatermanRepeated() { 46 43 44 } 45 46 @Override 47 public void align(NumberSequence input1, NumberSequence input2, 48 SubstitutionMatrix submat, float threshold) { 49 50 alignment = new ArrayList<NumberSequence>(); 51 alignment.add(input1); 52 alignment.add(input2); 53 54 this.input1 = input1.getSequence(); 55 this.input2 = input2.getSequence(); 56 57 length1 = input1.size(); 58 length2 = input2.size(); 59 this.submat = submat; 60 61 // System.out.println("Starting SmithWaterman algorithm with a " 62 // + submat.getClass() + " Substitution Matrix: " + 63 // submat.getClass().getCanonicalName()); 64 this.scoreThreshold = threshold; 65 66 matrix = new MatrixEntry[length1 + 2][length2 + 1]; 67 68 for (int i = 0; i <= (length1 + 1); i++) { 69 for (int j = 0; j <= length2; j++) { 70 matrix[i][j] = new MatrixEntry(); 71 } 72 } 73 74 // Console.traceln(Level.INFO,"Generating DP Matrix"); 75 buildMatrix(); 76 // Console.traceln(Level.INFO,"Doing traceback"); 77 traceback(); 78 } 79 80 /** 81 * Build the score matrix using dynamic programming. 82 */ 83 private void buildMatrix() { 84 if (submat.getGapPenalty() >= 0) { 85 throw new Error("Indel score must be negative"); 86 } 87 88 // it's a gap 89 matrix[0][0].setScore(0); 90 matrix[0][0].setPrevious(null); // starting point 91 matrix[0][0].setXvalue(Constants.UNMATCHED_SYMBOL); 92 matrix[0][0].setYvalue(Constants.UNMATCHED_SYMBOL); 93 94 // the first column 95 for (int j = 1; j < length2; j++) { 96 matrix[0][j].setScore(0); 97 // We don't need to go back to [0][0] if we reached matrix[0][x], so 98 // just end here 99 // matrix[0][j].setPrevious(matrix[0][j-1]); 100 matrix[0][j].setPrevious(null); 101 } 102 103 for (int i = 1; i < (length1 + 2); i++) { 104 105 // Formula for first row: 106 // F(i,0) = max { F(i-1,0), F(i-1,j)-T j=1,...,m 107 108 final double firstRowLeftScore = matrix[i - 1][0].getScore(); 109 // for sequences of length 1 110 double tempMax; 111 int maxRowIndex; 112 if (length2 == 1) { 113 tempMax = matrix[i - 1][0].getScore(); 114 maxRowIndex = 0; 115 } else { 116 tempMax = matrix[i - 1][1].getScore(); 117 maxRowIndex = 1; 118 // position of the maximal score of the previous row 119 120 for (int j = 2; j <= length2; j++) { 121 if (matrix[i - 1][j].getScore() > tempMax) { 122 tempMax = matrix[i - 1][j].getScore(); 123 maxRowIndex = j; 124 } 125 } 126 127 } 128 129 tempMax -= scoreThreshold; 130 matrix[i][0].setScore(Math.max(firstRowLeftScore, tempMax)); 131 if (tempMax == matrix[i][0].getScore()) { 132 matrix[i][0].setPrevious(matrix[i - 1][maxRowIndex]); 133 } 134 135 if (firstRowLeftScore == matrix[i][0].getScore()) { 136 matrix[i][0].setPrevious(matrix[i - 1][0]); 137 } 138 139 // The last additional score is not related to a character in the 140 // input sequence, it's the total score. Therefore we don't need to 141 // save something for it 142 // and can end here 143 if (i < (length1 + 1)) { 144 matrix[i][0].setXvalue(input1[i - 1]); 145 matrix[i][0].setYvalue(Constants.UNMATCHED_SYMBOL); 146 } else { 147 return; 148 } 149 150 for (int j = 1; j <= length2; j++) { 151 final double diagScore = matrix[i - 1][j - 1].getScore() 152 + similarity(i, j); 153 final double upScore = matrix[i][j - 1].getScore() 154 + submat.getGapPenalty(); 155 final double leftScore = matrix[i - 1][j].getScore() 156 + submat.getGapPenalty(); 157 158 matrix[i][j].setScore(Math.max( 159 diagScore, 160 Math.max(upScore, 161 Math.max(leftScore, matrix[i][0].getScore())))); 162 163 // find the directions that give the maximum scores. 164 // TODO: Multiple directions are ignored, we choose the first 165 // maximum score 166 // True if we had a match 167 if (diagScore == matrix[i][j].getScore()) { 168 matrix[i][j].setPrevious(matrix[i - 1][j - 1]); 169 matrix[i][j].setXvalue(input1[i - 1]); 170 matrix[i][j].setYvalue(input2[j - 1]); 171 } 172 // true if we took an event from sequence x and not from y 173 if (leftScore == matrix[i][j].getScore()) { 174 matrix[i][j].setXvalue(input1[i - 1]); 175 matrix[i][j].setYvalue(Constants.GAP_SYMBOL); 176 matrix[i][j].setPrevious(matrix[i - 1][j]); 177 } 178 // true if we took an event from sequence y and not from x 179 if (upScore == matrix[i][j].getScore()) { 180 matrix[i][j].setXvalue(Constants.GAP_SYMBOL); 181 matrix[i][j].setYvalue(input2[j - 1]); 182 matrix[i][j].setPrevious(matrix[i][j - 1]); 183 } 184 // true if we ended a matching region 185 if (matrix[i][0].getScore() == matrix[i][j].getScore()) { 186 matrix[i][j].setPrevious(matrix[i][0]); 187 matrix[i][j].setXvalue(input1[i - 1]); 188 matrix[i][j].setYvalue(Constants.UNMATCHED_SYMBOL); 189 } 190 } 191 192 // Set the complete score cell 193 194 } 195 } 196 197 /* 198 * (non-Javadoc) 199 * 200 * @see 201 * de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm 202 * #getAlignment() 203 */ 204 @Override 205 public ArrayList<NumberSequence> getAlignment() { 206 return alignment; 207 } 208 209 /* 210 * (non-Javadoc) 211 * 212 * @see 213 * de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm 214 * #getAlignmentScore() 215 */ 216 @Override 217 public double getAlignmentScore() { 218 return matrix[length1 + 1][0].getScore(); 219 } 220 221 @Override 222 public ArrayList<Match> getMatches() { 223 final ArrayList<Match> result = new ArrayList<Match>(); 224 225 // both alignment sequences should be equally long 226 int i = 0; 227 final int[] seq1 = alignment.get(0).getSequence(); 228 final int[] seq2 = alignment.get(1).getSequence(); 229 int start = 0; 230 while (i < seq1.length) { 231 if (seq2[i] != Constants.UNMATCHED_SYMBOL) { 232 start = i; 233 int count = 0; 234 while ((i < seq2.length) 235 && (seq2[i] != Constants.UNMATCHED_SYMBOL)) { 236 i++; 237 count++; 238 } 239 // I am really missing memcpy here? How does one do this better 240 // in java? 241 final int[] tmp1 = new int[count]; 242 final int[] tmp2 = new int[count]; 243 for (int j = 0; j < count; j++) { 244 tmp1[j] = seq1[start + j]; 245 tmp2[j] = seq2[start + j]; 246 } 247 final NumberSequence tmpns1 = new NumberSequence(count); 248 final NumberSequence tmpns2 = new NumberSequence(count); 249 tmpns1.setSequence(tmp1); 250 tmpns2.setSequence(tmp2); 251 final Match tmpal = new Match(); 252 tmpal.setFirstSequence(tmpns1); 253 tmpal.setSecondSequence(tmpns2); 254 // tmpal.addOccurence(new 255 // MatchOccurence(start,alignment.get(0).getId())); 256 // tmpal.addOccurence(new 257 // MatchOccurence(start,alignment.get(1).getId())); 258 result.add(tmpal); 259 } 260 i++; 261 } 262 return result; 263 } 264 265 /** 266 * Get the maximum value in the score matrix. 267 */ 268 @Override 269 public double getMaxScore() { 270 double maxScore = 0; 271 272 // skip the first row and column 273 for (int i = 1; i <= length1; i++) { 274 for (int j = 1; j <= length2; j++) { 275 if (matrix[i][j].getScore() > maxScore) { 276 maxScore = matrix[i][j].getScore(); 277 } 278 } 279 } 280 281 return maxScore; 282 } 283 284 @Override 285 public void printAlignment() { 286 final int[] tmp1 = alignment.get(0).getSequence(); 287 final int[] tmp2 = alignment.get(1).getSequence(); 288 for (int i = 0; i < tmp1.length; i++) { 289 if (tmp1[i] == Constants.GAP_SYMBOL) { 290 System.out.print(" ___"); 291 } else if (tmp1[i] == Constants.UNMATCHED_SYMBOL) { 292 System.out.print(" ..."); 293 } else { 294 System.out.format("%5d", tmp1[i]); 295 } 296 297 } 298 System.out.println(); 299 for (int i = 0; i < tmp2.length; i++) { 300 if (tmp2[i] == Constants.GAP_SYMBOL) { 301 System.out.print(" ___"); 302 } else if (tmp2[i] == Constants.UNMATCHED_SYMBOL) { 303 System.out.print(" ..."); 304 } else { 305 System.out.format("%5d", tmp2[i]); 306 } 307 308 } 309 System.out.println(); 310 311 } 312 313 /** 314 * print the dynmaic programming matrix 315 */ 316 @Override 317 public void printDPMatrix() { 318 System.out.print(" "); 319 for (int i = 1; i <= length1; i++) { 320 System.out.format("%5d", input1[i - 1]); 321 } 322 System.out.println(); 323 for (int j = 0; j <= length2; j++) { 324 if (j > 0) { 325 System.out.format("%5d ", input2[j - 1]); 326 } else { 327 System.out.print(" "); 328 } 329 for (int i = 0; i <= (length1 + 1); i++) { 330 if ((i < (length1 + 1)) || ((i == (length1 + 1)) && (j == 0))) { 331 System.out.format("%4.1f ", matrix[i][j].getScore()); 332 } 333 334 } 335 System.out.println(); 336 } 337 } 338 339 public void setAlignment(ArrayList<NumberSequence> alignment) { 340 this.alignment = alignment; 47 341 } 48 342 … … 57 351 * @return Cost of substitution of the character in str1 by the one in str2 58 352 */ 59 private double similarity(int i, int j) { 353 private double similarity(int i, int j) { 60 354 return submat.getScore(input1[i - 1], input2[j - 1]); 61 355 } 62 356 63 /**64 * Build the score matrix using dynamic programming.65 */66 private void buildMatrix() {67 if (submat.getGapPenalty() >= 0) {68 throw new Error("Indel score must be negative");69 }70 71 // it's a gap72 matrix[0][0].setScore(0);73 matrix[0][0].setPrevious(null); // starting point74 matrix[0][0].setXvalue(Constants.UNMATCHED_SYMBOL);75 matrix[0][0].setYvalue(Constants.UNMATCHED_SYMBOL);76 77 // the first column78 for (int j = 1; j < length2; j++) {79 matrix[0][j].setScore(0);80 //We don't need to go back to [0][0] if we reached matrix[0][x], so just end here81 //matrix[0][j].setPrevious(matrix[0][j-1]);82 matrix[0][j].setPrevious(null);83 }84 85 86 87 for (int i = 1; i < length1 + 2; i++) {88 89 // Formula for first row:90 // F(i,0) = max { F(i-1,0), F(i-1,j)-T j=1,...,m91 92 double firstRowLeftScore = matrix[i-1][0].getScore();93 //for sequences of length 194 double tempMax;95 int maxRowIndex;96 if(length2 == 1) {97 tempMax = matrix[i-1][0].getScore();98 maxRowIndex = 0;99 } else {100 tempMax = matrix[i-1][1].getScore();101 maxRowIndex = 1;102 //position of the maximal score of the previous row103 104 for(int j = 2; j <= length2;j++) {105 if(matrix[i-1][j].getScore() > tempMax) {106 tempMax = matrix[i-1][j].getScore();107 maxRowIndex = j;108 }109 }110 111 }112 113 tempMax -= scoreThreshold;114 matrix[i][0].setScore(Math.max(firstRowLeftScore, tempMax));115 if(tempMax == matrix[i][0].getScore()){116 matrix[i][0].setPrevious(matrix[i-1][maxRowIndex]);117 }118 119 if(firstRowLeftScore == matrix[i][0].getScore()) {120 matrix[i][0].setPrevious(matrix[i-1][0]);121 }122 123 //The last additional score is not related to a character in the input sequence, it's the total score. Therefore we don't need to save something for it124 //and can end here125 if(i<length1+1) {126 matrix[i][0].setXvalue(input1[i-1]);127 matrix[i][0].setYvalue(Constants.UNMATCHED_SYMBOL);128 }129 else {130 return;131 }132 133 134 135 for (int j = 1; j <= length2; j++) {136 double diagScore = matrix[i - 1][j - 1].getScore() + similarity(i, j);137 double upScore = matrix[i][j - 1].getScore() + submat.getGapPenalty();138 double leftScore = matrix[i - 1][j].getScore() + submat.getGapPenalty();139 140 matrix[i][j].setScore(Math.max(diagScore,Math.max(upScore, Math.max(leftScore,matrix[i][0].getScore()))));141 142 // find the directions that give the maximum scores.143 // TODO: Multiple directions are ignored, we choose the first maximum score144 //True if we had a match145 if (diagScore == matrix[i][j].getScore()) {146 matrix[i][j].setPrevious(matrix[i-1][j-1]);147 matrix[i][j].setXvalue(input1[i-1]);148 matrix[i][j].setYvalue(input2[j-1]);149 }150 //true if we took an event from sequence x and not from y151 if (leftScore == matrix[i][j].getScore()) {152 matrix[i][j].setXvalue(input1[i-1]);153 matrix[i][j].setYvalue(Constants.GAP_SYMBOL);154 matrix[i][j].setPrevious(matrix[i-1][j]);155 }156 //true if we took an event from sequence y and not from x157 if (upScore == matrix[i][j].getScore()) {158 matrix[i][j].setXvalue(Constants.GAP_SYMBOL);159 matrix[i][j].setYvalue(input2[j-1]);160 matrix[i][j].setPrevious(matrix[i][j-1]);161 }162 //true if we ended a matching region163 if (matrix[i][0].getScore() == matrix[i][j].getScore()) {164 matrix[i][j].setPrevious(matrix[i][0]);165 matrix[i][j].setXvalue(input1[i-1]);166 matrix[i][j].setYvalue(Constants.UNMATCHED_SYMBOL);167 }168 }169 170 //Set the complete score cell171 172 }173 }174 175 /**176 * Get the maximum value in the score matrix.177 */178 public double getMaxScore() {179 double maxScore = 0;180 181 // skip the first row and column182 for (int i = 1; i <= length1; i++) {183 for (int j = 1; j <= length2; j++) {184 if (matrix[i][j].getScore() > maxScore) {185 maxScore = matrix[i][j].getScore();186 }187 }188 }189 190 return maxScore;191 }192 193 /* (non-Javadoc)194 * @see de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm#getAlignmentScore()195 */196 @Override197 public double getAlignmentScore() {198 return matrix[length1+1][0].getScore();199 }200 201 357 public void traceback() { 202 MatrixEntry tmp = matrix[length1 +1][0].getPrevious();203 LinkedList<Integer> aligned1 = new LinkedList<Integer>();204 LinkedList<Integer> aligned2 = new LinkedList<Integer>();358 MatrixEntry tmp = matrix[length1 + 1][0].getPrevious(); 359 final LinkedList<Integer> aligned1 = new LinkedList<Integer>(); 360 final LinkedList<Integer> aligned2 = new LinkedList<Integer>(); 205 361 while (tmp.getPrevious() != null) { 206 362 207 363 aligned1.add(new Integer(tmp.getXvalue())); 208 364 aligned2.add(new Integer(tmp.getYvalue())); 209 365 210 366 tmp = tmp.getPrevious(); 211 } 212 367 } 368 213 369 // reverse order of the alignment 214 int reversed1[] = new int[aligned1.size()];215 int reversed2[] = new int[aligned2.size()];370 final int reversed1[] = new int[aligned1.size()]; 371 final int reversed2[] = new int[aligned2.size()]; 216 372 217 373 int count = 0; 218 for ( Iterator<Integer> it = aligned1.iterator(); it.hasNext();) {374 for (final Iterator<Integer> it = aligned1.iterator(); it.hasNext();) { 219 375 count++; 220 376 reversed1[reversed1.length - count] = it.next(); 221 377 222 378 } 223 379 count = 0; 224 for ( Iterator<Integer> it = aligned2.iterator(); it.hasNext();) {380 for (final Iterator<Integer> it = aligned2.iterator(); it.hasNext();) { 225 381 count++; 226 382 reversed2[reversed2.length - count] = it.next(); 227 383 } 228 384 229 230 NumberSequence ns1 = new NumberSequence(reversed1.length); 231 NumberSequence ns2 = new NumberSequence(reversed2.length); 385 final NumberSequence ns1 = new NumberSequence(reversed1.length); 386 final NumberSequence ns2 = new NumberSequence(reversed2.length); 232 387 ns1.setSequence(reversed1); 233 388 ns2.setSequence(reversed2); 234 389 ns1.setId(alignment.get(0).getId()); 235 390 ns2.setId(alignment.get(1).getId()); 236 391 237 392 alignment.set(0, ns1); 238 393 alignment.set(1, ns2); 239 394 } 240 241 242 243 public void printAlignment() {244 int[] tmp1 = alignment.get(0).getSequence();245 int[] tmp2 = alignment.get(1).getSequence();246 for (int i=0; i< tmp1.length;i++) {247 if(tmp1[i] == Constants.GAP_SYMBOL) {248 System.out.print(" ___");249 }250 else if(tmp1[i] == Constants.UNMATCHED_SYMBOL) {251 System.out.print(" ...");252 }253 else {254 System.out.format("%5d", tmp1[i]);255 }256 257 }258 System.out.println();259 for (int i=0; i< tmp2.length;i++) {260 if(tmp2[i] == Constants.GAP_SYMBOL) {261 System.out.print(" ___");262 }263 else if(tmp2[i] == Constants.UNMATCHED_SYMBOL) {264 System.out.print(" ...");265 }266 else {267 System.out.format("%5d", tmp2[i]);268 }269 270 }271 System.out.println();272 273 274 275 }276 277 public ArrayList<Match> getMatches() {278 ArrayList<Match> result = new ArrayList<Match>();279 280 //both alignment sequences should be equally long281 int i = 0;282 int[] seq1 = alignment.get(0).getSequence();283 int[] seq2 = alignment.get(1).getSequence();284 int start = 0;285 while (i < seq1.length){286 if(seq2[i] != Constants.UNMATCHED_SYMBOL) {287 start = i;288 int count = 0;289 while(i < seq2.length && seq2[i] != Constants.UNMATCHED_SYMBOL) {290 i++;291 count++;292 }293 //I am really missing memcpy here? How does one do this better in java?294 int[] tmp1 = new int[count];295 int[] tmp2 = new int[count];296 for (int j = 0; j<count;j++) {297 tmp1[j] = seq1[start+j];298 tmp2[j] = seq2[start+j];299 }300 NumberSequence tmpns1 = new NumberSequence(count);301 NumberSequence tmpns2 = new NumberSequence(count);302 tmpns1.setSequence(tmp1);303 tmpns2.setSequence(tmp2);304 Match tmpal = new Match();305 tmpal.setFirstSequence(tmpns1);306 tmpal.setSecondSequence(tmpns2);307 //tmpal.addOccurence(new MatchOccurence(start,alignment.get(0).getId()));308 //tmpal.addOccurence(new MatchOccurence(start,alignment.get(1).getId()));309 result.add(tmpal);310 }311 i++;312 }313 return result;314 }315 316 /**317 * print the dynmaic programming matrix318 */319 public void printDPMatrix() {320 System.out.print(" ");321 for (int i = 1; i <= length1; i++)322 System.out.format("%5d", input1[i - 1]);323 System.out.println();324 for (int j = 0; j <= length2; j++) {325 if (j > 0)326 System.out.format("%5d ",input2[j - 1]);327 else{328 System.out.print(" ");329 }330 for (int i = 0; i <= length1 + 1; i++) {331 if((i<length1+1) || (i==length1+1 && j==0)) {332 System.out.format("%4.1f ",matrix[i][j].getScore());333 }334 335 }336 System.out.println();337 }338 }339 340 341 /* (non-Javadoc)342 * @see de.ugoe.cs.autoquest.tasktrees.alignment.algorithms.AlignmentAlgorithm#getAlignment()343 */344 @Override345 public ArrayList<NumberSequence> getAlignment() {346 return alignment;347 }348 349 public void setAlignment(ArrayList<NumberSequence> alignment) {350 this.alignment = alignment;351 }352 353 @Override354 public void align(NumberSequence input1, NumberSequence input2, SubstitutionMatrix submat,355 float threshold) {356 357 alignment = new ArrayList<NumberSequence>();358 alignment.add(input1);359 alignment.add(input2);360 361 this.input1=input1.getSequence();362 this.input2=input2.getSequence();363 364 length1 = input1.size();365 length2 = input2.size();366 this.submat = submat;367 368 //System.out.println("Starting SmithWaterman algorithm with a "369 // + submat.getClass() + " Substitution Matrix: " + submat.getClass().getCanonicalName());370 this.scoreThreshold = threshold;371 372 matrix = new MatrixEntry[length1+2][length2+1];373 374 375 for (int i = 0; i <= length1+1; i++) {376 for(int j = 0; j<= length2; j++) {377 matrix[i][j] = new MatrixEntry();378 }379 }380 381 //Console.traceln(Level.INFO,"Generating DP Matrix");382 buildMatrix();383 //Console.traceln(Level.INFO,"Doing traceback");384 traceback();385 }386 395 387 396 }
Note: See TracChangeset
for help on using the changeset viewer.