Multiple sequence alignment in Java












6














(The entire project lives here.)



Problem definition



Suppose we are given three genomic strings drawn from the alphabet of 20 amino acids:



ACGH
CFG
EAC


In multiple sequence alignment problem we want to align them optimally in order to access their evolutionary similarity. Such an alignment may look like the following:



-AC-GH
--CFG-
EAC---


Solution



Often, the problem is cast as a pathfinding problem in $k$-dimensional lattice, where $k$ is the number of strings. Here is my program, it compares A* with Dijkstra's algorithms:



MultipleSequenceAlignmentInstance.java



package net.coderodde.bio.msa;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Set;

public final class MultipleSequenceAlignmentInstance {

/**
* The penalty for pairs in which there is one valid character and one gap.
*/
private final int gapPenalty;

/**
* The character pairs cost matrix.
*/
private final CostMatrix<Integer> costMatrix;

/**
* The array of sequences to be aligned.
*/
private final String sequenceArray;

// A small speed optimization:
private final char column;

public MultipleSequenceAlignmentInstance(CostMatrix<Integer> costMatrix,
int gapPenalty,
String... sequenceArray) {
this.costMatrix = Objects.requireNonNull(costMatrix,
"Cost matrix is null");
this.gapPenalty = gapPenalty;
this.sequenceArray = sequenceArray.clone();
this.column = new char[sequenceArray.length];

for (int i = 0; i != sequenceArray.length; ++i) {
this.sequenceArray[i] =
checkIsValidGenomicSequence(sequenceArray[i]);
}
}

public Alignment align() {
long start = System.currentTimeMillis();
HeuristicFunction hf = new HeuristicFunctionComputer()
.computeHeuristicFunction(this);
long end = System.currentTimeMillis();

System.out.println("Computed heuristic function in " + (end - start) +
" milliseconds.");

Queue<LatticeNodeHolder> open = new PriorityQueue<>();
Map<LatticeNode, Integer> distance = new HashMap<>();
Map<LatticeNode, LatticeNode> parents = new HashMap<>();

LatticeNode sourceNode = getSourceNode();
LatticeNode targetNode = getTargetNode();

open.add(new LatticeNodeHolder(sourceNode, 0));
distance.put(sourceNode, 0);
parents.put(sourceNode, null);

Set<LatticeNode> closed = new HashSet<>();

while (true) {
LatticeNode currentNode = open.remove().getNode();

if (currentNode.equals(targetNode)) {
return tracebackPath(parents, distance.get(currentNode));
}

if (closed.contains(currentNode)) {
continue;
}

closed.add(currentNode);

for (LatticeNode childNode : currentNode.getChildren()) {
if (closed.contains(childNode)) {
continue;
}

int tentativeCost = distance.get(currentNode) +
getWeight(currentNode, childNode);
Integer currentCost = distance.get(childNode);

if (currentCost == null || currentCost > tentativeCost) {
open.add(new LatticeNodeHolder(childNode,
tentativeCost +
hf.get(childNode)));
distance.put(childNode, tentativeCost);
parents.put(childNode, currentNode);
}
}
}
}

public Alignment alignBrute() {
Queue<LatticeNodeHolder> open = new PriorityQueue<>();
Map<LatticeNode, Integer> distance = new HashMap<>();
Map<LatticeNode, LatticeNode> parents = new HashMap<>();

LatticeNode sourceNode = getSourceNode();
LatticeNode targetNode = getTargetNode();

open.add(new LatticeNodeHolder(sourceNode, 0));
distance.put(sourceNode, 0);
parents.put(sourceNode, null);

Set<LatticeNode> closed = new HashSet<>();

while (true) {
LatticeNode currentNode = open.remove().getNode();

if (currentNode.equals(targetNode)) {
return tracebackPath(parents, distance.get(currentNode));
}

if (closed.contains(currentNode)) {
continue;
}

closed.add(currentNode);

for (LatticeNode childNode : currentNode.getChildren()) {
if (closed.contains(childNode)) {
continue;
}

int tentativeCost = distance.get(currentNode) +
getWeight(currentNode, childNode);
Integer currentCost = distance.get(childNode);

if (currentCost == null || currentCost > tentativeCost) {
open.add(new LatticeNodeHolder(childNode, tentativeCost));
distance.put(childNode, tentativeCost);
parents.put(childNode, currentNode);
}
}
}
}

/**
* Create the source node for the sequence alignment task, i.e., a lattice
* node with all coordinates set to zero.
*
* @return the source node.
*/
LatticeNode getSourceNode() {
int sourceCoordinates = new int[sequenceArray.length];
return new LatticeNode(this, sourceCoordinates);
}

/**
* Create the target node for the sequence alignment task, i.e., a lattice
* node with all coordinates set to the respective sequence lengths.
*
* @return the target node.
*/
LatticeNode getTargetNode() {
int targetCoordinates = new int[sequenceArray.length];

for (int i = 0; i != sequenceArray.length; ++i) {
targetCoordinates[i] = sequenceArray[i].length();
}

return new LatticeNode(this, targetCoordinates);
}

Integer getWeight(LatticeNode tail,
LatticeNode head,
int dimension1,
int dimension2) {
int tailCoordinates = tail.getCoordinates();
int headCoordinates = head.getCoordinates();

if (tailCoordinates[dimension1] == headCoordinates[dimension1]) {
//System.out.println("1");
return gapPenalty;
} else if (tailCoordinates[dimension2] == headCoordinates[dimension2]) {
//System.out.println("2");
return gapPenalty;
} else {
// System.out.println("3");
char character1 = sequenceArray[dimension1]
.charAt(tailCoordinates[dimension1]);

char character2 = sequenceArray[dimension2]
.charAt(tailCoordinates[dimension2]);
return costMatrix.getCost(character1, character2);
}
}

Integer getWeight(LatticeNode tail, LatticeNode head) {
// Extract the column represented by taking a single hop from 'tail' to
// 'head':
int tailCoordinates = tail.getCoordinates();
int headCoordinates = head.getCoordinates();

for (int i = 0; i < sequenceArray.length; ++i) {
if (tailCoordinates[i] + 1 == headCoordinates[i]) {
column[i] = sequenceArray[i].charAt(tailCoordinates[i]);
} else {
column[i] = AminoAcidAlphabet.GAP_CHARACTER;
}
}

// Compute the hop cost as the sum of pairwise hops in any plane:
int cost = 0;

for (int i = 0; i < column.length; ++i) {
char character1 = column[i];

for (int j = i + 1; j < column.length; ++j) {
char character2 = column[j];

if (character1 != AminoAcidAlphabet.GAP_CHARACTER) {
if (character2 != AminoAcidAlphabet.GAP_CHARACTER) {
cost += costMatrix.getCost(character1, character2);
} else {
cost += gapPenalty;
}
} else {
// character1 IS the gap character:
if (character2 != AminoAcidAlphabet.GAP_CHARACTER) {
cost += gapPenalty;
} else {
// Do nothing since we have a pair (gap, gap).
}
}
}
}

return cost;
}

String getSequenceArray() {
return sequenceArray;
}

private Alignment tracebackPath(Map<LatticeNode, LatticeNode> parents,
Integer cost) {
List<LatticeNode> path = new ArrayList<>();
LatticeNode node = getTargetNode();

while (node != null) {
path.add(node);
node = parents.get(node);
}

Collections.<LatticeNode>reverse(path);

String strings = new String[getSequenceArray().length];
StringBuilder stringBuilders = new StringBuilder[strings.length];

for (int i = 0; i < stringBuilders.length; ++i) {
stringBuilders[i] = new StringBuilder();
}

for (int i = 1; i < path.size(); ++i) {
LatticeNode tail = path.get(i - 1);
LatticeNode head = path.get(i);

int tailCoordinates = tail.getCoordinates();
int headCoordinates = head.getCoordinates();

for (int j = 0; j < tailCoordinates.length; ++j) {
if (tailCoordinates[j] != headCoordinates[j]) {
stringBuilders[j].
append(sequenceArray[j].charAt(tailCoordinates[j]));
} else {
stringBuilders[j].append(AminoAcidAlphabet.GAP_CHARACTER);
}
}
}

for (int i = 0; i < strings.length; ++i) {
strings[i] = stringBuilders[i].toString();
}

return new Alignment(strings, cost);
}

private String checkIsValidGenomicSequence(String string) {
Set<Character> characterSet =
AminoAcidAlphabet.getAminoAcidAlphabet()
.getCharacterSet();

for (char c : string.toCharArray()) {
if (!characterSet.contains(c)) {
throw new IllegalArgumentException("Unknown amino acid: " + c);
}
}

return string;
}

private static final class LatticeNodeHolder
implements Comparable<LatticeNodeHolder> {

private final Integer fScore;
private final LatticeNode node;

LatticeNodeHolder(LatticeNode node, Integer fScore) {
this.fScore = fScore;
this.node = node;
}

LatticeNode getNode() {
return node;
}

@Override
public int compareTo(LatticeNodeHolder o) {
return Integer.compare(fScore, o.fScore);
}
}
}


HeuristicFunctionComputer.java



package net.coderodde.bio.msa;

import java.awt.Point;
import java.util.HashSet;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Set;

final class HeuristicFunctionComputer {

HeuristicFunction computeHeuristicFunction(
MultipleSequenceAlignmentInstance instance) {
String sequenceArray = instance.getSequenceArray();
HeuristicFunction heuristicFunction = new HeuristicFunction(instance);

for (int dimension1 = 0;
dimension1 < sequenceArray.length;
dimension1++) {
for (int dimension2 = dimension1 + 1;
dimension2 < sequenceArray.length;
dimension2++) {
loadPartialHeuristicFunction(heuristicFunction,
dimension1,
dimension2,
instance);
}
}

return heuristicFunction;
}

// Basically, this method runs Dijkstra backwards from the target node until
// the entire 2D-grid is explored.
private void loadPartialHeuristicFunction(
HeuristicFunction heuristicFunction,
int dimension1,
int dimension2,
MultipleSequenceAlignmentInstance instance) {
LatticeNode target = instance.getTargetNode();
Queue<LatticeNodeHolder> open = new PriorityQueue<>();
Set<LatticeNode> closed = new HashSet<>();
open.add(new LatticeNodeHolder(target, 0));
Point point = new Point();

while (!open.isEmpty()) {
LatticeNodeHolder currentNodeHolder = open.remove();
LatticeNode currentNode = currentNodeHolder.getLatticeNode();

if (closed.contains(currentNode)) {
continue;
}

closed.add(currentNode);

Integer currentNodeCost = currentNodeHolder.getCost();
extractPoint(point, currentNode, dimension1, dimension2);
heuristicFunction.put(dimension1,
dimension2,
new Point(point),
currentNodeCost);

for (LatticeNode parent : currentNode.getParents()) {
if (closed.contains(parent)) {
continue;
}

int tentativeCost =
currentNodeCost + instance.getWeight(parent,
currentNode,
dimension1,
dimension2);
extractPoint(point, parent, dimension1, dimension2);

if (!heuristicFunction.containsPartial(dimension1,
dimension2,
point)
|| heuristicFunction.getPartial(dimension1,
dimension2,
point)
> tentativeCost) {
open.add(new LatticeNodeHolder(parent, tentativeCost));
}
}
}
}

private static void extractPoint(Point point,
LatticeNode latticeNode,
int i,
int j) {
int coordinates = latticeNode.getCoordinates();
point.x = coordinates[i];
point.y = coordinates[j];
}

private static final class LatticeNodeHolder
implements Comparable<LatticeNodeHolder> {

private final LatticeNode node;
private final Integer cost;

LatticeNodeHolder(LatticeNode node, Integer cost) {
this.node = node;
this.cost = cost;
}

LatticeNode getLatticeNode() {
return node;
}

Integer getCost() {
return cost;
}

@Override
public int compareTo(LatticeNodeHolder o) {
return Integer.compare(cost, o.cost);
}
}
}


LatticeNode.java



package net.coderodde.bio.msa;

import com.sun.corba.se.spi.orb.OperationFactory;
import java.util.Arrays;

final class LatticeNode {

/**
* The problem instance this lattice node contributes to.
*/
private final MultipleSequenceAlignmentInstance instance;

/**
* The coordinates of this node in the entire lattice.
*/
private final int coordinates;

LatticeNode(MultipleSequenceAlignmentInstance instance, int coordinates) {
this.instance = instance;
this.coordinates = coordinates;
}

@Override
public boolean equals(Object o) {
return Arrays.equals(coordinates, ((LatticeNode) o).coordinates);
}

// Generated by NetBeans 8.1:
@Override
public int hashCode() {
int hash = 7;
hash = 41 * hash + Arrays.hashCode(this.coordinates);
return hash;
}

@Override
public String toString() {
StringBuilder sb = new StringBuilder("[");
String separator = "";

for (int coordinate : coordinates) {
sb.append(separator).append(coordinate);
separator = ", ";
}

return sb.append("]").toString();
}

LatticeNode getChildren() {
// Find out in how many dimension we can move forward:
int dimensionsNotReached = 0;
String sequenceArray = instance.getSequenceArray();
boolean inclusionMap = new boolean[coordinates.length];

for (int i = 0; i < sequenceArray.length; ++i) {
if (coordinates[i] < sequenceArray[i].length()) {
dimensionsNotReached++;
// We can make a step forward in the direction of ith dimension:
inclusionMap[i] = true;
}
}

// Create the array of children:
int numberOfChildren = pow2(dimensionsNotReached) - 1;
LatticeNode children = new LatticeNode[numberOfChildren];
loadChildren(children, inclusionMap);

// Convert offsets to actual child coordinates:
for (LatticeNode child : children) {
child.addOffsets(coordinates);
}

return children;
}

LatticeNode getParents() {
// Find out in how many dimensions we can move BACKward:
int dimensionsNotReached = 0;
String sequenceArray = instance.getSequenceArray();
boolean inclusionMap = new boolean[coordinates.length];

for (int i = 0; i < sequenceArray.length; ++i) {
if (coordinates[i] > 0) {
dimensionsNotReached++;
// We can make a step backwards in the direction of ith
// dimension:
inclusionMap[i] = true;
}
}

// Create the array of parents:
int numberOfParents = pow2(dimensionsNotReached) - 1;
LatticeNode parents = new LatticeNode[numberOfParents];
loadParents(parents, inclusionMap);

// Convert the offsets to actual parent coordinates:
for (LatticeNode parent : parents) {
parent.addOffsets(coordinates);
}

return parents;
}

int getCoordinates() {
return coordinates;
}

private void loadChildren(LatticeNode children, boolean inclusionMap) {
int coords = new int[this.coordinates.length];

for (int i = 0; i != children.length; ++i) {
increment(coords, inclusionMap);
children[i] = new LatticeNode(instance, coords.clone());
}
}

private void loadParents(LatticeNode parents, boolean inclusionMap) {
int coords = new int[this.coordinates.length];

for (int i = 0; i != parents.length; ++i) {
decrement(coords, inclusionMap);
parents[i] = new LatticeNode(instance, coords.clone());
}
}

private static void increment(int coordinates, boolean inclusionMap) {
for (int i = 0; i < coordinates.length; ++i) {
if (!inclusionMap[i]) {
continue;
}

if (coordinates[i] == 0) {
coordinates[i] = 1;
return;
}

coordinates[i] = 0;
}
}

private static void decrement(int coordinates, boolean inclusionMap) {
for (int i = 0; i < coordinates.length; ++i) {
if (!inclusionMap[i]) {
continue;
}

if (coordinates[i] == 0) {
coordinates[i] = -1;
return;
}

coordinates[i] = 0;
}
}

private void addOffsets(int offsets) {
for (int i = 0; i < coordinates.length; ++i) {
coordinates[i] += offsets[i];
}
}

private static int pow2(int exponent) {
int ret = 1;

for (int e = 0; e < exponent; ++e) {
ret *= 2;
}

return ret;
}
}


PAM250CostMatrix.java



package net.coderodde.bio.msa;

import java.util.HashMap;
import java.util.Map;
import net.coderodde.bio.msa.AminoAcidAlphabet;
import net.coderodde.bio.msa.CostMatrix;

public final class PAM250CostMatrix implements CostMatrix<Integer> {

private static PAM250CostMatrix instance;

private final Map<Character, Map<Character, Integer>> m = new HashMap<>();

public static PAM250CostMatrix getPAM250CostMatrix() {
if (instance == null) {
instance = new PAM250CostMatrix();
}

return instance;
}

private PAM250CostMatrix() {
AminoAcidAlphabet alphabet = AminoAcidAlphabet.getAminoAcidAlphabet();

alphabet.getCharacterSet().stream().forEach((character) -> {
m.put(character, new HashMap<>());
});

m.get('A').put('A', -2);
m.get('R').put('A', 2);
m.get('A').put('R', 2);
m.get('R').put('R', -6);
m.get('N').put('A', 0);
m.get('A').put('N', 0);
m.get('N').put('R', 0);
m.get('R').put('N', 0);
m.get('N').put('N', -2);
m.get('D').put('A', 0);
m.get('A').put('D', 0);
m.get('D').put('R', 1);
m.get('R').put('D', 1);
m.get('D').put('N', -2);
m.get('N').put('D', -2);
m.get('D').put('D', -4);
m.get('C').put('A', 2);
m.get('A').put('C', 2);
m.get('C').put('R', 4);
m.get('R').put('C', 4);
m.get('C').put('N', 4);
m.get('N').put('C', 4);
m.get('C').put('D', 5);
m.get('D').put('C', 5);
m.get('C').put('C', -4);
m.get('Q').put('A', 0);
m.get('A').put('Q', 0);
m.get('Q').put('R', -1);
m.get('R').put('Q', -1);
m.get('Q').put('N', -1);
m.get('N').put('Q', -1);
m.get('Q').put('D', -2);
m.get('D').put('Q', -2);
m.get('Q').put('C', 5);
m.get('C').put('Q', 5);
m.get('Q').put('Q', -4);
m.get('E').put('A', 0);
m.get('A').put('E', 0);
m.get('E').put('R', 1);
m.get('R').put('E', 1);
m.get('E').put('N', -1);
m.get('N').put('E', -1);
m.get('E').put('D', -3);
m.get('D').put('E', -3);
m.get('E').put('C', 5);
m.get('C').put('E', 5);
m.get('E').put('Q', -2);
m.get('Q').put('E', -2);
m.get('E').put('E', -4);
m.get('G').put('A', -1);
m.get('A').put('G', -1);
m.get('G').put('R', 3);
m.get('R').put('G', 3);
m.get('G').put('N', 0);
m.get('N').put('G', 0);
m.get('G').put('D', -1);
m.get('D').put('G', -1);
m.get('G').put('C', 3);
m.get('C').put('G', 3);
m.get('G').put('Q', 1);
m.get('Q').put('G', 1);
m.get('G').put('E', 0);
m.get('E').put('G', 0);
m.get('G').put('G', -5);
m.get('H').put('A', 1);
m.get('A').put('H', 1);
m.get('H').put('R', -2);
m.get('R').put('H', -2);
m.get('H').put('N', -2);
m.get('N').put('H', -2);
m.get('H').put('D', -1);
m.get('D').put('H', -1);
m.get('H').put('C', 3);
m.get('C').put('H', 3);
m.get('H').put('Q', -3);
m.get('Q').put('H', -3);
m.get('H').put('E', -1);
m.get('E').put('H', -1);
m.get('H').put('G', 2);
m.get('G').put('H', 2);
m.get('H').put('H', -6);
m.get('I').put('A', 1);
m.get('A').put('I', 1);
m.get('I').put('R', 2);
m.get('R').put('I', 2);
m.get('I').put('N', 2);
m.get('N').put('I', 2);
m.get('I').put('D', 2);
m.get('D').put('I', 2);
m.get('I').put('C', 2);
m.get('C').put('I', 2);
m.get('I').put('Q', 2);
m.get('Q').put('I', 2);
m.get('I').put('E', 2);
m.get('E').put('I', 2);
m.get('I').put('G', 3);
m.get('G').put('I', 3);
m.get('I').put('H', 2);
m.get('H').put('I', 2);
m.get('I').put('I', -5);
m.get('L').put('A', 2);
m.get('A').put('L', 2);
m.get('L').put('R', 3);
m.get('R').put('L', 3);
m.get('L').put('N', 3);
m.get('N').put('L', 3);
m.get('L').put('D', 4);
m.get('D').put('L', 4);
m.get('L').put('C', 6);
m.get('C').put('L', 6);
m.get('L').put('Q', 2);
m.get('Q').put('L', 2);
m.get('L').put('E', 3);
m.get('E').put('L', 3);
m.get('L').put('G', 4);
m.get('G').put('L', 4);
m.get('L').put('H', 2);
m.get('H').put('L', 2);
m.get('L').put('I', -2);
m.get('I').put('L', -2);
m.get('L').put('L', -6);
m.get('K').put('A', 1);
m.get('A').put('K', 1);
m.get('K').put('R', -3);
m.get('R').put('K', -3);
m.get('K').put('N', -1);
m.get('N').put('K', -1);
m.get('K').put('D', 0);
m.get('D').put('K', 0);
m.get('K').put('C', 5);
m.get('C').put('K', 5);
m.get('K').put('Q', -1);
m.get('Q').put('K', -1);
m.get('K').put('E', 0);
m.get('E').put('K', 0);
m.get('K').put('G', 2);
m.get('G').put('K', 2);
m.get('K').put('H', 0);
m.get('H').put('K', 0);
m.get('K').put('I', 2);
m.get('I').put('K', 2);
m.get('K').put('L', 3);
m.get('L').put('K', 3);
m.get('K').put('K', -5);
m.get('M').put('A', 1);
m.get('A').put('M', 1);
m.get('M').put('R', 0);
m.get('R').put('M', 0);
m.get('M').put('N', 2);
m.get('N').put('M', 2);
m.get('M').put('D', 3);
m.get('D').put('M', 3);
m.get('M').put('C', 5);
m.get('C').put('M', 5);
m.get('M').put('Q', 1);
m.get('Q').put('M', 1);
m.get('M').put('E', 2);
m.get('E').put('M', 2);
m.get('M').put('G', 3);
m.get('G').put('M', 3);
m.get('M').put('H', 2);
m.get('H').put('M', 2);
m.get('M').put('I', -2);
m.get('I').put('M', -2);
m.get('M').put('L', -4);
m.get('L').put('M', -4);
m.get('M').put('K', 0);
m.get('K').put('M', 0);
m.get('M').put('M', -6);
m.get('F').put('A', 4);
m.get('A').put('F', 4);
m.get('F').put('R', 4);
m.get('R').put('F', 4);
m.get('F').put('N', 4);
m.get('N').put('F', 4);
m.get('F').put('D', 6);
m.get('D').put('F', 6);
m.get('F').put('C', 4);
m.get('C').put('F', 4);
m.get('F').put('Q', 5);
m.get('Q').put('F', 5);
m.get('F').put('E', 5);
m.get('E').put('F', 5);
m.get('F').put('G', 5);
m.get('G').put('F', 5);
m.get('F').put('H', 2);
m.get('H').put('F', 2);
m.get('F').put('I', -1);
m.get('I').put('F', -1);
m.get('F').put('L', -2);
m.get('L').put('F', -2);
m.get('F').put('K', 5);
m.get('K').put('F', 5);
m.get('F').put('M', 0);
m.get('M').put('F', 0);
m.get('F').put('F', -9);
m.get('P').put('A', -1);
m.get('A').put('P', -1);
m.get('P').put('R', 0);
m.get('R').put('P', 0);
m.get('P').put('N', 1);
m.get('N').put('P', 1);
m.get('P').put('D', 1);
m.get('D').put('P', 1);
m.get('P').put('C', 3);
m.get('C').put('P', 3);
m.get('P').put('Q', 0);
m.get('Q').put('P', 0);
m.get('P').put('E', 1);
m.get('E').put('P', 1);
m.get('P').put('G', 1);
m.get('G').put('P', 1);
m.get('P').put('H', 0);
m.get('H').put('P', 0);
m.get('P').put('I', 2);
m.get('I').put('P', 2);
m.get('P').put('L', 3);
m.get('L').put('P', 3);
m.get('P').put('K', 1);
m.get('K').put('P', 1);
m.get('P').put('M', 2);
m.get('M').put('P', 2);
m.get('P').put('F', 5);
m.get('F').put('P', 5);
m.get('P').put('P', -6);
m.get('S').put('A', -1);
m.get('A').put('S', -1);
m.get('S').put('R', 0);
m.get('R').put('S', 0);
m.get('S').put('N', -1);
m.get('N').put('S', -1);
m.get('S').put('D', 0);
m.get('D').put('S', 0);
m.get('S').put('C', 0);
m.get('C').put('S', 0);
m.get('S').put('Q', 1);
m.get('Q').put('S', 1);
m.get('S').put('E', 0);
m.get('E').put('S', 0);
m.get('S').put('G', -1);
m.get('G').put('S', -1);
m.get('S').put('H', 1);
m.get('H').put('S', 1);
m.get('S').put('I', 1);
m.get('I').put('S', 1);
m.get('S').put('L', 3);
m.get('L').put('S', 3);
m.get('S').put('K', 0);
m.get('K').put('S', 0);
m.get('S').put('M', 2);
m.get('M').put('S', 2);
m.get('S').put('F', 3);
m.get('F').put('S', 3);
m.get('S').put('P', -1);
m.get('P').put('S', -1);
m.get('S').put('S', -3);
m.get('T').put('A', -1);
m.get('A').put('T', -1);
m.get('T').put('R', 1);
m.get('R').put('T', 1);
m.get('T').put('N', 0);
m.get('N').put('T', 0);
m.get('T').put('D', 0);
m.get('D').put('T', 0);
m.get('T').put('C', 2);
m.get('C').put('T', 2);
m.get('T').put('Q', 1);
m.get('Q').put('T', 1);
m.get('T').put('E', 0);
m.get('E').put('T', 0);
m.get('T').put('G', 0);
m.get('G').put('T', 0);
m.get('T').put('H', 1);
m.get('H').put('T', 1);
m.get('T').put('I', 0);
m.get('I').put('T', 0);
m.get('T').put('L', 2);
m.get('L').put('T', 2);
m.get('T').put('K', 0);
m.get('K').put('T', 0);
m.get('T').put('M', 1);
m.get('M').put('T', 1);
m.get('T').put('F', 2);
m.get('F').put('T', 2);
m.get('T').put('P', 0);
m.get('P').put('T', 0);
m.get('T').put('S', -1);
m.get('S').put('T', -1);
m.get('T').put('T', -3);
m.get('W').put('A', 6);
m.get('A').put('W', 6);
m.get('W').put('R', -2);
m.get('R').put('W', -2);
m.get('W').put('N', 4);
m.get('N').put('W', 4);
m.get('W').put('D', 7);
m.get('D').put('W', 7);
m.get('W').put('C', 8);
m.get('C').put('W', 8);
m.get('W').put('Q', 5);
m.get('Q').put('W', 5);
m.get('W').put('E', 7);
m.get('E').put('W', 7);
m.get('W').put('G', 7);
m.get('G').put('W', 7);
m.get('W').put('H', 3);
m.get('H').put('W', 3);
m.get('W').put('I', 5);
m.get('I').put('W', 5);
m.get('W').put('L', 2);
m.get('L').put('W', 2);
m.get('W').put('K', 3);
m.get('K').put('W', 3);
m.get('W').put('M', 4);
m.get('M').put('W', 4);
m.get('W').put('F', 0);
m.get('F').put('W', 0);
m.get('W').put('P', 6);
m.get('P').put('W', 6);
m.get('W').put('S', 2);
m.get('S').put('W', 2);
m.get('W').put('T', 5);
m.get('T').put('W', 5);
m.get('W').put('W', -17);
m.get('Y').put('A', 3);
m.get('A').put('Y', 3);
m.get('Y').put('R', 4);
m.get('R').put('Y', 4);
m.get('Y').put('N', 2);
m.get('N').put('Y', 2);
m.get('Y').put('D', 4);
m.get('D').put('Y', 4);
m.get('Y').put('C', 0);
m.get('C').put('Y', 0);
m.get('Y').put('Q', 4);
m.get('Q').put('Y', 4);
m.get('Y').put('E', 4);
m.get('E').put('Y', 4);
m.get('Y').put('G', 5);
m.get('G').put('Y', 5);
m.get('Y').put('H', 0);
m.get('H').put('Y', 0);
m.get('Y').put('I', 1);
m.get('I').put('Y', 1);
m.get('Y').put('L', 1);
m.get('L').put('Y', 1);
m.get('Y').put('K', 4);
m.get('K').put('Y', 4);
m.get('Y').put('M', 2);
m.get('M').put('Y', 2);
m.get('Y').put('F', -7);
m.get('F').put('Y', -7);
m.get('Y').put('P', 5);
m.get('P').put('Y', 5);
m.get('Y').put('S', 3);
m.get('S').put('Y', 3);
m.get('Y').put('T', 3);
m.get('T').put('Y', 3);
m.get('Y').put('W', 0);
m.get('W').put('Y', 0);
m.get('Y').put('Y', -10);
m.get('V').put('A', 0);
m.get('A').put('V', 0);
m.get('V').put('R', 2);
m.get('R').put('V', 2);
m.get('V').put('N', 2);
m.get('N').put('V', 2);
m.get('V').put('D', 2);
m.get('D').put('V', 2);
m.get('V').put('C', 2);
m.get('C').put('V', 2);
m.get('V').put('Q', 2);
m.get('Q').put('V', 2);
m.get('V').put('E', 2);
m.get('E').put('V', 2);
m.get('V').put('G', 1);
m.get('G').put('V', 1);
m.get('V').put('H', 2);
m.get('H').put('V', 2);
m.get('V').put('I', -4);
m.get('I').put('V', -4);
m.get('V').put('L', -2);
m.get('L').put('V', -2);
m.get('V').put('K', 2);
m.get('K').put('V', 2);
m.get('V').put('M', -2);
m.get('M').put('V', -2);
m.get('V').put('F', 1);
m.get('F').put('V', 1);
m.get('V').put('P', 1);
m.get('P').put('V', 1);
m.get('V').put('S', 1);
m.get('S').put('V', 1);
m.get('V').put('T', 0);
m.get('T').put('V', 0);
m.get('V').put('W', 6);
m.get('W').put('V', 6);
m.get('V').put('Y', 2);
m.get('Y').put('V', 2);
m.get('V').put('V', -4);
}

@Override
public Integer getCost(Character aminoAcidChar1, Character aminoAcidChar2) {
try {
return m.get(aminoAcidChar1).get(aminoAcidChar2);
} catch (NullPointerException ex) {
throw new IllegalArgumentException("Bad arguments: (" +
aminoAcidChar1 + ", " + aminoAcidChar2 + ")",
ex);
}
}
}


AminoAcidAlphabet.java



package net.coderodde.bio.msa;

import java.util.Collections;
import java.util.HashSet;
import java.util.Set;

public final class AminoAcidAlphabet {

public static final Character GAP_CHARACTER = '-';
private static AminoAcidAlphabet instance;

private final Set<Character> alphabet = new HashSet<>();

public static AminoAcidAlphabet getAminoAcidAlphabet() {
if (instance == null) {
instance = new AminoAcidAlphabet();
}

return instance;
}

private AminoAcidAlphabet() {
String aminoAcids = "ACDEF" +
"GHIKL" +
"MNPQR" +
"STVWY";

for (char c : aminoAcids.toCharArray()) {
alphabet.add(c);
}
}

public Set<Character> getCharacterSet() {
return Collections.<Character>unmodifiableSet(alphabet);
}
}


Alignment.java



package net.coderodde.bio.msa;

public class Alignment {

private final String alignment;
private final int cost;

Alignment(String alignment, int cost) {
this.alignment = alignment;
this.cost = cost;
}

public String getAlignemnt() {
return alignment;
}

public int getCost() {
return cost;
}

@Override
public String toString() {
StringBuilder sb = new StringBuilder();
String separator = "";

for (String row : alignment) {
sb.append(separator).append(row);
separator = "n";
}

sb.append("nCost: ").append(cost);
return sb.toString();
}
}


CostMatrix.java



package net.coderodde.bio.msa;

public interface CostMatrix<C> {

public C getCost(Character aminoAcidChar1, Character aminoAcidChar2);
}


HeuristicFunction.java



package net.coderodde.bio.msa;

import java.awt.Point;
import java.util.HashMap;
import java.util.Map;

final class HeuristicFunction {

private final Map<Point, Integer> matrix;

HeuristicFunction(MultipleSequenceAlignmentInstance instance) {
int sequences = instance.getSequenceArray().length;
this.matrix = new Map[sequences];

for (int i = 0; i < sequences; ++i) {
this.matrix[i] = new Map[sequences];

for (int j = i + 1; j < sequences; ++j) {
this.matrix[i][j] = new HashMap<>();
}
}
}

void put(int dimension1, int dimension2, Point point, Integer cost) {
matrix[dimension1][dimension2].put(point, cost);
}

boolean containsPartial(int dimension1, int dimension2, Point point) {
return matrix[dimension1][dimension2].containsKey(point);
}

Integer getPartial(int dimension1, int dimension2, Point point) {
return matrix[dimension1][dimension2].get(point);
}

Integer get(LatticeNode node) {
int cost = 0;
int coordinates = node.getCoordinates();
Point point = new Point();

for (int dimension1 = 0;
dimension1 < coordinates.length;
dimension1++) {
point.x = dimension1;

for (int dimension2 = dimension1 + 1;
dimension2 < coordinates.length;
dimension2++) {
point.y = dimension2;
cost += matrix[dimension1][dimension2].get(point);
}
}

return cost;
}
}


App.java



package net.coderodde.bio.msa;

final class App {

private static final String SEQUENCES = {
"ACGHKGMNPFQEKKFKLMNRW",
"CFGPQWYRTLMEKKFKNR",
"EACLMNRPQWTR",
"TIMWAYHTMGIEKKFK"
};

public static void main(String args) {
MultipleSequenceAlignmentInstance instance =
new MultipleSequenceAlignmentInstance(
PAM250CostMatrix.getPAM250CostMatrix(),
4,
SEQUENCES);

long start = System.currentTimeMillis();
Alignment alignment1 = instance.align();
long end = System.currentTimeMillis();

System.out.println(alignment1);
System.out.println(end - start + " ms.");
System.out.println();

start = System.currentTimeMillis();
Alignment alignment2 = instance.alignBrute();
end = System.currentTimeMillis();

System.out.println(alignment2);
System.out.println(end - start + " ms.");
}
}


Output



The output for the demo is




Computed heuristic function in 1003 milliseconds.
-ACG-HKGMNPF-QEKKFKLMNRW
-CFGPQW-YRTL-MEKKFK--NR-
EAC--LM-NRP--Q-WT-R-----
-TI--MWAYHTMGIEKKFK-----
Cost: 53
1126 ms.

-ACG-HKGMNPF-QEKKFKLMNRW
-CFGPQW-YRTL-MEKKFK--NR-
EAC--LM-NRP--Q-WT-R-----
-TI--MWAYHTMGIEKKFK-----
Cost: 53
60 ms.



Critique request



As always, please tell me anything that comes to mind.










share|improve this question



























    6














    (The entire project lives here.)



    Problem definition



    Suppose we are given three genomic strings drawn from the alphabet of 20 amino acids:



    ACGH
    CFG
    EAC


    In multiple sequence alignment problem we want to align them optimally in order to access their evolutionary similarity. Such an alignment may look like the following:



    -AC-GH
    --CFG-
    EAC---


    Solution



    Often, the problem is cast as a pathfinding problem in $k$-dimensional lattice, where $k$ is the number of strings. Here is my program, it compares A* with Dijkstra's algorithms:



    MultipleSequenceAlignmentInstance.java



    package net.coderodde.bio.msa;

    import java.util.ArrayList;
    import java.util.Collections;
    import java.util.HashMap;
    import java.util.HashSet;
    import java.util.List;
    import java.util.Map;
    import java.util.Objects;
    import java.util.PriorityQueue;
    import java.util.Queue;
    import java.util.Set;

    public final class MultipleSequenceAlignmentInstance {

    /**
    * The penalty for pairs in which there is one valid character and one gap.
    */
    private final int gapPenalty;

    /**
    * The character pairs cost matrix.
    */
    private final CostMatrix<Integer> costMatrix;

    /**
    * The array of sequences to be aligned.
    */
    private final String sequenceArray;

    // A small speed optimization:
    private final char column;

    public MultipleSequenceAlignmentInstance(CostMatrix<Integer> costMatrix,
    int gapPenalty,
    String... sequenceArray) {
    this.costMatrix = Objects.requireNonNull(costMatrix,
    "Cost matrix is null");
    this.gapPenalty = gapPenalty;
    this.sequenceArray = sequenceArray.clone();
    this.column = new char[sequenceArray.length];

    for (int i = 0; i != sequenceArray.length; ++i) {
    this.sequenceArray[i] =
    checkIsValidGenomicSequence(sequenceArray[i]);
    }
    }

    public Alignment align() {
    long start = System.currentTimeMillis();
    HeuristicFunction hf = new HeuristicFunctionComputer()
    .computeHeuristicFunction(this);
    long end = System.currentTimeMillis();

    System.out.println("Computed heuristic function in " + (end - start) +
    " milliseconds.");

    Queue<LatticeNodeHolder> open = new PriorityQueue<>();
    Map<LatticeNode, Integer> distance = new HashMap<>();
    Map<LatticeNode, LatticeNode> parents = new HashMap<>();

    LatticeNode sourceNode = getSourceNode();
    LatticeNode targetNode = getTargetNode();

    open.add(new LatticeNodeHolder(sourceNode, 0));
    distance.put(sourceNode, 0);
    parents.put(sourceNode, null);

    Set<LatticeNode> closed = new HashSet<>();

    while (true) {
    LatticeNode currentNode = open.remove().getNode();

    if (currentNode.equals(targetNode)) {
    return tracebackPath(parents, distance.get(currentNode));
    }

    if (closed.contains(currentNode)) {
    continue;
    }

    closed.add(currentNode);

    for (LatticeNode childNode : currentNode.getChildren()) {
    if (closed.contains(childNode)) {
    continue;
    }

    int tentativeCost = distance.get(currentNode) +
    getWeight(currentNode, childNode);
    Integer currentCost = distance.get(childNode);

    if (currentCost == null || currentCost > tentativeCost) {
    open.add(new LatticeNodeHolder(childNode,
    tentativeCost +
    hf.get(childNode)));
    distance.put(childNode, tentativeCost);
    parents.put(childNode, currentNode);
    }
    }
    }
    }

    public Alignment alignBrute() {
    Queue<LatticeNodeHolder> open = new PriorityQueue<>();
    Map<LatticeNode, Integer> distance = new HashMap<>();
    Map<LatticeNode, LatticeNode> parents = new HashMap<>();

    LatticeNode sourceNode = getSourceNode();
    LatticeNode targetNode = getTargetNode();

    open.add(new LatticeNodeHolder(sourceNode, 0));
    distance.put(sourceNode, 0);
    parents.put(sourceNode, null);

    Set<LatticeNode> closed = new HashSet<>();

    while (true) {
    LatticeNode currentNode = open.remove().getNode();

    if (currentNode.equals(targetNode)) {
    return tracebackPath(parents, distance.get(currentNode));
    }

    if (closed.contains(currentNode)) {
    continue;
    }

    closed.add(currentNode);

    for (LatticeNode childNode : currentNode.getChildren()) {
    if (closed.contains(childNode)) {
    continue;
    }

    int tentativeCost = distance.get(currentNode) +
    getWeight(currentNode, childNode);
    Integer currentCost = distance.get(childNode);

    if (currentCost == null || currentCost > tentativeCost) {
    open.add(new LatticeNodeHolder(childNode, tentativeCost));
    distance.put(childNode, tentativeCost);
    parents.put(childNode, currentNode);
    }
    }
    }
    }

    /**
    * Create the source node for the sequence alignment task, i.e., a lattice
    * node with all coordinates set to zero.
    *
    * @return the source node.
    */
    LatticeNode getSourceNode() {
    int sourceCoordinates = new int[sequenceArray.length];
    return new LatticeNode(this, sourceCoordinates);
    }

    /**
    * Create the target node for the sequence alignment task, i.e., a lattice
    * node with all coordinates set to the respective sequence lengths.
    *
    * @return the target node.
    */
    LatticeNode getTargetNode() {
    int targetCoordinates = new int[sequenceArray.length];

    for (int i = 0; i != sequenceArray.length; ++i) {
    targetCoordinates[i] = sequenceArray[i].length();
    }

    return new LatticeNode(this, targetCoordinates);
    }

    Integer getWeight(LatticeNode tail,
    LatticeNode head,
    int dimension1,
    int dimension2) {
    int tailCoordinates = tail.getCoordinates();
    int headCoordinates = head.getCoordinates();

    if (tailCoordinates[dimension1] == headCoordinates[dimension1]) {
    //System.out.println("1");
    return gapPenalty;
    } else if (tailCoordinates[dimension2] == headCoordinates[dimension2]) {
    //System.out.println("2");
    return gapPenalty;
    } else {
    // System.out.println("3");
    char character1 = sequenceArray[dimension1]
    .charAt(tailCoordinates[dimension1]);

    char character2 = sequenceArray[dimension2]
    .charAt(tailCoordinates[dimension2]);
    return costMatrix.getCost(character1, character2);
    }
    }

    Integer getWeight(LatticeNode tail, LatticeNode head) {
    // Extract the column represented by taking a single hop from 'tail' to
    // 'head':
    int tailCoordinates = tail.getCoordinates();
    int headCoordinates = head.getCoordinates();

    for (int i = 0; i < sequenceArray.length; ++i) {
    if (tailCoordinates[i] + 1 == headCoordinates[i]) {
    column[i] = sequenceArray[i].charAt(tailCoordinates[i]);
    } else {
    column[i] = AminoAcidAlphabet.GAP_CHARACTER;
    }
    }

    // Compute the hop cost as the sum of pairwise hops in any plane:
    int cost = 0;

    for (int i = 0; i < column.length; ++i) {
    char character1 = column[i];

    for (int j = i + 1; j < column.length; ++j) {
    char character2 = column[j];

    if (character1 != AminoAcidAlphabet.GAP_CHARACTER) {
    if (character2 != AminoAcidAlphabet.GAP_CHARACTER) {
    cost += costMatrix.getCost(character1, character2);
    } else {
    cost += gapPenalty;
    }
    } else {
    // character1 IS the gap character:
    if (character2 != AminoAcidAlphabet.GAP_CHARACTER) {
    cost += gapPenalty;
    } else {
    // Do nothing since we have a pair (gap, gap).
    }
    }
    }
    }

    return cost;
    }

    String getSequenceArray() {
    return sequenceArray;
    }

    private Alignment tracebackPath(Map<LatticeNode, LatticeNode> parents,
    Integer cost) {
    List<LatticeNode> path = new ArrayList<>();
    LatticeNode node = getTargetNode();

    while (node != null) {
    path.add(node);
    node = parents.get(node);
    }

    Collections.<LatticeNode>reverse(path);

    String strings = new String[getSequenceArray().length];
    StringBuilder stringBuilders = new StringBuilder[strings.length];

    for (int i = 0; i < stringBuilders.length; ++i) {
    stringBuilders[i] = new StringBuilder();
    }

    for (int i = 1; i < path.size(); ++i) {
    LatticeNode tail = path.get(i - 1);
    LatticeNode head = path.get(i);

    int tailCoordinates = tail.getCoordinates();
    int headCoordinates = head.getCoordinates();

    for (int j = 0; j < tailCoordinates.length; ++j) {
    if (tailCoordinates[j] != headCoordinates[j]) {
    stringBuilders[j].
    append(sequenceArray[j].charAt(tailCoordinates[j]));
    } else {
    stringBuilders[j].append(AminoAcidAlphabet.GAP_CHARACTER);
    }
    }
    }

    for (int i = 0; i < strings.length; ++i) {
    strings[i] = stringBuilders[i].toString();
    }

    return new Alignment(strings, cost);
    }

    private String checkIsValidGenomicSequence(String string) {
    Set<Character> characterSet =
    AminoAcidAlphabet.getAminoAcidAlphabet()
    .getCharacterSet();

    for (char c : string.toCharArray()) {
    if (!characterSet.contains(c)) {
    throw new IllegalArgumentException("Unknown amino acid: " + c);
    }
    }

    return string;
    }

    private static final class LatticeNodeHolder
    implements Comparable<LatticeNodeHolder> {

    private final Integer fScore;
    private final LatticeNode node;

    LatticeNodeHolder(LatticeNode node, Integer fScore) {
    this.fScore = fScore;
    this.node = node;
    }

    LatticeNode getNode() {
    return node;
    }

    @Override
    public int compareTo(LatticeNodeHolder o) {
    return Integer.compare(fScore, o.fScore);
    }
    }
    }


    HeuristicFunctionComputer.java



    package net.coderodde.bio.msa;

    import java.awt.Point;
    import java.util.HashSet;
    import java.util.PriorityQueue;
    import java.util.Queue;
    import java.util.Set;

    final class HeuristicFunctionComputer {

    HeuristicFunction computeHeuristicFunction(
    MultipleSequenceAlignmentInstance instance) {
    String sequenceArray = instance.getSequenceArray();
    HeuristicFunction heuristicFunction = new HeuristicFunction(instance);

    for (int dimension1 = 0;
    dimension1 < sequenceArray.length;
    dimension1++) {
    for (int dimension2 = dimension1 + 1;
    dimension2 < sequenceArray.length;
    dimension2++) {
    loadPartialHeuristicFunction(heuristicFunction,
    dimension1,
    dimension2,
    instance);
    }
    }

    return heuristicFunction;
    }

    // Basically, this method runs Dijkstra backwards from the target node until
    // the entire 2D-grid is explored.
    private void loadPartialHeuristicFunction(
    HeuristicFunction heuristicFunction,
    int dimension1,
    int dimension2,
    MultipleSequenceAlignmentInstance instance) {
    LatticeNode target = instance.getTargetNode();
    Queue<LatticeNodeHolder> open = new PriorityQueue<>();
    Set<LatticeNode> closed = new HashSet<>();
    open.add(new LatticeNodeHolder(target, 0));
    Point point = new Point();

    while (!open.isEmpty()) {
    LatticeNodeHolder currentNodeHolder = open.remove();
    LatticeNode currentNode = currentNodeHolder.getLatticeNode();

    if (closed.contains(currentNode)) {
    continue;
    }

    closed.add(currentNode);

    Integer currentNodeCost = currentNodeHolder.getCost();
    extractPoint(point, currentNode, dimension1, dimension2);
    heuristicFunction.put(dimension1,
    dimension2,
    new Point(point),
    currentNodeCost);

    for (LatticeNode parent : currentNode.getParents()) {
    if (closed.contains(parent)) {
    continue;
    }

    int tentativeCost =
    currentNodeCost + instance.getWeight(parent,
    currentNode,
    dimension1,
    dimension2);
    extractPoint(point, parent, dimension1, dimension2);

    if (!heuristicFunction.containsPartial(dimension1,
    dimension2,
    point)
    || heuristicFunction.getPartial(dimension1,
    dimension2,
    point)
    > tentativeCost) {
    open.add(new LatticeNodeHolder(parent, tentativeCost));
    }
    }
    }
    }

    private static void extractPoint(Point point,
    LatticeNode latticeNode,
    int i,
    int j) {
    int coordinates = latticeNode.getCoordinates();
    point.x = coordinates[i];
    point.y = coordinates[j];
    }

    private static final class LatticeNodeHolder
    implements Comparable<LatticeNodeHolder> {

    private final LatticeNode node;
    private final Integer cost;

    LatticeNodeHolder(LatticeNode node, Integer cost) {
    this.node = node;
    this.cost = cost;
    }

    LatticeNode getLatticeNode() {
    return node;
    }

    Integer getCost() {
    return cost;
    }

    @Override
    public int compareTo(LatticeNodeHolder o) {
    return Integer.compare(cost, o.cost);
    }
    }
    }


    LatticeNode.java



    package net.coderodde.bio.msa;

    import com.sun.corba.se.spi.orb.OperationFactory;
    import java.util.Arrays;

    final class LatticeNode {

    /**
    * The problem instance this lattice node contributes to.
    */
    private final MultipleSequenceAlignmentInstance instance;

    /**
    * The coordinates of this node in the entire lattice.
    */
    private final int coordinates;

    LatticeNode(MultipleSequenceAlignmentInstance instance, int coordinates) {
    this.instance = instance;
    this.coordinates = coordinates;
    }

    @Override
    public boolean equals(Object o) {
    return Arrays.equals(coordinates, ((LatticeNode) o).coordinates);
    }

    // Generated by NetBeans 8.1:
    @Override
    public int hashCode() {
    int hash = 7;
    hash = 41 * hash + Arrays.hashCode(this.coordinates);
    return hash;
    }

    @Override
    public String toString() {
    StringBuilder sb = new StringBuilder("[");
    String separator = "";

    for (int coordinate : coordinates) {
    sb.append(separator).append(coordinate);
    separator = ", ";
    }

    return sb.append("]").toString();
    }

    LatticeNode getChildren() {
    // Find out in how many dimension we can move forward:
    int dimensionsNotReached = 0;
    String sequenceArray = instance.getSequenceArray();
    boolean inclusionMap = new boolean[coordinates.length];

    for (int i = 0; i < sequenceArray.length; ++i) {
    if (coordinates[i] < sequenceArray[i].length()) {
    dimensionsNotReached++;
    // We can make a step forward in the direction of ith dimension:
    inclusionMap[i] = true;
    }
    }

    // Create the array of children:
    int numberOfChildren = pow2(dimensionsNotReached) - 1;
    LatticeNode children = new LatticeNode[numberOfChildren];
    loadChildren(children, inclusionMap);

    // Convert offsets to actual child coordinates:
    for (LatticeNode child : children) {
    child.addOffsets(coordinates);
    }

    return children;
    }

    LatticeNode getParents() {
    // Find out in how many dimensions we can move BACKward:
    int dimensionsNotReached = 0;
    String sequenceArray = instance.getSequenceArray();
    boolean inclusionMap = new boolean[coordinates.length];

    for (int i = 0; i < sequenceArray.length; ++i) {
    if (coordinates[i] > 0) {
    dimensionsNotReached++;
    // We can make a step backwards in the direction of ith
    // dimension:
    inclusionMap[i] = true;
    }
    }

    // Create the array of parents:
    int numberOfParents = pow2(dimensionsNotReached) - 1;
    LatticeNode parents = new LatticeNode[numberOfParents];
    loadParents(parents, inclusionMap);

    // Convert the offsets to actual parent coordinates:
    for (LatticeNode parent : parents) {
    parent.addOffsets(coordinates);
    }

    return parents;
    }

    int getCoordinates() {
    return coordinates;
    }

    private void loadChildren(LatticeNode children, boolean inclusionMap) {
    int coords = new int[this.coordinates.length];

    for (int i = 0; i != children.length; ++i) {
    increment(coords, inclusionMap);
    children[i] = new LatticeNode(instance, coords.clone());
    }
    }

    private void loadParents(LatticeNode parents, boolean inclusionMap) {
    int coords = new int[this.coordinates.length];

    for (int i = 0; i != parents.length; ++i) {
    decrement(coords, inclusionMap);
    parents[i] = new LatticeNode(instance, coords.clone());
    }
    }

    private static void increment(int coordinates, boolean inclusionMap) {
    for (int i = 0; i < coordinates.length; ++i) {
    if (!inclusionMap[i]) {
    continue;
    }

    if (coordinates[i] == 0) {
    coordinates[i] = 1;
    return;
    }

    coordinates[i] = 0;
    }
    }

    private static void decrement(int coordinates, boolean inclusionMap) {
    for (int i = 0; i < coordinates.length; ++i) {
    if (!inclusionMap[i]) {
    continue;
    }

    if (coordinates[i] == 0) {
    coordinates[i] = -1;
    return;
    }

    coordinates[i] = 0;
    }
    }

    private void addOffsets(int offsets) {
    for (int i = 0; i < coordinates.length; ++i) {
    coordinates[i] += offsets[i];
    }
    }

    private static int pow2(int exponent) {
    int ret = 1;

    for (int e = 0; e < exponent; ++e) {
    ret *= 2;
    }

    return ret;
    }
    }


    PAM250CostMatrix.java



    package net.coderodde.bio.msa;

    import java.util.HashMap;
    import java.util.Map;
    import net.coderodde.bio.msa.AminoAcidAlphabet;
    import net.coderodde.bio.msa.CostMatrix;

    public final class PAM250CostMatrix implements CostMatrix<Integer> {

    private static PAM250CostMatrix instance;

    private final Map<Character, Map<Character, Integer>> m = new HashMap<>();

    public static PAM250CostMatrix getPAM250CostMatrix() {
    if (instance == null) {
    instance = new PAM250CostMatrix();
    }

    return instance;
    }

    private PAM250CostMatrix() {
    AminoAcidAlphabet alphabet = AminoAcidAlphabet.getAminoAcidAlphabet();

    alphabet.getCharacterSet().stream().forEach((character) -> {
    m.put(character, new HashMap<>());
    });

    m.get('A').put('A', -2);
    m.get('R').put('A', 2);
    m.get('A').put('R', 2);
    m.get('R').put('R', -6);
    m.get('N').put('A', 0);
    m.get('A').put('N', 0);
    m.get('N').put('R', 0);
    m.get('R').put('N', 0);
    m.get('N').put('N', -2);
    m.get('D').put('A', 0);
    m.get('A').put('D', 0);
    m.get('D').put('R', 1);
    m.get('R').put('D', 1);
    m.get('D').put('N', -2);
    m.get('N').put('D', -2);
    m.get('D').put('D', -4);
    m.get('C').put('A', 2);
    m.get('A').put('C', 2);
    m.get('C').put('R', 4);
    m.get('R').put('C', 4);
    m.get('C').put('N', 4);
    m.get('N').put('C', 4);
    m.get('C').put('D', 5);
    m.get('D').put('C', 5);
    m.get('C').put('C', -4);
    m.get('Q').put('A', 0);
    m.get('A').put('Q', 0);
    m.get('Q').put('R', -1);
    m.get('R').put('Q', -1);
    m.get('Q').put('N', -1);
    m.get('N').put('Q', -1);
    m.get('Q').put('D', -2);
    m.get('D').put('Q', -2);
    m.get('Q').put('C', 5);
    m.get('C').put('Q', 5);
    m.get('Q').put('Q', -4);
    m.get('E').put('A', 0);
    m.get('A').put('E', 0);
    m.get('E').put('R', 1);
    m.get('R').put('E', 1);
    m.get('E').put('N', -1);
    m.get('N').put('E', -1);
    m.get('E').put('D', -3);
    m.get('D').put('E', -3);
    m.get('E').put('C', 5);
    m.get('C').put('E', 5);
    m.get('E').put('Q', -2);
    m.get('Q').put('E', -2);
    m.get('E').put('E', -4);
    m.get('G').put('A', -1);
    m.get('A').put('G', -1);
    m.get('G').put('R', 3);
    m.get('R').put('G', 3);
    m.get('G').put('N', 0);
    m.get('N').put('G', 0);
    m.get('G').put('D', -1);
    m.get('D').put('G', -1);
    m.get('G').put('C', 3);
    m.get('C').put('G', 3);
    m.get('G').put('Q', 1);
    m.get('Q').put('G', 1);
    m.get('G').put('E', 0);
    m.get('E').put('G', 0);
    m.get('G').put('G', -5);
    m.get('H').put('A', 1);
    m.get('A').put('H', 1);
    m.get('H').put('R', -2);
    m.get('R').put('H', -2);
    m.get('H').put('N', -2);
    m.get('N').put('H', -2);
    m.get('H').put('D', -1);
    m.get('D').put('H', -1);
    m.get('H').put('C', 3);
    m.get('C').put('H', 3);
    m.get('H').put('Q', -3);
    m.get('Q').put('H', -3);
    m.get('H').put('E', -1);
    m.get('E').put('H', -1);
    m.get('H').put('G', 2);
    m.get('G').put('H', 2);
    m.get('H').put('H', -6);
    m.get('I').put('A', 1);
    m.get('A').put('I', 1);
    m.get('I').put('R', 2);
    m.get('R').put('I', 2);
    m.get('I').put('N', 2);
    m.get('N').put('I', 2);
    m.get('I').put('D', 2);
    m.get('D').put('I', 2);
    m.get('I').put('C', 2);
    m.get('C').put('I', 2);
    m.get('I').put('Q', 2);
    m.get('Q').put('I', 2);
    m.get('I').put('E', 2);
    m.get('E').put('I', 2);
    m.get('I').put('G', 3);
    m.get('G').put('I', 3);
    m.get('I').put('H', 2);
    m.get('H').put('I', 2);
    m.get('I').put('I', -5);
    m.get('L').put('A', 2);
    m.get('A').put('L', 2);
    m.get('L').put('R', 3);
    m.get('R').put('L', 3);
    m.get('L').put('N', 3);
    m.get('N').put('L', 3);
    m.get('L').put('D', 4);
    m.get('D').put('L', 4);
    m.get('L').put('C', 6);
    m.get('C').put('L', 6);
    m.get('L').put('Q', 2);
    m.get('Q').put('L', 2);
    m.get('L').put('E', 3);
    m.get('E').put('L', 3);
    m.get('L').put('G', 4);
    m.get('G').put('L', 4);
    m.get('L').put('H', 2);
    m.get('H').put('L', 2);
    m.get('L').put('I', -2);
    m.get('I').put('L', -2);
    m.get('L').put('L', -6);
    m.get('K').put('A', 1);
    m.get('A').put('K', 1);
    m.get('K').put('R', -3);
    m.get('R').put('K', -3);
    m.get('K').put('N', -1);
    m.get('N').put('K', -1);
    m.get('K').put('D', 0);
    m.get('D').put('K', 0);
    m.get('K').put('C', 5);
    m.get('C').put('K', 5);
    m.get('K').put('Q', -1);
    m.get('Q').put('K', -1);
    m.get('K').put('E', 0);
    m.get('E').put('K', 0);
    m.get('K').put('G', 2);
    m.get('G').put('K', 2);
    m.get('K').put('H', 0);
    m.get('H').put('K', 0);
    m.get('K').put('I', 2);
    m.get('I').put('K', 2);
    m.get('K').put('L', 3);
    m.get('L').put('K', 3);
    m.get('K').put('K', -5);
    m.get('M').put('A', 1);
    m.get('A').put('M', 1);
    m.get('M').put('R', 0);
    m.get('R').put('M', 0);
    m.get('M').put('N', 2);
    m.get('N').put('M', 2);
    m.get('M').put('D', 3);
    m.get('D').put('M', 3);
    m.get('M').put('C', 5);
    m.get('C').put('M', 5);
    m.get('M').put('Q', 1);
    m.get('Q').put('M', 1);
    m.get('M').put('E', 2);
    m.get('E').put('M', 2);
    m.get('M').put('G', 3);
    m.get('G').put('M', 3);
    m.get('M').put('H', 2);
    m.get('H').put('M', 2);
    m.get('M').put('I', -2);
    m.get('I').put('M', -2);
    m.get('M').put('L', -4);
    m.get('L').put('M', -4);
    m.get('M').put('K', 0);
    m.get('K').put('M', 0);
    m.get('M').put('M', -6);
    m.get('F').put('A', 4);
    m.get('A').put('F', 4);
    m.get('F').put('R', 4);
    m.get('R').put('F', 4);
    m.get('F').put('N', 4);
    m.get('N').put('F', 4);
    m.get('F').put('D', 6);
    m.get('D').put('F', 6);
    m.get('F').put('C', 4);
    m.get('C').put('F', 4);
    m.get('F').put('Q', 5);
    m.get('Q').put('F', 5);
    m.get('F').put('E', 5);
    m.get('E').put('F', 5);
    m.get('F').put('G', 5);
    m.get('G').put('F', 5);
    m.get('F').put('H', 2);
    m.get('H').put('F', 2);
    m.get('F').put('I', -1);
    m.get('I').put('F', -1);
    m.get('F').put('L', -2);
    m.get('L').put('F', -2);
    m.get('F').put('K', 5);
    m.get('K').put('F', 5);
    m.get('F').put('M', 0);
    m.get('M').put('F', 0);
    m.get('F').put('F', -9);
    m.get('P').put('A', -1);
    m.get('A').put('P', -1);
    m.get('P').put('R', 0);
    m.get('R').put('P', 0);
    m.get('P').put('N', 1);
    m.get('N').put('P', 1);
    m.get('P').put('D', 1);
    m.get('D').put('P', 1);
    m.get('P').put('C', 3);
    m.get('C').put('P', 3);
    m.get('P').put('Q', 0);
    m.get('Q').put('P', 0);
    m.get('P').put('E', 1);
    m.get('E').put('P', 1);
    m.get('P').put('G', 1);
    m.get('G').put('P', 1);
    m.get('P').put('H', 0);
    m.get('H').put('P', 0);
    m.get('P').put('I', 2);
    m.get('I').put('P', 2);
    m.get('P').put('L', 3);
    m.get('L').put('P', 3);
    m.get('P').put('K', 1);
    m.get('K').put('P', 1);
    m.get('P').put('M', 2);
    m.get('M').put('P', 2);
    m.get('P').put('F', 5);
    m.get('F').put('P', 5);
    m.get('P').put('P', -6);
    m.get('S').put('A', -1);
    m.get('A').put('S', -1);
    m.get('S').put('R', 0);
    m.get('R').put('S', 0);
    m.get('S').put('N', -1);
    m.get('N').put('S', -1);
    m.get('S').put('D', 0);
    m.get('D').put('S', 0);
    m.get('S').put('C', 0);
    m.get('C').put('S', 0);
    m.get('S').put('Q', 1);
    m.get('Q').put('S', 1);
    m.get('S').put('E', 0);
    m.get('E').put('S', 0);
    m.get('S').put('G', -1);
    m.get('G').put('S', -1);
    m.get('S').put('H', 1);
    m.get('H').put('S', 1);
    m.get('S').put('I', 1);
    m.get('I').put('S', 1);
    m.get('S').put('L', 3);
    m.get('L').put('S', 3);
    m.get('S').put('K', 0);
    m.get('K').put('S', 0);
    m.get('S').put('M', 2);
    m.get('M').put('S', 2);
    m.get('S').put('F', 3);
    m.get('F').put('S', 3);
    m.get('S').put('P', -1);
    m.get('P').put('S', -1);
    m.get('S').put('S', -3);
    m.get('T').put('A', -1);
    m.get('A').put('T', -1);
    m.get('T').put('R', 1);
    m.get('R').put('T', 1);
    m.get('T').put('N', 0);
    m.get('N').put('T', 0);
    m.get('T').put('D', 0);
    m.get('D').put('T', 0);
    m.get('T').put('C', 2);
    m.get('C').put('T', 2);
    m.get('T').put('Q', 1);
    m.get('Q').put('T', 1);
    m.get('T').put('E', 0);
    m.get('E').put('T', 0);
    m.get('T').put('G', 0);
    m.get('G').put('T', 0);
    m.get('T').put('H', 1);
    m.get('H').put('T', 1);
    m.get('T').put('I', 0);
    m.get('I').put('T', 0);
    m.get('T').put('L', 2);
    m.get('L').put('T', 2);
    m.get('T').put('K', 0);
    m.get('K').put('T', 0);
    m.get('T').put('M', 1);
    m.get('M').put('T', 1);
    m.get('T').put('F', 2);
    m.get('F').put('T', 2);
    m.get('T').put('P', 0);
    m.get('P').put('T', 0);
    m.get('T').put('S', -1);
    m.get('S').put('T', -1);
    m.get('T').put('T', -3);
    m.get('W').put('A', 6);
    m.get('A').put('W', 6);
    m.get('W').put('R', -2);
    m.get('R').put('W', -2);
    m.get('W').put('N', 4);
    m.get('N').put('W', 4);
    m.get('W').put('D', 7);
    m.get('D').put('W', 7);
    m.get('W').put('C', 8);
    m.get('C').put('W', 8);
    m.get('W').put('Q', 5);
    m.get('Q').put('W', 5);
    m.get('W').put('E', 7);
    m.get('E').put('W', 7);
    m.get('W').put('G', 7);
    m.get('G').put('W', 7);
    m.get('W').put('H', 3);
    m.get('H').put('W', 3);
    m.get('W').put('I', 5);
    m.get('I').put('W', 5);
    m.get('W').put('L', 2);
    m.get('L').put('W', 2);
    m.get('W').put('K', 3);
    m.get('K').put('W', 3);
    m.get('W').put('M', 4);
    m.get('M').put('W', 4);
    m.get('W').put('F', 0);
    m.get('F').put('W', 0);
    m.get('W').put('P', 6);
    m.get('P').put('W', 6);
    m.get('W').put('S', 2);
    m.get('S').put('W', 2);
    m.get('W').put('T', 5);
    m.get('T').put('W', 5);
    m.get('W').put('W', -17);
    m.get('Y').put('A', 3);
    m.get('A').put('Y', 3);
    m.get('Y').put('R', 4);
    m.get('R').put('Y', 4);
    m.get('Y').put('N', 2);
    m.get('N').put('Y', 2);
    m.get('Y').put('D', 4);
    m.get('D').put('Y', 4);
    m.get('Y').put('C', 0);
    m.get('C').put('Y', 0);
    m.get('Y').put('Q', 4);
    m.get('Q').put('Y', 4);
    m.get('Y').put('E', 4);
    m.get('E').put('Y', 4);
    m.get('Y').put('G', 5);
    m.get('G').put('Y', 5);
    m.get('Y').put('H', 0);
    m.get('H').put('Y', 0);
    m.get('Y').put('I', 1);
    m.get('I').put('Y', 1);
    m.get('Y').put('L', 1);
    m.get('L').put('Y', 1);
    m.get('Y').put('K', 4);
    m.get('K').put('Y', 4);
    m.get('Y').put('M', 2);
    m.get('M').put('Y', 2);
    m.get('Y').put('F', -7);
    m.get('F').put('Y', -7);
    m.get('Y').put('P', 5);
    m.get('P').put('Y', 5);
    m.get('Y').put('S', 3);
    m.get('S').put('Y', 3);
    m.get('Y').put('T', 3);
    m.get('T').put('Y', 3);
    m.get('Y').put('W', 0);
    m.get('W').put('Y', 0);
    m.get('Y').put('Y', -10);
    m.get('V').put('A', 0);
    m.get('A').put('V', 0);
    m.get('V').put('R', 2);
    m.get('R').put('V', 2);
    m.get('V').put('N', 2);
    m.get('N').put('V', 2);
    m.get('V').put('D', 2);
    m.get('D').put('V', 2);
    m.get('V').put('C', 2);
    m.get('C').put('V', 2);
    m.get('V').put('Q', 2);
    m.get('Q').put('V', 2);
    m.get('V').put('E', 2);
    m.get('E').put('V', 2);
    m.get('V').put('G', 1);
    m.get('G').put('V', 1);
    m.get('V').put('H', 2);
    m.get('H').put('V', 2);
    m.get('V').put('I', -4);
    m.get('I').put('V', -4);
    m.get('V').put('L', -2);
    m.get('L').put('V', -2);
    m.get('V').put('K', 2);
    m.get('K').put('V', 2);
    m.get('V').put('M', -2);
    m.get('M').put('V', -2);
    m.get('V').put('F', 1);
    m.get('F').put('V', 1);
    m.get('V').put('P', 1);
    m.get('P').put('V', 1);
    m.get('V').put('S', 1);
    m.get('S').put('V', 1);
    m.get('V').put('T', 0);
    m.get('T').put('V', 0);
    m.get('V').put('W', 6);
    m.get('W').put('V', 6);
    m.get('V').put('Y', 2);
    m.get('Y').put('V', 2);
    m.get('V').put('V', -4);
    }

    @Override
    public Integer getCost(Character aminoAcidChar1, Character aminoAcidChar2) {
    try {
    return m.get(aminoAcidChar1).get(aminoAcidChar2);
    } catch (NullPointerException ex) {
    throw new IllegalArgumentException("Bad arguments: (" +
    aminoAcidChar1 + ", " + aminoAcidChar2 + ")",
    ex);
    }
    }
    }


    AminoAcidAlphabet.java



    package net.coderodde.bio.msa;

    import java.util.Collections;
    import java.util.HashSet;
    import java.util.Set;

    public final class AminoAcidAlphabet {

    public static final Character GAP_CHARACTER = '-';
    private static AminoAcidAlphabet instance;

    private final Set<Character> alphabet = new HashSet<>();

    public static AminoAcidAlphabet getAminoAcidAlphabet() {
    if (instance == null) {
    instance = new AminoAcidAlphabet();
    }

    return instance;
    }

    private AminoAcidAlphabet() {
    String aminoAcids = "ACDEF" +
    "GHIKL" +
    "MNPQR" +
    "STVWY";

    for (char c : aminoAcids.toCharArray()) {
    alphabet.add(c);
    }
    }

    public Set<Character> getCharacterSet() {
    return Collections.<Character>unmodifiableSet(alphabet);
    }
    }


    Alignment.java



    package net.coderodde.bio.msa;

    public class Alignment {

    private final String alignment;
    private final int cost;

    Alignment(String alignment, int cost) {
    this.alignment = alignment;
    this.cost = cost;
    }

    public String getAlignemnt() {
    return alignment;
    }

    public int getCost() {
    return cost;
    }

    @Override
    public String toString() {
    StringBuilder sb = new StringBuilder();
    String separator = "";

    for (String row : alignment) {
    sb.append(separator).append(row);
    separator = "n";
    }

    sb.append("nCost: ").append(cost);
    return sb.toString();
    }
    }


    CostMatrix.java



    package net.coderodde.bio.msa;

    public interface CostMatrix<C> {

    public C getCost(Character aminoAcidChar1, Character aminoAcidChar2);
    }


    HeuristicFunction.java



    package net.coderodde.bio.msa;

    import java.awt.Point;
    import java.util.HashMap;
    import java.util.Map;

    final class HeuristicFunction {

    private final Map<Point, Integer> matrix;

    HeuristicFunction(MultipleSequenceAlignmentInstance instance) {
    int sequences = instance.getSequenceArray().length;
    this.matrix = new Map[sequences];

    for (int i = 0; i < sequences; ++i) {
    this.matrix[i] = new Map[sequences];

    for (int j = i + 1; j < sequences; ++j) {
    this.matrix[i][j] = new HashMap<>();
    }
    }
    }

    void put(int dimension1, int dimension2, Point point, Integer cost) {
    matrix[dimension1][dimension2].put(point, cost);
    }

    boolean containsPartial(int dimension1, int dimension2, Point point) {
    return matrix[dimension1][dimension2].containsKey(point);
    }

    Integer getPartial(int dimension1, int dimension2, Point point) {
    return matrix[dimension1][dimension2].get(point);
    }

    Integer get(LatticeNode node) {
    int cost = 0;
    int coordinates = node.getCoordinates();
    Point point = new Point();

    for (int dimension1 = 0;
    dimension1 < coordinates.length;
    dimension1++) {
    point.x = dimension1;

    for (int dimension2 = dimension1 + 1;
    dimension2 < coordinates.length;
    dimension2++) {
    point.y = dimension2;
    cost += matrix[dimension1][dimension2].get(point);
    }
    }

    return cost;
    }
    }


    App.java



    package net.coderodde.bio.msa;

    final class App {

    private static final String SEQUENCES = {
    "ACGHKGMNPFQEKKFKLMNRW",
    "CFGPQWYRTLMEKKFKNR",
    "EACLMNRPQWTR",
    "TIMWAYHTMGIEKKFK"
    };

    public static void main(String args) {
    MultipleSequenceAlignmentInstance instance =
    new MultipleSequenceAlignmentInstance(
    PAM250CostMatrix.getPAM250CostMatrix(),
    4,
    SEQUENCES);

    long start = System.currentTimeMillis();
    Alignment alignment1 = instance.align();
    long end = System.currentTimeMillis();

    System.out.println(alignment1);
    System.out.println(end - start + " ms.");
    System.out.println();

    start = System.currentTimeMillis();
    Alignment alignment2 = instance.alignBrute();
    end = System.currentTimeMillis();

    System.out.println(alignment2);
    System.out.println(end - start + " ms.");
    }
    }


    Output



    The output for the demo is




    Computed heuristic function in 1003 milliseconds.
    -ACG-HKGMNPF-QEKKFKLMNRW
    -CFGPQW-YRTL-MEKKFK--NR-
    EAC--LM-NRP--Q-WT-R-----
    -TI--MWAYHTMGIEKKFK-----
    Cost: 53
    1126 ms.

    -ACG-HKGMNPF-QEKKFKLMNRW
    -CFGPQW-YRTL-MEKKFK--NR-
    EAC--LM-NRP--Q-WT-R-----
    -TI--MWAYHTMGIEKKFK-----
    Cost: 53
    60 ms.



    Critique request



    As always, please tell me anything that comes to mind.










    share|improve this question

























      6












      6








      6







      (The entire project lives here.)



      Problem definition



      Suppose we are given three genomic strings drawn from the alphabet of 20 amino acids:



      ACGH
      CFG
      EAC


      In multiple sequence alignment problem we want to align them optimally in order to access their evolutionary similarity. Such an alignment may look like the following:



      -AC-GH
      --CFG-
      EAC---


      Solution



      Often, the problem is cast as a pathfinding problem in $k$-dimensional lattice, where $k$ is the number of strings. Here is my program, it compares A* with Dijkstra's algorithms:



      MultipleSequenceAlignmentInstance.java



      package net.coderodde.bio.msa;

      import java.util.ArrayList;
      import java.util.Collections;
      import java.util.HashMap;
      import java.util.HashSet;
      import java.util.List;
      import java.util.Map;
      import java.util.Objects;
      import java.util.PriorityQueue;
      import java.util.Queue;
      import java.util.Set;

      public final class MultipleSequenceAlignmentInstance {

      /**
      * The penalty for pairs in which there is one valid character and one gap.
      */
      private final int gapPenalty;

      /**
      * The character pairs cost matrix.
      */
      private final CostMatrix<Integer> costMatrix;

      /**
      * The array of sequences to be aligned.
      */
      private final String sequenceArray;

      // A small speed optimization:
      private final char column;

      public MultipleSequenceAlignmentInstance(CostMatrix<Integer> costMatrix,
      int gapPenalty,
      String... sequenceArray) {
      this.costMatrix = Objects.requireNonNull(costMatrix,
      "Cost matrix is null");
      this.gapPenalty = gapPenalty;
      this.sequenceArray = sequenceArray.clone();
      this.column = new char[sequenceArray.length];

      for (int i = 0; i != sequenceArray.length; ++i) {
      this.sequenceArray[i] =
      checkIsValidGenomicSequence(sequenceArray[i]);
      }
      }

      public Alignment align() {
      long start = System.currentTimeMillis();
      HeuristicFunction hf = new HeuristicFunctionComputer()
      .computeHeuristicFunction(this);
      long end = System.currentTimeMillis();

      System.out.println("Computed heuristic function in " + (end - start) +
      " milliseconds.");

      Queue<LatticeNodeHolder> open = new PriorityQueue<>();
      Map<LatticeNode, Integer> distance = new HashMap<>();
      Map<LatticeNode, LatticeNode> parents = new HashMap<>();

      LatticeNode sourceNode = getSourceNode();
      LatticeNode targetNode = getTargetNode();

      open.add(new LatticeNodeHolder(sourceNode, 0));
      distance.put(sourceNode, 0);
      parents.put(sourceNode, null);

      Set<LatticeNode> closed = new HashSet<>();

      while (true) {
      LatticeNode currentNode = open.remove().getNode();

      if (currentNode.equals(targetNode)) {
      return tracebackPath(parents, distance.get(currentNode));
      }

      if (closed.contains(currentNode)) {
      continue;
      }

      closed.add(currentNode);

      for (LatticeNode childNode : currentNode.getChildren()) {
      if (closed.contains(childNode)) {
      continue;
      }

      int tentativeCost = distance.get(currentNode) +
      getWeight(currentNode, childNode);
      Integer currentCost = distance.get(childNode);

      if (currentCost == null || currentCost > tentativeCost) {
      open.add(new LatticeNodeHolder(childNode,
      tentativeCost +
      hf.get(childNode)));
      distance.put(childNode, tentativeCost);
      parents.put(childNode, currentNode);
      }
      }
      }
      }

      public Alignment alignBrute() {
      Queue<LatticeNodeHolder> open = new PriorityQueue<>();
      Map<LatticeNode, Integer> distance = new HashMap<>();
      Map<LatticeNode, LatticeNode> parents = new HashMap<>();

      LatticeNode sourceNode = getSourceNode();
      LatticeNode targetNode = getTargetNode();

      open.add(new LatticeNodeHolder(sourceNode, 0));
      distance.put(sourceNode, 0);
      parents.put(sourceNode, null);

      Set<LatticeNode> closed = new HashSet<>();

      while (true) {
      LatticeNode currentNode = open.remove().getNode();

      if (currentNode.equals(targetNode)) {
      return tracebackPath(parents, distance.get(currentNode));
      }

      if (closed.contains(currentNode)) {
      continue;
      }

      closed.add(currentNode);

      for (LatticeNode childNode : currentNode.getChildren()) {
      if (closed.contains(childNode)) {
      continue;
      }

      int tentativeCost = distance.get(currentNode) +
      getWeight(currentNode, childNode);
      Integer currentCost = distance.get(childNode);

      if (currentCost == null || currentCost > tentativeCost) {
      open.add(new LatticeNodeHolder(childNode, tentativeCost));
      distance.put(childNode, tentativeCost);
      parents.put(childNode, currentNode);
      }
      }
      }
      }

      /**
      * Create the source node for the sequence alignment task, i.e., a lattice
      * node with all coordinates set to zero.
      *
      * @return the source node.
      */
      LatticeNode getSourceNode() {
      int sourceCoordinates = new int[sequenceArray.length];
      return new LatticeNode(this, sourceCoordinates);
      }

      /**
      * Create the target node for the sequence alignment task, i.e., a lattice
      * node with all coordinates set to the respective sequence lengths.
      *
      * @return the target node.
      */
      LatticeNode getTargetNode() {
      int targetCoordinates = new int[sequenceArray.length];

      for (int i = 0; i != sequenceArray.length; ++i) {
      targetCoordinates[i] = sequenceArray[i].length();
      }

      return new LatticeNode(this, targetCoordinates);
      }

      Integer getWeight(LatticeNode tail,
      LatticeNode head,
      int dimension1,
      int dimension2) {
      int tailCoordinates = tail.getCoordinates();
      int headCoordinates = head.getCoordinates();

      if (tailCoordinates[dimension1] == headCoordinates[dimension1]) {
      //System.out.println("1");
      return gapPenalty;
      } else if (tailCoordinates[dimension2] == headCoordinates[dimension2]) {
      //System.out.println("2");
      return gapPenalty;
      } else {
      // System.out.println("3");
      char character1 = sequenceArray[dimension1]
      .charAt(tailCoordinates[dimension1]);

      char character2 = sequenceArray[dimension2]
      .charAt(tailCoordinates[dimension2]);
      return costMatrix.getCost(character1, character2);
      }
      }

      Integer getWeight(LatticeNode tail, LatticeNode head) {
      // Extract the column represented by taking a single hop from 'tail' to
      // 'head':
      int tailCoordinates = tail.getCoordinates();
      int headCoordinates = head.getCoordinates();

      for (int i = 0; i < sequenceArray.length; ++i) {
      if (tailCoordinates[i] + 1 == headCoordinates[i]) {
      column[i] = sequenceArray[i].charAt(tailCoordinates[i]);
      } else {
      column[i] = AminoAcidAlphabet.GAP_CHARACTER;
      }
      }

      // Compute the hop cost as the sum of pairwise hops in any plane:
      int cost = 0;

      for (int i = 0; i < column.length; ++i) {
      char character1 = column[i];

      for (int j = i + 1; j < column.length; ++j) {
      char character2 = column[j];

      if (character1 != AminoAcidAlphabet.GAP_CHARACTER) {
      if (character2 != AminoAcidAlphabet.GAP_CHARACTER) {
      cost += costMatrix.getCost(character1, character2);
      } else {
      cost += gapPenalty;
      }
      } else {
      // character1 IS the gap character:
      if (character2 != AminoAcidAlphabet.GAP_CHARACTER) {
      cost += gapPenalty;
      } else {
      // Do nothing since we have a pair (gap, gap).
      }
      }
      }
      }

      return cost;
      }

      String getSequenceArray() {
      return sequenceArray;
      }

      private Alignment tracebackPath(Map<LatticeNode, LatticeNode> parents,
      Integer cost) {
      List<LatticeNode> path = new ArrayList<>();
      LatticeNode node = getTargetNode();

      while (node != null) {
      path.add(node);
      node = parents.get(node);
      }

      Collections.<LatticeNode>reverse(path);

      String strings = new String[getSequenceArray().length];
      StringBuilder stringBuilders = new StringBuilder[strings.length];

      for (int i = 0; i < stringBuilders.length; ++i) {
      stringBuilders[i] = new StringBuilder();
      }

      for (int i = 1; i < path.size(); ++i) {
      LatticeNode tail = path.get(i - 1);
      LatticeNode head = path.get(i);

      int tailCoordinates = tail.getCoordinates();
      int headCoordinates = head.getCoordinates();

      for (int j = 0; j < tailCoordinates.length; ++j) {
      if (tailCoordinates[j] != headCoordinates[j]) {
      stringBuilders[j].
      append(sequenceArray[j].charAt(tailCoordinates[j]));
      } else {
      stringBuilders[j].append(AminoAcidAlphabet.GAP_CHARACTER);
      }
      }
      }

      for (int i = 0; i < strings.length; ++i) {
      strings[i] = stringBuilders[i].toString();
      }

      return new Alignment(strings, cost);
      }

      private String checkIsValidGenomicSequence(String string) {
      Set<Character> characterSet =
      AminoAcidAlphabet.getAminoAcidAlphabet()
      .getCharacterSet();

      for (char c : string.toCharArray()) {
      if (!characterSet.contains(c)) {
      throw new IllegalArgumentException("Unknown amino acid: " + c);
      }
      }

      return string;
      }

      private static final class LatticeNodeHolder
      implements Comparable<LatticeNodeHolder> {

      private final Integer fScore;
      private final LatticeNode node;

      LatticeNodeHolder(LatticeNode node, Integer fScore) {
      this.fScore = fScore;
      this.node = node;
      }

      LatticeNode getNode() {
      return node;
      }

      @Override
      public int compareTo(LatticeNodeHolder o) {
      return Integer.compare(fScore, o.fScore);
      }
      }
      }


      HeuristicFunctionComputer.java



      package net.coderodde.bio.msa;

      import java.awt.Point;
      import java.util.HashSet;
      import java.util.PriorityQueue;
      import java.util.Queue;
      import java.util.Set;

      final class HeuristicFunctionComputer {

      HeuristicFunction computeHeuristicFunction(
      MultipleSequenceAlignmentInstance instance) {
      String sequenceArray = instance.getSequenceArray();
      HeuristicFunction heuristicFunction = new HeuristicFunction(instance);

      for (int dimension1 = 0;
      dimension1 < sequenceArray.length;
      dimension1++) {
      for (int dimension2 = dimension1 + 1;
      dimension2 < sequenceArray.length;
      dimension2++) {
      loadPartialHeuristicFunction(heuristicFunction,
      dimension1,
      dimension2,
      instance);
      }
      }

      return heuristicFunction;
      }

      // Basically, this method runs Dijkstra backwards from the target node until
      // the entire 2D-grid is explored.
      private void loadPartialHeuristicFunction(
      HeuristicFunction heuristicFunction,
      int dimension1,
      int dimension2,
      MultipleSequenceAlignmentInstance instance) {
      LatticeNode target = instance.getTargetNode();
      Queue<LatticeNodeHolder> open = new PriorityQueue<>();
      Set<LatticeNode> closed = new HashSet<>();
      open.add(new LatticeNodeHolder(target, 0));
      Point point = new Point();

      while (!open.isEmpty()) {
      LatticeNodeHolder currentNodeHolder = open.remove();
      LatticeNode currentNode = currentNodeHolder.getLatticeNode();

      if (closed.contains(currentNode)) {
      continue;
      }

      closed.add(currentNode);

      Integer currentNodeCost = currentNodeHolder.getCost();
      extractPoint(point, currentNode, dimension1, dimension2);
      heuristicFunction.put(dimension1,
      dimension2,
      new Point(point),
      currentNodeCost);

      for (LatticeNode parent : currentNode.getParents()) {
      if (closed.contains(parent)) {
      continue;
      }

      int tentativeCost =
      currentNodeCost + instance.getWeight(parent,
      currentNode,
      dimension1,
      dimension2);
      extractPoint(point, parent, dimension1, dimension2);

      if (!heuristicFunction.containsPartial(dimension1,
      dimension2,
      point)
      || heuristicFunction.getPartial(dimension1,
      dimension2,
      point)
      > tentativeCost) {
      open.add(new LatticeNodeHolder(parent, tentativeCost));
      }
      }
      }
      }

      private static void extractPoint(Point point,
      LatticeNode latticeNode,
      int i,
      int j) {
      int coordinates = latticeNode.getCoordinates();
      point.x = coordinates[i];
      point.y = coordinates[j];
      }

      private static final class LatticeNodeHolder
      implements Comparable<LatticeNodeHolder> {

      private final LatticeNode node;
      private final Integer cost;

      LatticeNodeHolder(LatticeNode node, Integer cost) {
      this.node = node;
      this.cost = cost;
      }

      LatticeNode getLatticeNode() {
      return node;
      }

      Integer getCost() {
      return cost;
      }

      @Override
      public int compareTo(LatticeNodeHolder o) {
      return Integer.compare(cost, o.cost);
      }
      }
      }


      LatticeNode.java



      package net.coderodde.bio.msa;

      import com.sun.corba.se.spi.orb.OperationFactory;
      import java.util.Arrays;

      final class LatticeNode {

      /**
      * The problem instance this lattice node contributes to.
      */
      private final MultipleSequenceAlignmentInstance instance;

      /**
      * The coordinates of this node in the entire lattice.
      */
      private final int coordinates;

      LatticeNode(MultipleSequenceAlignmentInstance instance, int coordinates) {
      this.instance = instance;
      this.coordinates = coordinates;
      }

      @Override
      public boolean equals(Object o) {
      return Arrays.equals(coordinates, ((LatticeNode) o).coordinates);
      }

      // Generated by NetBeans 8.1:
      @Override
      public int hashCode() {
      int hash = 7;
      hash = 41 * hash + Arrays.hashCode(this.coordinates);
      return hash;
      }

      @Override
      public String toString() {
      StringBuilder sb = new StringBuilder("[");
      String separator = "";

      for (int coordinate : coordinates) {
      sb.append(separator).append(coordinate);
      separator = ", ";
      }

      return sb.append("]").toString();
      }

      LatticeNode getChildren() {
      // Find out in how many dimension we can move forward:
      int dimensionsNotReached = 0;
      String sequenceArray = instance.getSequenceArray();
      boolean inclusionMap = new boolean[coordinates.length];

      for (int i = 0; i < sequenceArray.length; ++i) {
      if (coordinates[i] < sequenceArray[i].length()) {
      dimensionsNotReached++;
      // We can make a step forward in the direction of ith dimension:
      inclusionMap[i] = true;
      }
      }

      // Create the array of children:
      int numberOfChildren = pow2(dimensionsNotReached) - 1;
      LatticeNode children = new LatticeNode[numberOfChildren];
      loadChildren(children, inclusionMap);

      // Convert offsets to actual child coordinates:
      for (LatticeNode child : children) {
      child.addOffsets(coordinates);
      }

      return children;
      }

      LatticeNode getParents() {
      // Find out in how many dimensions we can move BACKward:
      int dimensionsNotReached = 0;
      String sequenceArray = instance.getSequenceArray();
      boolean inclusionMap = new boolean[coordinates.length];

      for (int i = 0; i < sequenceArray.length; ++i) {
      if (coordinates[i] > 0) {
      dimensionsNotReached++;
      // We can make a step backwards in the direction of ith
      // dimension:
      inclusionMap[i] = true;
      }
      }

      // Create the array of parents:
      int numberOfParents = pow2(dimensionsNotReached) - 1;
      LatticeNode parents = new LatticeNode[numberOfParents];
      loadParents(parents, inclusionMap);

      // Convert the offsets to actual parent coordinates:
      for (LatticeNode parent : parents) {
      parent.addOffsets(coordinates);
      }

      return parents;
      }

      int getCoordinates() {
      return coordinates;
      }

      private void loadChildren(LatticeNode children, boolean inclusionMap) {
      int coords = new int[this.coordinates.length];

      for (int i = 0; i != children.length; ++i) {
      increment(coords, inclusionMap);
      children[i] = new LatticeNode(instance, coords.clone());
      }
      }

      private void loadParents(LatticeNode parents, boolean inclusionMap) {
      int coords = new int[this.coordinates.length];

      for (int i = 0; i != parents.length; ++i) {
      decrement(coords, inclusionMap);
      parents[i] = new LatticeNode(instance, coords.clone());
      }
      }

      private static void increment(int coordinates, boolean inclusionMap) {
      for (int i = 0; i < coordinates.length; ++i) {
      if (!inclusionMap[i]) {
      continue;
      }

      if (coordinates[i] == 0) {
      coordinates[i] = 1;
      return;
      }

      coordinates[i] = 0;
      }
      }

      private static void decrement(int coordinates, boolean inclusionMap) {
      for (int i = 0; i < coordinates.length; ++i) {
      if (!inclusionMap[i]) {
      continue;
      }

      if (coordinates[i] == 0) {
      coordinates[i] = -1;
      return;
      }

      coordinates[i] = 0;
      }
      }

      private void addOffsets(int offsets) {
      for (int i = 0; i < coordinates.length; ++i) {
      coordinates[i] += offsets[i];
      }
      }

      private static int pow2(int exponent) {
      int ret = 1;

      for (int e = 0; e < exponent; ++e) {
      ret *= 2;
      }

      return ret;
      }
      }


      PAM250CostMatrix.java



      package net.coderodde.bio.msa;

      import java.util.HashMap;
      import java.util.Map;
      import net.coderodde.bio.msa.AminoAcidAlphabet;
      import net.coderodde.bio.msa.CostMatrix;

      public final class PAM250CostMatrix implements CostMatrix<Integer> {

      private static PAM250CostMatrix instance;

      private final Map<Character, Map<Character, Integer>> m = new HashMap<>();

      public static PAM250CostMatrix getPAM250CostMatrix() {
      if (instance == null) {
      instance = new PAM250CostMatrix();
      }

      return instance;
      }

      private PAM250CostMatrix() {
      AminoAcidAlphabet alphabet = AminoAcidAlphabet.getAminoAcidAlphabet();

      alphabet.getCharacterSet().stream().forEach((character) -> {
      m.put(character, new HashMap<>());
      });

      m.get('A').put('A', -2);
      m.get('R').put('A', 2);
      m.get('A').put('R', 2);
      m.get('R').put('R', -6);
      m.get('N').put('A', 0);
      m.get('A').put('N', 0);
      m.get('N').put('R', 0);
      m.get('R').put('N', 0);
      m.get('N').put('N', -2);
      m.get('D').put('A', 0);
      m.get('A').put('D', 0);
      m.get('D').put('R', 1);
      m.get('R').put('D', 1);
      m.get('D').put('N', -2);
      m.get('N').put('D', -2);
      m.get('D').put('D', -4);
      m.get('C').put('A', 2);
      m.get('A').put('C', 2);
      m.get('C').put('R', 4);
      m.get('R').put('C', 4);
      m.get('C').put('N', 4);
      m.get('N').put('C', 4);
      m.get('C').put('D', 5);
      m.get('D').put('C', 5);
      m.get('C').put('C', -4);
      m.get('Q').put('A', 0);
      m.get('A').put('Q', 0);
      m.get('Q').put('R', -1);
      m.get('R').put('Q', -1);
      m.get('Q').put('N', -1);
      m.get('N').put('Q', -1);
      m.get('Q').put('D', -2);
      m.get('D').put('Q', -2);
      m.get('Q').put('C', 5);
      m.get('C').put('Q', 5);
      m.get('Q').put('Q', -4);
      m.get('E').put('A', 0);
      m.get('A').put('E', 0);
      m.get('E').put('R', 1);
      m.get('R').put('E', 1);
      m.get('E').put('N', -1);
      m.get('N').put('E', -1);
      m.get('E').put('D', -3);
      m.get('D').put('E', -3);
      m.get('E').put('C', 5);
      m.get('C').put('E', 5);
      m.get('E').put('Q', -2);
      m.get('Q').put('E', -2);
      m.get('E').put('E', -4);
      m.get('G').put('A', -1);
      m.get('A').put('G', -1);
      m.get('G').put('R', 3);
      m.get('R').put('G', 3);
      m.get('G').put('N', 0);
      m.get('N').put('G', 0);
      m.get('G').put('D', -1);
      m.get('D').put('G', -1);
      m.get('G').put('C', 3);
      m.get('C').put('G', 3);
      m.get('G').put('Q', 1);
      m.get('Q').put('G', 1);
      m.get('G').put('E', 0);
      m.get('E').put('G', 0);
      m.get('G').put('G', -5);
      m.get('H').put('A', 1);
      m.get('A').put('H', 1);
      m.get('H').put('R', -2);
      m.get('R').put('H', -2);
      m.get('H').put('N', -2);
      m.get('N').put('H', -2);
      m.get('H').put('D', -1);
      m.get('D').put('H', -1);
      m.get('H').put('C', 3);
      m.get('C').put('H', 3);
      m.get('H').put('Q', -3);
      m.get('Q').put('H', -3);
      m.get('H').put('E', -1);
      m.get('E').put('H', -1);
      m.get('H').put('G', 2);
      m.get('G').put('H', 2);
      m.get('H').put('H', -6);
      m.get('I').put('A', 1);
      m.get('A').put('I', 1);
      m.get('I').put('R', 2);
      m.get('R').put('I', 2);
      m.get('I').put('N', 2);
      m.get('N').put('I', 2);
      m.get('I').put('D', 2);
      m.get('D').put('I', 2);
      m.get('I').put('C', 2);
      m.get('C').put('I', 2);
      m.get('I').put('Q', 2);
      m.get('Q').put('I', 2);
      m.get('I').put('E', 2);
      m.get('E').put('I', 2);
      m.get('I').put('G', 3);
      m.get('G').put('I', 3);
      m.get('I').put('H', 2);
      m.get('H').put('I', 2);
      m.get('I').put('I', -5);
      m.get('L').put('A', 2);
      m.get('A').put('L', 2);
      m.get('L').put('R', 3);
      m.get('R').put('L', 3);
      m.get('L').put('N', 3);
      m.get('N').put('L', 3);
      m.get('L').put('D', 4);
      m.get('D').put('L', 4);
      m.get('L').put('C', 6);
      m.get('C').put('L', 6);
      m.get('L').put('Q', 2);
      m.get('Q').put('L', 2);
      m.get('L').put('E', 3);
      m.get('E').put('L', 3);
      m.get('L').put('G', 4);
      m.get('G').put('L', 4);
      m.get('L').put('H', 2);
      m.get('H').put('L', 2);
      m.get('L').put('I', -2);
      m.get('I').put('L', -2);
      m.get('L').put('L', -6);
      m.get('K').put('A', 1);
      m.get('A').put('K', 1);
      m.get('K').put('R', -3);
      m.get('R').put('K', -3);
      m.get('K').put('N', -1);
      m.get('N').put('K', -1);
      m.get('K').put('D', 0);
      m.get('D').put('K', 0);
      m.get('K').put('C', 5);
      m.get('C').put('K', 5);
      m.get('K').put('Q', -1);
      m.get('Q').put('K', -1);
      m.get('K').put('E', 0);
      m.get('E').put('K', 0);
      m.get('K').put('G', 2);
      m.get('G').put('K', 2);
      m.get('K').put('H', 0);
      m.get('H').put('K', 0);
      m.get('K').put('I', 2);
      m.get('I').put('K', 2);
      m.get('K').put('L', 3);
      m.get('L').put('K', 3);
      m.get('K').put('K', -5);
      m.get('M').put('A', 1);
      m.get('A').put('M', 1);
      m.get('M').put('R', 0);
      m.get('R').put('M', 0);
      m.get('M').put('N', 2);
      m.get('N').put('M', 2);
      m.get('M').put('D', 3);
      m.get('D').put('M', 3);
      m.get('M').put('C', 5);
      m.get('C').put('M', 5);
      m.get('M').put('Q', 1);
      m.get('Q').put('M', 1);
      m.get('M').put('E', 2);
      m.get('E').put('M', 2);
      m.get('M').put('G', 3);
      m.get('G').put('M', 3);
      m.get('M').put('H', 2);
      m.get('H').put('M', 2);
      m.get('M').put('I', -2);
      m.get('I').put('M', -2);
      m.get('M').put('L', -4);
      m.get('L').put('M', -4);
      m.get('M').put('K', 0);
      m.get('K').put('M', 0);
      m.get('M').put('M', -6);
      m.get('F').put('A', 4);
      m.get('A').put('F', 4);
      m.get('F').put('R', 4);
      m.get('R').put('F', 4);
      m.get('F').put('N', 4);
      m.get('N').put('F', 4);
      m.get('F').put('D', 6);
      m.get('D').put('F', 6);
      m.get('F').put('C', 4);
      m.get('C').put('F', 4);
      m.get('F').put('Q', 5);
      m.get('Q').put('F', 5);
      m.get('F').put('E', 5);
      m.get('E').put('F', 5);
      m.get('F').put('G', 5);
      m.get('G').put('F', 5);
      m.get('F').put('H', 2);
      m.get('H').put('F', 2);
      m.get('F').put('I', -1);
      m.get('I').put('F', -1);
      m.get('F').put('L', -2);
      m.get('L').put('F', -2);
      m.get('F').put('K', 5);
      m.get('K').put('F', 5);
      m.get('F').put('M', 0);
      m.get('M').put('F', 0);
      m.get('F').put('F', -9);
      m.get('P').put('A', -1);
      m.get('A').put('P', -1);
      m.get('P').put('R', 0);
      m.get('R').put('P', 0);
      m.get('P').put('N', 1);
      m.get('N').put('P', 1);
      m.get('P').put('D', 1);
      m.get('D').put('P', 1);
      m.get('P').put('C', 3);
      m.get('C').put('P', 3);
      m.get('P').put('Q', 0);
      m.get('Q').put('P', 0);
      m.get('P').put('E', 1);
      m.get('E').put('P', 1);
      m.get('P').put('G', 1);
      m.get('G').put('P', 1);
      m.get('P').put('H', 0);
      m.get('H').put('P', 0);
      m.get('P').put('I', 2);
      m.get('I').put('P', 2);
      m.get('P').put('L', 3);
      m.get('L').put('P', 3);
      m.get('P').put('K', 1);
      m.get('K').put('P', 1);
      m.get('P').put('M', 2);
      m.get('M').put('P', 2);
      m.get('P').put('F', 5);
      m.get('F').put('P', 5);
      m.get('P').put('P', -6);
      m.get('S').put('A', -1);
      m.get('A').put('S', -1);
      m.get('S').put('R', 0);
      m.get('R').put('S', 0);
      m.get('S').put('N', -1);
      m.get('N').put('S', -1);
      m.get('S').put('D', 0);
      m.get('D').put('S', 0);
      m.get('S').put('C', 0);
      m.get('C').put('S', 0);
      m.get('S').put('Q', 1);
      m.get('Q').put('S', 1);
      m.get('S').put('E', 0);
      m.get('E').put('S', 0);
      m.get('S').put('G', -1);
      m.get('G').put('S', -1);
      m.get('S').put('H', 1);
      m.get('H').put('S', 1);
      m.get('S').put('I', 1);
      m.get('I').put('S', 1);
      m.get('S').put('L', 3);
      m.get('L').put('S', 3);
      m.get('S').put('K', 0);
      m.get('K').put('S', 0);
      m.get('S').put('M', 2);
      m.get('M').put('S', 2);
      m.get('S').put('F', 3);
      m.get('F').put('S', 3);
      m.get('S').put('P', -1);
      m.get('P').put('S', -1);
      m.get('S').put('S', -3);
      m.get('T').put('A', -1);
      m.get('A').put('T', -1);
      m.get('T').put('R', 1);
      m.get('R').put('T', 1);
      m.get('T').put('N', 0);
      m.get('N').put('T', 0);
      m.get('T').put('D', 0);
      m.get('D').put('T', 0);
      m.get('T').put('C', 2);
      m.get('C').put('T', 2);
      m.get('T').put('Q', 1);
      m.get('Q').put('T', 1);
      m.get('T').put('E', 0);
      m.get('E').put('T', 0);
      m.get('T').put('G', 0);
      m.get('G').put('T', 0);
      m.get('T').put('H', 1);
      m.get('H').put('T', 1);
      m.get('T').put('I', 0);
      m.get('I').put('T', 0);
      m.get('T').put('L', 2);
      m.get('L').put('T', 2);
      m.get('T').put('K', 0);
      m.get('K').put('T', 0);
      m.get('T').put('M', 1);
      m.get('M').put('T', 1);
      m.get('T').put('F', 2);
      m.get('F').put('T', 2);
      m.get('T').put('P', 0);
      m.get('P').put('T', 0);
      m.get('T').put('S', -1);
      m.get('S').put('T', -1);
      m.get('T').put('T', -3);
      m.get('W').put('A', 6);
      m.get('A').put('W', 6);
      m.get('W').put('R', -2);
      m.get('R').put('W', -2);
      m.get('W').put('N', 4);
      m.get('N').put('W', 4);
      m.get('W').put('D', 7);
      m.get('D').put('W', 7);
      m.get('W').put('C', 8);
      m.get('C').put('W', 8);
      m.get('W').put('Q', 5);
      m.get('Q').put('W', 5);
      m.get('W').put('E', 7);
      m.get('E').put('W', 7);
      m.get('W').put('G', 7);
      m.get('G').put('W', 7);
      m.get('W').put('H', 3);
      m.get('H').put('W', 3);
      m.get('W').put('I', 5);
      m.get('I').put('W', 5);
      m.get('W').put('L', 2);
      m.get('L').put('W', 2);
      m.get('W').put('K', 3);
      m.get('K').put('W', 3);
      m.get('W').put('M', 4);
      m.get('M').put('W', 4);
      m.get('W').put('F', 0);
      m.get('F').put('W', 0);
      m.get('W').put('P', 6);
      m.get('P').put('W', 6);
      m.get('W').put('S', 2);
      m.get('S').put('W', 2);
      m.get('W').put('T', 5);
      m.get('T').put('W', 5);
      m.get('W').put('W', -17);
      m.get('Y').put('A', 3);
      m.get('A').put('Y', 3);
      m.get('Y').put('R', 4);
      m.get('R').put('Y', 4);
      m.get('Y').put('N', 2);
      m.get('N').put('Y', 2);
      m.get('Y').put('D', 4);
      m.get('D').put('Y', 4);
      m.get('Y').put('C', 0);
      m.get('C').put('Y', 0);
      m.get('Y').put('Q', 4);
      m.get('Q').put('Y', 4);
      m.get('Y').put('E', 4);
      m.get('E').put('Y', 4);
      m.get('Y').put('G', 5);
      m.get('G').put('Y', 5);
      m.get('Y').put('H', 0);
      m.get('H').put('Y', 0);
      m.get('Y').put('I', 1);
      m.get('I').put('Y', 1);
      m.get('Y').put('L', 1);
      m.get('L').put('Y', 1);
      m.get('Y').put('K', 4);
      m.get('K').put('Y', 4);
      m.get('Y').put('M', 2);
      m.get('M').put('Y', 2);
      m.get('Y').put('F', -7);
      m.get('F').put('Y', -7);
      m.get('Y').put('P', 5);
      m.get('P').put('Y', 5);
      m.get('Y').put('S', 3);
      m.get('S').put('Y', 3);
      m.get('Y').put('T', 3);
      m.get('T').put('Y', 3);
      m.get('Y').put('W', 0);
      m.get('W').put('Y', 0);
      m.get('Y').put('Y', -10);
      m.get('V').put('A', 0);
      m.get('A').put('V', 0);
      m.get('V').put('R', 2);
      m.get('R').put('V', 2);
      m.get('V').put('N', 2);
      m.get('N').put('V', 2);
      m.get('V').put('D', 2);
      m.get('D').put('V', 2);
      m.get('V').put('C', 2);
      m.get('C').put('V', 2);
      m.get('V').put('Q', 2);
      m.get('Q').put('V', 2);
      m.get('V').put('E', 2);
      m.get('E').put('V', 2);
      m.get('V').put('G', 1);
      m.get('G').put('V', 1);
      m.get('V').put('H', 2);
      m.get('H').put('V', 2);
      m.get('V').put('I', -4);
      m.get('I').put('V', -4);
      m.get('V').put('L', -2);
      m.get('L').put('V', -2);
      m.get('V').put('K', 2);
      m.get('K').put('V', 2);
      m.get('V').put('M', -2);
      m.get('M').put('V', -2);
      m.get('V').put('F', 1);
      m.get('F').put('V', 1);
      m.get('V').put('P', 1);
      m.get('P').put('V', 1);
      m.get('V').put('S', 1);
      m.get('S').put('V', 1);
      m.get('V').put('T', 0);
      m.get('T').put('V', 0);
      m.get('V').put('W', 6);
      m.get('W').put('V', 6);
      m.get('V').put('Y', 2);
      m.get('Y').put('V', 2);
      m.get('V').put('V', -4);
      }

      @Override
      public Integer getCost(Character aminoAcidChar1, Character aminoAcidChar2) {
      try {
      return m.get(aminoAcidChar1).get(aminoAcidChar2);
      } catch (NullPointerException ex) {
      throw new IllegalArgumentException("Bad arguments: (" +
      aminoAcidChar1 + ", " + aminoAcidChar2 + ")",
      ex);
      }
      }
      }


      AminoAcidAlphabet.java



      package net.coderodde.bio.msa;

      import java.util.Collections;
      import java.util.HashSet;
      import java.util.Set;

      public final class AminoAcidAlphabet {

      public static final Character GAP_CHARACTER = '-';
      private static AminoAcidAlphabet instance;

      private final Set<Character> alphabet = new HashSet<>();

      public static AminoAcidAlphabet getAminoAcidAlphabet() {
      if (instance == null) {
      instance = new AminoAcidAlphabet();
      }

      return instance;
      }

      private AminoAcidAlphabet() {
      String aminoAcids = "ACDEF" +
      "GHIKL" +
      "MNPQR" +
      "STVWY";

      for (char c : aminoAcids.toCharArray()) {
      alphabet.add(c);
      }
      }

      public Set<Character> getCharacterSet() {
      return Collections.<Character>unmodifiableSet(alphabet);
      }
      }


      Alignment.java



      package net.coderodde.bio.msa;

      public class Alignment {

      private final String alignment;
      private final int cost;

      Alignment(String alignment, int cost) {
      this.alignment = alignment;
      this.cost = cost;
      }

      public String getAlignemnt() {
      return alignment;
      }

      public int getCost() {
      return cost;
      }

      @Override
      public String toString() {
      StringBuilder sb = new StringBuilder();
      String separator = "";

      for (String row : alignment) {
      sb.append(separator).append(row);
      separator = "n";
      }

      sb.append("nCost: ").append(cost);
      return sb.toString();
      }
      }


      CostMatrix.java



      package net.coderodde.bio.msa;

      public interface CostMatrix<C> {

      public C getCost(Character aminoAcidChar1, Character aminoAcidChar2);
      }


      HeuristicFunction.java



      package net.coderodde.bio.msa;

      import java.awt.Point;
      import java.util.HashMap;
      import java.util.Map;

      final class HeuristicFunction {

      private final Map<Point, Integer> matrix;

      HeuristicFunction(MultipleSequenceAlignmentInstance instance) {
      int sequences = instance.getSequenceArray().length;
      this.matrix = new Map[sequences];

      for (int i = 0; i < sequences; ++i) {
      this.matrix[i] = new Map[sequences];

      for (int j = i + 1; j < sequences; ++j) {
      this.matrix[i][j] = new HashMap<>();
      }
      }
      }

      void put(int dimension1, int dimension2, Point point, Integer cost) {
      matrix[dimension1][dimension2].put(point, cost);
      }

      boolean containsPartial(int dimension1, int dimension2, Point point) {
      return matrix[dimension1][dimension2].containsKey(point);
      }

      Integer getPartial(int dimension1, int dimension2, Point point) {
      return matrix[dimension1][dimension2].get(point);
      }

      Integer get(LatticeNode node) {
      int cost = 0;
      int coordinates = node.getCoordinates();
      Point point = new Point();

      for (int dimension1 = 0;
      dimension1 < coordinates.length;
      dimension1++) {
      point.x = dimension1;

      for (int dimension2 = dimension1 + 1;
      dimension2 < coordinates.length;
      dimension2++) {
      point.y = dimension2;
      cost += matrix[dimension1][dimension2].get(point);
      }
      }

      return cost;
      }
      }


      App.java



      package net.coderodde.bio.msa;

      final class App {

      private static final String SEQUENCES = {
      "ACGHKGMNPFQEKKFKLMNRW",
      "CFGPQWYRTLMEKKFKNR",
      "EACLMNRPQWTR",
      "TIMWAYHTMGIEKKFK"
      };

      public static void main(String args) {
      MultipleSequenceAlignmentInstance instance =
      new MultipleSequenceAlignmentInstance(
      PAM250CostMatrix.getPAM250CostMatrix(),
      4,
      SEQUENCES);

      long start = System.currentTimeMillis();
      Alignment alignment1 = instance.align();
      long end = System.currentTimeMillis();

      System.out.println(alignment1);
      System.out.println(end - start + " ms.");
      System.out.println();

      start = System.currentTimeMillis();
      Alignment alignment2 = instance.alignBrute();
      end = System.currentTimeMillis();

      System.out.println(alignment2);
      System.out.println(end - start + " ms.");
      }
      }


      Output



      The output for the demo is




      Computed heuristic function in 1003 milliseconds.
      -ACG-HKGMNPF-QEKKFKLMNRW
      -CFGPQW-YRTL-MEKKFK--NR-
      EAC--LM-NRP--Q-WT-R-----
      -TI--MWAYHTMGIEKKFK-----
      Cost: 53
      1126 ms.

      -ACG-HKGMNPF-QEKKFKLMNRW
      -CFGPQW-YRTL-MEKKFK--NR-
      EAC--LM-NRP--Q-WT-R-----
      -TI--MWAYHTMGIEKKFK-----
      Cost: 53
      60 ms.



      Critique request



      As always, please tell me anything that comes to mind.










      share|improve this question













      (The entire project lives here.)



      Problem definition



      Suppose we are given three genomic strings drawn from the alphabet of 20 amino acids:



      ACGH
      CFG
      EAC


      In multiple sequence alignment problem we want to align them optimally in order to access their evolutionary similarity. Such an alignment may look like the following:



      -AC-GH
      --CFG-
      EAC---


      Solution



      Often, the problem is cast as a pathfinding problem in $k$-dimensional lattice, where $k$ is the number of strings. Here is my program, it compares A* with Dijkstra's algorithms:



      MultipleSequenceAlignmentInstance.java



      package net.coderodde.bio.msa;

      import java.util.ArrayList;
      import java.util.Collections;
      import java.util.HashMap;
      import java.util.HashSet;
      import java.util.List;
      import java.util.Map;
      import java.util.Objects;
      import java.util.PriorityQueue;
      import java.util.Queue;
      import java.util.Set;

      public final class MultipleSequenceAlignmentInstance {

      /**
      * The penalty for pairs in which there is one valid character and one gap.
      */
      private final int gapPenalty;

      /**
      * The character pairs cost matrix.
      */
      private final CostMatrix<Integer> costMatrix;

      /**
      * The array of sequences to be aligned.
      */
      private final String sequenceArray;

      // A small speed optimization:
      private final char column;

      public MultipleSequenceAlignmentInstance(CostMatrix<Integer> costMatrix,
      int gapPenalty,
      String... sequenceArray) {
      this.costMatrix = Objects.requireNonNull(costMatrix,
      "Cost matrix is null");
      this.gapPenalty = gapPenalty;
      this.sequenceArray = sequenceArray.clone();
      this.column = new char[sequenceArray.length];

      for (int i = 0; i != sequenceArray.length; ++i) {
      this.sequenceArray[i] =
      checkIsValidGenomicSequence(sequenceArray[i]);
      }
      }

      public Alignment align() {
      long start = System.currentTimeMillis();
      HeuristicFunction hf = new HeuristicFunctionComputer()
      .computeHeuristicFunction(this);
      long end = System.currentTimeMillis();

      System.out.println("Computed heuristic function in " + (end - start) +
      " milliseconds.");

      Queue<LatticeNodeHolder> open = new PriorityQueue<>();
      Map<LatticeNode, Integer> distance = new HashMap<>();
      Map<LatticeNode, LatticeNode> parents = new HashMap<>();

      LatticeNode sourceNode = getSourceNode();
      LatticeNode targetNode = getTargetNode();

      open.add(new LatticeNodeHolder(sourceNode, 0));
      distance.put(sourceNode, 0);
      parents.put(sourceNode, null);

      Set<LatticeNode> closed = new HashSet<>();

      while (true) {
      LatticeNode currentNode = open.remove().getNode();

      if (currentNode.equals(targetNode)) {
      return tracebackPath(parents, distance.get(currentNode));
      }

      if (closed.contains(currentNode)) {
      continue;
      }

      closed.add(currentNode);

      for (LatticeNode childNode : currentNode.getChildren()) {
      if (closed.contains(childNode)) {
      continue;
      }

      int tentativeCost = distance.get(currentNode) +
      getWeight(currentNode, childNode);
      Integer currentCost = distance.get(childNode);

      if (currentCost == null || currentCost > tentativeCost) {
      open.add(new LatticeNodeHolder(childNode,
      tentativeCost +
      hf.get(childNode)));
      distance.put(childNode, tentativeCost);
      parents.put(childNode, currentNode);
      }
      }
      }
      }

      public Alignment alignBrute() {
      Queue<LatticeNodeHolder> open = new PriorityQueue<>();
      Map<LatticeNode, Integer> distance = new HashMap<>();
      Map<LatticeNode, LatticeNode> parents = new HashMap<>();

      LatticeNode sourceNode = getSourceNode();
      LatticeNode targetNode = getTargetNode();

      open.add(new LatticeNodeHolder(sourceNode, 0));
      distance.put(sourceNode, 0);
      parents.put(sourceNode, null);

      Set<LatticeNode> closed = new HashSet<>();

      while (true) {
      LatticeNode currentNode = open.remove().getNode();

      if (currentNode.equals(targetNode)) {
      return tracebackPath(parents, distance.get(currentNode));
      }

      if (closed.contains(currentNode)) {
      continue;
      }

      closed.add(currentNode);

      for (LatticeNode childNode : currentNode.getChildren()) {
      if (closed.contains(childNode)) {
      continue;
      }

      int tentativeCost = distance.get(currentNode) +
      getWeight(currentNode, childNode);
      Integer currentCost = distance.get(childNode);

      if (currentCost == null || currentCost > tentativeCost) {
      open.add(new LatticeNodeHolder(childNode, tentativeCost));
      distance.put(childNode, tentativeCost);
      parents.put(childNode, currentNode);
      }
      }
      }
      }

      /**
      * Create the source node for the sequence alignment task, i.e., a lattice
      * node with all coordinates set to zero.
      *
      * @return the source node.
      */
      LatticeNode getSourceNode() {
      int sourceCoordinates = new int[sequenceArray.length];
      return new LatticeNode(this, sourceCoordinates);
      }

      /**
      * Create the target node for the sequence alignment task, i.e., a lattice
      * node with all coordinates set to the respective sequence lengths.
      *
      * @return the target node.
      */
      LatticeNode getTargetNode() {
      int targetCoordinates = new int[sequenceArray.length];

      for (int i = 0; i != sequenceArray.length; ++i) {
      targetCoordinates[i] = sequenceArray[i].length();
      }

      return new LatticeNode(this, targetCoordinates);
      }

      Integer getWeight(LatticeNode tail,
      LatticeNode head,
      int dimension1,
      int dimension2) {
      int tailCoordinates = tail.getCoordinates();
      int headCoordinates = head.getCoordinates();

      if (tailCoordinates[dimension1] == headCoordinates[dimension1]) {
      //System.out.println("1");
      return gapPenalty;
      } else if (tailCoordinates[dimension2] == headCoordinates[dimension2]) {
      //System.out.println("2");
      return gapPenalty;
      } else {
      // System.out.println("3");
      char character1 = sequenceArray[dimension1]
      .charAt(tailCoordinates[dimension1]);

      char character2 = sequenceArray[dimension2]
      .charAt(tailCoordinates[dimension2]);
      return costMatrix.getCost(character1, character2);
      }
      }

      Integer getWeight(LatticeNode tail, LatticeNode head) {
      // Extract the column represented by taking a single hop from 'tail' to
      // 'head':
      int tailCoordinates = tail.getCoordinates();
      int headCoordinates = head.getCoordinates();

      for (int i = 0; i < sequenceArray.length; ++i) {
      if (tailCoordinates[i] + 1 == headCoordinates[i]) {
      column[i] = sequenceArray[i].charAt(tailCoordinates[i]);
      } else {
      column[i] = AminoAcidAlphabet.GAP_CHARACTER;
      }
      }

      // Compute the hop cost as the sum of pairwise hops in any plane:
      int cost = 0;

      for (int i = 0; i < column.length; ++i) {
      char character1 = column[i];

      for (int j = i + 1; j < column.length; ++j) {
      char character2 = column[j];

      if (character1 != AminoAcidAlphabet.GAP_CHARACTER) {
      if (character2 != AminoAcidAlphabet.GAP_CHARACTER) {
      cost += costMatrix.getCost(character1, character2);
      } else {
      cost += gapPenalty;
      }
      } else {
      // character1 IS the gap character:
      if (character2 != AminoAcidAlphabet.GAP_CHARACTER) {
      cost += gapPenalty;
      } else {
      // Do nothing since we have a pair (gap, gap).
      }
      }
      }
      }

      return cost;
      }

      String getSequenceArray() {
      return sequenceArray;
      }

      private Alignment tracebackPath(Map<LatticeNode, LatticeNode> parents,
      Integer cost) {
      List<LatticeNode> path = new ArrayList<>();
      LatticeNode node = getTargetNode();

      while (node != null) {
      path.add(node);
      node = parents.get(node);
      }

      Collections.<LatticeNode>reverse(path);

      String strings = new String[getSequenceArray().length];
      StringBuilder stringBuilders = new StringBuilder[strings.length];

      for (int i = 0; i < stringBuilders.length; ++i) {
      stringBuilders[i] = new StringBuilder();
      }

      for (int i = 1; i < path.size(); ++i) {
      LatticeNode tail = path.get(i - 1);
      LatticeNode head = path.get(i);

      int tailCoordinates = tail.getCoordinates();
      int headCoordinates = head.getCoordinates();

      for (int j = 0; j < tailCoordinates.length; ++j) {
      if (tailCoordinates[j] != headCoordinates[j]) {
      stringBuilders[j].
      append(sequenceArray[j].charAt(tailCoordinates[j]));
      } else {
      stringBuilders[j].append(AminoAcidAlphabet.GAP_CHARACTER);
      }
      }
      }

      for (int i = 0; i < strings.length; ++i) {
      strings[i] = stringBuilders[i].toString();
      }

      return new Alignment(strings, cost);
      }

      private String checkIsValidGenomicSequence(String string) {
      Set<Character> characterSet =
      AminoAcidAlphabet.getAminoAcidAlphabet()
      .getCharacterSet();

      for (char c : string.toCharArray()) {
      if (!characterSet.contains(c)) {
      throw new IllegalArgumentException("Unknown amino acid: " + c);
      }
      }

      return string;
      }

      private static final class LatticeNodeHolder
      implements Comparable<LatticeNodeHolder> {

      private final Integer fScore;
      private final LatticeNode node;

      LatticeNodeHolder(LatticeNode node, Integer fScore) {
      this.fScore = fScore;
      this.node = node;
      }

      LatticeNode getNode() {
      return node;
      }

      @Override
      public int compareTo(LatticeNodeHolder o) {
      return Integer.compare(fScore, o.fScore);
      }
      }
      }


      HeuristicFunctionComputer.java



      package net.coderodde.bio.msa;

      import java.awt.Point;
      import java.util.HashSet;
      import java.util.PriorityQueue;
      import java.util.Queue;
      import java.util.Set;

      final class HeuristicFunctionComputer {

      HeuristicFunction computeHeuristicFunction(
      MultipleSequenceAlignmentInstance instance) {
      String sequenceArray = instance.getSequenceArray();
      HeuristicFunction heuristicFunction = new HeuristicFunction(instance);

      for (int dimension1 = 0;
      dimension1 < sequenceArray.length;
      dimension1++) {
      for (int dimension2 = dimension1 + 1;
      dimension2 < sequenceArray.length;
      dimension2++) {
      loadPartialHeuristicFunction(heuristicFunction,
      dimension1,
      dimension2,
      instance);
      }
      }

      return heuristicFunction;
      }

      // Basically, this method runs Dijkstra backwards from the target node until
      // the entire 2D-grid is explored.
      private void loadPartialHeuristicFunction(
      HeuristicFunction heuristicFunction,
      int dimension1,
      int dimension2,
      MultipleSequenceAlignmentInstance instance) {
      LatticeNode target = instance.getTargetNode();
      Queue<LatticeNodeHolder> open = new PriorityQueue<>();
      Set<LatticeNode> closed = new HashSet<>();
      open.add(new LatticeNodeHolder(target, 0));
      Point point = new Point();

      while (!open.isEmpty()) {
      LatticeNodeHolder currentNodeHolder = open.remove();
      LatticeNode currentNode = currentNodeHolder.getLatticeNode();

      if (closed.contains(currentNode)) {
      continue;
      }

      closed.add(currentNode);

      Integer currentNodeCost = currentNodeHolder.getCost();
      extractPoint(point, currentNode, dimension1, dimension2);
      heuristicFunction.put(dimension1,
      dimension2,
      new Point(point),
      currentNodeCost);

      for (LatticeNode parent : currentNode.getParents()) {
      if (closed.contains(parent)) {
      continue;
      }

      int tentativeCost =
      currentNodeCost + instance.getWeight(parent,
      currentNode,
      dimension1,
      dimension2);
      extractPoint(point, parent, dimension1, dimension2);

      if (!heuristicFunction.containsPartial(dimension1,
      dimension2,
      point)
      || heuristicFunction.getPartial(dimension1,
      dimension2,
      point)
      > tentativeCost) {
      open.add(new LatticeNodeHolder(parent, tentativeCost));
      }
      }
      }
      }

      private static void extractPoint(Point point,
      LatticeNode latticeNode,
      int i,
      int j) {
      int coordinates = latticeNode.getCoordinates();
      point.x = coordinates[i];
      point.y = coordinates[j];
      }

      private static final class LatticeNodeHolder
      implements Comparable<LatticeNodeHolder> {

      private final LatticeNode node;
      private final Integer cost;

      LatticeNodeHolder(LatticeNode node, Integer cost) {
      this.node = node;
      this.cost = cost;
      }

      LatticeNode getLatticeNode() {
      return node;
      }

      Integer getCost() {
      return cost;
      }

      @Override
      public int compareTo(LatticeNodeHolder o) {
      return Integer.compare(cost, o.cost);
      }
      }
      }


      LatticeNode.java



      package net.coderodde.bio.msa;

      import com.sun.corba.se.spi.orb.OperationFactory;
      import java.util.Arrays;

      final class LatticeNode {

      /**
      * The problem instance this lattice node contributes to.
      */
      private final MultipleSequenceAlignmentInstance instance;

      /**
      * The coordinates of this node in the entire lattice.
      */
      private final int coordinates;

      LatticeNode(MultipleSequenceAlignmentInstance instance, int coordinates) {
      this.instance = instance;
      this.coordinates = coordinates;
      }

      @Override
      public boolean equals(Object o) {
      return Arrays.equals(coordinates, ((LatticeNode) o).coordinates);
      }

      // Generated by NetBeans 8.1:
      @Override
      public int hashCode() {
      int hash = 7;
      hash = 41 * hash + Arrays.hashCode(this.coordinates);
      return hash;
      }

      @Override
      public String toString() {
      StringBuilder sb = new StringBuilder("[");
      String separator = "";

      for (int coordinate : coordinates) {
      sb.append(separator).append(coordinate);
      separator = ", ";
      }

      return sb.append("]").toString();
      }

      LatticeNode getChildren() {
      // Find out in how many dimension we can move forward:
      int dimensionsNotReached = 0;
      String sequenceArray = instance.getSequenceArray();
      boolean inclusionMap = new boolean[coordinates.length];

      for (int i = 0; i < sequenceArray.length; ++i) {
      if (coordinates[i] < sequenceArray[i].length()) {
      dimensionsNotReached++;
      // We can make a step forward in the direction of ith dimension:
      inclusionMap[i] = true;
      }
      }

      // Create the array of children:
      int numberOfChildren = pow2(dimensionsNotReached) - 1;
      LatticeNode children = new LatticeNode[numberOfChildren];
      loadChildren(children, inclusionMap);

      // Convert offsets to actual child coordinates:
      for (LatticeNode child : children) {
      child.addOffsets(coordinates);
      }

      return children;
      }

      LatticeNode getParents() {
      // Find out in how many dimensions we can move BACKward:
      int dimensionsNotReached = 0;
      String sequenceArray = instance.getSequenceArray();
      boolean inclusionMap = new boolean[coordinates.length];

      for (int i = 0; i < sequenceArray.length; ++i) {
      if (coordinates[i] > 0) {
      dimensionsNotReached++;
      // We can make a step backwards in the direction of ith
      // dimension:
      inclusionMap[i] = true;
      }
      }

      // Create the array of parents:
      int numberOfParents = pow2(dimensionsNotReached) - 1;
      LatticeNode parents = new LatticeNode[numberOfParents];
      loadParents(parents, inclusionMap);

      // Convert the offsets to actual parent coordinates:
      for (LatticeNode parent : parents) {
      parent.addOffsets(coordinates);
      }

      return parents;
      }

      int getCoordinates() {
      return coordinates;
      }

      private void loadChildren(LatticeNode children, boolean inclusionMap) {
      int coords = new int[this.coordinates.length];

      for (int i = 0; i != children.length; ++i) {
      increment(coords, inclusionMap);
      children[i] = new LatticeNode(instance, coords.clone());
      }
      }

      private void loadParents(LatticeNode parents, boolean inclusionMap) {
      int coords = new int[this.coordinates.length];

      for (int i = 0; i != parents.length; ++i) {
      decrement(coords, inclusionMap);
      parents[i] = new LatticeNode(instance, coords.clone());
      }
      }

      private static void increment(int coordinates, boolean inclusionMap) {
      for (int i = 0; i < coordinates.length; ++i) {
      if (!inclusionMap[i]) {
      continue;
      }

      if (coordinates[i] == 0) {
      coordinates[i] = 1;
      return;
      }

      coordinates[i] = 0;
      }
      }

      private static void decrement(int coordinates, boolean inclusionMap) {
      for (int i = 0; i < coordinates.length; ++i) {
      if (!inclusionMap[i]) {
      continue;
      }

      if (coordinates[i] == 0) {
      coordinates[i] = -1;
      return;
      }

      coordinates[i] = 0;
      }
      }

      private void addOffsets(int offsets) {
      for (int i = 0; i < coordinates.length; ++i) {
      coordinates[i] += offsets[i];
      }
      }

      private static int pow2(int exponent) {
      int ret = 1;

      for (int e = 0; e < exponent; ++e) {
      ret *= 2;
      }

      return ret;
      }
      }


      PAM250CostMatrix.java



      package net.coderodde.bio.msa;

      import java.util.HashMap;
      import java.util.Map;
      import net.coderodde.bio.msa.AminoAcidAlphabet;
      import net.coderodde.bio.msa.CostMatrix;

      public final class PAM250CostMatrix implements CostMatrix<Integer> {

      private static PAM250CostMatrix instance;

      private final Map<Character, Map<Character, Integer>> m = new HashMap<>();

      public static PAM250CostMatrix getPAM250CostMatrix() {
      if (instance == null) {
      instance = new PAM250CostMatrix();
      }

      return instance;
      }

      private PAM250CostMatrix() {
      AminoAcidAlphabet alphabet = AminoAcidAlphabet.getAminoAcidAlphabet();

      alphabet.getCharacterSet().stream().forEach((character) -> {
      m.put(character, new HashMap<>());
      });

      m.get('A').put('A', -2);
      m.get('R').put('A', 2);
      m.get('A').put('R', 2);
      m.get('R').put('R', -6);
      m.get('N').put('A', 0);
      m.get('A').put('N', 0);
      m.get('N').put('R', 0);
      m.get('R').put('N', 0);
      m.get('N').put('N', -2);
      m.get('D').put('A', 0);
      m.get('A').put('D', 0);
      m.get('D').put('R', 1);
      m.get('R').put('D', 1);
      m.get('D').put('N', -2);
      m.get('N').put('D', -2);
      m.get('D').put('D', -4);
      m.get('C').put('A', 2);
      m.get('A').put('C', 2);
      m.get('C').put('R', 4);
      m.get('R').put('C', 4);
      m.get('C').put('N', 4);
      m.get('N').put('C', 4);
      m.get('C').put('D', 5);
      m.get('D').put('C', 5);
      m.get('C').put('C', -4);
      m.get('Q').put('A', 0);
      m.get('A').put('Q', 0);
      m.get('Q').put('R', -1);
      m.get('R').put('Q', -1);
      m.get('Q').put('N', -1);
      m.get('N').put('Q', -1);
      m.get('Q').put('D', -2);
      m.get('D').put('Q', -2);
      m.get('Q').put('C', 5);
      m.get('C').put('Q', 5);
      m.get('Q').put('Q', -4);
      m.get('E').put('A', 0);
      m.get('A').put('E', 0);
      m.get('E').put('R', 1);
      m.get('R').put('E', 1);
      m.get('E').put('N', -1);
      m.get('N').put('E', -1);
      m.get('E').put('D', -3);
      m.get('D').put('E', -3);
      m.get('E').put('C', 5);
      m.get('C').put('E', 5);
      m.get('E').put('Q', -2);
      m.get('Q').put('E', -2);
      m.get('E').put('E', -4);
      m.get('G').put('A', -1);
      m.get('A').put('G', -1);
      m.get('G').put('R', 3);
      m.get('R').put('G', 3);
      m.get('G').put('N', 0);
      m.get('N').put('G', 0);
      m.get('G').put('D', -1);
      m.get('D').put('G', -1);
      m.get('G').put('C', 3);
      m.get('C').put('G', 3);
      m.get('G').put('Q', 1);
      m.get('Q').put('G', 1);
      m.get('G').put('E', 0);
      m.get('E').put('G', 0);
      m.get('G').put('G', -5);
      m.get('H').put('A', 1);
      m.get('A').put('H', 1);
      m.get('H').put('R', -2);
      m.get('R').put('H', -2);
      m.get('H').put('N', -2);
      m.get('N').put('H', -2);
      m.get('H').put('D', -1);
      m.get('D').put('H', -1);
      m.get('H').put('C', 3);
      m.get('C').put('H', 3);
      m.get('H').put('Q', -3);
      m.get('Q').put('H', -3);
      m.get('H').put('E', -1);
      m.get('E').put('H', -1);
      m.get('H').put('G', 2);
      m.get('G').put('H', 2);
      m.get('H').put('H', -6);
      m.get('I').put('A', 1);
      m.get('A').put('I', 1);
      m.get('I').put('R', 2);
      m.get('R').put('I', 2);
      m.get('I').put('N', 2);
      m.get('N').put('I', 2);
      m.get('I').put('D', 2);
      m.get('D').put('I', 2);
      m.get('I').put('C', 2);
      m.get('C').put('I', 2);
      m.get('I').put('Q', 2);
      m.get('Q').put('I', 2);
      m.get('I').put('E', 2);
      m.get('E').put('I', 2);
      m.get('I').put('G', 3);
      m.get('G').put('I', 3);
      m.get('I').put('H', 2);
      m.get('H').put('I', 2);
      m.get('I').put('I', -5);
      m.get('L').put('A', 2);
      m.get('A').put('L', 2);
      m.get('L').put('R', 3);
      m.get('R').put('L', 3);
      m.get('L').put('N', 3);
      m.get('N').put('L', 3);
      m.get('L').put('D', 4);
      m.get('D').put('L', 4);
      m.get('L').put('C', 6);
      m.get('C').put('L', 6);
      m.get('L').put('Q', 2);
      m.get('Q').put('L', 2);
      m.get('L').put('E', 3);
      m.get('E').put('L', 3);
      m.get('L').put('G', 4);
      m.get('G').put('L', 4);
      m.get('L').put('H', 2);
      m.get('H').put('L', 2);
      m.get('L').put('I', -2);
      m.get('I').put('L', -2);
      m.get('L').put('L', -6);
      m.get('K').put('A', 1);
      m.get('A').put('K', 1);
      m.get('K').put('R', -3);
      m.get('R').put('K', -3);
      m.get('K').put('N', -1);
      m.get('N').put('K', -1);
      m.get('K').put('D', 0);
      m.get('D').put('K', 0);
      m.get('K').put('C', 5);
      m.get('C').put('K', 5);
      m.get('K').put('Q', -1);
      m.get('Q').put('K', -1);
      m.get('K').put('E', 0);
      m.get('E').put('K', 0);
      m.get('K').put('G', 2);
      m.get('G').put('K', 2);
      m.get('K').put('H', 0);
      m.get('H').put('K', 0);
      m.get('K').put('I', 2);
      m.get('I').put('K', 2);
      m.get('K').put('L', 3);
      m.get('L').put('K', 3);
      m.get('K').put('K', -5);
      m.get('M').put('A', 1);
      m.get('A').put('M', 1);
      m.get('M').put('R', 0);
      m.get('R').put('M', 0);
      m.get('M').put('N', 2);
      m.get('N').put('M', 2);
      m.get('M').put('D', 3);
      m.get('D').put('M', 3);
      m.get('M').put('C', 5);
      m.get('C').put('M', 5);
      m.get('M').put('Q', 1);
      m.get('Q').put('M', 1);
      m.get('M').put('E', 2);
      m.get('E').put('M', 2);
      m.get('M').put('G', 3);
      m.get('G').put('M', 3);
      m.get('M').put('H', 2);
      m.get('H').put('M', 2);
      m.get('M').put('I', -2);
      m.get('I').put('M', -2);
      m.get('M').put('L', -4);
      m.get('L').put('M', -4);
      m.get('M').put('K', 0);
      m.get('K').put('M', 0);
      m.get('M').put('M', -6);
      m.get('F').put('A', 4);
      m.get('A').put('F', 4);
      m.get('F').put('R', 4);
      m.get('R').put('F', 4);
      m.get('F').put('N', 4);
      m.get('N').put('F', 4);
      m.get('F').put('D', 6);
      m.get('D').put('F', 6);
      m.get('F').put('C', 4);
      m.get('C').put('F', 4);
      m.get('F').put('Q', 5);
      m.get('Q').put('F', 5);
      m.get('F').put('E', 5);
      m.get('E').put('F', 5);
      m.get('F').put('G', 5);
      m.get('G').put('F', 5);
      m.get('F').put('H', 2);
      m.get('H').put('F', 2);
      m.get('F').put('I', -1);
      m.get('I').put('F', -1);
      m.get('F').put('L', -2);
      m.get('L').put('F', -2);
      m.get('F').put('K', 5);
      m.get('K').put('F', 5);
      m.get('F').put('M', 0);
      m.get('M').put('F', 0);
      m.get('F').put('F', -9);
      m.get('P').put('A', -1);
      m.get('A').put('P', -1);
      m.get('P').put('R', 0);
      m.get('R').put('P', 0);
      m.get('P').put('N', 1);
      m.get('N').put('P', 1);
      m.get('P').put('D', 1);
      m.get('D').put('P', 1);
      m.get('P').put('C', 3);
      m.get('C').put('P', 3);
      m.get('P').put('Q', 0);
      m.get('Q').put('P', 0);
      m.get('P').put('E', 1);
      m.get('E').put('P', 1);
      m.get('P').put('G', 1);
      m.get('G').put('P', 1);
      m.get('P').put('H', 0);
      m.get('H').put('P', 0);
      m.get('P').put('I', 2);
      m.get('I').put('P', 2);
      m.get('P').put('L', 3);
      m.get('L').put('P', 3);
      m.get('P').put('K', 1);
      m.get('K').put('P', 1);
      m.get('P').put('M', 2);
      m.get('M').put('P', 2);
      m.get('P').put('F', 5);
      m.get('F').put('P', 5);
      m.get('P').put('P', -6);
      m.get('S').put('A', -1);
      m.get('A').put('S', -1);
      m.get('S').put('R', 0);
      m.get('R').put('S', 0);
      m.get('S').put('N', -1);
      m.get('N').put('S', -1);
      m.get('S').put('D', 0);
      m.get('D').put('S', 0);
      m.get('S').put('C', 0);
      m.get('C').put('S', 0);
      m.get('S').put('Q', 1);
      m.get('Q').put('S', 1);
      m.get('S').put('E', 0);
      m.get('E').put('S', 0);
      m.get('S').put('G', -1);
      m.get('G').put('S', -1);
      m.get('S').put('H', 1);
      m.get('H').put('S', 1);
      m.get('S').put('I', 1);
      m.get('I').put('S', 1);
      m.get('S').put('L', 3);
      m.get('L').put('S', 3);
      m.get('S').put('K', 0);
      m.get('K').put('S', 0);
      m.get('S').put('M', 2);
      m.get('M').put('S', 2);
      m.get('S').put('F', 3);
      m.get('F').put('S', 3);
      m.get('S').put('P', -1);
      m.get('P').put('S', -1);
      m.get('S').put('S', -3);
      m.get('T').put('A', -1);
      m.get('A').put('T', -1);
      m.get('T').put('R', 1);
      m.get('R').put('T', 1);
      m.get('T').put('N', 0);
      m.get('N').put('T', 0);
      m.get('T').put('D', 0);
      m.get('D').put('T', 0);
      m.get('T').put('C', 2);
      m.get('C').put('T', 2);
      m.get('T').put('Q', 1);
      m.get('Q').put('T', 1);
      m.get('T').put('E', 0);
      m.get('E').put('T', 0);
      m.get('T').put('G', 0);
      m.get('G').put('T', 0);
      m.get('T').put('H', 1);
      m.get('H').put('T', 1);
      m.get('T').put('I', 0);
      m.get('I').put('T', 0);
      m.get('T').put('L', 2);
      m.get('L').put('T', 2);
      m.get('T').put('K', 0);
      m.get('K').put('T', 0);
      m.get('T').put('M', 1);
      m.get('M').put('T', 1);
      m.get('T').put('F', 2);
      m.get('F').put('T', 2);
      m.get('T').put('P', 0);
      m.get('P').put('T', 0);
      m.get('T').put('S', -1);
      m.get('S').put('T', -1);
      m.get('T').put('T', -3);
      m.get('W').put('A', 6);
      m.get('A').put('W', 6);
      m.get('W').put('R', -2);
      m.get('R').put('W', -2);
      m.get('W').put('N', 4);
      m.get('N').put('W', 4);
      m.get('W').put('D', 7);
      m.get('D').put('W', 7);
      m.get('W').put('C', 8);
      m.get('C').put('W', 8);
      m.get('W').put('Q', 5);
      m.get('Q').put('W', 5);
      m.get('W').put('E', 7);
      m.get('E').put('W', 7);
      m.get('W').put('G', 7);
      m.get('G').put('W', 7);
      m.get('W').put('H', 3);
      m.get('H').put('W', 3);
      m.get('W').put('I', 5);
      m.get('I').put('W', 5);
      m.get('W').put('L', 2);
      m.get('L').put('W', 2);
      m.get('W').put('K', 3);
      m.get('K').put('W', 3);
      m.get('W').put('M', 4);
      m.get('M').put('W', 4);
      m.get('W').put('F', 0);
      m.get('F').put('W', 0);
      m.get('W').put('P', 6);
      m.get('P').put('W', 6);
      m.get('W').put('S', 2);
      m.get('S').put('W', 2);
      m.get('W').put('T', 5);
      m.get('T').put('W', 5);
      m.get('W').put('W', -17);
      m.get('Y').put('A', 3);
      m.get('A').put('Y', 3);
      m.get('Y').put('R', 4);
      m.get('R').put('Y', 4);
      m.get('Y').put('N', 2);
      m.get('N').put('Y', 2);
      m.get('Y').put('D', 4);
      m.get('D').put('Y', 4);
      m.get('Y').put('C', 0);
      m.get('C').put('Y', 0);
      m.get('Y').put('Q', 4);
      m.get('Q').put('Y', 4);
      m.get('Y').put('E', 4);
      m.get('E').put('Y', 4);
      m.get('Y').put('G', 5);
      m.get('G').put('Y', 5);
      m.get('Y').put('H', 0);
      m.get('H').put('Y', 0);
      m.get('Y').put('I', 1);
      m.get('I').put('Y', 1);
      m.get('Y').put('L', 1);
      m.get('L').put('Y', 1);
      m.get('Y').put('K', 4);
      m.get('K').put('Y', 4);
      m.get('Y').put('M', 2);
      m.get('M').put('Y', 2);
      m.get('Y').put('F', -7);
      m.get('F').put('Y', -7);
      m.get('Y').put('P', 5);
      m.get('P').put('Y', 5);
      m.get('Y').put('S', 3);
      m.get('S').put('Y', 3);
      m.get('Y').put('T', 3);
      m.get('T').put('Y', 3);
      m.get('Y').put('W', 0);
      m.get('W').put('Y', 0);
      m.get('Y').put('Y', -10);
      m.get('V').put('A', 0);
      m.get('A').put('V', 0);
      m.get('V').put('R', 2);
      m.get('R').put('V', 2);
      m.get('V').put('N', 2);
      m.get('N').put('V', 2);
      m.get('V').put('D', 2);
      m.get('D').put('V', 2);
      m.get('V').put('C', 2);
      m.get('C').put('V', 2);
      m.get('V').put('Q', 2);
      m.get('Q').put('V', 2);
      m.get('V').put('E', 2);
      m.get('E').put('V', 2);
      m.get('V').put('G', 1);
      m.get('G').put('V', 1);
      m.get('V').put('H', 2);
      m.get('H').put('V', 2);
      m.get('V').put('I', -4);
      m.get('I').put('V', -4);
      m.get('V').put('L', -2);
      m.get('L').put('V', -2);
      m.get('V').put('K', 2);
      m.get('K').put('V', 2);
      m.get('V').put('M', -2);
      m.get('M').put('V', -2);
      m.get('V').put('F', 1);
      m.get('F').put('V', 1);
      m.get('V').put('P', 1);
      m.get('P').put('V', 1);
      m.get('V').put('S', 1);
      m.get('S').put('V', 1);
      m.get('V').put('T', 0);
      m.get('T').put('V', 0);
      m.get('V').put('W', 6);
      m.get('W').put('V', 6);
      m.get('V').put('Y', 2);
      m.get('Y').put('V', 2);
      m.get('V').put('V', -4);
      }

      @Override
      public Integer getCost(Character aminoAcidChar1, Character aminoAcidChar2) {
      try {
      return m.get(aminoAcidChar1).get(aminoAcidChar2);
      } catch (NullPointerException ex) {
      throw new IllegalArgumentException("Bad arguments: (" +
      aminoAcidChar1 + ", " + aminoAcidChar2 + ")",
      ex);
      }
      }
      }


      AminoAcidAlphabet.java



      package net.coderodde.bio.msa;

      import java.util.Collections;
      import java.util.HashSet;
      import java.util.Set;

      public final class AminoAcidAlphabet {

      public static final Character GAP_CHARACTER = '-';
      private static AminoAcidAlphabet instance;

      private final Set<Character> alphabet = new HashSet<>();

      public static AminoAcidAlphabet getAminoAcidAlphabet() {
      if (instance == null) {
      instance = new AminoAcidAlphabet();
      }

      return instance;
      }

      private AminoAcidAlphabet() {
      String aminoAcids = "ACDEF" +
      "GHIKL" +
      "MNPQR" +
      "STVWY";

      for (char c : aminoAcids.toCharArray()) {
      alphabet.add(c);
      }
      }

      public Set<Character> getCharacterSet() {
      return Collections.<Character>unmodifiableSet(alphabet);
      }
      }


      Alignment.java



      package net.coderodde.bio.msa;

      public class Alignment {

      private final String alignment;
      private final int cost;

      Alignment(String alignment, int cost) {
      this.alignment = alignment;
      this.cost = cost;
      }

      public String getAlignemnt() {
      return alignment;
      }

      public int getCost() {
      return cost;
      }

      @Override
      public String toString() {
      StringBuilder sb = new StringBuilder();
      String separator = "";

      for (String row : alignment) {
      sb.append(separator).append(row);
      separator = "n";
      }

      sb.append("nCost: ").append(cost);
      return sb.toString();
      }
      }


      CostMatrix.java



      package net.coderodde.bio.msa;

      public interface CostMatrix<C> {

      public C getCost(Character aminoAcidChar1, Character aminoAcidChar2);
      }


      HeuristicFunction.java



      package net.coderodde.bio.msa;

      import java.awt.Point;
      import java.util.HashMap;
      import java.util.Map;

      final class HeuristicFunction {

      private final Map<Point, Integer> matrix;

      HeuristicFunction(MultipleSequenceAlignmentInstance instance) {
      int sequences = instance.getSequenceArray().length;
      this.matrix = new Map[sequences];

      for (int i = 0; i < sequences; ++i) {
      this.matrix[i] = new Map[sequences];

      for (int j = i + 1; j < sequences; ++j) {
      this.matrix[i][j] = new HashMap<>();
      }
      }
      }

      void put(int dimension1, int dimension2, Point point, Integer cost) {
      matrix[dimension1][dimension2].put(point, cost);
      }

      boolean containsPartial(int dimension1, int dimension2, Point point) {
      return matrix[dimension1][dimension2].containsKey(point);
      }

      Integer getPartial(int dimension1, int dimension2, Point point) {
      return matrix[dimension1][dimension2].get(point);
      }

      Integer get(LatticeNode node) {
      int cost = 0;
      int coordinates = node.getCoordinates();
      Point point = new Point();

      for (int dimension1 = 0;
      dimension1 < coordinates.length;
      dimension1++) {
      point.x = dimension1;

      for (int dimension2 = dimension1 + 1;
      dimension2 < coordinates.length;
      dimension2++) {
      point.y = dimension2;
      cost += matrix[dimension1][dimension2].get(point);
      }
      }

      return cost;
      }
      }


      App.java



      package net.coderodde.bio.msa;

      final class App {

      private static final String SEQUENCES = {
      "ACGHKGMNPFQEKKFKLMNRW",
      "CFGPQWYRTLMEKKFKNR",
      "EACLMNRPQWTR",
      "TIMWAYHTMGIEKKFK"
      };

      public static void main(String args) {
      MultipleSequenceAlignmentInstance instance =
      new MultipleSequenceAlignmentInstance(
      PAM250CostMatrix.getPAM250CostMatrix(),
      4,
      SEQUENCES);

      long start = System.currentTimeMillis();
      Alignment alignment1 = instance.align();
      long end = System.currentTimeMillis();

      System.out.println(alignment1);
      System.out.println(end - start + " ms.");
      System.out.println();

      start = System.currentTimeMillis();
      Alignment alignment2 = instance.alignBrute();
      end = System.currentTimeMillis();

      System.out.println(alignment2);
      System.out.println(end - start + " ms.");
      }
      }


      Output



      The output for the demo is




      Computed heuristic function in 1003 milliseconds.
      -ACG-HKGMNPF-QEKKFKLMNRW
      -CFGPQW-YRTL-MEKKFK--NR-
      EAC--LM-NRP--Q-WT-R-----
      -TI--MWAYHTMGIEKKFK-----
      Cost: 53
      1126 ms.

      -ACG-HKGMNPF-QEKKFKLMNRW
      -CFGPQW-YRTL-MEKKFK--NR-
      EAC--LM-NRP--Q-WT-R-----
      -TI--MWAYHTMGIEKKFK-----
      Cost: 53
      60 ms.



      Critique request



      As always, please tell me anything that comes to mind.







      java algorithm bioinformatics






      share|improve this question













      share|improve this question











      share|improve this question




      share|improve this question










      asked Apr 16 '17 at 15:05









      coderoddecoderodde

      15.7k537125




      15.7k537125






















          1 Answer
          1






          active

          oldest

          votes


















          1














          In AminoAcidAlphabet:





          • getCharacterSet always return same set, but you always create new instance.


          In PAM250CostMatrix




          • in constructor you can extract method to write add('A', 'A', -2) instead of m.get('A').put('A', -2)


          In LatticeNode




          • you can make hashCode simply do return Arrays.hashCode(this.coordinates);






          share|improve this answer





















            Your Answer





            StackExchange.ifUsing("editor", function () {
            return StackExchange.using("mathjaxEditing", function () {
            StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
            StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
            });
            });
            }, "mathjax-editing");

            StackExchange.ifUsing("editor", function () {
            StackExchange.using("externalEditor", function () {
            StackExchange.using("snippets", function () {
            StackExchange.snippets.init();
            });
            });
            }, "code-snippets");

            StackExchange.ready(function() {
            var channelOptions = {
            tags: "".split(" "),
            id: "196"
            };
            initTagRenderer("".split(" "), "".split(" "), channelOptions);

            StackExchange.using("externalEditor", function() {
            // Have to fire editor after snippets, if snippets enabled
            if (StackExchange.settings.snippets.snippetsEnabled) {
            StackExchange.using("snippets", function() {
            createEditor();
            });
            }
            else {
            createEditor();
            }
            });

            function createEditor() {
            StackExchange.prepareEditor({
            heartbeatType: 'answer',
            autoActivateHeartbeat: false,
            convertImagesToLinks: false,
            noModals: true,
            showLowRepImageUploadWarning: true,
            reputationToPostImages: null,
            bindNavPrevention: true,
            postfix: "",
            imageUploader: {
            brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
            contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
            allowUrls: true
            },
            onDemand: true,
            discardSelector: ".discard-answer"
            ,immediatelyShowMarkdownHelp:true
            });


            }
            });














            draft saved

            draft discarded


















            StackExchange.ready(
            function () {
            StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f160915%2fmultiple-sequence-alignment-in-java%23new-answer', 'question_page');
            }
            );

            Post as a guest















            Required, but never shown

























            1 Answer
            1






            active

            oldest

            votes








            1 Answer
            1






            active

            oldest

            votes









            active

            oldest

            votes






            active

            oldest

            votes









            1














            In AminoAcidAlphabet:





            • getCharacterSet always return same set, but you always create new instance.


            In PAM250CostMatrix




            • in constructor you can extract method to write add('A', 'A', -2) instead of m.get('A').put('A', -2)


            In LatticeNode




            • you can make hashCode simply do return Arrays.hashCode(this.coordinates);






            share|improve this answer


























              1














              In AminoAcidAlphabet:





              • getCharacterSet always return same set, but you always create new instance.


              In PAM250CostMatrix




              • in constructor you can extract method to write add('A', 'A', -2) instead of m.get('A').put('A', -2)


              In LatticeNode




              • you can make hashCode simply do return Arrays.hashCode(this.coordinates);






              share|improve this answer
























                1












                1








                1






                In AminoAcidAlphabet:





                • getCharacterSet always return same set, but you always create new instance.


                In PAM250CostMatrix




                • in constructor you can extract method to write add('A', 'A', -2) instead of m.get('A').put('A', -2)


                In LatticeNode




                • you can make hashCode simply do return Arrays.hashCode(this.coordinates);






                share|improve this answer












                In AminoAcidAlphabet:





                • getCharacterSet always return same set, but you always create new instance.


                In PAM250CostMatrix




                • in constructor you can extract method to write add('A', 'A', -2) instead of m.get('A').put('A', -2)


                In LatticeNode




                • you can make hashCode simply do return Arrays.hashCode(this.coordinates);







                share|improve this answer












                share|improve this answer



                share|improve this answer










                answered 17 hours ago









                talextalex

                1764




                1764






























                    draft saved

                    draft discarded




















































                    Thanks for contributing an answer to Code Review Stack Exchange!


                    • Please be sure to answer the question. Provide details and share your research!

                    But avoid



                    • Asking for help, clarification, or responding to other answers.

                    • Making statements based on opinion; back them up with references or personal experience.


                    Use MathJax to format equations. MathJax reference.


                    To learn more, see our tips on writing great answers.




                    draft saved


                    draft discarded














                    StackExchange.ready(
                    function () {
                    StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f160915%2fmultiple-sequence-alignment-in-java%23new-answer', 'question_page');
                    }
                    );

                    Post as a guest















                    Required, but never shown





















































                    Required, but never shown














                    Required, but never shown












                    Required, but never shown







                    Required, but never shown

































                    Required, but never shown














                    Required, but never shown












                    Required, but never shown







                    Required, but never shown







                    Popular posts from this blog

                    How to make a Squid Proxy server?

                    Is this a new Fibonacci Identity?

                    Touch on Surface Book