Pages

Search This Blog

November 06, 2013

Edit distance between two strings

Given two strings s and d, how will you find edit distance between them? Edit distance is the number of substitutions, insertions and deletions required to match the first string to second.

Following is a dynamic programming approach to this problem where the intermediate solutions are built at step up fashion by explicitly specifying the order of evaluation.

Reference: The Algorithm Design Manual - Steven Skiena

#include <iostream>
#include <conio.h>

int m[1000][1000];

int match(char x, char y)
{
    //std::cout << "match: x: " << x << " y: " << y;
    if(x == y) return 0;
    return 1;
}
int indel(char x)
{
    return 1;
}

int findEdits(char* s, char* d) {
    int i, j, k;
    int opt[3];

    for(i=0; i<1000; i++)
    {
        m[0][i] = i;
        m[i][0] = i;
    }
    
    for(i=1; i<=strlen(s); i++)
    {
        for(j=1; j<=strlen(d); j++)
        {
            opt[0] = m[i-1][j-1] + match(s[i-1], d[j-1]);
            opt[1] = m[i][j-1] + indel(d[j]);
            opt[2] = m[i-1][j] + indel(s[i]);
            
            //printf(" 0: %d 1: %d 2: %d", opt[0], opt[1], opt[2]);
            
            m[i][j] = opt[0];
            for(k=1; k<3; k++)
            {
                if(opt[k] < m[i][j])
                    m[i][j] = opt[k];
            }
            //printf(" after: m[i][j]: %d\\n", m[i][j]);
        }
    }
    
    return m[strlen(s)][strlen(d)];
}

int main()
{
    char *s = "This is not correctasda";
    char *d = "This is absolutely correct!";
    int x = findEdits(s, d);
    std::cout << x << std::endl;
    getch();
}

No comments:

Post a Comment