We usually want to talk about Levenshtein distance when we want to compare two different strings. Pratically, the Levenshtein distance (as its creator’s name, Vladimir Levenshtein) is a measure of the similarity between two strings. If you want to look at the algorithm in detail, then you should take a look at this page available on wikipedia.
The Levenshtein distance is nothing but else than the number of steps needed to transform a string into another one. For example, if we want to go from “cat” to “cats”, then we have a Levenshtein distance equal to 1 (since we added only a ‘s’, so one step).
Here is the code I done (tested with gcc, after a strip --strip-unneeded is only 4.5kb large):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAX_STR_LEN 100
typedef unsigned short int Matrix;
static int levenshtein(const char *, const char *);
static int min(const int, const int, const int);
static char *strlower(char *);
int
main(int argc, char **argv)
{
unsigned short int i, ss, c_sens = 1;
char str[1][MAX_STR_LEN];
if (argc < 3) {
printf("Usage: %s [-i] <string1> <string2>n", argv[0]);
return EXIT_FAILURE;
}
ss = 0;
for (i = 1; i < argc; i++)
{
if (!strcmp(argv[i], "-i")) {
c_sens = 0;
} else {
strcpy(str[ss], argv[i]);
ss++;
}
}
if (!c_sens) {
strlower(str[0]);
strlower(str[1]);
}
printf("[%s]/[%s]:%dn", str[0], str[1], levenshtein(str[0], str[1]));
return EXIT_SUCCESS;
}
static int
levenshtein(const char *str1, const char *str2)
{
unsigned short int n, m, i, j, cost;
Matrix matrix[MAX_STR_LEN][MAX_STR_LEN];
n = strlen(str1);
m = strlen(str2);
if (n == 0)
return m;
if (m == 0)
return n;
for (i = 0; i <= n; i++)
matrix[i][0] = i;
for (i = 0; i <= m; i++)
matrix[0][i] = i;
for (i = 1; i <= n; i++)
{
for (j = 1; j <= m; j++)
{
cost = ((str1[i - 1] == str2[j - 1]) ? 0 : 1);
matrix[i][j] = min( matrix[i - 1][j] + 1,
matrix[i][j - 1] + 1,
matrix[i - 1][j - 1] + cost);
}
}
return matrix[n][m];
}
static int
min(const int a, const int b, const int c)
{
unsigned short int min = a;
if (b < min)
min = b;
if (c < min)
min = c;
return min;
}
static char *
strlower(char *a)
{
unsigned short int i, len = strlen(a);
for (i = 0; i < len; i++)
*(a + i) = tolower(*(a + i));
return a;
}
If you want a case insensitive computation, use the -i switch on the command line.
This version could be improved by using pointers when threating the matrix, but maybe I would do it for a newer release.