/** * Levenshtein string comparison function * * Web site: http://rumkin.com/reference/algorithms/fuzzy_strings/ * License: http://rumkin.com/license/ */ #include #include // This is only good for strings with a difference less than 64k int levenshtein(char *str1, char *str2) { unsigned int i, j, diagonal, cost, s1len, s2len; unsigned int *arr; s1len = strlen(str1); s2len = strlen(str2); if (s1len * s2len == 0) return s1len + s2len; j = s1len + 1; arr = (unsigned int *) malloc(sizeof(unsigned int) * j); for (i = 0; i < j; i ++) arr[i] = i + 1; for (i = 0; i < s2len; i ++) { diagonal = arr[0] - 1; arr[0] = i + 1; j = 0; while (j < s1len) { cost = diagonal; if (str1[j] != str2[i]) cost ++; if (cost > arr[j]) cost = arr[j]; j++; if (cost > arr[j]) cost = arr[j]; diagonal = arr[j] - 1; arr[j] = cost + 1; } } cost = arr[j] - 1; free(arr); return cost; } int main(void) { char a[] = "YelloDoggie"; char b[] = "FollowHoggie"; printf("result: %d\n", levenshtein(a, b)); // To get a confidence level from 0.0 (completely different) to 1.0 (same) // just use this formula: // confidence = (strlen(a) + strlen(b) - result) / (strlen(a) + strlen(b)) // Just make sure that the length of a + b is bigger than 0. }