title: Levenshtein Distance Algorithm: TSQL Implementation
by Joseph Gama
CREATE function LEVENSHTEIN( @s varchar(50), @t
varchar(50) )
--Returns the Levenshtein Distance between strings s1
and s2.
--Original developer: Michael Gilleland
http://www.merriampark.com/ld.htm
--Translated to TSQL by Joseph Gama
returns varchar(50)
as
BEGIN
DECLARE @d varchar(100), @LD int, @m int, @n int, @i
int, @j int,
@s_i char(1), @t_j char(1),@cost int
--Step 1
SET @n=LEN(@s)
SET @m=LEN(@t)
SET @d=replicate(CHAR(0),100)
If @n = 0
BEGIN
SET @LD = @m
GOTO done
END
If @m = 0
BEGIN
SET @LD = @n
GOTO done
END
--Step 2
SET @i=0
WHILE @i<=@n
BEGIN
SET @d=STUFF(@d,@i+1,1,CHAR(@i))--d(i, 0) = i
SET @i=@i+1
END
SET @i=0
WHILE @i<=@m
BEGIN
SET @d=STUFF(@d,@i*(@n+1)+1,1,CHAR(@i))--d(0, j) = j
SET @i=@i+1
END
--goto done
--Step 3
SET @i=1
WHILE @i<=@n
BEGIN
SET @s_i=(substring(@s,@i,1))
--Step 4
SET @j=1
WHILE @j<=@m
BEGIN
SET @t_j=(substring(@t,@j,1))
--Step 5
If @s_i = @t_j
SET @cost=0
ELSE
SET @cost=1
--Step 6
SET @d=STUFF(@d,@j*(@n+1)+@i+1,1,CHAR(dbo.MIN3(
ASCII(substring(@d,@j*(@n+1)+@i-1+1,1))+1,
ASCII(substring(@d,(@j-1)*(@n+1)+@i+1,1))+1,
ASCII(substring(@d,(@j-1)*(@n+1)+@i-1+1,1))+@cost)
))
SET @j=@j+1
END
SET @i=@i+1
END
--Step 7
SET @LD = ASCII(substring(@d,@n*(@m+1)+@m+1,1))
done:
--RETURN @LD
--I kept this code that can be used to display the
matrix with all calculated values
--From Query Analyser it provides a nice way to check
the algorithm in action
--
RETURN @LD
--declare @z varchar(255)
--set @z=''
--SET @i=0
--WHILE @i<=@n
-- BEGIN
-- SET @j=0
-- WHILE @j<=@m
-- BEGIN
-- set
@z=@z+CONVERT(char(3),ASCII(substring(@d,@i*(@m+1
)+@j+1 ,1)))
-- SET @j=@j+1
-- END
-- SET @i=@i+1
-- END
--print dbo.wrap(@z,3*(@n+1))
END