title: Levenshtein Distance Algorithm: Progress 4gl Implementation

by Steve Southwell

DEFINE TEMP-TABLE matrix NO-UNDO
 FIELD X AS INTEGER
 FIELD Y AS INTEGER
 FIELD pointvalue AS INTEGER
 INDEX xy IS PRIMARY UNIQUE X Y.


FUNCTION lscore RETURNS INTEGER
  ( INPUT myfirst AS CHAR,
    INPUT mysecond AS CHAR ) :
/*------------------------------------------------------------------------------
  Purpose:  Computes Levenshtein distance between two strings
    Notes:  See http://www.merriampark.com/ld.htm for a description of this
            algorithm in further detail
------------------------------------------------------------------------------*/
  DEFINE VAR myscore AS INTEGER NO-UNDO.
  DEFINE VAR m AS INTEGER NO-UNDO.
  DEFINE VAR n AS INTEGER NO-UNDO.
  DEFINE VAR i AS INTEGER NO-UNDO.
  DEFINE VAR j AS INTEGER NO-UNDO.
  DEFINE VAR s AS CHARACTER NO-UNDO.
  DEFINE VAR t AS CHARACTER NO-UNDO.

  DEFINE BUFFER leftcell FOR matrix.
  DEFINE BUFFER abovecell FOR matrix.
  DEFINE BUFFER diagcell FOR matrix.

  ASSIGN
   s = mysecond
   t = myfirst
  .

  ASSIGN
   m = LENGTH(t,"CHARACTER")
   n = LENGTH(s,"CHARACTER")
  .

  /* if both empty, return 0 */
  IF m + n = 0 THEN RETURN 0.

   /* Initialize our matrix */
  EMPTY TEMP-TABLE matrix.

  DO i = 0 TO n:
    CREATE matrix.
    ASSIGN
     matrix.X = i
     matrix.Y = 0
     matrix.pointvalue = i
    .
  END.
  DO j = 1 TO m:
    CREATE matrix.
    ASSIGN
     matrix.X = 0
     matrix.Y = j
     matrix.pointvalue = j
    .
  END.

  DO i = 1 TO N:
    DO j = 1 TO m:
        CREATE matrix.
        ASSIGN
         matrix.X = i
         matrix.Y = j
        .
        IF SUBSTRING(s,i,1,"CHARACTER") = SUBSTRING(t,j,1,"CHARACTER") THEN
         ASSIGN matrix.pointvalue = 0.
        ELSE matrix.pointvalue = 1.

        FIND leftcell WHERE leftcell.X = i - 1 AND leftcell.Y = j.
        FIND abovecell WHERE abovecell.Y = j - 1 AND abovecell.X = i.
        FIND diagcell WHERE diagcell.X = i - 1 AND diagcell.Y = j - 1.

        ASSIGN matrix.pointvalue = MIN(abovecell.pointvalue + 1,
                                       leftcell.pointvalue + 1,
                                       diagcell.pointvalue + matrix.pointvalue).

    END.
  END.


  RETURN matrix.pointvalue.
END FUNCTION.