跳轉到內容

演算法實現/字串/Levenshtein 距離

來自 Wikibooks,開放世界中的開放書籍

本頁面上的 Levenshtein 演算法實現僅供說明。在大多數情況下,應用程式將使用在堆分配上謹慎使用的實現,特別是在比較大量單詞時。以下備註說明了這種方法和相關主題的一些變化。

  • 大多數實現使用一維或二維陣列來儲存所比較單詞的字首的距離。在大多數應用程式中,這些結構的大小是預先知道的。例如,當距離僅在低於某個最大允許距離時才相關時,就會出現這種情況(當從字典中選擇單詞以近似匹配給定單詞時,就會發生這種情況)。在這種情況下,陣列可以預先分配並在對連續單詞進行演算法的多次執行中重複使用。
  • 使用最大允許距離對搜尋時間設定上限。一旦字串字首之間的最小 Levenshtein 距離超過最大允許距離,搜尋就可以停止。
  • 字元的刪除、插入和替換可以分配不同的權重。通常的選擇是將所有三個權重設定為 1。這些權重的不同值允許在單詞列表中進行更靈活的搜尋策略。

Action Script 3

[編輯 | 編輯原始碼]
function levenshtein(s1:String, s2:String):uint
		{
		     const len1:uint = s1.length, len2:uint = s2.length;
		     var d:Vector.<Vector.<uint> >=new Vector.<Vector.<uint> >(len1+1);
		     for(i=0; i<=len1; ++i) 
			d[i] = new Vector.<uint>(len2+1);
		 
		     d[0][0]=0;
		 	
		     var i:int;
		     var j:int;
		 	
		     for(i=1; i<=len1; ++i) d[i][0]=i; //int faster than uint
		     for(j=1; j<=len2; ++j) d[0][j]=j; 
		 
		     for(i = 1; i <= len1; ++i)
		          for(j = 1; j <= len2; ++j)
		               d[i][j] = Math.min( Math.min(d[i - 1][j] + 1,d[i][j - 1] + 1),
		                           d[i - 1][j - 1] + (s1.charAt(i - 1) == s2.charAt(j - 1) ? 0 : 1) );
		     return(d[len1][len2]);
		 
		}
function Levenshtein(Left, Right : String) return Natural is
    D : array(0 .. Left'Last, 0 .. Right'Last) of Natural;
  begin
    for I in D'range(1) loop D(I, 0) := I;end loop;
 
    for J in D'range(2) loop D(0, J) := J;end loop;
 
    for I in Left'range loop
      for J in Right'range loop
        D(I, J) := Natural'Min(D(I - 1, J), D(I, J - 1)) + 1;
        D(I, J) := Natural'Min(D(I, J), D(I - 1, J - 1) + Boolean'Pos(Left(I) /= Right(J)));
      end loop;
    end loop;
 
    return D(D'Last(1), D'Last(2));
  end Levenshtein;
function levenshtein(str1, str2, cost_ins, cost_rep, cost_del,    str1_len, str2_len, matrix, is_match, i, j, x, y, z) {
	if(cost_ins == "") cost_ins = 1
	if(cost_rep == "") cost_rep = 1
	if(cost_del == "") cost_del = 1
	str1_len = length(str1)
	str2_len = length(str2)
	if(str1_len == 0) return str2_len * cost_ins
	if(str2_len == 0) return str1_len * cost_del
	matrix[0, 0] = 0
	for(i = 1; i <= str1_len; i++) {
		matrix[i, 0] = i * cost_del
		for(j = 1; j <= str2_len; j++) {
			matrix[0, j] = j * cost_ins
			x = matrix[i - 1, j] + cost_del
			y = matrix[i, j - 1] + cost_ins
			z = matrix[i - 1, j - 1] + (substr(str1, i, 1) == substr(str2, j, 1) ? 0 : cost_rep)
			x = x < y ? x : y
			matrix[i, j] = x < z ? x : z
		}
	}
	return matrix[str1_len, str2_len]
}
#!/bin/bash
function levenshtein {
    if (( $# != 2 )); then
        echo "Usage: $0 word1 word2" >&2
    elif (( ${#1} < ${#2} )); then
        levenshtein "$2" "$1"
    else
        local str1len=${#1}
        local str2len=${#2}
        local d

        for (( i = 0; i <= (str1len+1)*(str2len+1); i++ )); do
            d[i]=0
        done

        for (( i = 0; i <= str1len; i++ )); do
            d[i+0*str1len]=$i
        done

        for (( j = 0; j <= str2len; j++ )); do
            d[0+j*(str1len+1)]=$j
        done

        for (( j = 1; j <= str2len; j++ )); do
            for (( i = 1; i <= str1len; i++ )); do
                [ "${1:i-1:1}" = "${2:j-1:1}" ] && local cost=0 || local cost=1
                del=$(( d[(i-1)+str1len*j]+1 ))
                ins=$(( d[i+str1len*(j-1)]+1 ))
                alt=$(( d[(i-1)+str1len*(j-1)]+cost ))
                d[i+str1len*j]=$( echo -e "$del\n$ins\n$alt" | sort -n | head -1 )
            done
        done
        echo ${d[str1len+str1len*(str2len)]}
    fi
}

Common Lisp

[編輯 | 編輯原始碼]
(defun levenshtein-distance (str1 str2)
  "Calculates the Levenshtein distance between str1 and str2, returns an editing distance (int)."
  (let ((n (length str1))
        (m (length str2)))
    ;; Check trivial cases
    (cond ((= 0 n) (return-from levenshtein-distance m))
          ((= 0 m) (return-from levenshtein-distance n)))
    (let ((col (make-array (1+ m) :element-type 'integer))
          (prev-col (make-array (1+ m) :element-type 'integer)))
      ;; We need to store only two columns---the current one that
      ;; is being built and the previous one
      (dotimes (i (1+ m))
        (setf (svref prev-col i) i))
      ;; Loop across all chars of each string
      (dotimes (i n)
        (setf (svref col 0) (1+ i))
        (dotimes (j m)
          (setf (svref col (1+ j))
                (min (1+ (svref col j))
                     (1+ (svref prev-col (1+ j)))
                     (+ (svref prev-col j)
                        (if (char-equal (schar str1 i) (schar str2 j)) 0 1)))))
        (rotatef col prev-col))
      (svref prev-col m))))
int min(int a, int b, int c)
{	
	if(a <= b && a <= c)
	{
		return a;
	}
	else if(b <= a && b <= c)
	{
		return b;
	}
	else if(c <= a && c <= b)
	{
		return c;
	}
}

int levenshtein(char *s1, char *s2) {
    unsigned int x, y, s1len, s2len;
    s1len = strlen(s1);
    s2len = strlen(s2);
    unsigned int matrix[s2len+1][s1len+1];
    matrix[0][0] = 0;
    for (x = 1; x <= s2len; x++)
        matrix[x][0] = matrix[x-1][0] + 1;
    for (y = 1; y <= s1len; y++)
        matrix[0][y] = matrix[0][y-1] + 1;
    for (x = 1; x <= s2len; x++)
        for (y = 1; y <= s1len; y++)
            matrix[x][y] = min(matrix[x-1][y] + 1, matrix[x][y-1] + 1, matrix[x-1][y-1] + (s1[y-1] == s2[x-1] ? 0 : 1));

    return(matrix[s2len][s1len]);
}

以上程式碼可以最佳化為使用 O(min(m,n)) 空間而不是 O(mn)。關鍵觀察是,我們只需要在逐列填充矩陣時訪問前一列的內容。因此,我們可以一遍又一遍地重複使用一列,在繼續時覆蓋其內容。

#define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c)))

int levenshtein(char *s1, char *s2) {
    unsigned int s1len, s2len, x, y, lastdiag, olddiag;
    s1len = strlen(s1);
    s2len = strlen(s2);
    unsigned int column[s1len + 1];
    for (y = 1; y <= s1len; y++)
        column[y] = y;
    for (x = 1; x <= s2len; x++) {
        column[0] = x;
        for (y = 1, lastdiag = x - 1; y <= s1len; y++) {
            olddiag = column[y];
            column[y] = MIN3(column[y] + 1, column[y - 1] + 1, lastdiag + (s1[y-1] == s2[x - 1] ? 0 : 1));
            lastdiag = olddiag;
        }
    }
    return column[s1len];
}
unsigned int edit_distance(const std::string& s1, const std::string& s2)
{
	const std::size_t len1 = s1.size(), len2 = s2.size();
	std::vector<std::vector<unsigned int>> d(len1 + 1, std::vector<unsigned int>(len2 + 1));

	d[0][0] = 0;
	for(unsigned int i = 1; i <= len1; ++i) d[i][0] = i;
	for(unsigned int i = 1; i <= len2; ++i) d[0][i] = i;

	for(unsigned int i = 1; i <= len1; ++i)
		for(unsigned int j = 1; j <= len2; ++j)
                      // note that std::min({arg1, arg2, arg3}) works only in C++11,
                      // for C++98 use std::min(std::min(arg1, arg2), arg3)
                      d[i][j] = std::min({ d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + (s1[i - 1] == s2[j - 1] ? 0 : 1) });
	return d[len1][len2];
}

請注意,使用

vector<vector<>>

在這裡並不高效,因為您不需要二維陣列。

以下是另一個只使用 記憶體的實現(它還利用了 這個事實,因此不需要從 3 個可能性中取最小值)。

#include <algorithm>
#include <vector>

template<typename T>
typename T::size_type LevenshteinDistance(const T &source, const T &target) {
    if (source.size() > target.size()) {
        return LevenshteinDistance(target, source);
    }

    using TSizeType = typename T::size_type;
    const TSizeType min_size = source.size(), max_size = target.size();
    std::vector<TSizeType> lev_dist(min_size + 1);

    for (TSizeType i = 0; i <= min_size; ++i) {
        lev_dist[i] = i;
    }

    for (TSizeType j = 1; j <= max_size; ++j) {
        TSizeType previous_diagonal = lev_dist[0], previous_diagonal_save;
        ++lev_dist[0];

        for (TSizeType i = 1; i <= min_size; ++i) {
            previous_diagonal_save = lev_dist[i];
            if (source[i - 1] == target[j - 1]) {
                lev_dist[i] = previous_diagonal;
            } else {
                lev_dist[i] = std::min(std::min(lev_dist[i - 1], lev_dist[i]), previous_diagonal) + 1;
            }
            previous_diagonal = previous_diagonal_save;
        }
    }

    return lev_dist[min_size];
}

以下是具有不同插入、刪除和替換成本的廣義 Levenshtein 距離的實現。

#include <algorithm>
#include <vector>

template<typename T>
typename T::size_type GeneralizedLevenshteinDistance(const T &source,
                                                    const T &target,
                                                    typename T::size_type insert_cost = 1,
                                                    typename T::size_type delete_cost = 1,
                                                    typename T::size_type replace_cost = 1) {
    if (source.size() > target.size()) {
        return GeneralizedLevenshteinDistance(target, source, delete_cost, insert_cost, replace_cost);
    }

    using TSizeType = typename T::size_type;
    const TSizeType min_size = source.size(), max_size = target.size();
    std::vector<TSizeType> lev_dist(min_size + 1);

    lev_dist[0] = 0;
    for (TSizeType i = 1; i <= min_size; ++i) {
        lev_dist[i] = lev_dist[i - 1] + delete_cost;
    }

    for (TSizeType j = 1; j <= max_size; ++j) {
        TSizeType previous_diagonal = lev_dist[0], previous_diagonal_save;
        lev_dist[0] += insert_cost;

        for (TSizeType i = 1; i <= min_size; ++i) {
            previous_diagonal_save = lev_dist[i];
            if (source[i - 1] == target[j - 1]) {
                lev_dist[i] = previous_diagonal;
            } else {
                lev_dist[i] = std::min(std::min(lev_dist[i - 1] + delete_cost, lev_dist[i] + insert_cost), previous_diagonal + replace_cost);
            }
            previous_diagonal = previous_diagonal_save;
        }
    }

    return lev_dist[min_size];
}
        private int levenshtein(string a, string b)
        {

            if (string.IsNullOrEmpty(a))
            {
                if (!string.IsNullOrEmpty(b))
                {
                    return b.Length;
                }
                return 0;
            }

            if (string.IsNullOrEmpty(b))
            {
                if (!string.IsNullOrEmpty(a))
                {
                    return a.Length;
                }
                return 0;
            }

            int cost;
            int[,] d = new int[a.Length + 1, b.Length + 1];
            int min1;
            int min2;
            int min3;

            for (int i = 0; i <= d.GetUpperBound(0); i += 1)
            {
                d[i, 0] = i;
            }

            for (int i = 0; i <= d.GetUpperBound(1); i += 1)
            {
                d[0, i] = i;
            }

            for (int i = 1; i <= d.GetUpperBound(0); i += 1)
            {
                for (int j = 1; j <= d.GetUpperBound(1); j += 1)
                {
                    cost = (a[i-1] != b[j-1])? 1 : 0; 

                    min1 = d[i - 1, j] + 1;
                    min2 = d[i, j - 1] + 1;
                    min3 = d[i - 1, j - 1] + cost;
                    d[i, j] = Math.Min(Math.Min(min1, min2), min3);
                }
            }

            return d[d.GetUpperBound(0), d.GetUpperBound(1)];

        }

具有減少記憶體使用量的實現

public int LevenshteinDistance(string source, string target){
	if(string.IsNullOrEmpty(source)){
		if(string.IsNullOrEmpty(target)) return 0;
		return target.Length;
	}
	if(string.IsNullOrEmpty(target)) return source.Length;

	if(source.Length > target.Length){
		var temp = target;
		target = source;
		source = temp;
	}
 
	var m = target.Length;
	var n = source.Length;
	var distance = new int[2, m + 1];
	// Initialize the distance matrix
	for(var j = 1; j <= m; j++) distance[0, j] = j;

	var currentRow = 0;
	for(var i = 1; i <= n; ++i){
		currentRow = i & 1;
		distance[currentRow, 0] = i;
		var previousRow = currentRow ^ 1;
		for(var j = 1; j <= m; j++) {
			var cost = (target[j - 1] == source[i - 1] ? 0 : 1);
			distance[currentRow, j] = Math.Min(Math.Min(
						distance[previousRow, j] + 1,
						distance[currentRow, j - 1] + 1),
						distance[previousRow, j - 1] + cost);
		}
	}
	return distance[currentRow, m];
}

Damerau-Levenshtein 距離在漸進時間 O ((max + 1) * min (first.length (), second.length ())) 內計算。

    /// <summary>
    /// Damerau-Levenshtein distance
    /// </summary>
    public class DamerauLevensteinMetric
    {
        private const int DEFAULT_LENGTH = 255;
        private int[] _currentRow;
        private int[] _previousRow;
        private int[] _transpositionRow;

        public DamerauLevensteinMetric()
            : this(DEFAULT_LENGTH)
        {
        }

        /// <summary>
        /// 
        /// </summary>
        /// <param name="maxLength"></param>
        public DamerauLevensteinMetric(int maxLength)
        {
            _currentRow = new int[maxLength + 1];
            _previousRow = new int[maxLength + 1];
            _transpositionRow = new int[maxLength + 1];
        }

        /// <summary>
        /// Damerau-Levenshtein distance is computed in asymptotic time O((max + 1) * min(first.length(), second.length()))
        /// </summary>
        /// <param name="first"></param>
        /// <param name="second"></param>
        /// <param name="max"></param>
        /// <returns></returns>
        public int GetDistance(string first, string second, int max)
        {
            int firstLength = first.Length;
            int secondLength = second.Length;

            if (firstLength == 0)
                return secondLength;
            
            if (secondLength == 0) return firstLength;

            if (firstLength > secondLength)
            {
                string tmp = first;
                first = second;
                second = tmp;
                firstLength = secondLength;
                secondLength = second.Length;
            }

            if (max < 0) max = secondLength;
            if (secondLength - firstLength > max) return max + 1;

            if (firstLength > _currentRow.Length)
            {
                _currentRow = new int[firstLength + 1];
                _previousRow = new int[firstLength + 1];
                _transpositionRow = new int[firstLength + 1];
            }

            for (int i = 0; i <= firstLength; i++)
                _previousRow[i] = i;

            char lastSecondCh = (char) 0;
            for (int i = 1; i <= secondLength; i++)
            {
                char secondCh = second[i - 1];
                _currentRow[0] = i;

                // Compute only diagonal stripe of width 2 * (max + 1)
                int from = Math.Max(i - max - 1, 1);
                int to = Math.Min(i + max + 1, firstLength);

                char lastFirstCh = (char) 0;
                for (int j = from; j <= to; j++)
                {
                    char firstCh = first[j - 1];

                    // Compute minimal cost of state change to current state from previous states of deletion, insertion and swapping 
                    int cost = firstCh == secondCh ? 0 : 1;
                    int value = Math.Min(Math.Min(_currentRow[j - 1] + 1, _previousRow[j] + 1), _previousRow[j - 1] + cost);

                    // If there was transposition, take in account its cost 
                    if (firstCh == lastSecondCh && secondCh == lastFirstCh)
                        value = Math.Min(value, _transpositionRow[j - 2] + cost);

                    _currentRow[j] = value;
                    lastFirstCh = firstCh;
                }
                lastSecondCh = secondCh;

                int[] tempRow = _transpositionRow;
                _transpositionRow = _previousRow;
                _previousRow = _currentRow;
                _currentRow = tempRow;
            }

            return _previousRow[firstLength];
        }
    }
(defn nextelt
  "Given two characters, the previous row, and a row we are
  building, determine out the next element for this row."
  [char1 char2 prevrow thisrow position]
  (if (= char1 char2)
    (prevrow (- position 1))
    (+ 1 (min
      (prevrow (- position 1))
      (prevrow position)
      (last thisrow))))
  )

(defn nextrow
  "Based on the next character from string1 and the whole of string2
  calculate the next row. Initially thisrow contains one number."
  [char1 str2 prevrow thisrow]
  (let [char2 (first str2)
        position (count thisrow)]
    (if (= (count thisrow) (count prevrow))
      thisrow
      (recur
        char1
        (rest str2)
        prevrow
        (conj thisrow (nextelt char1 char2 prevrow thisrow position))))))

(defn levenshtein
  "Calculate the Levenshtein distance between two strings."
  ([str1 str2]
  (let [row0 (vec (map first (map vector (iterate inc 1) str2)))]
    (levenshtein 1 (vec (cons 0 row0)) str1 str2)))
  ([row-nr prevrow str1 str2]
    (let [next-row (nextrow (first str1) str2 prevrow (vector row-nr))
          str1-remainder (.substring str1 1)]
      (if (= "" str1-remainder)
        (last next-row)
        (recur (inc row-nr) next-row str1-remainder str2))))
  )

另一個使用瞬態資料結構的實現,靈感來自 Common Lisp 版本

(defn levenshtein [str1 str2]
  "a Clojure levenshtein implementation using transient data structure"
  (let [n (count str1) m (count str2)]
    (cond 
     (= 0 n) m
     (= 0 m) n
     :else
     (let [prev-col (transient (vec (range (inc m)))) col (transient [])] ; initialization for the first column.
       (dotimes [i n]
         (assoc! col 0 (inc i)) ; update col[0]
         (dotimes [j m]
           (assoc! col (inc j)  ; update col[1..m] 
                   (min (inc (nth col j))
                        (inc (nth prev-col (inc j)))
                        (+ (get prev-col j) (if (= (nth str1 i) (nth str2 j)) 0 1)))))
         (dotimes [i (count prev-col)] 
           (assoc! prev-col i (get col i)))) ; 
       (last (persistent! col)))))) ; last element of last column
import std.string;
import std.algorithm; // for min, already defines levenshteinDistance in library

pure nothrow size_t damerauDistance(const string source, const string target) {
	immutable auto sourceLength = source.length;
	immutable auto targetLength = target.length;
	auto distance = new size_t[][](sourceLength + 1, targetLength+1); 
	for (auto iSource = 0; iSource <= sourceLength; ++iSource) {
		distance[iSource][0] = iSource;
	}
	for (auto iTarget = 1; iTarget <= targetLength; ++iTarget) {
		distance[0][iTarget] = iTarget;
	}

	for (auto iSource = 1; iSource <= sourceLength; ++iSource) {
		for (auto iTarget = 1; iTarget <= targetLength; ++iTarget) {
			auto delete1 =  distance[iSource - 1][iTarget] + 1;
			auto insert1 =  distance[iSource][iTarget - 1] + 1;
			size_t marginalCost = source[iSource - 1] == target[iTarget - 1] ? 0 : 1;
			auto substitute = distance[iSource - 1][iTarget - 1] + marginalCost;
			distance[iSource][iTarget] = min(delete1, insert1, substitute);
			if (iSource > 1 && iTarget > 1 && 
				source[iSource - 1] == target[iTarget - 2] &&
				source[iSource - 2] == target[iTarget-1]) {
					distance[iSource][iTarget] = 
						min(distance[iSource][iTarget], distance[iSource - 2][iTarget - 2] + marginalCost);
			}
		}
	}
	return distance[sourceLength][targetLength];
}

一個簡單的實現,當然可以改進。

function Levenshtein(Word1, Word2: String): integer;
var lev : array of array of integer;
    i,j : integer;
begin  
   // Word1 := LowerCase(Word1);
   // Word2 := LowerCase(Word2);

  // If the words are identical, do nothing
  if Word1 = Word2 then
  begin
    result := 0;
    exit;
  end;

  SetLength(lev, length(Word1) + 1);
  for i := low(lev) to high(lev) do
    setLength(lev[i], length(Word2) + 1);

  for i := low(lev) to high(lev) do lev[i][0] := i;
  for j := low(lev[low(lev)]) to high(lev[low(lev)]) do lev[0][j] := j;

  for i := low(lev)+1 to high(lev) do
    for j := low(lev[i])+1 to high(lev[i]) do
      lev[i][j] := min(min(lev[i-1][j] + 1,lev[i][j-1] + 1)
                      ,lev[i-1][j-1] + ifthen(Word1[i] = Word2[j], 0, 1));

  result := lev[length(word1)][length(word2)];
end;
	distance (a, b: STRING): INTEGER
			-- Compute Levenshtein distance between strings `a' and `b'.
		note
			synopsis: "[
				Example: Calculate distance between "sitting" and "kitten":
				
						k	i	t	t	e	n
					0	1	2	3	4	5	6 <-- set top row to [0 .. m]
				s	1	1	2	3	4	5	6
				i	2	2	1	2	3	4	5
				t	3	3	2	1	2	3	4
				t	4	4	3	2	1	2	3
				i	5	5	4	3	2	2	3
				n	6	6	5	4	3	3	2
				g	7	7	6	5	4	4	3! <-- Answer
					^
					|
					set first column to [0 .. n]
					
				Compute top row and left-most columns first. Then go cell-by-cell
				(e.g. 1 .. n over 1 .. m), row-by-row, column-by-column, computing:
				
				d[i-1, j] + 1		-- a deletion
				d[i, j-1] + 1		-- an insertion
				d[i-1, j-1] + 1		-- a substitution
				
				Taking the minimum of each and putting it in its proper place in
				the d[i, j] array, unless the char's are the same (e.g. a [i - 1] = b [j - 1])
				]"
			language_note: "[
				Eiffel is a 1-based array system and not 0-based, so do not
				read the array element accesses from the 0-based mindset.
				]"
		local
			d: ARRAY2 [INTEGER]
			i, j, m, n: INTEGER
		do
			m := a.count + 1; n := b.count + 1
			create d.make_filled (0, m, n)
			from i := 0 until i = m loop d [i + 1, 1] := i; i := i + 1 end
			from j := 0 until j = n loop d [1, j + 1] := j; j := j + 1 end
			from j := 2 until j > n loop
				from i := 2 until i > m loop
					if a [i - 1] = b [j - 1] then
						d[i, j] := d[i-1, j-1]
					else
						d[i, j] := (d[i-1, j-1] + 1).min (d[i, j-1] + 1).min (d[i-1, j] + 1)
					end
					i := i + 1
				end
				j := j + 1
			end
			Result := d[m, n]
		end

Emacs Lisp

[編輯 | 編輯原始碼]
 (defun levenshtein-distance (str1 str2)
   "Return the edit distance between strings STR1 and STR2."
   (if (not (stringp str1))
       (error "Argument was not a string: %s" str1))
   (if (not (stringp str2))
       (error "Argument was not a string: %s" str2))
   (let* ((make-table (function (lambda (columns rows init)
            (make-vector rows (make-vector columns init)))))
          (tref (function (lambda (table x y)
            (aref (aref table y) x))))
          (tset (function (lambda (table x y object)
            (let ((row (copy-sequence (aref table y))))
              (aset row x object)
              (aset table y row) object))))
          (length-str1 (length str1))
          (length-str2 (length str2))
          (d (funcall make-table (1+ length-str1) (1+ length-str2) 0)))
     (let ((i 0) (j 0))
       (while (<= i length-str1)
         (funcall tset d i 0 i)
         (setq i (1+ i)))
       (while (<= j length-str2)
         (funcall tset d 0 j j)
         (setq j (1+ j))))
     (let ((i 1))
       (while (<= i length-str1)
         (let ((j 1))
           (while (<= j length-str2)
             (let* ((cost (if (equal (aref str1 (1- i)) (aref str2 (1- j)))
                              0
                            1))
                    (deletion (1+ (funcall tref d (1- i) j)))
                    (insertion (1+ (funcall tref d i (1- j))))
                    (substitution (+ (funcall tref d (1- i) (1- j)) cost)))
               (funcall tset d i j (min insertion deletion substitution)))
             (setq j (1+ j))))
         (setq i (1+ i))))
     (funcall tref d length-str1 length-str2)))
levenshtein(A, []) -> length(A);
levenshtein([], B) -> length(B);
levenshtein([A | TA] = AA, [B | TB] = BA) ->
	lists:min([levenshtein(TA, BA) + 1,
	           levenshtein(AA, TB) + 1,
	           levenshtein(TA, TB) + delta(A, B)]).
			   
delta(_A, _A) -> 0;
delta(_A, _B) -> 1.

內聯的 min 函式提供了很大的速度提升。

let inline min3 one two three = 
    if one < two && one < three then one
    elif two < three then two
    else three

let wagnerFischer (s: string) (t: string) =
    let m = s.Length
    let n = t.Length
    let d = Array2D.create (m + 1) (n + 1) 0

    for i = 0 to m do d.[i, 0] <- i
    for j = 0 to n do d.[0, j] <- j

    for j = 1 to n do
        for i = 1 to m do
            if s.[i-1] = t.[j-1] then
                d.[i, j] <- d.[i-1, j-1]
            else
                d.[i, j] <-
                    min3
                        (d.[i-1, j  ] + 1) // a deletion
                        (d.[i  , j-1] + 1) // an insertion
                        (d.[i-1, j-1] + 1) // a substitution
    d.[m,n]

以下是一個稍微快一點的延遲版本。

let wagnerFischerLazy (s: string) (t: string) =
    let m = s.Length
    let n = t.Length
    let d = Array2D.create (m+1) (n+1) -1
    let rec dist =
        function
        | i, 0 -> i
        | 0, j -> j
        | i, j when d.[i,j] <> -1 -> d.[i,j]
        | i, j ->
            let dval = 
                if s.[i-1] = t.[j-1] then dist (i-1, j-1)
                else
                    min3
                        (dist (i-1, j)   + 1) // a deletion
                        (dist (i,   j-1) + 1) // an insertion
                        (dist (i-1, j-1) + 1) // a substitution
            d.[i, j] <- dval; dval 
    dist (m, n)

此版本使用動態規劃,時間複雜度為 ,其中 分別是 的長度,空間複雜度為 個整數加上一些常數空間(即 )。

import "unicode/utf8"
func Levenshtein(a, b string) int {
	f := make([]int, utf8.RuneCountInString(b)+1)

	for j := range f {
		f[j] = j
	}

	for _, ca := range a {
		j := 1
		fj1 := f[0] // fj1 is the value of f[j - 1] in last iteration
		f[0]++
		for _, cb := range b {
			mn := min(f[j]+1, f[j-1]+1) // delete & insert
			if cb != ca {
				mn = min(mn, fj1+1) // change
			} else {
				mn = min(mn, fj1) // matched
			}

			fj1, f[j] = f[j], mn // save f[j] to fj1(j is about to increase), update f[j] to mn
			j++
		}
	}

	return f[len(f)-1]
}
func min(a, b int) int {
         if a <= b {
                 return a
         } else {
                 return b
         }
}

此版本基於下面的 Java 版本。

class Levenshtein {
  def static int distance(String str1, String str2) {
    def str1_len = str1.length()
    def str2_len = str2.length()
    int[][] distance = new int[str1_len + 1][str2_len + 1]
    (str1_len + 1).times { distance[it][0] = it }
    (str2_len + 1).times { distance[0][it] = it }
    (1..str1_len).each { i ->
       (1..str2_len).each { j ->
          distance[i][j] = [distance[i-1][j]+1, distance[i][j-1]+1, str1[i-1]==str2[j-1]?distance[i-1][j-1]:distance[i-1][j-1]+1].min()
       }
    }
    distance[str1_len][str2_len]
  }
}

使用 GHCi 測試。

levenshtein::String->String->Int
levenshtein s t = 
    d!!(length s)!!(length t) 
    where d = [[distance m n|n<-[0..length t]]|m<-[0..length s]]
          distance i 0 = i
          distance 0 j = j
          distance i j = minimum [d!!(i-1)!!j+1, d!!i!!(j-1)+1, d!!(i-1)!!(j-1) + (if s!!(i-1)==t!!(j-1) then 0 else 1)]

這裡有一個稍微快一點的版本。

levenshtein::String->String->Int
levenshtein s t = 
    d!!(length s)!!(length t) 
    where d = [[distance m n|n<-[0..length t]]|m<-[0..length s]]
          distance i 0 = i
          distance 0 j = j
          distance i j = if s!!(i-1)==t!!(j-1) then d!!(i-1)!!(j-1) else
                         1 + minimum [d!!(i-1)!!j, d!!i!!(j-1), d!!(i-1)!!(j-1)]

對於大型字串,使用陣列快得多

import Data.Array.Unboxed
import Data.Array.ST
import Control.Monad.ST

for_ xs f =  mapM_ f xs

levenshtein :: [Char] -> [Char] -> Int
levenshtein s t = d ! (ls , lt)
    where s' = array (0,ls) [ (i,x) | (i,x) <- zip [0..] s ]::UArray Int Char
          t' = array (0,lt) [ (i,x) | (i,x) <- zip [0..] t ]::UArray Int Char
          ls = length s
          lt = length t
          (l,h) = ((0,0),(length s,length t))
          d = runSTUArray $ do
                m <- newArray (l,h) 0 :: ST s (STUArray s (Int,Int) Int)
                for_ [0..ls] $ \i -> writeArray m (i,0) i
                for_ [0..lt] $ \j -> writeArray m (0,j) j
                for_ [1..lt] $ \j -> do
                              for_ [1..ls] $ \i -> do
                                  let c = if s'!(i-1)==t'! (j-1) 
                                          then 0 else 1
                                  x <- readArray m (i-1,j)
                                  y <- readArray m (i,j-1)
                                  z <- readArray m (i-1,j-1)
                                  writeArray m (i,j) $ minimum [x+1, y+1, z+c ]
                return m

作為遞迴定義的陣列

import Array
toArray l = listArray (0, length l - 1) l                  -- makes an Array from a List
mkArray f bnds = array bnds [ (i, f i) | i <- range bnds ] -- defines an Array over its indices

levenshtein sa sb = table
  where
    arrA = toArray sa
    arrB = toArray sb
    table = mkArray f ((0,0), (length sa , length sb))
    f (ia, 0) = ia
    f (0 ,ib) = ib
    f (ia,ib)
      | a == b    = table ! (ia-1,ib-1)
      | otherwise = 1 + minimum [ table ! x | x <- [(ia-1,ib-1),(ia-1,ib),(ia,ib-1)] ]
      where
        a = arrA ! (ia - 1)
        b = arrB ! (ib - 1)

最後:快速但難以理解的實現

levenshtein2 sa sb = last $ foldl transform [0..length sa] sb 
   where
      transform xs@(x:xs') c = scanl compute (x+1) (zip3 sa xs xs') 
         where
            compute z (c', x, y) = minimum [y+1, z+1, x + fromEnum (c' /= c)]
 Levenshtein := method(left, right,
   if(right size < left size, return Levenshtein(right, left))
   current := 0 to(left size) asList
   right foreach(i, righti,
     previous := current
     current = List clone with(i + 1)
     left foreach(j, leftj,
       current append((current at(j) + 1) min(previous at(j + 1) + 1) min(previous at(j) + if(leftj == righti, 0, 1)))
     )
   )
   current last
 )

JavaScript

[編輯 | 編輯原始碼]

慢速遞迴版本

function levenshteinDistance (s, t) {
        if (s.length === 0) return t.length;
        if (t.length === 0) return s.length;

        return Math.min(
                levenshteinDistance(s.substr(1), t) + 1,
                levenshteinDistance(t.substr(1), s) + 1,
                levenshteinDistance(s.substr(1), t.substr(1)) + (s[0] !== t[0] ? 1 : 0)
        );
}

使用動態規劃的更快方法

// Compute the edit distance between the two given strings
function getEditDistance(a, b) {
  if (a.length === 0) return b.length; 
  if (b.length === 0) return a.length;

  var matrix = [];

  // increment along the first column of each row
  var i;
  for (i = 0; i <= b.length; i++) {
    matrix[i] = [i];
  }

  // increment each column in the first row
  var j;
  for (j = 0; j <= a.length; j++) {
    matrix[0][j] = j;
  }

  // Fill in the rest of the matrix
  for (i = 1; i <= b.length; i++) {
    for (j = 1; j <= a.length; j++) {
      if (b.charAt(i-1) == a.charAt(j-1)) {
        matrix[i][j] = matrix[i-1][j-1];
      } else {
        matrix[i][j] = Math.min(matrix[i-1][j-1] + 1, // substitution
                                Math.min(matrix[i][j-1] + 1, // insertion
                                         matrix[i-1][j] + 1)); // deletion
      }
    }
  }

  return matrix[b.length][a.length];
};

(來源)

public class LevenshteinDistance {                                               
    private static int minimum(int a, int b, int c) {                            
        return Math.min(Math.min(a, b), c);                                      
    }                                                                            
                                                                                 
    public static int computeLevenshteinDistance(CharSequence lhs, CharSequence rhs) {      
        int[][] distance = new int[lhs.length() + 1][rhs.length() + 1];        
                                                                                 
        for (int i = 0; i <= lhs.length(); i++)                                 
            distance[i][0] = i;                                                  
        for (int j = 1; j <= rhs.length(); j++)                                 
            distance[0][j] = j;                                                  
                                                                                 
        for (int i = 1; i <= lhs.length(); i++)                                 
            for (int j = 1; j <= rhs.length(); j++)                             
                distance[i][j] = minimum(                                        
                        distance[i - 1][j] + 1,                                  
                        distance[i][j - 1] + 1,                                  
                        distance[i - 1][j - 1] + ((lhs.charAt(i - 1) == rhs.charAt(j - 1)) ? 0 : 1));
                                                                                 
        return distance[lhs.length()][rhs.length()];                           
    }                                                                            
}

非遞迴,更快

public int levenshteinDistance (CharSequence lhs, CharSequence rhs) {                          
    int len0 = lhs.length() + 1;                                                     
    int len1 = rhs.length() + 1;                                                     
                                                                                    
    // the array of distances                                                       
    int[] cost = new int[len0];                                                     
    int[] newcost = new int[len0];                                                  
                                                                                    
    // initial cost of skipping prefix in String s0                                 
    for (int i = 0; i < len0; i++) cost[i] = i;                                     
                                                                                    
    // dynamically computing the array of distances                                  
                                                                                    
    // transformation cost for each letter in s1                                    
    for (int j = 1; j < len1; j++) {                                                
        // initial cost of skipping prefix in String s1                             
        newcost[0] = j;                                                             
                                                                                    
        // transformation cost for each letter in s0                                
        for(int i = 1; i < len0; i++) {                                             
            // matching current letters in both strings                             
            int match = (lhs.charAt(i - 1) == rhs.charAt(j - 1)) ? 0 : 1;             
                                                                                    
            // computing cost for each transformation                               
            int cost_replace = cost[i - 1] + match;                                 
            int cost_insert  = cost[i] + 1;                                         
            int cost_delete  = newcost[i - 1] + 1;                                  
                                                                                    
            // keep minimum cost                                                    
            newcost[i] = Math.min(Math.min(cost_insert, cost_delete), cost_replace);
        }                                                                           
                                                                                    
        // swap cost/newcost arrays                                                 
        int[] swap = cost; cost = newcost; newcost = swap;                          
    }                                                                               
                                                                                    
    // the distance is the cost for transforming all letters in both strings        
    return cost[len0 - 1];                                                          
}

直接遞迴實現。不要用於大型字串!

function levenshtein(lhs::String, rhs::String, lhsi::Int = length(lhs), rhsi::Int = length(rhs))
    if lhsi == 0
        return rhsi
    end
    if rhsi == 0
        return lhsi
    end

    cost = lhs[lhsi] == rhs[rhsi] ? 0 : 1
    min(
        levenshtein(lhs, rhs, lhsi - 1, rhsi) + 1,
        levenshtein(lhs, rhs, lhsi, rhsi - 1) + 1,
        levenshtein(lhs, rhs, lhsi - 1, rhsi - 1) + cost
        )
end

使用以下測試

@show levenshtein("aaa", "ab")
@show levenshtein("kitten", "sitting")
fun levenshtein(lhs : CharSequence, rhs : CharSequence) : Int {
    val lhsLength = lhs.length
    val rhsLength = rhs.length

    var cost = IntArray(lhsLength + 1) { it }
    var newCost = IntArray(lhsLength + 1) { 0 }

    for (i in 1..rhsLength) {
        newCost[0] = i

        for (j in 1..lhsLength) {
            val editCost= if(lhs[j - 1] == rhs[i - 1]) 0 else 1

            val costReplace = cost[j - 1] + editCost
            val costInsert = cost[j] + 1
            val costDelete = newCost[j - 1] + 1

            newCost[j] = minOf(costInsert, costDelete, costReplace)
        }

        val swap = cost
        cost = newCost
        newCost = swap
    }

    return cost[lhsLength]
}

此實現似乎已損壞;但我不能確定如何修復它。

DROP FUNCTION IF EXISTS `levenshtein`;
CREATE FUNCTION `levenshtein`(`s1` VARCHAR(255) CHARACTER SET utf8, `s2` VARCHAR(255) CHARACTER SET utf8)
    RETURNS TINYINT UNSIGNED
    NO SQL
    DETERMINISTIC
BEGIN
    DECLARE s1_len, s2_len, i, j, c, c_temp TINYINT UNSIGNED;
    -- max strlen=255 for this function
    DECLARE cv0, cv1 VARBINARY(256);
    
    -- if any param is NULL return NULL
    -- (consistent with builtin functions)
    IF (s1 + s2) IS NULL THEN
        RETURN NULL;
    END IF;
    
    SET s1_len = CHAR_LENGTH(s1),
        s2_len = CHAR_LENGTH(s2),
        cv1 = 0x00,
        j = 1,
        i = 1,
        c = 0;
    
    -- if any string is empty,
    -- distance is the length of the other one 
    IF (s1 = s2) THEN
        RETURN 0;
    ELSEIF (s1_len = 0) THEN
        RETURN s2_len;
    ELSEIF (s2_len = 0) THEN
        RETURN s1_len;
    END IF;
    
    WHILE (j <= s2_len) DO
        SET cv1 = CONCAT(cv1, CHAR(j)),
        j = j + 1;
    END WHILE;
    
    WHILE (i <= s1_len) DO
        SET c = i,
            cv0 = CHAR(i),
            j = 1;
        
        -- What for syntax of WHILE is that?? MYSQL do not now. Please help.
        WHILE (j <= c_temp) THEN
                SET c = c_temp;
            END IF;
            
            SET c_temp = ORD(SUBSTRING(cv1, j+1, 1)) + 1;
            IF (c > c_temp) THEN
                SET c = c_temp;
            END IF;
            
            SET cv0 = CONCAT(cv0, CHAR(c)),
                j = j + 1;
        END WHILE;
        
        SET cv1 = cv0,
            i = i + 1;
    END WHILE;
    
    RETURN c;
END;

下面的實現,感謝 Code Janitor 的 Jason Rust,似乎更完整。

CREATE FUNCTION levenshtein( s1 VARCHAR(255), s2 VARCHAR(255) ) 
  RETURNS INT 
  DETERMINISTIC 
  BEGIN 
    DECLARE s1_len, s2_len, i, j, c, c_temp, cost INT; 
    DECLARE s1_char CHAR; 
    -- max strlen=255 
    DECLARE cv0, cv1 VARBINARY(256); 
    SET s1_len = CHAR_LENGTH(s1), s2_len = CHAR_LENGTH(s2), cv1 = 0x00, j = 1, i = 1, c = 0; 
    IF s1 = s2 THEN 
      RETURN 0; 
    ELSEIF s1_len = 0 THEN 
      RETURN s2_len; 
    ELSEIF s2_len = 0 THEN 
      RETURN s1_len; 
    ELSE 
      WHILE j <= s2_len DO 
        SET cv1 = CONCAT(cv1, UNHEX(HEX(j))), j = j + 1; 
      END WHILE; 
      WHILE i <= s1_len DO 
        SET s1_char = SUBSTRING(s1, i, 1), c = i, cv0 = UNHEX(HEX(i)), j = 1; 
        WHILE j <= s2_len DO 
          SET c = c + 1; 
          IF s1_char = SUBSTRING(s2, j, 1) THEN  
            SET cost = 0; ELSE SET cost = 1; 
          END IF; 
          SET c_temp = CONV(HEX(SUBSTRING(cv1, j, 1)), 16, 10) + cost; 
          IF c > c_temp THEN SET c = c_temp; END IF; 
          SET c_temp = CONV(HEX(SUBSTRING(cv1, j+1, 1)), 16, 10) + 1; 
          IF c > c_temp THEN  
            SET c = c_temp;  
          END IF; 
          SET cv0 = CONCAT(cv0, UNHEX(HEX(c))), j = j + 1; 
        END WHILE; 
        SET cv1 = cv0, i = i + 1; 
      END WHILE; 
    END IF; 
    RETURN c; 
  END;

Objective-C

[編輯 | 編輯原始碼]
- (NSInteger)computeLevenshteinDistanceFrom:(NSString *)source to:(NSString *)target {
    NSUInteger sourceLength = [source length] + 1;
    NSUInteger targetLength = [target length] + 1;

    NSMutableArray *cost = [NSMutableArray new];
    NSMutableArray *newCost = [NSMutableArray new];

    for (NSUInteger i = 0; i < sourceLength; i++) {
        cost[i] = @(i);
    }

    for (NSUInteger j = 1; j < targetLength; j++) {
        newCost[0] = @(j - 1);

        for (NSUInteger i = 1; i < sourceLength; i++) {
            NSInteger match = ([source characterAtIndex:i - 1] == [target characterAtIndex:j - 1]) ? 0 : 1;
            NSInteger costReplace = [cost[i - 1] integerValue] + match;
            NSInteger costInsert = [cost[i] integerValue] + 1;
            NSInteger costDelete = [newCost[i - 1] integerValue] + 1;
            newCost[i] = @(MIN(MIN(costInsert, costDelete), costReplace));
        }

        NSMutableArray *swap = cost;
        cost = newCost;
        newCost = swap;
    }

    return [cost[sourceLength - 1] integerValue];
}

或者使用擴充套件“和 C 風格”

#define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c)))

@implementation NSString (LevensteinDistance)

- (NSUInteger)distance:(NSString *)query {
    const char *selfCharArray = [self UTF8String];
    const char *queryCharArray = [query UTF8String];
    NSUInteger x, y;
    NSUInteger selfLength = strlen(selfCharArray);
    NSUInteger queryLength = strlen(queryCharArray);
    NSUInteger matrix[queryLength + 1][selfLength + 1];
    matrix[0][0] = 0;
    for (x = 1; x <= queryLength; x++) {
        matrix[x][0] = matrix[x - 1][0] + 1;
    }
    for (y = 1; y <= selfLength; y++) {
        matrix[0][y] = matrix[0][y - 1] + 1;
    }
    for (x = 1; x <= queryLength; x++) {
        for (y = 1; y <= selfLength; y++) {
            matrix[x][y] = MIN3(matrix[x - 1][y] + 1, matrix[x][y - 1] + 1, matrix[x - 1][y - 1] + (selfCharArray[y - 1] == queryCharArray[x - 1] ? 0 : 1));
        }
    }
    return (matrix[queryLength][selfLength]);
}

@end
(* Minimum of three integers. This function is deliberately
 * not polymorphic because (1) we only need to compare integers 
 * and (2) the OCaml compilers do not perform type specialization 
 * for user-defined functions. *)
let minimum (x:int) y z =
  let m' (a:int) b = if a < b then a else b in
    m' (m' x y) z

(* Matrix initialization. *)
let init_matrix n m =
  let init_col = Array.init m in
  Array.init n (function
    | 0 -> init_col (function j -> j)
    | i -> init_col (function 0 -> i | _ -> 0)
  )

(* Computes the Levenshtein distance between two unistring.
 * If you want to run it faster, add the -unsafe option when
 * compiling or use Array.unsafe_* functions (but be carefull 
 * with these well-named unsafe features). *)
let distance_utf8 x y =
  match Array.length x, Array.length y with
    | 0, n -> n
    | m, 0 -> m
    | m, n ->
       let matrix = init_matrix (m + 1) (n + 1) in
         for i = 1 to m do
           let s = matrix.(i) and t = matrix.(i - 1) in
             for j = 1 to n do
               let cost = abs (compare x.(i - 1) y.(j - 1)) in
                 s.(j) <- minimum (t.(j) + 1) (s.(j - 1) + 1) (t.(j - 1) + cost)
             done
         done;
         matrix.(m).(n)

(* This function takes two strings, convert them to unistring (int array)
 * and then call distance_utf8, so we can compare utf8 strings. Please
 * note that you need Glib (see LablGTK). *)
let distance x y =
  distance_utf8 (Glib.Utf8.to_unistring x) (Glib.Utf8.to_unistring y)

Octave 和 MATLAB

[編輯 | 編輯原始碼]
function [dist,L]=levenshtein_distance(str1,str2)
    L1=length(str1)+1;
    L2=length(str2)+1;
    L=zeros(L1,L2);

    g=+1;%just constant
    m=+0;%match is cheaper, we seek to minimize
    d=+1;%not-a-match is more costly.
    
    %do BC's
    L(:,1)=([0:L1-1]*g)';
    L(1,:)=[0:L2-1]*g;
    
    m4=0;%loop invariant
    for idx=2:L1;
        for idy=2:L2
            if(str1(idx-1)==str2(idy-1))
                score=m;
            else
                score=d;
            end
            m1=L(idx-1,idy-1) + score;
            m2=L(idx-1,idy) + g;
            m3=L(idx,idy-1) + g;
            L(idx,idy)=min(m1,min(m2,m3));
        end
    end
    
    dist=L(L1,L2);
    return
end
%I think this function generates errors: 
%>> levenshtein('aaa','ab')
% ans =
%     1
%correct answer here is 2, I believe: 1 deletion, 1 replacement?
function q = levenshtein(s1,s2)
d = toeplitz(find(s1),find(s2))-1;
for j = 1:numel(s2)-1
    for i = 1:numel(s1)-1
           d(i+1,j+1) = min(min(d(i,j+1),d(i+1,j)) + 1,d(i,j) + ne(s1(i), s2(j)));
    end
end
q = d(end) + eq(i,j)*ne(s1(end),s2(end)); % check if last chars are different
use List::Util qw(min);

sub levenshtein {
    my ($str1, $str2) = @_;
    my @ar1 = split //, $str1;
    my @ar2 = split //, $str2;

    my @dist;
    $dist[$_][0] = $_ foreach (0 .. @ar1);
    $dist[0][$_] = $_ foreach (0 .. @ar2);

    foreach my $i (1 .. @ar1){
        foreach my $j (1 .. @ar2){
            my $cost = $ar1[$i - 1] eq $ar2[$j - 1] ? 0 : 1;
            $dist[$i][$j] = min(
                        $dist[$i - 1][$j] + 1, 
                        $dist[$i][$j - 1] + 1, 
                        $dist[$i - 1][$j - 1] + $cost );
        }
    }

    return $dist[@ar1][@ar2];
}
function levenshtein(string a, b)
    sequence costs = tagset(length(b)+1,0)
    for i=1 to length(a) do
        costs[1] = i
        integer newcost = i-1, pj = i, cj
        for j=1 to length(b) do
            cj = costs[j+1]
            pj = min({pj+1, cj+1, newcost+(a[i]!=b[j])})
            {costs[j+1],newcost} = {pj,cj}
        end for
    end for
    return costs[$-1]
end function

請注意,從 PHP 4.0.1 版開始,有一個標準庫呼叫 levenshtein()[1]。然而,它僅限於比較長度不超過 255 個字元的字串,因此限制了它的實用性。

function leven($s1, $s2){
    $l1 = strlen($s1);                    // Length of string $s1
    $l2 = strlen($s2);                    // Length of $s2
    $dis = range(0, $l2);                 // Array with (0,1,2,...,n)

    for ($x = 1; $x <= $l1; $x++) {        
        $dis_new[0] = $x;
        for ($y = 1; $y <= $l2; $y++) {
            $c = ($s1[$x - 1] == $s2[$y - 1]) ? 0 : 1;
            $dis_new[$y] = min($dis[$y] + 1, $dis_new[$y - 1] + 1, $dis[$y - 1] + $c);	 
        }
        $dis = $dis_new;              
    }	

    return $dis[$l2];
}

使用一維陣列複製 PHP 版本

CREATE OR REPLACE FUNCTION levenshtein(s text, t text) 
RETURNS integer AS $$
DECLARE i integer;
DECLARE j integer;
DECLARE m integer;
DECLARE n integer;
DECLARE d integer[];
DECLARE c integer;

BEGIN
	m := char_length(s);
	n := char_length(t);

	i := 0;
	j := 0;

	FOR i IN 0..m LOOP
		d[i*(n+1)] = i;
	END LOOP;

	FOR j IN 0..n LOOP
		d[j] = j;
	END LOOP;

	FOR i IN 1..m LOOP
		FOR j IN 1..n LOOP
			IF SUBSTRING(s,i,1) = SUBSTRING(t, j,1) THEN
				c := 0;
			ELSE
				c := 1;
			END IF;
			d[i*(n+1)+j] := LEAST(d[(i-1)*(n+1)+j]+1, d[i*(n+1)+j-1]+1, d[(i-1)*(n+1)+j-1]+c);
		END LOOP;
	END LOOP;
 
	return d[m*(n+1)+n];	
END;
$$ LANGUAGE plpgsql IMMUTABLE

第一個版本是一個動態規劃演算法,添加了只使用動態規劃矩陣最後兩行進行計算的最佳化。

def levenshtein(s1, s2):
    if len(s1) < len(s2):
        return levenshtein(s2, s1)

    # len(s1) >= len(s2)
    if len(s2) == 0:
        return len(s1)

    previous_row = range(len(s2) + 1)
    for i, c1 in enumerate(s1):
        current_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = previous_row[j + 1] + 1 # j+1 instead of j since previous_row and current_row are one character longer
            deletions = current_row[j] + 1       # than s2
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row
    
    return previous_row[-1]

第二個版本

def lev(a, b):
    if not a: return len(b)
    if not b: return len(a)
    return min(lev(a[1:], b[1:])+(a[0] != b[0]), lev(a[1:], b)+1, lev(a, b[1:])+1)

(注意,雖然非常緊湊,但此實現的執行時間非常差,在實際應用中不可用。一種使其明顯更快的方法是新增 @functools.cache 裝飾器,但由於許多字串複製,它並不能使其最優。)

第三個版本(有效)

def LD(s,t):
    s = ' ' + s
    t = ' ' + t
    d = {}
    S = len(s)
    T = len(t)
    for i in range(S):
        d[i, 0] = i
    for j in range (T):
        d[0, j] = j
    for j in range(1,T):
        for i in range(1,S):
            if s[i] == t[j]:
                d[i, j] = d[i-1, j-1]
            else:
                d[i, j] = min(d[i-1, j], d[i, j-1], d[i-1, j-1]) + 1
    return d[S-1, T-1]

(注意,雖然緊湊,但此實現的執行時間相對較差。)

第四個版本

def levenshtein(seq1, seq2):
    oneago = None
    thisrow = range(1, len(seq2) + 1) + [0]
    for x in xrange(len(seq1)):
        twoago, oneago, thisrow = oneago, thisrow, [0] * len(seq2) + [x + 1]
        for y in xrange(len(seq2)):
            delcost = oneago[y] + 1
            addcost = thisrow[y - 1] + 1
            subcost = oneago[y - 1] + (seq1[x] != seq2[y])
            thisrow[y] = min(delcost, addcost, subcost)
    return thisrow[len(seq2) - 1]

(注意,此實現的時間複雜度為 O(N*M),空間複雜度為 O(M),其中 N 和 M 分別是兩個序列的長度。)

第五個版本,使用 NumPy 對第一個版本進行向量化。在我的測試用例中,速度提高了約 40%。但是,這個 NumPy 向量化的 Python 版本在 levenshtein('aabcc', 'bccdd') 的情況下錯誤地返回 5(應該為 4,因為有兩個 'a' 插入 + 兩個 'd' 刪除)。原因是刪除步驟 np.minimum(current_row[1:], current_row[0:-1] + 1) 沒有處理 current_row[j] 的最小值可能是 current_row[j-2] + 2 的情況,因為在此示例中存在兩個連續的刪除。參見 討論

def levenshtein(source, target):
    if len(source) < len(target):
        return levenshtein(target, source)

    # So now we have len(source) >= len(target).
    if len(target) == 0:
        return len(source)

    # We call tuple() to force strings to be used as sequences
    # ('c', 'a', 't', 's') - numpy uses them as values by default.
    source = np.array(tuple(source))
    target = np.array(tuple(target))

    # We use a dynamic programming algorithm, but with the
    # added optimization that we only need the last two rows
    # of the matrix.
    previous_row = np.arange(target.size + 1)
    for s in source:
        # Insertion (target grows longer than source):
        current_row = previous_row + 1

        # Substitution or matching:
        # Target and source items are aligned, and either
        # are different (cost of 1), or are the same (cost of 0).
        current_row[1:] = np.minimum(
                current_row[1:],
                np.add(previous_row[:-1], target != s))

        # Deletion (target grows shorter than source):
        current_row[1:] = np.minimum(
                current_row[1:],
                current_row[0:-1] + 1)

        previous_row = current_row

    return previous_row[-1]

(注意,此實現僅在權重不依賴於編輯的字元時才有效。)


第六個版本,來自維基百科關於萊文斯坦距離的文章;使用兩行矩陣的迭代方法。

# Christopher P. Matthews
# christophermatthews1985@gmail.com
# Sacramento, CA, USA

def levenshtein(s, t):
        ''' From Wikipedia article; Iterative with two matrix rows. '''
        if s == t: return 0
        elif len(s) == 0: return len(t)
        elif len(t) == 0: return len(s)
        v0 = [None] * (len(t) + 1)
        v1 = [None] * (len(t) + 1)
        for i in range(len(v0)):
            v0[i] = i
        for i in range(len(s)):
            v1[0] = i + 1
            for j in range(len(t)):
                cost = 0 if s[i] == t[j] else 1
                v1[j + 1] = min(v1[j] + 1, v0[j + 1] + 1, v0[j] + cost)
            for j in range(len(v0)):
                v0[j] = v1[j]
                
        return v1[len(t)]
function (str1, str2) 
{
    if (typeof(str1) != "character" && class(str1) != "factor") 
        stop(sprintf("Illegal data type: %s", typeof(str1)))
    if (class(str1) == "factor") 
        str = as.character(str1)
    if (typeof(str2) != "character" && class(str2) != "factor") 
        stop(sprintf("Illegal data type: %s", typeof(str2)))
    if (class(str2) == "factor") 
        str = as.character(str2)
    if ((is.array(str1) || is.array(str2)) && dim(str1) != dim(str2)) 
        stop("non-conformable arrays")
    if (length(str1) == 0 || length(str2) == 0) 
        return(integer(0))
    l1 <- length(str1)
    l2 <- length(str2)
    out <- .C("levenshtein", as.character(str1), as.character(str2), 
        l1, l2, ans = integer(max(l1, l2)), PACKAGE = "RecordLinkage")
    if (any(is.na(str1), is.na(str2))) 
        out$ans[is.na(str1) | is.na(str2)] = NA
    if (is.array(str1)) 
        return(array(out$ans, dim(str1)))
    if (is.array(str2)) 
        return(array(out$ans, dim(str2)))
    return(out$ans)
}

"(注意) 這只是萊文斯坦距離的許多實現之一,但我無法讓其他實現工作。

/* rexx levenshtein calculates the edit distance   Karlocai 2009-01-18
    between 2 strings s and t

   This implementation of the Levenshtein algorithm 
   uses one row only (0..n), containing 
   - values of the previous line in columns [i-1]..n and 
   - values of the current  line in columns 1..[i-2]
   The current left value is kept in the variable lc
   i: column 1..n of s, 1st index
   j: row    1..m of t, 2nd index
*/

 Parse Arg s,t                -- gets 2 strings as parameter

 n = Length(s)                -- checks the parameters
 m = Length(t)
 If n = 0 Then
   Return m
 If m = 0 Then
   Return n

 Do i = 0 To n                -- initializes the 1st row
   r.i = i
 End

 Do j = 1 To m                -- for each row
   lc = j                     -- left column start value
   Do i = 1 To n              -- for each column
     nv = Min(r.i + 1,   ,
              lc  + 1,   ,
              r.[i-1] + (Substr(s,i,1) <> Substr(t,j,1)))  
     r.[i-1] = lc             -- sets previous left column
     lc = nv                  -- current left column
   End
   r.n = lc                   -- sets last current value
 End
 Return r.n

此 Ruby 版本很簡單,但速度非常慢,儘管它可以與任何實現了 '==' 的元素的陣列一起使用。

def levenshtein(first:, second:)
  case
    when first.empty? then second.length
    when second.empty? then first.length
    else [(first[0] == second[0] ? 0 : 1) + levenshtein(first: first[1..-1], second: second[1..-1]),
          1 + levenshtein(first: first[1..-1], second: second),
          1 + levenshtein(first: first, second: second[1..-1])].min
  end
end

此記憶化版本快得多。

def levenshtein(first:, second:)
  matrix = [(0..first.length).to_a]
  (1..second.length).each do |j|
    matrix << [j] + [0] * (first.length)
  end

  (1..second.length).each do |i|
    (1..first.length).each do |j|
      if first[j-1] == second[i-1]
        matrix[i][j] = matrix[i-1][j-1]
      else
        matrix[i][j] = [
          matrix[i-1][j],
          matrix[i][j-1],
          matrix[i-1][j-1],
        ].min + 1
      end
    end
  end
  return matrix.last.last
end

另一個記憶化版本。

def levenshtein(first:, second:)
  m, n = first.length, second.length
  return m if n == 0
  return n if m == 0

  # Create our distance matrix
  d = Array.new(m+1) {Array.new(n+1)}
  0.upto(m) { |i| d[i][0] = i }
  0.upto(n) { |j| d[0][j] = j }

  1.upto(n) do |j|
    1.upto(m) do |i|
      d[i][j] = first[i-1] == second[j-1] ? d[i-1][j-1] : [d[i-1][j]+1,d[i][j-1]+1,d[i-1][j-1]+1,].min
    end
  end
  d[m][n]
end

可以使用 C 繫結獲得字串的萊文斯坦距離的更快實現 此處

Rust(beta2)中的簡單實現,從 C 版本轉換而來。它比絕對最小值更通用,事實上它接受任何可以轉換為 &[u8] 的型別。

fn levenshtein<T1: AsRef<[u8]>, T2: AsRef<[u8]>>(s1: T1, s2: T2) -> usize {
    let v1 = s1.as_ref();
    let v2 = s2.as_ref();

    // Early exit if one of the strings is empty
    let v1len = v1.len();
    let v2len = v2.len();
    if v1len == 0 { return v2len; }
    if v2len == 0 { return v1len; }

    #[inline]
    fn min3<T: Ord>(v1: T, v2: T, v3: T) -> T{
        std::cmp::min(v1, std::cmp::min(v2, v3))
    }

    #[inline]
    fn delta(x: u8, y: u8) -> usize {
        if x == y { 0 } else { 1 }
    }

    let mut column: Vec<usize> = (0..v1len+1).collect();
    for x in 1..v2len+1 {
        column[0] = x;
        let mut lastdiag = x-1;

        for y in 1..v1len+1 {
            let olddiag = column[y];
            column[y] = min3(
                column[y] + 1, 
                column[y-1] + 1, 
                lastdiag + delta(v1[y-1], v2[x-1]),
            );
            lastdiag = olddiag;
        }
    }

    column[v1len]
}

您可以在 Rust Playpen 上對其進行即時測試。

命令式版本

[編輯 | 編輯原始碼]

函式式版本可能會更加簡潔。

def levenshtein(str1: String, str2: String): Int = {
    val lenStr1 = str1.length
    val lenStr2 = str2.length

    val d: Array[Array[Int]] = Array.ofDim(lenStr1 + 1, lenStr2 + 1)

    for (i <- 0 to lenStr1) d(i)(0) = i
    for (j <- 0 to lenStr2) d(0)(j) = j

    for (i <- 1 to lenStr1; j <- 1 to lenStr2) {
      val cost = if (str1(i - 1) == str2(j - 1)) 0 else 1

      d(i)(j) = min(
        d(i-1)(j  ) + 1,     // deletion
        d(i  )(j-1) + 1,     // insertion
        d(i-1)(j-1) + cost   // substitution
      )
    }

    d(lenStr1)(lenStr2)
  }

  def min(nums: Int*): Int = nums.min

函式式版本

[編輯 | 編輯原始碼]
import scala.collection.mutable
import scala.collection.parallel.ParSeq

def levenshtein(s1: String, s2: String): Int = {
  val memorizedCosts = mutable.Map[(Int, Int), Int]()

  def lev: ((Int, Int)) => Int = {
    case (k1, k2) =>
      memorizedCosts.getOrElseUpdate((k1, k2), (k1, k2) match {
        case (i, 0) => i
        case (0, j) => j
        case (i, j) =>
          ParSeq(1 + lev((i - 1, j)),
            1 + lev((i, j - 1)),
            lev((i - 1, j - 1))
              + (if (s1(i - 1) != s2(j - 1)) 1 else 0)).min
      })
  }

  lev((s1.length, s2.length))
}
;; using the `match' macro as defined in http://synthcode.com/scheme/match.scm

(define/memoized (edit-distance a b)
  (match `(,a ,b)
    (`(,a ())
     (length a))
    (`(() ,b)
     (length b))
    (`((,a0 . ,a*) (,b0 . ,b*))
     (min (+ (edit-distance a* b) 1)
          (+ (edit-distance a b*) 1)
          (+ (edit-distance a* b*)
             (if (equal? a0 b0) 0 2))))))

;; where the `define/memoized' special form can be defined in the following way:

(define (memoized proc)
  (let ((cache (make-hash-table)))
    (lambda args
      (match (hash-get-handle cache args)
        (`(,key . ,memoized-result)
         (apply values memoized-result))
        (_
         (call-with-values (lambda () (apply proc args))
           (lambda result
             (hash-set! cache args result)
             (apply values result))))))))

(define-syntax-rule (define/memoized (name . args) . body)
  (define name (memoized (lambda args . body))))

Smalltalk

[編輯 | 編輯原始碼]
levenshteinDistanceBetween: stringOne and: stringTwo 
	
	"Iterative calculation of the Levenshtein distance between two strings."
	
	| arrayTwo arrayOne|
	" degenerate cases"
	stringTwo = stringOne 
		ifTrue:[^0].
	(stringTwo size) = 0 
		ifTrue:[^(stringOne size)].
	(stringOne size) = 0 
		ifTrue:[^(stringTwo size)].
	
	"create two work vectors of integer distances"
	arrayOne := Array new:((stringTwo size) + 1).
	arrayTwo := Array new:((stringTwo size) + 1).
	
	"initialize v0 (the previous row of distances)
    	this row is A[0][i]: edit distance for an empty s
    	the distance is just the number of characters to delete from t"
	(1 to: (arrayOne size)) do:[ :i | arrayOne at: i put: i-1 ].
	
	(1 to: (stringOne size)) do: [ : i | 	
		" calculate v1 (current row distances) from the previous row v0
		first element of v1 is A[i+1][0] edit distance is delete (i+1) chars from s to match empty t"
  		arrayTwo at: 1 put: i.
		
		 " use formula to fill in the rest of the row"
		(1 to: (stringTwo size)) do: [ :j || cost minimum minimumAux |
			((stringOne at: i) = (stringTwo at: j)) 
				ifTrue:[cost:=0]
				ifFalse:[cost:=1].			
			minimumAux := ((arrayTwo at: j) + 1) min: ((arrayOne at: (j+1)) + 1).
			minimum :=  minimumAux min: ((arrayOne at: j) + cost).
			arrayTwo at: (j+1) put: minimum.].
		
		(1 to: (arrayOne size)) do: [ :j | arrayOne at: j put: (arrayTwo at: j) ].
		
		].

	^arrayTwo at: ((stringTwo size)+1).
/**
 * Levenshtein edit distance calculator
 *
 * Inspired by https://gist.github.com/bgreenlee/52d93a1d8fa1b8c1f38b
 * Improved with http://stackoverflow.com/questions/26990394/slow-swift-arrays-and-strings-performance
 */

class Levenshtein {

    private class func min(numbers: Int...) -> Int {
        return numbers.reduce(numbers[0], combine: {$0 < $1 ? $0 : $1})
    }

    class Array2D {
        var cols:Int, rows:Int
        var matrix: [Int]

        init(cols:Int, rows:Int) {
            self.cols = cols
            self.rows = rows
            matrix = Array(count:cols*rows, repeatedValue:0)
        }

        subscript(col:Int, row:Int) -> Int {
            get {
                return matrix[cols * row + col]
            }
            set {
                matrix[cols*row+col] = newValue
            }
        }

        func colCount() -> Int {
            return self.cols
        }

        func rowCount() -> Int {
            return self.rows
        }
    }

    class func distanceBetween(aStr: String, and bStr: String) -> Int {
        let a = Array(aStr.utf16)
        let b = Array(bStr.utf16)

        let dist = Array2D(cols: a.count + 1, rows: b.count + 1)

        for i in 1...a.count {
            dist[i, 0] = i
        }

        for j in 1...b.count {
            dist[0, j] = j
        }

        for i in 1...a.count {
            for j in 1...b.count {
                if a[i-1] == b[j-1] {
                    dist[i, j] = dist[i-1, j-1]  // noop
                } else {
                    dist[i, j] = min(
                        dist[i-1, j] + 1,  // deletion
                        dist[i, j-1] + 1,  // insertion
                        dist[i-1, j-1] + 1  // substitution
                    )
                }
            }
        }

        return dist[a.count, b.count]
    }
}
%find the levenshtein distance
function LD (s, t : string) : int
    % a couple variables
    var cell_above := 0
    var cell_left := 0
    var cell_diagonal := 0
    var cost := 0

    %lengths
    var n := length (s)
    var m := length (t)

    %if a string is zero
    if n = 0 then
        result m
    elsif m = 0 then
        result n
    else
    end if
    % make an array with both words
    var matrix : array 0 .. n, 0 .. m of int

    % initialize array to the length of each word
    for i : 0 .. n
        matrix (i, 0) := i
    end for

    for i : 0 .. m
        matrix (0, i) := i
    end for
    % calculate total changes
    for i : 1 .. n
        for j : 1 .. m

            if s (i) = t (j) then
cost := 0
            else
                cost := 1
            end if
            cell_above := matrix (i - 1, j) + 1
            cell_left := matrix (i, j - 1) + 1
            cell_diagonal := matrix (i - 1, j - 1) + cost
            var mini := min (cell_above, cell_left)
            matrix (i, j) := min (mini, cell_diagonal)
        end for
    end for
    result matrix (n, m)
end LD

put LD ("s", "t")

此版本與本文中的 JavaScript 和 PHP 實現相同。

Function levenshtein( a, b )
	Dim i,j,cost,d,min1,min2,min3
	
 ' Avoid calculations where there there are empty words
	If Len( a ) = 0 Then levenshtein = Len( b ): Exit Function
	If Len( b ) = 0 Then levenshtein = Len( a ): Exit Function
	
 ' Array initialization	
	ReDim d( Len( a ), Len( b ) )

	For i = 0 To Len( a ): d( i, 0 ) = i: Next
	For j = 0 To Len( b ): d( 0, j ) = j: Next
	
 ' Actual calculation
	For i = 1 To Len( a )
		For j = 1 To Len( b )
                        If Mid(a, i, 1) = Mid(b, j, 1) Then cost = 0 Else cost = 1 End If
			
			' Since min() function is not a part of VBScript, we'll "emulate" it below
			min1 = ( d( i - 1, j ) + 1 )
			min2 = ( d( i, j - 1 ) + 1 )
			min3 = ( d( i - 1, j - 1 ) + cost )
			
			If min1 <= min2 And min1 <= min3 Then
				d( i, j ) = min1
			ElseIf min2 <= min1 And min2 <= min3 Then
				d( i, j ) = min2
			Else
				d( i, j ) = min3
			End If
		Next
	Next

	levenshtein = d( Len( a ), Len( b ) )
End Function

Visual Basic for Applications(無 Damerau 擴充套件)

[編輯 | 編輯原始碼]

此版本與本文中的 JavaScript 和 PHP 實現相同。當我嘗試使用本文中的其他 VBA 實現時遇到問題,因此我不得不採用下面的版本。

Application.WorksheetFunction.Min() 方法是 Excel 特定的。如果您在其他啟用 VBA 的應用程式中實現它,請取消註釋條件塊並註釋 Application.WorksheetFunction.Min() 行。

Function levenshtein(a As String, b As String) As Integer

    Dim i As Integer
    Dim j As Integer
    Dim cost As Integer
    Dim d(,) As Integer
    Dim min1 As Integer
    Dim min2 As Integer
    Dim min3 As Integer
    
    If Len( a ) = 0 Then
        levenshtein = Len( b )
        Exit Function
    End If
    
    If Len( b ) = 0 Then
        levenshtein = Len( a )
        Exit Function
    End If
    
    ReDim d(Len(a), Len(b))
    
    For i = 0 To Len(a)
        d(i, 0) = i
    Next
    
    For j = 0 To Len(b)
        d(0, j) = j
    Next
    
    For i = 1 To Len(a)
        For j = 1 To Len(b)
            If Mid(a, i, 1) = Mid(b, j, 1) Then
                cost = 0
            Else
                cost = 1
            End If
            
            ' Since Min() function is not a part of VBA, we'll "emulate" it below
            min1 = (d(i - 1, j) + 1)
            min2 = (d(i, j - 1) + 1)
            min3 = (d(i - 1, j - 1) + cost)
            
'            If min1 <= min2 And min1 <= min3 Then
'                d(i, j) = min1
'            ElseIf min2 <= min1 And min2 <= min3 Then
'                d(i, j) = min2
'            Else
'                d(i, j) = min3
'            End If
            
            ' In Excel we can use Min() function that is included
            ' as a method of WorksheetFunction object
            d(i, j) = Application.WorksheetFunction.Min(min1, min2, min3)
        Next
    Next
    
    levenshtein = d(Len(a), Len(b))

End Function

此版本與本文中的 VB 實現相同。

type twoDimArrayType
 	secondArray() as integer
end type
dim FirstArray() as twoDimArrayType

Dim i As Integer
Dim j As Integer
Dim cost As Integer
Dim d() As Integer
Dim min1 As Integer
Dim min2 As Integer
Dim min3 As Integer
    
declare Function calculateLevenshteinDistance(byval firstString As String, byval secondString As String) As Integer
Function calculateLevenshteinDistance(byval firstString As String, byval secondString As String) As Integer
	print chr$(12)
'If one of the parameter is null, then the result will be the length of the other parameter...
  if Len(firstString) = 0 Then
      calculateLevenshteinDistance = Len(secondString)
      exit Function
  end if
  if Len(secondString) = 0 Then
      calculateLevenshteinDistance = Len(firstString)
       exit Function
  end if
  
'Initializing array...
	redim FirstArray(len(firstString)+1)
	for i=1 to ubound(FirstArray)
		redim FirstArray(i).secondArray(len(secondString)+1)
	next

'Deletion...
  For i = 1 To ubound(FirstArray)
      FirstArray(i).secondArray(1) = i-1
  Next
  
'Insertion ...
  For i = 1 To ubound(FirstArray(ubound(FirstArray)).secondArray)
      FirstArray(1).secondArray(i) = i-1
  Next
	
'Actual calculation...	
  for i=2 to ubound(FirstArray)
  	for j=2 to ubound(FirstArray(i).secondArray)	
    	If StringCompare(Mid$(firstString, i-1, 1), Mid$(secondString, j-1, 1))=0  Then
      	cost = 0
      Else
      	cost = 1
      End If
			min1 =  FirstArray(i-1).secondArray(j) + 1 
			min2 =  FirstArray(i).secondArray(j-1) + 1 
			min3 =  FirstArray(i-1).secondArray(j-1) + cost    
			If min1 <= min2 And min1 <= min3 Then
				FirstArray(i).secondArray(j) = min1
			ElseIf min2 <= min1 And min2 <= min3 Then
				FirstArray(i).secondArray(j) = min2
			Else
				FirstArray(i).secondArray(j) = min3
			End If			
  	Next
  Next

'Calculating Return Value...
  calculateLevenshteinDistance = FirstArray(ubound(FirstArray)).secondArray(ubound(FirstArray(ubound(FirstArray)).secondArray))
End Function

這是 Teslock 機器語言中的萊文斯坦距離計算。

.declare singlecall virtual LevenshteinDistance[args(2) string s1, string s2]: out unsigned int
 main
    (
    define unsigned int constant "cost_ins" == 1;
    define unsigned int constant "cost_del" == 1;
    define unsigned int constant "cost_sub" == 1;

    define unsigned int variable "n1" == calculate::string_operations>length("s1");
    define unsigned int variable "n2" == calculate::string_operations>length("s2");

    define unsigned int_array(calculate::string_operations>array_instantiation("p")>fixed_length("n2", ++1));
    define unsigned int_array(calculate::string_operations>array_instantiation("q")>fixed_length("n2", ++1));
    define unsigned int_array(calculate::string_operations>array_instantiation("r")>variable_length);

    p>array_vector>0 == 0;
    loop_for>finalized(define unsigned int variable "j" == 1)>break_condition("j" <= "n2")>forward_condition(++j)
    (
        p>array_vector>j == p>array_vector>j::access>+constants::cost_ins;
    )

    q>array_vector>0 == 0;
    loop_for>finalized(define unsigned int variable "i" == 1)>break_condition("i" <= "n1")>forward_condition(++i)
    (
        q>array_vector>0 == p>array_vector>0::access>+constants::cost_del;
        
        loop for>finalized(define unsigned int variable "j" == 1)>break_condition("j" <= "n2")>forward_condition(++j)
        (
            define unsigned int variable "d_del" == p>array_vector>j::access>+constants::cost_del;
            define unsigned int variable "d_ins" == q>array_vector>j[delegate_handle::j::access>-1]>+constants::cost_ins;
            define unsigned int veriable "d_sub" == p>array_vector>j[delegate_handle::j::access>-1]>+logical_operations>xor_result["s1"::access>"s1"[delegate_handle::j::access>-1]?0>return_handle_as_result constants::"cost_sub"];
            q>array_vector>j == dll_extern::math_interop_singlecall(min)::[args(3) (d_del, d_ins), d_sub;
        )
        local>"r" == "p"(self_typecast::ignore_unsafe_condition);
        local>"p" == "q"(self_typecast);
        local>"q" == "r"(self_typecast);    
    )
    
    logical_result(param p::singlecall)::define>"return" == p>array_vector>"n2";
)

請注意,存在一個標準的 ABAP 函式 distance,它可以計算萊文斯坦距離,無需下面的程式碼。

REPORT  zlevenshtein.
*----------------------------------------------------------------------*
*       CLASS lcl_levenshtein DEFINITION
*----------------------------------------------------------------------*
*
*----------------------------------------------------------------------*
CLASS lcl_levenshtein DEFINITION.
  PUBLIC SECTION.
    CLASS-METHODS:
      distance IMPORTING i_s TYPE csequence
                         i_t TYPE csequence
               RETURNING value(r) TYPE i.
ENDCLASS.                    "lcl_c DEFINITION
*----------------------------------------------------------------------*
*       CLASS lcl_levenshtein IMPLEMENTATION
*----------------------------------------------------------------------*
*
*----------------------------------------------------------------------*
CLASS lcl_levenshtein IMPLEMENTATION.
  METHOD distance.
    DEFINE m_get.
      l_m_index = ( ( l_l_t * ( l_m_i + ( &2 ) ) ) + l_m_j + ( &1 ) ) + 1 .
      read table l_d into r index l_m_index.
      add &3 to r.
      insert r into table l_v.
    END-OF-DEFINITION.
    DATA: l_d       TYPE STANDARD TABLE OF i,
          l_v       TYPE SORTED TABLE OF i WITH UNIQUE KEY table_line,
          l_cost    TYPE i,
          l_m_i     TYPE i,
          l_m_j     TYPE i,
          l_m_index TYPE i,
          l_l_s     TYPE i,
          l_l_t     TYPE i.

    l_l_s = STRLEN( i_s ).
    l_l_t = STRLEN( i_t ).

    DO l_l_s TIMES.
      l_m_i = sy-index - 1.

      DO l_l_t TIMES. "#EC CI_NESTED
        l_m_j = sy-index - 1.

          IF i_s+l_m_i(1) = i_t+l_m_j(1).
            l_cost = 0.
          ELSE.
            l_cost = 1.
          ENDIF.
        IF l_m_j = 0.
          r = l_cost.
        ELSEIF l_m_i = 0.
          r = l_cost.
        ELSE.

          CLEAR l_v.
          m_get: -1  0 1, 0 -1 1, -1 -1 l_cost.
          READ TABLE l_v INTO r INDEX 1.
        ENDIF.
        APPEND r TO l_d.

      ENDDO.
    ENDDO.
  ENDMETHOD.                    "distance
ENDCLASS.                    "lcl_levenshtein IMPLEMENTATION

START-OF-SELECTION.
  DATA: d TYPE i.

  d = lcl_levenshtein=>distance( i_s = 'sitting' i_t = 'kitten' ).
  WRITE: / d.

Pick Basic

[編輯 | 編輯原始碼]
       IF STRING1 = STRING2 THEN
          LD = 0
       END ELSE
          S.LEN = LEN(STRING1)
          C.LEN = LEN(STRING2)
          MAT LD.MTX = ''
          DIM LD.MTX(100,100)
          FOR I = 3 TO S.LEN + 2
             LD.MTX(I,1) = STRING1[I-2,1]
          NEXT I
          FOR I = 3 TO S.LEN + 2
             LD.MTX(I,2) = I - 2
          NEXT I
          FOR I = 3 TO C.LEN + 2
             LD.MTX(1,I) = STRING2[I-2,1]
          NEXT I
          LD.MTX(2,2) = 0
          FOR I = 3 TO C.LEN + 2
             LD.MTX(2,I) = I - 2
          NEXT I
          FOR I = 3 TO (S.LEN+2)
             S.LETTER = LD.MTX(I,1)
             FOR J = 3 TO (C.LEN+2)
                C.LETTER = LD.MTX(1,J)
                IF C.LETTER = S.LETTER THEN COST = 0 ELSE COST = 1
                P1 = LD.MTX(I-1,J) + 1
                P2 = LD.MTX(I,J-1) + 1
                P3 = LD.MTX(I-1,J-1) + COST
                IF P1 < P2 THEN LD.NUM = P1 ELSE LD.NUM = P2
                IF P3 < LD.NUM THEN LD.NUM = P3
                LD.MTX(I,J) = LD.NUM
             NEXT J
          NEXT I
          LD = LD.MTX(S.LEN+2,C.LEN+2)
       END


  1. https://php.net.tw/levenshtein

C 版本的實現

// Damerau-Levenshtein includes transposition, remove for normal Levenshtein 
pub fn levenshteinDistance(a: []const u8, b: []const u8) !u8 {
    const length: usize = a.len + 1;
    var column = try std.ArrayList(u8).initCapacity(std.heap.page_allocator, length);
    try column.appendNTimes(0, length);
    defer column.deinit();
    var y: u8 = 1;
    while (y <= a.len) : (y += 1) {
        column.items[y] = y;
        var x: u8 = 1;
        while (x <= b.len) : (x += 1) {
            column.items[0] = x;
            var last_diag = x - 1;
            var j: u8 = 1;
            while (j <= a.len) : (j += 1) {
                const old_diag = column.items[j];
                const cost: u8 = if (a[j - 1] == b[x - 1]) 0 else 1;
                const deletion: u8 = column.items[j] + 1;
                const insertion: u8 = column.items[j - 1] + 1;
                const substitution: u8 = last_diag + cost;
                const transposition: u8 = if (j > 1 and x > 1 and a[j - 1] == b[x - 2] and a[j - 2] == b[x - 1]) column.items[j - 2] + 1 else std.math.maxInt(u8);
                column.items[j] = std.mem.min(u8, &.{ deletion, insertion, substitution, transposition });
                last_diag = old_diag;
            }
        }
    }
    return column.items[length - 1];
}
華夏公益教科書