"""
Contains functions implementing edit distance algorithms.
"""
def levenshtein(seq1, seq2, limit=None):
"""Returns the Levenshtein edit distance between two strings."""
oneago = None
thisrow = list(range(1, len(seq2) + 1)) + [0]
for x in range(len(seq1)):
# Python lists wrap around for negative indices, so put the
# leftmost column at the *end* of the list. This matches with
# the zero-indexed strings and saves extra calculation.
oneago, thisrow = thisrow, [0] * len(seq2) + [x + 1]
for y in range(len(seq2)):
delcost = oneago[y] + 1
addcost = thisrow[y - 1] + 1
subcost = oneago[y - 1] + (seq1[x] != seq2[y])
thisrow[y] = min(delcost, addcost, subcost)
if limit and x > limit and min(thisrow) > limit:
return limit + 1
return thisrow[len(seq2) - 1]
def damerau_levenshtein(seq1, seq2, limit=None):
"""Returns the Damerau-Levenshtein edit distance between two strings."""
oneago = None
thisrow = list(range(1, len(seq2) + 1)) + [0]
for x in range(len(seq1)):
# Python lists wrap around for negative indices, so put the
# leftmost column at the *end* of the list. This matches with
# the zero-indexed strings and saves extra calculation.
twoago, oneago, thisrow = oneago, thisrow, [0] * len(seq2) + [x + 1]
for y in range(len(seq2)):
delcost = oneago[y] + 1
addcost = thisrow[y - 1] + 1
subcost = oneago[y - 1] + (seq1[x] != seq2[y])
thisrow[y] = min(delcost, addcost, subcost)
# This block deals with transpositions
if (
x > 0
and y > 0
and seq1[x] == seq2[y - 1]
and seq1[x - 1] == seq2[y]
and seq1[x] != seq2[y]
):
thisrow[y] = min(thisrow[y], twoago[y - 2] + 1)
if limit and x > limit and min(thisrow) > limit:
return limit + 1
return thisrow[len(seq2) - 1]
[docs]def relative(a, b):
"""Returns the relative distance between two strings, in the range
[0-1] where 1 means total equality.
"""
d = distance(a, b)
longer = float(max((len(a), len(b))))
shorter = float(min((len(a), len(b))))
r = ((longer - d) / longer) * (shorter / longer)
return r
distance = damerau_levenshtein