diff options
author | Dirk Engling <erdgeist@erdgeist.org> | 2019-03-20 04:30:29 +0100 |
---|---|---|
committer | Dirk Engling <erdgeist@erdgeist.org> | 2019-03-20 04:30:29 +0100 |
commit | b4bf8417af0d8ebff2c50570c70fdecaf6a53ed9 (patch) | |
tree | 4b9341a67c6fc9fd48cae5eecee79ff10ee0fe2f /src/postprocess/simi.py | |
parent | c4a8bd34b41b2be26426ea01aafc69d41260cce5 (diff) |
Add code to lookup new zip codes for 1995 entries and fix up streetnames
Diffstat (limited to 'src/postprocess/simi.py')
-rwxr-xr-x | src/postprocess/simi.py | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/src/postprocess/simi.py b/src/postprocess/simi.py new file mode 100755 index 0000000..62ff1ff --- /dev/null +++ b/src/postprocess/simi.py | |||
@@ -0,0 +1,11 @@ | |||
1 | #!python | ||
2 | |||
3 | import textdistance | ||
4 | from sys import stdin | ||
5 | |||
6 | for line in stdin.readlines(): | ||
7 | x,y = line.split('\t') | ||
8 | x = x.casefold() | ||
9 | y = y.casefold() | ||
10 | v = textdistance.ratcliff_obershelp.normalized_similarity(x,y) + textdistance.jaro_winkler.normalized_similarity(x,y) + textdistance.cosine.normalized_similarity(x,y) | ||
11 | print (int(100*(v/3))) | ||