#!/usr/bin/env python
# _*_ coding: utf-8 _*_
# Svensk SOUNDEX för att hitta ex.vis efternamn som
# låter lika men stavas olika
# Version: 0.4 (2007-09-10: 17:00)
import os,sys
verbose = 0 # 1 = verbose ON, 2 = full verbose ON, 0 = verbose OFF
vflag = 0
if len(sys.argv) < 2:
print "Exempel: " + sys.argv[0] + " 'ord1' 'ord2'"
sys.exit(0)
pro = dict(
A=('AU','AO','AA','AE'), # A (hård vokal), behandlas separat
E=('IJ','J','IO','OE','AE','AI','IE','YE','II','I','Y',u'ä',u'ö',u'é','EE'), # mjuka vokaler
O=('IU','EU','OU','OO','U',u'å',u'ü') # hårda vokaler
)
con = dict(
B=('DD','PP','FV','FF','F','W','V','D','P'),
C=('X','CK','CH','Q','GG','Z','SS','SC','SSJ','SJ','TJ','S','K'),
J=('LJ','GJ','DJ','NG','G'),
M=('MM','MN','NN','N'),
R=('RR','LL','L','TT','T'),
)
for w in sys.argv[1:]:
w = w.upper().decode('utf-8')
if verbose: print w + ' (:H)', # verbose
w = w.replace('H','')
for z in (pro, con):
for a in z:
for b in z[a]:
if verbose and (w.find(b)) >= 0: # verbose
print '(' + b + ':' + a + ')',
vflag = 1
w = w.replace(b,a)
if vflag and verbose == 2: # verbose
print '=' + w + ',',
vflag = 0
print w
|