50 行純 python code 做中文手寫
Posted by tjwei on 星期六, 6月 25, 2011 with No comments
用 50 行 python 3.2 source code 實作中文手寫辨識的核心,外加訓練也是五十行以內,演算法同 Zinnia。完整 Source Code
辨識核心的 source
from itertools import chain from math import atan2, hypot def farthestVertex(path): first,last = path[0],path[-1] if first == last: return 0.0, 0 a, b = first[1]-last[1], last[0]-first[0] c = a*first[0]+b*first[1] maxf, bestj=max((abs(a*x+b*y-c),j) for j,(x,y) in enumerate(path)) return bestj, maxf**2/hypot(a,b) def split(path, i, Error=0.001, kMaxCharacters=50):# split path to segments if i > kMaxCharacters: return [] iter0=((i,(path[0],path[-1])),) j, dist = farthestVertex(path) return iter0 if dist <=Error else \ chain(iter0, split(path[:j+1], i*2+1), split(path[j:], i*2+2)) def toFeature(seg): a, b =((x-0.5, y-0.5) for (x,y) in seg) return ( 10*hypot(b[0]-a[0], b[1]-a[1]), # 10 * length atan2(b[1]-a[1], b[0]-a[0]), # 1 * direction 10*a[0], 10*a[1], 10*b[0], 10*b[1], # 10 * absolute position atan2(a[1], a[0]), atan2(b[1], b[0]), # 1 * absolute degree 10*hypot(a[0], a[1]), 10*hypot(b[0], b[1]), # 10 * absolute distance 5*(b[0]-a[0]), 5*(b[1]-a[1])) # 5 * diff def getFeatures(dim, strokes): fstrokes=([(x/dim[0],y/dim[1]) for (x,y) in stk] for stk in strokes) prev=None x={} for sid, path in enumerate(fstrokes): for i,n in split(path, 0): #segments feature x.update(enumerate(toFeature(n), sid*1000+20*i+1)) if prev: # movement feature x.update(enumerate(toFeature((prev, path[0])), sid*1000+100001)) prev=path[-1] x[2000000],x[2000001+sid]=sid+1, 10 return x def recognize(model, dim, strokes, nbest=10, best=[]): x=getFeatures(dim, strokes) for c, b, w in model: v=(b+sum(f*x[i] for i,f in w if i in x), c) if len(best)<nbest or best[-1]<v: best=sorted(best+[v], reverse=True)[:nbest] return bestSVM 訓練的 source
import random from collections import defaultdict def svm_train(ys, xs, C, kINF=10.0**37, kSMALL=10.0**-12, kEPS=0.1): PGmax_old, PGmin_old = -kINF, kINF w=defaultdict(float) alpha=[0.0]*len(xs) QD=[sum(v*v for i,v in x) for x in xs] index=index0=range(len(xs)) for itr in range(2000): if itr%4==0: print(".", end='') PGmax_new, PGmin_new=-kINF, kINF index_new=[] for i in random.sample(index, len(index)): PG=sum(w[j]*v for j,v in xs[i] if j in w)*ys[i]-1 if alpha[i]==0.0: if PG> PGmax_old: continue PG=min(PG, 0.0) elif alpha[i]==C: if PG<PGmin_old: continue PG=max(PG, 0.0) index_new.append(i) PGmax_new=max(PGmax_new, PG) PGmin_new=min(PGmin_new, PG) if abs(PG) > kSMALL: alpha_old=alpha[i] alpha[i]=sorted((0.0, alpha[i] - PG/QD[i], C))[1] d=(alpha[i] - alpha_old)* ys[i] w.update((j, w[j]+d*v) for j,v in xs[i]) index=index_new if PGmax_new-PGmin_new <=kEPS: if len(index)==len(xs): break index=index0 PGmax_old, PGmin_old=kINF, -kINF else: PGmax_old = PGmax_new if PGmax_old > 0 else kINF PGmin_old = PGmin_new if PGmin_old <0 else -kINF return w
Categories: python
0 意見:
張貼留言