...
 
Commits (2)
......@@ -53,16 +53,16 @@ def createFromNgramFolder(folder,out_folder=None,left=False):
neighbours_rel[word][neighbour_words] += int(cnt) / _sum
sums_file[ngram] += float(cnt)
_sums_file=sum(sums_file.values())
neighbours_file_rel = defaultdict(lambda: defaultdict(float))
for w in neighbours_file:
for nw in neighbours_file[w]:
neighbours_file_rel[w][nw] = neighbours_file[word][neighbour_words] / _sums_file
if out_folder:
print(f"save:{fn}")
saveNeighboursDict(neighbours_file, os.path.join(out_folder,"absolute",fn))
saveNeighboursDict(neighbours_file_rel, os.path.join(out_folder, "relative", fn))
_sums_file=sum(sums_file.values())
neighbours_file_rel = defaultdict(lambda: defaultdict(float))
for w in neighbours_file:
for nw in neighbours_file[w]:
neighbours_file_rel[w][nw] = neighbours_file[word][neighbour_words] / _sums_file
if out_folder:
print(f"save:{fn}")
saveNeighboursDict(neighbours_file, os.path.join(out_folder,"absolute",fn))
saveNeighboursDict(neighbours_file_rel, os.path.join(out_folder, "relative", fn))
return neighbours,neighbours_rel
......
......@@ -87,12 +87,34 @@ class Flrc(object):
f_cn = self.fqs_sum[cn]
term2 = 1
for l in cn.split(" "):
term3 = 1
#n2sum = 0
cns = cn.split(" ")
#for i in range(0,len(cns)-1):
# n2sum += self.fqs[cn[i:i+1]]
for n,l in enumerate(cns):
if n > 1:
l2sum = self.fqs[" ".join(cns[n-2:n])]
else:
l2sum = 0
if n < len(cns)-1:
#xx = " ".join(cns[n:n+2])
r2sum = self.fqs[" ".join(cns[n:n+2])]
else:
r2sum = 0
term2 = term2 * (self.fls_l[l] +1) *(self.fls_r[l] + 1)
x_l = self.fls_l[l]
x_r = self.fls_r[l]
term3 = term3 * (x_l+ 1 - l2sum) * (x_r + 1 - r2sum) #only bigrams not in cn
term2 = term2 ** (1/(2*len(cn.split(" "))))
term3 = term3 ** (1 /(2 * len(cn.split(" "))))
return f_cn * term2, term2 ,f_cn, f_cn / term2
return f_cn, term2 , term3
......@@ -116,8 +138,8 @@ if __name__ == '__main__':
#sg = pandas.DataFrame(vals)
# sg.to_csv("/tmp/out.csv")
with open(outfn,"w",encoding="utf-8") as outf:
outf.write("\t".join(["name","fcrn","term2","fr","f_icrn"]) + "\n")
for x in flrcn.fqs:
outf.write("\t".join(["name","fc","term2","term3"]) + "\n")
for x in list(flrcn.fqs):
v = flrcn.flr(x)
outf.write(x + "\t" + "\t".join([str(x) for x in v]) + "\n")
......