59 lines
1.5 KiB
Python
59 lines
1.5 KiB
Python
|
import numpy as ν
|
|||
|
import csv
|
|||
|
import get_emb
|
|||
|
|
|||
|
MAX_RES = 10
|
|||
|
|
|||
|
φ = open('beta', 'r'); # coefficients
|
|||
|
Δ = [float(el) for el in (open('Delta', 'r').read().split('\n')[:-1])]
|
|||
|
#α = -0.8569279; # some magic constant
|
|||
|
α = 0;
|
|||
|
M = open('titles2.text', 'r').read().strip().split('\n') # TITLE\nAUTHORS\nACCEPTED?
|
|||
|
T = ν.array(M[0::4])
|
|||
|
A = ν.array(M[1::4])
|
|||
|
O = ν.array(M[2::4])
|
|||
|
X = ν.loadtxt(open('embeddings2.nsv', 'rb'), delimiter=',', skiprows=0)
|
|||
|
|
|||
|
NN = open('NN', 'r').read().split('\n')
|
|||
|
|
|||
|
β = φ.read().split('\n');
|
|||
|
β = β[:-1]
|
|||
|
β = [float(el) for el in β]
|
|||
|
|
|||
|
def get(θ):
|
|||
|
return(get_emb.get_embedding(θ))
|
|||
|
|
|||
|
def percent(χ):
|
|||
|
γ = α + ν.dot(χ, β)
|
|||
|
π = ν.exp(γ) / (1 + ν.exp(γ))
|
|||
|
|
|||
|
return(str(π)[2:4] + '%')
|
|||
|
|
|||
|
def closest(χ, n):
|
|||
|
n = abs(n)
|
|||
|
n = n % MAX_RES
|
|||
|
if n == 0: n = MAX_RES
|
|||
|
|
|||
|
ψ = ν.array(ν.dot(X, χ))
|
|||
|
topn = T[ν.argsort(ψ)[-n:]]
|
|||
|
aopn = A[ν.argsort(ψ)[-n:]]
|
|||
|
oopn = O[ν.argsort(ψ)[-n:]]
|
|||
|
|
|||
|
print(ν.argsort(ψ)[-n:])
|
|||
|
|
|||
|
out = ""#"tail prob = " + str(percentile_far(ν.max(ψ))) + "\n"
|
|||
|
for i in reversed(range(len(topn))):
|
|||
|
if oopn[i] == "TRUE":
|
|||
|
p = "presented"
|
|||
|
else:
|
|||
|
p = "online-only"
|
|||
|
out += topn[i] + " <i>(" + aopn[i] + ", " + p + ")</i>\n"
|
|||
|
|
|||
|
tailprob = int(percentile_far(ν.max(ψ))*100)
|
|||
|
|
|||
|
return [out, tailprob]
|
|||
|
|
|||
|
def percentile_far(q_dist):
|
|||
|
return sum(1*(ν.array(Δ)<=q_dist)) / len(Δ) # fraction of abstracts further from their nearest neighbor than χ
|
|||
|
|