import numpy as ν import csv import get_emb MAX_RES = 10 φ = open('beta', 'r'); # coefficients Δ = [float(el) for el in (open('Delta', 'r').read().split('\n')[:-1])] #α = -0.8569279; # some magic constant α = 0; M = open('titles2.text', 'r').read().strip().split('\n') # TITLE\nAUTHORS\nACCEPTED? T = ν.array(M[0::4]) A = ν.array(M[1::4]) O = ν.array(M[2::4]) X = ν.loadtxt(open('embeddings2.nsv', 'rb'), delimiter=',', skiprows=0) NN = open('NN', 'r').read().split('\n') β = φ.read().split('\n'); β = β[:-1] β = [float(el) for el in β] def get(θ): return(get_emb.get_embedding(θ)) def percent(χ): γ = α + ν.dot(χ, β) π = ν.exp(γ) / (1 + ν.exp(γ)) return(str(π)[2:4] + '%') def closest(χ, n): n = abs(n) n = n % MAX_RES if n == 0: n = MAX_RES ψ = ν.array(ν.dot(X, χ)) topn = T[ν.argsort(ψ)[-n:]] aopn = A[ν.argsort(ψ)[-n:]] oopn = O[ν.argsort(ψ)[-n:]] print(ν.argsort(ψ)[-n:]) out = ""#"tail prob = " + str(percentile_far(ν.max(ψ))) + "\n" for i in reversed(range(len(topn))): if oopn[i] == "TRUE": p = "presented" else: p = "online-only" out += topn[i] + " (" + aopn[i] + ", " + p + ")\n" tailprob = int(percentile_far(ν.max(ψ))*100) return [out, tailprob] def percentile_far(q_dist): return sum(1*(ν.array(Δ)<=q_dist)) / len(Δ) # fraction of abstracts further from their nearest neighbor than χ