Initial commit.
Yeah I know there are a lot of nonessential files but w/e.
This commit is contained in:
commit
2cb11e4933
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
venv
|
||||
.env
|
||||
nohup.out
|
||||
__pycache__
|
||||
.ropeproject
|
5
README.md
Normal file
5
README.md
Normal file
@ -0,0 +1,5 @@
|
||||
# ASCO Abstract Success Predictor
|
||||
|
||||
Simple web interface to predict the chance a given abstract will be accepted for
|
||||
oral presentation, based only on its title. Code is currently in shambles, I
|
||||
might bring it up to my current standards some day.
|
28
app.py
Normal file
28
app.py
Normal file
@ -0,0 +1,28 @@
|
||||
from flask import Flask, redirect, render_template, request, url_for
|
||||
import fit
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route("/", methods=("GET", "POST"))
|
||||
def index():
|
||||
if request.method == "POST":
|
||||
title = request.form["title"]
|
||||
embedding = fit.get(title);
|
||||
|
||||
#nclose = request.form["nclose"]
|
||||
|
||||
percent = fit.percent(embedding);
|
||||
closest = fit.closest(embedding, 10);
|
||||
|
||||
tprob = closest[1]
|
||||
|
||||
return redirect(url_for("index", tprob=closest[1], result=percent, title=title, closest=closest[0]))
|
||||
|
||||
result = request.args.get("result")
|
||||
title = request.args.get("title")
|
||||
closest = request.args.get("closest")
|
||||
#nclose = request.args.get("nclose")
|
||||
tprob = request.args.get("tprob")
|
||||
if title == None: title = ""
|
||||
#if nclose == None: nclose = 10
|
||||
return render_template("index.html", tprob=tprob, result=result, title=title, closest=closest)
|
5252
embeddings2.nsv
Normal file
5252
embeddings2.nsv
Normal file
File diff suppressed because one or more lines are too long
58
fit.py
Normal file
58
fit.py
Normal file
@ -0,0 +1,58 @@
|
||||
import numpy as ν
|
||||
import csv
|
||||
import get_emb
|
||||
|
||||
MAX_RES = 10
|
||||
|
||||
φ = open('beta', 'r'); # coefficients
|
||||
Δ = [float(el) for el in (open('Delta', 'r').read().split('\n')[:-1])]
|
||||
#α = -0.8569279; # some magic constant
|
||||
α = 0;
|
||||
M = open('titles2.text', 'r').read().strip().split('\n') # TITLE\nAUTHORS\nACCEPTED?
|
||||
T = ν.array(M[0::4])
|
||||
A = ν.array(M[1::4])
|
||||
O = ν.array(M[2::4])
|
||||
X = ν.loadtxt(open('embeddings2.nsv', 'rb'), delimiter=',', skiprows=0)
|
||||
|
||||
NN = open('NN', 'r').read().split('\n')
|
||||
|
||||
β = φ.read().split('\n');
|
||||
β = β[:-1]
|
||||
β = [float(el) for el in β]
|
||||
|
||||
def get(θ):
|
||||
return(get_emb.get_embedding(θ))
|
||||
|
||||
def percent(χ):
|
||||
γ = α + ν.dot(χ, β)
|
||||
π = ν.exp(γ) / (1 + ν.exp(γ))
|
||||
|
||||
return(str(π)[2:4] + '%')
|
||||
|
||||
def closest(χ, n):
|
||||
n = abs(n)
|
||||
n = n % MAX_RES
|
||||
if n == 0: n = MAX_RES
|
||||
|
||||
ψ = ν.array(ν.dot(X, χ))
|
||||
topn = T[ν.argsort(ψ)[-n:]]
|
||||
aopn = A[ν.argsort(ψ)[-n:]]
|
||||
oopn = O[ν.argsort(ψ)[-n:]]
|
||||
|
||||
print(ν.argsort(ψ)[-n:])
|
||||
|
||||
out = ""#"tail prob = " + str(percentile_far(ν.max(ψ))) + "\n"
|
||||
for i in reversed(range(len(topn))):
|
||||
if oopn[i] == "TRUE":
|
||||
p = "presented"
|
||||
else:
|
||||
p = "online-only"
|
||||
out += topn[i] + " <i>(" + aopn[i] + ", " + p + ")</i>\n"
|
||||
|
||||
tailprob = int(percentile_far(ν.max(ψ))*100)
|
||||
|
||||
return [out, tailprob]
|
||||
|
||||
def percentile_far(q_dist):
|
||||
return sum(1*(ν.array(Δ)<=q_dist)) / len(Δ) # fraction of abstracts further from their nearest neighbor than χ
|
||||
|
37
fit.r
Executable file
37
fit.r
Executable file
@ -0,0 +1,37 @@
|
||||
library('glmnet')
|
||||
library('pROC')
|
||||
library('survival')
|
||||
inv.logit <- function(x) {exp(x) / (1 + exp(x))}
|
||||
read.table('titles.text', header=FALSE, quote='', sep="\n") -> D
|
||||
matrix(as.vector(D[,1]), 3, nrow(D) / 3) -> M
|
||||
M <- t(M)
|
||||
X <- as.matrix(read.csv('embeddings.nsv', header=F)) # matrix of embeddings
|
||||
|
||||
# remove duplicates (if present)
|
||||
X <- X[!duplicated(M[,1]),]
|
||||
M <- M[!duplicated(M[,1]),]
|
||||
write.table(as.vector(t(cbind(M,""))), "titles2.text", sep="\n", row.names=FALSE, col.names=FALSE, quote=F)
|
||||
write.table(X, "embeddings2.nsv", sep=",", row.names=F, col.names=F, quote=F);
|
||||
|
||||
Y <- as.numeric(as.logical(M[,3]))
|
||||
V <- cv.glmnet(x=X, y=Y, family="binomial", type.measure="auc")
|
||||
|
||||
Z <- glmnet(x=X, y=Y, lambda=V$lambda.min, family="binomial")
|
||||
A <- predict(Z, newx=X, type="response")
|
||||
B <- glm(Y~A, family="binomial")
|
||||
R <- roc(Y, B$fitted.values)
|
||||
beta <- Z$beta;
|
||||
|
||||
|
||||
#P <- B$fitted.values
|
||||
#O1 <- X %*% as.vector(Z$beta)
|
||||
#O2 <- B$coefficients[1] + B$coefficients[2] * O1
|
||||
#O3 <- inv.logit(exp(1) + O2)
|
||||
#beta <- as.vector(Z$beta) * B$coefficients[2];
|
||||
#alpha <- B$coefficients[1] + exp(1);
|
||||
|
||||
δ <- X %*% t(X)
|
||||
diag(δ) <- NA
|
||||
Δ <- apply(δ, 1, max, na.rm=T)
|
||||
write.table(as.vector(beta), "beta", sep="\n", row.names=F, col.names=F)
|
||||
write.table(Δ, "Delta", sep="\n", row.names=F, col.names=F)
|
5
get_emb.py
Normal file
5
get_emb.py
Normal file
@ -0,0 +1,5 @@
|
||||
import openai
|
||||
|
||||
def get_embedding(text, model="text-embedding-ada-002"):
|
||||
client = openai.OpenAI();
|
||||
return client.embeddings.create(input = [text], model=model).data[0].embedding
|
2
gunicorn_config.py
Normal file
2
gunicorn_config.py
Normal file
@ -0,0 +1,2 @@
|
||||
bind = "0.0.0.0:80"
|
||||
workers = 2
|
3
run.sh
Executable file
3
run.sh
Executable file
@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
|
||||
gunicorn --bind=0.0.0.0:8077 --worker-tmp-dir /dev/shm app:app
|
27
static/about.html
Normal file
27
static/about.html
Normal file
@ -0,0 +1,27 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<link rel="stylesheet" href="main.css">
|
||||
<title>About</title>
|
||||
</head>
|
||||
<body>
|
||||
<center><h1>About</h1></center>
|
||||
|
||||
<h2>The Data</h2>
|
||||
<p>
|
||||
Information was scraped from <a href="//ascopubs.org/jco/meeting">this page</a> on 2023-06-28.
|
||||
Over 5000 abstract titles from 2023 were used as training data.
|
||||
Embeddings for these were generated through the OpenAI API, with the <code>text-embedding-ada-002</code> model.
|
||||
</p>
|
||||
|
||||
<h2>The Model</h2>
|
||||
<p>
|
||||
A penalized logistic regression model was fit using the <code>glmnet</code> R package.
|
||||
The tuning parameter was selected using cross validation.
|
||||
The area under the ROC curve was 0.83 in the training data.
|
||||
</p>
|
||||
|
||||
<center><a href="..">back</a></center>
|
||||
</body>
|
||||
</html>
|
BIN
static/gruv-4.jpg
Normal file
BIN
static/gruv-4.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 536 KiB |
129
static/main.css
Normal file
129
static/main.css
Normal file
@ -0,0 +1,129 @@
|
||||
/* global */
|
||||
:root {
|
||||
font-family: monospace;
|
||||
|
||||
--fgm: #bdae93;
|
||||
--bgm: #1d2021;
|
||||
--bgs: #282828;
|
||||
--bgr: #32302f;
|
||||
}
|
||||
|
||||
* {
|
||||
border-radius: 0px !important;
|
||||
}
|
||||
|
||||
/* background */
|
||||
html {
|
||||
background-color: var(--bgm);
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
font-size: 16px;
|
||||
}
|
||||
|
||||
/* page */
|
||||
body {
|
||||
background-color: var(--bgs);
|
||||
color: var(--fgm);
|
||||
margin: 0;
|
||||
padding-inline: 2em;
|
||||
padding-block: 1em;
|
||||
height: fit-content;
|
||||
min-height: 100vh;
|
||||
max-width: 50em;
|
||||
flex-grow: 1;
|
||||
font-size: 18px;
|
||||
}
|
||||
|
||||
/* headers */
|
||||
h1 {
|
||||
color: #fb4934;
|
||||
} h2 {
|
||||
color: #fabd2f;
|
||||
} h3 {
|
||||
color: #b8bb26;
|
||||
} h4 {
|
||||
color: #8ec07c
|
||||
} h5 {
|
||||
color: #83a598;
|
||||
} h1, h2,
|
||||
h3, h4,
|
||||
h5 {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
/* text styles */
|
||||
b, i, u {
|
||||
color: #d79921;
|
||||
}
|
||||
|
||||
/* links */
|
||||
a {
|
||||
color: #458588;
|
||||
} a:hover {
|
||||
color: var(--bgs);
|
||||
background-color: #458588;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
/* preformatted */
|
||||
pre,code {
|
||||
color: #8ec07c;
|
||||
background-color: var(--bgr);
|
||||
width: min-content;
|
||||
}
|
||||
|
||||
/* misc */
|
||||
.c {
|
||||
color: #458588;
|
||||
transition: filter 0.2s;
|
||||
cursor: help;
|
||||
} .c:hover {
|
||||
filter: blur(2px)
|
||||
}
|
||||
|
||||
form {
|
||||
margin: 1em;
|
||||
padding: 1em;
|
||||
}
|
||||
|
||||
input[type="text"],
|
||||
textarea,
|
||||
input[type="number"],
|
||||
input[type="submit"] {
|
||||
background-color: var(--bgr);
|
||||
border: none;
|
||||
outline: none;
|
||||
color: #8ec07c !important;
|
||||
font-size: 1em;
|
||||
font-family: monospace;
|
||||
width: min-content !important;
|
||||
}
|
||||
|
||||
textarea:active,
|
||||
textarea:focus,
|
||||
input[type="submit"]:active,
|
||||
input[type="submit"]:focus,
|
||||
input[type="text"]:active,
|
||||
input[type="number"]:active,
|
||||
input[type="text"]:focus,
|
||||
input[type="number"]:focus {
|
||||
background-color: #83c07c;
|
||||
color: var(--bgs) !important;
|
||||
border: none;
|
||||
outline: none;
|
||||
font-size: 1em;
|
||||
font-family: monospace;
|
||||
}
|
||||
|
||||
input {
|
||||
margin: 0.2em;
|
||||
border-radius: 0px !important;
|
||||
}
|
||||
|
||||
input[type="submit"],
|
||||
input[type="submit"]:active,
|
||||
input[type="submit"]:focus {
|
||||
border: 3px solid #83c07c
|
||||
}
|
BIN
static/stairs.jpg
Normal file
BIN
static/stairs.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.0 MiB |
67
templates/index.html
Normal file
67
templates/index.html
Normal file
@ -0,0 +1,67 @@
|
||||
<!DOCTYPE html>
|
||||
<head>
|
||||
<title>ASCO Abstract Title Evaluator</title>
|
||||
<link rel="stylesheet" href="static/main.css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<center><h1>ASCO Abstract Title Evaluator</h1></center>
|
||||
<center><form action="/" method="post">
|
||||
<textarea rows="3"
|
||||
cols="64"
|
||||
name="title"
|
||||
placeholder="Enter abstract title..."
|
||||
required>{{ title }}</textarea><br>
|
||||
<input type="submit" value="Evaluate"/>
|
||||
</form></center>
|
||||
|
||||
{% if tprob %}
|
||||
<center style="margin-inline:10em; text-align: justify;"><p>
|
||||
<h3>{{ tprob }}% Similarity</h3>
|
||||
The similarity of your abstract title to accepted
|
||||
ASCO 2023 titles is in this percentile.
|
||||
</p></center>
|
||||
<br>
|
||||
{% endif %}
|
||||
|
||||
{% if result %}
|
||||
<center style="margin-inline:10em; text-align: justify;"><p>
|
||||
<h3>{{ result }} Chance of on-site presentation</h3>
|
||||
Probability of on-site presentation
|
||||
versus online only publication of your
|
||||
abstract, based on the title, if
|
||||
accepted.
|
||||
{% if tprob|int < 5 %}
|
||||
<p><i>Warning: the similarity is too low to be confident in this prediction.</i></p><br>
|
||||
{% endif %}
|
||||
</p></center>
|
||||
<br>
|
||||
{% endif %}
|
||||
|
||||
{% if closest %}
|
||||
{% set closed = closest.split('\n')[:-1] %}
|
||||
<h3>Closest {{ closed|length }} titles from ASCO 2023:</h3>
|
||||
<ol>
|
||||
{% for close in closed %}
|
||||
<li>{{ close|safe }}</li>
|
||||
{% endfor %}
|
||||
</ol>
|
||||
{% endif %}
|
||||
|
||||
<center><a href="{{ url_for('static', filename='about.html') }}">about</a></center>
|
||||
|
||||
<script>
|
||||
function sub(event) {
|
||||
if (event.key == 'Enter' && !event.shiftKey) {
|
||||
if (!event.repeat) {
|
||||
const newEvent = new Event("submit", {cancelable: true});
|
||||
event.target.form.dispatchEvent(newEvent);
|
||||
}
|
||||
|
||||
event.preventDefault();
|
||||
}
|
||||
}
|
||||
|
||||
document.querySelector("textarea").addEventListener("keydown", sub);
|
||||
</script>
|
||||
</body>
|
21008
titles2.text
Normal file
21008
titles2.text
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user