asco/fit.r
Jacob 2cb11e4933 Initial commit.
Yeah I know there are a lot of nonessential files but w/e.
2024-12-17 01:39:52 -05:00

38 lines
1.3 KiB
R
Executable File

library('glmnet')
library('pROC')
library('survival')
inv.logit <- function(x) {exp(x) / (1 + exp(x))}
read.table('titles.text', header=FALSE, quote='', sep="\n") -> D
matrix(as.vector(D[,1]), 3, nrow(D) / 3) -> M
M <- t(M)
X <- as.matrix(read.csv('embeddings.nsv', header=F)) # matrix of embeddings
# remove duplicates (if present)
X <- X[!duplicated(M[,1]),]
M <- M[!duplicated(M[,1]),]
write.table(as.vector(t(cbind(M,""))), "titles2.text", sep="\n", row.names=FALSE, col.names=FALSE, quote=F)
write.table(X, "embeddings2.nsv", sep=",", row.names=F, col.names=F, quote=F);
Y <- as.numeric(as.logical(M[,3]))
V <- cv.glmnet(x=X, y=Y, family="binomial", type.measure="auc")
Z <- glmnet(x=X, y=Y, lambda=V$lambda.min, family="binomial")
A <- predict(Z, newx=X, type="response")
B <- glm(Y~A, family="binomial")
R <- roc(Y, B$fitted.values)
beta <- Z$beta;
#P <- B$fitted.values
#O1 <- X %*% as.vector(Z$beta)
#O2 <- B$coefficients[1] + B$coefficients[2] * O1
#O3 <- inv.logit(exp(1) + O2)
#beta <- as.vector(Z$beta) * B$coefficients[2];
#alpha <- B$coefficients[1] + exp(1);
δ <- X %*% t(X)
diag(δ) <- NA
Δ <- apply(δ, 1, max, na.rm=T)
write.table(as.vector(beta), "beta", sep="\n", row.names=F, col.names=F)
write.table(Δ, "Delta", sep="\n", row.names=F, col.names=F)