38 lines
1.3 KiB
R
Executable File
38 lines
1.3 KiB
R
Executable File
library('glmnet')
|
|
library('pROC')
|
|
library('survival')
|
|
inv.logit <- function(x) {exp(x) / (1 + exp(x))}
|
|
read.table('titles.text', header=FALSE, quote='', sep="\n") -> D
|
|
matrix(as.vector(D[,1]), 3, nrow(D) / 3) -> M
|
|
M <- t(M)
|
|
X <- as.matrix(read.csv('embeddings.nsv', header=F)) # matrix of embeddings
|
|
|
|
# remove duplicates (if present)
|
|
X <- X[!duplicated(M[,1]),]
|
|
M <- M[!duplicated(M[,1]),]
|
|
write.table(as.vector(t(cbind(M,""))), "titles2.text", sep="\n", row.names=FALSE, col.names=FALSE, quote=F)
|
|
write.table(X, "embeddings2.nsv", sep=",", row.names=F, col.names=F, quote=F);
|
|
|
|
Y <- as.numeric(as.logical(M[,3]))
|
|
V <- cv.glmnet(x=X, y=Y, family="binomial", type.measure="auc")
|
|
|
|
Z <- glmnet(x=X, y=Y, lambda=V$lambda.min, family="binomial")
|
|
A <- predict(Z, newx=X, type="response")
|
|
B <- glm(Y~A, family="binomial")
|
|
R <- roc(Y, B$fitted.values)
|
|
beta <- Z$beta;
|
|
|
|
|
|
#P <- B$fitted.values
|
|
#O1 <- X %*% as.vector(Z$beta)
|
|
#O2 <- B$coefficients[1] + B$coefficients[2] * O1
|
|
#O3 <- inv.logit(exp(1) + O2)
|
|
#beta <- as.vector(Z$beta) * B$coefficients[2];
|
|
#alpha <- B$coefficients[1] + exp(1);
|
|
|
|
δ <- X %*% t(X)
|
|
diag(δ) <- NA
|
|
Δ <- apply(δ, 1, max, na.rm=T)
|
|
write.table(as.vector(beta), "beta", sep="\n", row.names=F, col.names=F)
|
|
write.table(Δ, "Delta", sep="\n", row.names=F, col.names=F)
|