library('glmnet') library('pROC') library('survival') inv.logit <- function(x) {exp(x) / (1 + exp(x))} read.table('titles.text', header=FALSE, quote='', sep="\n") -> D matrix(as.vector(D[,1]), 3, nrow(D) / 3) -> M M <- t(M) X <- as.matrix(read.csv('embeddings.nsv', header=F)) # matrix of embeddings # remove duplicates (if present) X <- X[!duplicated(M[,1]),] M <- M[!duplicated(M[,1]),] write.table(as.vector(t(cbind(M,""))), "titles2.text", sep="\n", row.names=FALSE, col.names=FALSE, quote=F) write.table(X, "embeddings2.nsv", sep=",", row.names=F, col.names=F, quote=F); Y <- as.numeric(as.logical(M[,3])) V <- cv.glmnet(x=X, y=Y, family="binomial", type.measure="auc") Z <- glmnet(x=X, y=Y, lambda=V$lambda.min, family="binomial") A <- predict(Z, newx=X, type="response") B <- glm(Y~A, family="binomial") R <- roc(Y, B$fitted.values) beta <- Z$beta; #P <- B$fitted.values #O1 <- X %*% as.vector(Z$beta) #O2 <- B$coefficients[1] + B$coefficients[2] * O1 #O3 <- inv.logit(exp(1) + O2) #beta <- as.vector(Z$beta) * B$coefficients[2]; #alpha <- B$coefficients[1] + exp(1); δ <- X %*% t(X) diag(δ) <- NA Δ <- apply(δ, 1, max, na.rm=T) write.table(as.vector(beta), "beta", sep="\n", row.names=F, col.names=F) write.table(Δ, "Delta", sep="\n", row.names=F, col.names=F)