-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ff4be7e
commit f277366
Showing
4 changed files
with
101 additions
and
53 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,5 +8,6 @@ articles.zip | |
comments.zip | ||
config.py | ||
|
||
# hlm data files | ||
# hlm files | ||
hlm/*.csv | ||
hlm/*.pdf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,66 +1,114 @@ | ||
#!/usr/loca/bin/R | ||
#!/usr/local/bin/R | ||
# hierarchical modeling for ugb | ||
# questions? contact [email protected] | ||
|
||
# load libraries | ||
library(data.table) | ||
library(ggplot2) | ||
library(lme4) | ||
library(reshape2) | ||
library(sjstats) | ||
|
||
# get passed arguments | ||
args <- commandArgs(trailingOnly = TRUE) | ||
args <- c('breitbart_modeling_data.csv') | ||
nargs <- sapply(args, function(x) {strsplit(x, '_')[[1]][1]}) | ||
|
||
# load data | ||
d <- fread(paste('hlm', args[1], sep = '/')) | ||
d <- list() | ||
for(i in 1:(length(args)-1)) { | ||
d[[i]] <- fread(paste('hlm', args[i], sep = '/')) | ||
d[[i]]$source <- nargs[i] | ||
} | ||
d <- do.call(rbind, d) | ||
|
||
gender_mod <- lmer(gender_bias ~ | ||
title_embeddings_gender + | ||
art_text_embeddings_gender + | ||
title_sent_neg + | ||
title_sent_pos + | ||
art_sent_neg + | ||
art_sent_pos + | ||
art_comments + | ||
comment_embeddings_gender + | ||
upvotes + | ||
comm_sent_neg + | ||
comm_sent_pos + | ||
(1 | art_id) + | ||
(1 | CLUSTER), | ||
d, REML = FALSE | ||
) | ||
# prep modeling loop | ||
metric_measure <- grep(args[length(args)], names(d)) | ||
outcomes <- grep('^z', names(d)) | ||
mod_results <- list() | ||
i <- 1 | ||
|
||
# model against synthetic outcomes | ||
for(outcome in outcomes) { | ||
mod <- d[, lmer(.SD[[outcome]] ~ | ||
.SD[[metric_measure[1]]] + | ||
.SD[[metric_measure[2]]] + | ||
title_sent_neg + | ||
title_sent_pos + | ||
art_sent_neg + | ||
art_sent_pos + | ||
art_comments + | ||
.SD[[metric_measure[3]]] + | ||
upvotes + | ||
comm_sent_neg + | ||
comm_sent_pos + | ||
(1 | source) + | ||
(1 | art_id) + | ||
(1 | CLUSTER), | ||
d, REML = FALSE)] | ||
mod_results[[i]] <- list(icc = icc(mod), | ||
predictors = names(fixef(mod)), | ||
estimate = coef(summary(mod))[, 1], | ||
sterr = coef(summary(mod))[, 2], | ||
tval = coef(summary(mod))[, 3]) | ||
print(paste('Finished model', i, 'of', length(outcomes))) | ||
i <- i + 1 | ||
} | ||
|
||
race_mod <- lmer(race_bias ~ | ||
title_embeddings_race + | ||
art_text_embeddings_race + | ||
title_sent_neg + | ||
title_sent_pos + | ||
art_sent_neg + | ||
art_sent_pos + | ||
art_comments + | ||
comment_embeddings_race + | ||
upvotes + | ||
comm_sent_neg + | ||
comm_sent_pos + | ||
(1 | art_id) + | ||
(1 | CLUSTER), | ||
d, REML = FALSE | ||
# create diagnostics | ||
# iccs | ||
iccs <- data.frame( | ||
n = 1:length(outcomes), | ||
Article = do.call(c, lapply(mod_results, function(x) {x$icc[1]})), | ||
Cluster = do.call(c, lapply(mod_results, function(x) {x$icc[2]})), | ||
Source = do.call(c, lapply(mod_results, function(x) {x$icc[3]})) | ||
) | ||
iccs_l <- melt(iccs, id.vars = 'n') | ||
names(iccs_l)[2:3] <- c('Level', 'ICC') | ||
|
||
power_mod <- lmer(power_bias ~ | ||
title_embeddings_power + | ||
art_text_embeddings_power + | ||
title_sent_neg + | ||
title_sent_pos + | ||
art_sent_neg + | ||
art_sent_pos + | ||
art_comments + | ||
comment_embeddings_power + | ||
upvotes + | ||
comm_sent_neg + | ||
comm_sent_pos + | ||
(1 | art_id) + | ||
(1 | CLUSTER), | ||
d, REML = FALSE | ||
pdf('hlm/icc_visual.pdf', height = 7.5, width = 10) | ||
print(ggplot(iccs_l, aes(x = n, y = ICC)) + | ||
geom_point(aes(color = Level)) + | ||
theme_bw() + | ||
labs(title = 'ICC Values by Level', x = 'Iteration') + | ||
theme(legend.position = 'bottom') | ||
) | ||
dev.off() | ||
|
||
# estimates | ||
varnames <- c( | ||
paste0('Title embeddings (', args[length(args)], ')'), | ||
paste0('Article embeddings (', args[length(args)], ')'), | ||
'Title sentiment (negative)', | ||
'Title sentiment (positive)', | ||
'Article sentiment (negative)', | ||
'Article sentiment (positive)', | ||
'Nbr article comments', | ||
'Nbr upvotes', | ||
'Comment sentiment (negative)', | ||
'Comment sentiment (positive)' | ||
) | ||
|
||
# note: predictors 2-8 and 10-12 are of interest; predictor 1 is the intercept | ||
# and predictor 10 is weakly correlated to the outcome and therefore | ||
# inherently biased | ||
ests <- do.call(rbind, mapply(function(x, i) { | ||
return(data.frame( | ||
n = i, | ||
Variable = varnames, | ||
Estimate = x$estimate[c(2:8, 10:12)], | ||
Signif = ifelse(abs(x$tval[c(2:8, 10:12)]) > 1.96, 1, 0) | ||
)) | ||
}, mod_results, as.list(1:50), SIMPLIFY = FALSE)) | ||
|
||
pdf('hlm/estimate_visual.pdf', height = 7.5, width = 10) | ||
print(ggplot(ests, aes(x = n, y = Estimate)) + | ||
geom_point(aes(color = as.factor(Signif))) + | ||
scale_color_manual(name = 'Significance', | ||
values = c('black', 'red'), | ||
labels = c('Insignificant', 'Significant')) + | ||
facet_wrap(vars(Variable), nrow = 5, ncol = 2, scales = 'free_y') + | ||
theme_bw() + | ||
labs(title = 'Point Estimates and Statistical Significance by Variable', | ||
x = 'Iteration') + | ||
theme(legend.position = 'bottom')) | ||
dev.off() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters