Load packages.
rm(list = ls())
library(dplyr)
library(gamm4)
library(ggplot2)
library(gridExtra)
library(lme4)
library(purrr)
library(readr)
library(stringr)
library(tidyr)
## Helper functions ##
rotn <- function(x,n){rep(x,2)[n:(n+length(x)-1)]}
set.seed(312)
Two-by-two Latin square design with subjects and items.
subj_list <- paste0("sub", seq(101,132))
item_list <- paste0("item", seq(1,24))
n_cond <- 4
latin_square_cond <- data.frame(sapply(seq(1:n_cond), rotn, x = 1:n_cond))
# latin square design matrix -> (+ items) -> versions -> (+ subjs) -> experiment
df_exp <- latin_square_cond %>%
dplyr::slice(rep(1:n(), length(item_list) / n_cond)) %>%
dplyr::mutate(item = item_list) %>%
tidyr::gather(key = "version", value = "condition", -item) %>%
dplyr::slice(rep(1:n(), length(subj_list) / n_cond)) %>%
dplyr::mutate(subj = rep(subj_list, each = length(item_list)))
# # ... within-subj design
# df_exp <- tidyr::crossing(subj_list, item_list)
df_label <- data.frame(list(condition = 1:4,
cond = c("A1-B1", "A1-B2", "A2-B1", "A2-B2"),
factor_A = c("A1","A1","A2","A2"),
factor_B = c("B1","B2","B1","B2")
))
Common plots.
df_g <- df_exp %>%
group_by(condition) %>%
dplyr::mutate(RT = rnorm(n(), mean = rnorm(1, mean = condition, sd = 1), sd = 1),
td = rnorm(n(), mean = mean(RT)),
acc = 1*(runif(n()) > 0.5)
) %>%
inner_join(df_label)
A simple way to get some intuition about the data.
ggplot(df_g, aes(x = cond, y = RT)) +
stat_summary(fun.data = "mean_cl_boot") +
stat_summary(fun = "mean", geom = "text",
aes(label = round(..y.., 2)),
hjust = 1.3) +
xlab("Condition") +
ylab("Reaction Time (ms)") +
theme_bw()
A simple way to check main effect and interaction.
ggplot(df_g, aes(x = factor_A, y = RT, group = factor_B, color = factor_B)) +
stat_summary(fun.data = "mean_cl_boot") +
stat_summary(fun = "mean", geom = "line") +
stat_summary(fun = "mean", geom = "text", aes(label = round(..y.., 2)),
hjust = -0.5) +
xlab("Factor A") +
ylab("Reaction Time (ms)") +
ggtitle("Line plot (basic)") +
scale_color_discrete(name = "Factor B") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
… a customized version of the previous plot.
ggplot(df_g, aes(x = factor_A, y = RT, group = factor_B,
color = factor_B, shape = factor_B, linetype = factor_B)) +
stat_summary(fun.data = "mean_cl_boot", size = 0.8) +
stat_summary(fun = "mean", geom = "line", size = 1) +
stat_summary(fun = "mean", geom = "text", aes(label = round(..y.., 2)),
hjust = -0.5, size = 4, fontface = "bold") +
xlab("Factor A") +
ylab("Reaction Time (ms)") +
ggtitle("Line plot (customized)") +
scale_color_manual(name = "Factor B", values = c("#FF7E83", "#75AADC")) +
scale_shape_manual(name = "Factor B", values = c(2, 19)) +
scale_linetype_manual(name = "Factor B", values = c("dashed", "solid")) +
theme_bw() +
theme(text = element_text(size = 14),
plot.title = element_text(hjust = 0.5),
legend.position = c(0.85, 0.2),
legend.background = element_rect(colour = "grey60",
fill = "white", linetype = "solid"))
In case you prefer bar plot.
ggplot(df_g, aes(x = factor_A, y = RT, group = cond, fill = factor_B)) +
stat_summary(fun = "mean", geom = "bar",
width = 0.6,
position = position_dodge(width = 0.6)) +
stat_summary(fun.data = "mean_cl_boot",
position = position_dodge(width = 0.6),
geom = "errorbar",
width = 0.2) +
xlab("Factor A") +
ylab("Reaction Time (ms)") +
ggtitle("Bar plot") +
scale_fill_manual(name = "Factor B",
values = c( "#FF7E83", "#75AADC")) +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
In case you prefer box plot.
ggplot(df_g, aes(x = factor_A, y = RT, group = cond,
color = factor_B, fill = factor_B)) +
geom_boxplot(alpha = 0.5) +
geom_point(position = position_dodge(width = 0.75), alpha = 0.2) +
xlab("Factor A") +
ylab("Reaction Time (ms)") +
ggtitle("Boxplot") +
scale_color_manual(name = "Factor B",
values = c("#FF7E83", "#75AADC")) +
scale_fill_manual(name = "Factor B",
values = c("#FF7E83", "#75AADC")) +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
In case you prefer violin plot.
ggplot(df_g, aes(x = factor_A, y = RT, group = cond,
color = factor_B, fill = factor_B)) +
geom_violin(alpha = 0.5, draw_quantiles = c(0.25, 0.5, 0.75)) +
geom_point(position = position_dodge(width = 0.9)) +
xlab("Factor A") +
ylab("Reaction Time (ms)") +
ggtitle("Violin plot") +
scale_color_manual(name = "Factor B",
values = c( "#FF7E83", "#75AADC")) +
scale_fill_manual(name = "Factor B",
values = c( "#FF7E83", "#75AADC")) +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
Visualize LMM effects and check significance.
# fit lmm
contrasts(df_g$factor_A) <- contr.sum(2)
contrasts(df_g$factor_B) <- contr.sum(2)
m <- summary(lme4::lmer(data = df_g,
RT ~ factor_A * factor_B + (1|subj) + (1|item),
REML = FALSE))
m$coefficients %>%
data.frame() %>%
tibble::rownames_to_column() %>%
ggplot(aes(x = rowname, y = Estimate)) +
geom_pointrange(aes(ymin = Estimate - 1.96*Std..Error,
ymax = Estimate + 1.96*Std..Error),
fatten = 0.5) +
geom_hline(yintercept = 0, linetype = "dashed") +
coord_flip() +
theme_bw()
In case you have a continuous predictor or you want to check correlation.
cc <- cor(df_g$td, df_g$RT)
ggplot(df_g, aes(x = td, y = RT)) +
geom_point(alpha = 0.5) +
geom_abline(size = 1) +
geom_text(x = 0.9*max(df_g$td), y = mean(df_g$RT),
label = paste0("r = ", sprintf("%0.2f", round(cc, digits = 2))),
fontface = "italic", color = "red") +
xlab("Whatever X") +
ylab("Reaction Time (ms)") +
ggtitle("Correlation/Regression") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
Just play with functional programming.
df_g %>%
split(.$item) %>%
purrr::map(~ lm(RT ~ factor_A * factor_B, data = .)) %>%
purrr::map(summary) %>%
purrr::map(coef) %>%
purrr::map(data.frame) %>%
purrr::map(~ tibble::rownames_to_column(., var = "X")) %>%
dplyr::bind_rows(.id = "item") %>%
dplyr::filter(X != "(Intercept)") %>%
dplyr::mutate(item_id = as.integer(stringr::str_extract(item, "\\d+"))) %>%
ggplot(aes(x = item_id, y = Estimate)) +
geom_pointrange(aes(ymin = Estimate - 1.96*Std..Error,
ymax = Estimate + 1.96*Std..Error),
fatten = 0.5) +
geom_hline(yintercept = 0, linetype = "dashed") +
facet_wrap(~ X)
In case you want to visualize data from an experiment using visual world paradigm.
## add time windows ##
n_bin = 10
n_region = 3
da <- df_exp %>%
dplyr::slice(rep(1:n(), n_bin)) %>%
dplyr::mutate(bin_index = rep(1:n_bin, each = nrow(df_exp)))
da <- da %>%
dplyr::mutate(`AVERAGE_IA_1_SAMPLE_COUNT_%` = runif(nrow(da), 0, 1),
`AVERAGE_IA_2_SAMPLE_COUNT_%` = runif(nrow(da), 0, 1-`AVERAGE_IA_1_SAMPLE_COUNT_%`),
`AVERAGE_IA_3_SAMPLE_COUNT_%` = 1 - `AVERAGE_IA_1_SAMPLE_COUNT_%` - `AVERAGE_IA_2_SAMPLE_COUNT_%`
)
da_plot <- da %>%
tidyr::gather(key = "IA", value = "pct",
`AVERAGE_IA_1_SAMPLE_COUNT_%`,
`AVERAGE_IA_2_SAMPLE_COUNT_%`,
`AVERAGE_IA_3_SAMPLE_COUNT_%`
) %>%
dplyr::mutate(IA_ID = str_extract(IA, "\\d")) %>%
dplyr::select(-IA)
## add label df ##
df_label_1 = data.frame(list(IA_ID = c("1", "2", "3"),
region = c("target",
"competitor",
"distractor")
))
da_plot <- da_plot %>%
inner_join(df_label_1) %>%
inner_join(df_label)
da_plot <- da_plot %>%
mutate(region = factor(region,
levels = c("target", "competitor", "distractor")))
len_bin = 20
ggplot(da_plot %>% dplyr::filter(cond == "A2-B2"),
aes(x = bin_index * len_bin,
y = pct,
fill = region, color = region,
linetype = region, shape = region
)) +
stat_summary(fun.data = "mean_cl_boot", geom = "ribbon",
alpha = 0.3, colour = NA) +
stat_summary(fun = "mean", geom = "line") +
stat_summary(fun = "mean", geom = "point") +
xlab("Time (ms)") +
ylab("Total Proportion of Fixations") +
ggtitle("A2-B2") +
scale_x_continuous(breaks = seq(0, n_bin*len_bin, len_bin)) +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
ggplot(da_plot,
aes(x = bin_index * len_bin,
y = pct,
fill = region, color = region,
linetype = region, shape = region
)) +
stat_summary(fun.data = "mean_cl_boot", geom = 'ribbon',
alpha = 0.3, colour = NA) +
stat_summary(fun = "mean", geom = "line") +
stat_summary(fun = "mean", geom = "point") +
xlab("Time (ms)") +
ylab("Total Proportion of Fixations") +
ggtitle("Title: visual world paradigm") +
scale_x_continuous(breaks = seq(0, n_bin*len_bin, len_bin)) +
facet_grid(factor_A ~ factor_B) +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
Plots that I ever plotted (once) :)
Generalized additive models.
gam_ggplot <- function(gam0, sel, isprob = 1){
gam.model <- plot.gam(gam0, select = 1)
gam.data = data.frame(x = sapply(gam.model[sel], "[[", "x"),
fit_raw = sapply(gam.model[sel], "[[", "fit"),
se = sapply(gam.model[sel], "[[", "se"))
intercept = gam0$coefficients[1]
gam.data <- gam.data %>%
mutate(fit_raw = fit_raw + intercept,
raw_lo = fit_raw - se,
raw_hi = fit_raw + se,
fit_prop = exp(fit_raw)/(1 + exp(fit_raw)),
prop_lo = exp(raw_lo)/(1 + exp(raw_lo)),
prop_hi = exp(raw_hi)/(1 + exp(raw_hi))
)
if (isprob == 1){
p <- ggplot(gam.data, aes(x = x, y = fit_prop,
color = "black", fill = "black")) +
geom_line(size = 1)+
geom_ribbon(aes(ymin = prop_lo, ymax = prop_hi), alpha = 0.3, color = NA)
} else {
p <- ggplot(gam.data, aes(x = x, y = fit_raw,
color = "black", fill = "black")) +
geom_line(size = 1)+
geom_ribbon(aes(ymin = raw_lo, ymax = raw_hi), alpha = 0.3, color = NA)
}
return(p)
}
plist <- df_g %>%
dplyr::filter(item %in% c("item1","item2","item3")) %>%
split(.$item) %>%
purrr::map(~ bam(acc ~ s(RT), data = ., family = "binomial")) %>%
purrr::map(gam_ggplot) %>%
purrr::map(~ . + xlab("XXX") + ylab("YYY") +
scale_color_manual(values = "#75AADC") +
scale_fill_manual(values = "#75AADC") +
theme_bw() + theme(legend.position = "none"))
plist <- purrr::map2(plist, names(plist), ~(.x + ggtitle(.y)))
do.call("grid.arrange", c(plist, ncol = 2))
Dual y-axis in case you want to check speed-accuracy trade-off.
pct_scale = 8
n_fun <- function(x){
return(data.frame(y = mean(x),
label = sprintf("%0.2f", round(mean(x), digits = 2))))
}
n_fun_pct <- function(x){
return(data.frame(y = pct_scale*mean(x),
label = sprintf("%0.2f", round(mean(x), digits = 2))))
}
ggplot() +
stat_summary(fun.data = "mean_cl_boot",
mapping = aes(x = df_g$cond, y = pct_scale * df_g$acc),
color = "blue") +
stat_summary(fun.data = n_fun_pct,
mapping = aes(x = df_g$cond, y = df_g$acc),
geom = "text",
color = "blue",
position = position_dodge(width = 0.5), hjust = 1.2) +
stat_summary(fun.data = "mean_cl_boot",
mapping = aes(x = df_g$cond, y = df_g$RT),
color = "red") +
stat_summary(fun.data = n_fun,
mapping = aes(x = df_g$cond, y = df_g$RT),
geom = "text",
color = "red",
position = position_dodge(width = 0.5), hjust = 1.2) +
scale_y_continuous(name = "Reaction Time (ms)",
sec.axis = sec_axis(~./pct_scale, name = "Accuracy")) +
xlab("Condition") +
scale_x_discrete(labels=c("A1-B1" = "a",
"A1-B2" = "b",
"A2-B1" = "c",
"A2-B2" = "d"
)) +
ggtitle("Speed vs. accuracy") +
theme_bw() +
theme(
axis.title.y = element_text(color = "red"),
axis.text.y = element_text(color = "red"),
axis.title.y.right = element_text(color = "blue"),
axis.text.y.right = element_text(color = "blue"),
plot.title = element_text(hjust = 0.5)
)
Mathematical expressions and Chinese characters.
ggplot(data=data.frame(x = 0, y = 0)) +
geom_point(aes(x = x, y = y)) +
xlab(expression(paste(Sigma, sigma, R^{2*c}, L[1~d]))) +
ylab("纵坐标") +
theme(axis.title.y = element_text(family="STFangsong"))