This notebook provides the analysis regarding Hypothesis 1, Prediction 1b for the article: Heyne, M., Derrick, D., and Al-Tamimi, J. (under review). “Native language influence on brass instrument performance: An application of generalized additive mixed models (GAMMs) to midsagittal ultrasound images of the tongue”. Frontiers Research Topic: Models and Theories of Speech Production. Ed. Adamantios Gafos & Pascal van Lieshout.
# specify directory to save models and summaries
output_dir = "updated_models"
# specify whether to run models -> if set to false script will attempt to load saved models from output_dir
run_models = FALSE
Sys.setenv('MAPBOX_TOKEN' = 'sk.eyJ1IjoiZGRlcnJpY2siLCJhIjoiY2p2MW1ndnNoMXczYTRkbXd6dzRuMTQ2aCJ9.UTai4wWcFFVaJAuOrAX7VQ')
printPDF = TRUE
load_packages = c("parallel","mgcv", "itsadug", "rlist", "ggplot2", "plotly", "dplyr","tidyr")
for(pkg in load_packages){
eval(bquote(library(.(pkg))))
if (paste0("package:", pkg) %in% search()){
cat(paste0("Successfully loaded the ", pkg, " package.\n"))
}else{
install.packages(pkg)
eval(bquote(library(.(pkg))))
if (paste0("package:", pkg) %in% search()){
cat(paste0("Successfully loaded the ", pkg, " package.\n"))
}
}
}
## Successfully loaded the parallel package.
## Successfully loaded the mgcv package.
## Successfully loaded the itsadug package.
## Successfully loaded the rlist package.
## Successfully loaded the ggplot2 package.
## Successfully loaded the plotly package.
## Successfully loaded the dplyr package.
## Successfully loaded the tidyr package.
rm(load_packages, pkg)
# detect number of cores available for model calculations
ncores = detectCores()
cat(paste0("Number of cores available for model calculations set to ", ncores, "."))
## Number of cores available for model calculations set to 12.
# plot multiple GAM model outputs
plotly_model_outputs <- function(model, changing_cond, changing_var, constant_cond1, constant_var1, values, constant_cond2=NULL, constant_var2=NULL, print=TRUE){
if(length(constant_var1)>1 | length(constant_var2)>1){
print("Error: Constant variables can only have length 1.")
}else{
if(!is.null(constant_cond2) && !is.null(constant_var2) && length(changing_var)==2){
# works for models Notes.gam...
cond_p1 = capture.output(cat(paste0("list(", changing_cond, "='", changing_var[1], "', ", constant_cond1, "='", constant_var1, "', ", constant_cond2, "='", constant_var2, "')")))
p1=plot_smooth(x=get(model), view=values, cond = eval(parse(text=cond_p1)))
cond_p2 = capture.output(cat(paste0("list(", changing_cond, "='", changing_var[2], "', ", constant_cond1, "='", constant_var1, "', ", constant_cond2, "='", constant_var2, "')")))
p2=plot_smooth(x=get(model), view=values, cond = eval(parse(text=cond_p2)))
# set Rho max to the max of the fit + half the difference between max of the fit and the upper limit
max_ul = max(p1$fv$ul, p2$fv$ul)
max_fit = max(p1$fv$fit, p2$fv$fit)
maximum=max_fit+((max_ul-max_fit)/2)
# plot in polar coordinates
p=plot_ly(type='scatterpolar', mode='lines') %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=changing_var[1]) %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=changing_var[2]) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)),
title=paste0("GAM smooths @", constant_var1, " & ", constant_var2))
p
}else if(is.null(constant_cond2) && is.null(constant_var2) && length(changing_var)==2){
# no specific case yet
cond_p1 = capture.output(cat(paste0("list(", changing_cond, "='", changing_var[1], "', ", constant_cond1, "='", constant_var1, "')")))
p1=plot_smooth(x=get(model), view=values, cond = eval(parse(text=cond_p1)))
cond_p2 = capture.output(cat(paste0("list(", changing_cond, "='", changing_var[2], "', ", constant_cond1, "='", constant_var1, "')")))
p2=plot_smooth(x=get(model), view=values, cond = eval(parse(text=cond_p2)))
# set Rho max to the max of the fit + half the difference between max of the fit and the upper limit
max_ul = max(p1$fv$ul, p2$fv$ul)
max_fit = max(p1$fv$fit, p2$fv$fit)
maximum=max_fit+((max_ul-max_fit)/2)
# plot in polar coordinates
p=plot_ly(type='scatterpolar', mode='lines') %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=changing_var[1]) %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=changing_var[2]) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)),
title=paste0("GAM smooths @", constant_var1, " & ", constant_var2))
p
}else if(is.null(constant_cond2) && is.null(constant_var2) && length(changing_var)>2){
# works for models NZE.gam... or Tongan.gam...
for (i in 1:length(changing_var)){
if(changing_cond == constant_cond1){
cond_p1 = capture.output(cat(paste0("list(", constant_cond1, "='", constant_var1, "')")))
p1=plot_smooth(x=get(model), view=values, cond = eval(parse(text=cond_p1)))
# exception for KIT (='\\\\') when not using IPA symbols
if (changing_var[i]!="\\\\"){
cond_p2 = capture.output(cat(paste0("list(", changing_cond, "='", changing_var[i], "')")))
p2=plot_smooth(x=get(model), view=values, cond = eval(parse(text=cond_p2)))
}else{
p2=plot_smooth(x=get(model), view=values, cond = list(token.ord='\\\\'))
}
# set Rho max to the max of the fit + half the difference between max of the fit and the upper limit
max_ul = max(p1$fv$ul, p2$fv$ul)
max_fit = max(p1$fv$fit, p2$fv$fit)
maximum=max_fit+((max_ul-max_fit)/2)
# plot in polar coordinates
p=plot_ly(type='scatterpolar', mode='lines') %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=constant_var1) %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=changing_var[i]) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)),
title=paste0("GAM smooths ",constant_var1," vs ", changing_var[i]))
Sys.sleep(0)
print(p)
}
}
}
}
}
# plot smooths with shading to indicate significant differences (Matthias Heyne, 2019)
plotly_smooths_w_sig_diff <- function(model, condition, var1, var2, values, language, print=TRUE){
# get intervals of significant differences by running plot_diff
# unfortunately setting plot=FALSE doesn't work as intervals of significant difference are not displayed!
# hardcoded condition
# output = capture.output(plot_diff(get(model), view=values,
# comp=list(tokenPooled.ord=c(var1, var2))))
# output = capture.output(plot_diff(get(model), view=values, comp=list(langNoteInt.ord=c(paste0("Tongan.", note, ".", intensity), paste0("NZE.", note, ".", intensity)))))
names_smooths=list()
if (condition=="tokenPooled.ord" && length(language)==1){
output_comp = capture.output(cat(paste0("list(", condition, "=c(var1, var2))")))
names_smooths[1]=var1
names_smooths[2]=var2
plot_title = paste0("GAM smooths ", language, " ", var1, " vs ", var2)
}else if (condition=="langNoteInt.ord" && length(language)==2){
output_comp = capture.output(cat(paste0("list(", condition, "=c('", language[1], ".", var1, ".", var2,
"', '", language[2], ".", var1, ".", var2, "'))")))
names_smooths[1]=paste0(language[1], ".", var1, ".", var2)
names_smooths[2]=paste0(language[2], ".", var1, ".", var2)
plot_title = paste0("GAM smooths ", language[1], ".", var1, ".", var2, " vs ", language[2], ".", var1, ".", var2)
}else if (condition=="native_lg.ord"){
output_comp = capture.output(cat(paste0("list(", condition, "=c(var1, var2))")))
names_smooths[1]=var1
names_smooths[2]=var2
plot_title = paste0("GAM smooths ", language, " ", var1, " vs ", var2)
}
# output_comp = capture.output(cat(paste0("list(", condition, "=c(var1, var2))")))
output = capture.output(plot_diff(get(model), view=values, comp=eval(parse(text=output_comp))))
# no significant difference
if ((length(language)==1 && length(output)==7) | (length(language)==2 && length(output)==6)){
cat(paste0("Smooths for ", var1, " & ", var2, " are not significantly different.\n"))
dat1 = NA
assign(paste0("int_sig_diff_", var1, "_", var2), dat1, envir = .GlobalEnv)
rm(dat1)
# run plot_smooth to grab data for polar plots
p1 = plot_smooth(x=get(model), view=values, cond=list(tokenPooled.ord=var1, tokenPooled.ord=var2))
p2 = plot_smooth(x=get(model), view=values, cond=list(tokenPooled.ord=var2, tokenPooled.ord=var1))
# set Rho max to the max of the fit + half the difference between max of the fit and the upper limit
maximum=max(p1$fv$fit, p2$fv$fit)+((max(p1$fv$ul, p2$fv$ul)-max(p1$fv$fit, p2$fv$fit))/2)
# plot in polar coordinates
p=plot_ly(type='scatterpolar', mode='lines') %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=names_smooths[1]) %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=names_smooths[2]) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)),
title=plot_title, legend=list(orientation="h", xanchor="center", x=0.5))
Sys.sleep(0)
print(p)
# there are differences...
}else{
# grab intervals of significant differences from output
if (length(language)==1 && length(output)>=8){
sig_diff1 = c(as.double(unlist(strsplit(unlist(strsplit(output[8], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[8], " "))[3]))
}else if (length(language)==2 && length(output)>=7){
sig_diff1 = c(as.double(unlist(strsplit(unlist(strsplit(output[7], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[7], " "))[3]))
}
if (length(language)==1 && length(output)>=9){
sig_diff2 = c(as.double(unlist(strsplit(unlist(strsplit(output[9], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[9], " "))[3]))
}else if (length(language)==2 && length(output)>=8){
sig_diff2 = c(as.double(unlist(strsplit(unlist(strsplit(output[8], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[8], " "))[3]))
}
if (length(language)==1 && length(output)>=10){
sig_diff3 = c(as.double(unlist(strsplit(unlist(strsplit(output[10], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[10], " "))[3]))
}else if (length(language)==2 && length(output)>=9){
sig_diff3 = c(as.double(unlist(strsplit(unlist(strsplit(output[9], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[9], " "))[3]))
}
if (length(language)==1 && length(output)>=11){
sig_diff4 = c(as.double(unlist(strsplit(unlist(strsplit(output[11], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[11], " "))[3]))
}else if (length(language)==2 && length(output)>=10){
sig_diff4 = c(as.double(unlist(strsplit(unlist(strsplit(output[10], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[10], " "))[3]))
}
# write intervals of significant difference to variable
if ((length(language)==1 && length(output)>=11) | (length(language)==2 && length(output)>=10)){
dat1 = c(sig_diff1, sig_diff2, sig_diff3, sig_diff4)
}else if ((length(language)==1 && length(output)>=10) | (length(language)==2 && length(output)>=9)){
dat1 = c(sig_diff1, sig_diff2, sig_diff3)
}else if ((length(language)==1 && length(output)>=9) | (length(language)==2 && length(output)>=8)){
dat1 = c(sig_diff1, sig_diff2)
}else{
dat1 = sig_diff1
}
assign(paste0("int_sig_diff_", var1, "_", var2), dat1, envir = .GlobalEnv)
rm(dat1, output)
# run plot_smooth to grab data for polar plots
if (condition=="tokenPooled.ord" && length(language)==1){
cond_p1 = capture.output(cat(paste0("list(", condition, "=var1)")))
cond_p2 = capture.output(cat(paste0("list(", condition, "=var2)")))
}else if (condition=="langNoteInt.ord" && length(language)==2){
cond_p1 = capture.output(cat(paste0("list(", condition, "='", language[1], ".", var1, ".", var2,
"', ", condition, "='", language[2], ".", var1, ".", var2, "')")))
cond_p2 = capture.output(cat(paste0("list(", condition, "='", language[2], ".", var1, ".", var2,
"', ", condition, "='", language[1], ".", var1, ".", var2, "')")))
}else if (condition=="native_lg.ord"){
cond_p1 = capture.output(cat(paste0("list(", condition, "=var1)")))
cond_p2 = capture.output(cat(paste0("list(", condition, "=var2)")))
}
# hardcoded condition
p1 = plot_smooth(x=get(model), view=values, cond=eval(parse(text=cond_p1)))
p2 = plot_smooth(x=get(model), view=values, cond=eval(parse(text=cond_p2)))
# set Rho max to the max of the fit + half the difference between max of the fit and the upper limit
maximum=max(p1$fv$fit, p2$fv$fit)+((max(p1$fv$ul, p2$fv$ul)-max(p1$fv$fit, p2$fv$fit))/2)
# plot in polar coordinates
if (exists("sig_diff4")){
p=plot_ly(type='scatterpolar', mode='lines') %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=names_smooths[1]) %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=names_smooths[2]) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=seq(sig_diff1[1]*180/pi, sig_diff1[2]*180/pi, length.out=20),
r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
add_trace(theta=seq(sig_diff2[1]*180/pi, sig_diff2[2]*180/pi, length.out=20),
r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
add_trace(theta=seq(sig_diff3[1]*180/pi, sig_diff3[2]*180/pi, length.out=20),
r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
add_trace(theta=seq(sig_diff4[1]*180/pi, sig_diff4[2]*180/pi, length.out=20),
r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)),
title=plot_title, legend=list(orientation="h", xanchor="center", x=0.5))
Sys.sleep(0)
print(p)
}else if (exists("sig_diff3")){
p=plot_ly(type='scatterpolar', mode='lines') %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=names_smooths[1]) %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=names_smooths[2]) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=seq(sig_diff1[1]*180/pi, sig_diff1[2]*180/pi, length.out=20),
r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
add_trace(theta=seq(sig_diff2[1]*180/pi, sig_diff2[2]*180/pi, length.out=20),
r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
add_trace(theta=seq(sig_diff3[1]*180/pi, sig_diff3[2]*180/pi, length.out=20),
r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)),
title=plot_title, legend=list(orientation="h", xanchor="center", x=0.5))
Sys.sleep(0)
print(p)
}else if (exists("sig_diff2")){
p=plot_ly(type='scatterpolar', mode='lines') %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=names_smooths[1]) %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=names_smooths[2]) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=seq(sig_diff1[1]*180/pi, sig_diff1[2]*180/pi, length.out=20),
r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
add_trace(theta=seq(sig_diff2[1]*180/pi, sig_diff2[2]*180/pi, length.out=20),
r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)),
title=plot_title, legend=list(orientation="h", xanchor="center", x=0.5))
Sys.sleep(0)
print(p)
}else{
p=plot_ly(type='scatterpolar', mode='lines') %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=names_smooths[1]) %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=names_smooths[2]) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
add_trace(theta=seq(sig_diff1[1]*180/pi, sig_diff1[2]*180/pi, length.out=20),
r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)),
title=plot_title, legend=list(orientation="h", xanchor="center", x=0.5))
Sys.sleep(0)
print(p)
}
}
}
df <- read.csv("all_data_NZE_Tongan_w_context_checked_7_March_2019_not_cut_checked_27_Apr.csv", sep=',', stringsAsFactors = F)
# remove empty column
df$X = NULL
df$tokenPooled <- factor(df$tokenPooled)
df$subject <- factor(df$subject)
df$sex <- factor(df$sex)
df$native_lg <- factor(df$native_lg)
# updated
df$playing_proficiency[df$playing_proficiency == "intermediate"] <- "amateur"
df$playing_proficiency <- factor(df$playing_proficiency, levels = c("amateur","semi-professional","professional"))
df$block <- factor(df$block)
df$theta_uncut_z <- as.numeric(df$theta_uncut_z)
# updated
df$note_intensity <- factor(df$note_intensity, levels = c("piano","mezzopiano","mezzoforte","forte"))
df$tokenPooled <- factor(df$tokenPooled)
dfSummary <- group_by(df, subject, playing_proficiency, activity, native_lg, tokenPooled, theta_uncut_z_group = cut(theta_uncut_z,breaks=100)) %>% filter(activity=="speech") %>% summarise(rhoVar = sd(rho_uncut_z,na.rm=TRUE))
dfSummary$theta_uncut_z = as.character(dfSummary$theta_uncut_z_group)
dfSummary$theta_uncut_z = gsub("\\[|\\]|\\(|\\)", "",dfSummary$theta_uncut_z)
dfSummary$theta_uncut_z = strsplit(dfSummary$theta_uncut_z,",")
dfSummary$theta_uncut_z2 = 1
for(i in c(1:nrow(dfSummary)))
{
dfSummary$theta_uncut_z2[i] = mean(as.numeric(unlist(dfSummary$theta_uncut_z[i])))
}
dfSummary$theta_uncut_z=dfSummary$theta_uncut_z2
dfSummary$playing_proficiency.ord <- as.ordered(dfSummary$playing_proficiency)
contrasts(dfSummary$playing_proficiency.ord) <- "contr.treatment"
dfSummary$langNoteInt <- interaction(dfSummary$native_lg,
dfSummary$tokenPooled)
dfSummary$langNoteInt.ord <- as.ordered(dfSummary$langNoteInt)
contrasts(dfSummary$langNoteInt.ord) <- "contr.treatment"
dfSummary$native_lg.ord <- as.ordered(dfSummary$native_lg)
contrasts(dfSummary$native_lg.ord) <- "contr.treatment"
dfSummary$tokenPooled.ord <- as.ordered(dfSummary$tokenPooled)
contrasts(dfSummary$tokenPooled.ord) <- "contr.treatment"
dfSummary = na.omit(dfSummary)
for(i in unique(dfSummary$subject))
{
dfSubject = subset(dfSummary,dfSummary$subject == i)
for(j in unique(dfSubject$tokenPooled))
{
dfSummary$start[dfSummary$subject == i & dfSummary$tokenPooled == j] <-
dfSummary$theta_uncut_z[dfSummary$subject == i & dfSummary$tokenPooled == j] ==
min(dfSummary$theta_uncut_z[dfSummary$subject == i & dfSummary$tokenPooled == j])
}
}
if (run_models == TRUE){
mdl.sys.time1 <- system.time(VAR.gam.noAR.Mod1 <- bam(rhoVar ~ langNoteInt.ord + s(theta_uncut_z, bs="cr", k=10) + s(theta_uncut_z, bs="cr", k=10, by=langNoteInt.ord) +s(theta_uncut_z, subject, bs="fs", k=10, m=1, by=langNoteInt.ord), data=dfSummary, discrete=TRUE, nthreads=ncores, method = "fREML"))
mdl.sys.time1
# save model & model summary so they can be reloaded later
saveRDS(VAR.gam.noAR.Mod1, paste0(output_dir,"/VAR.gam.noAR.Mod1_vowels.rds"))
capture.output(summary(VAR.gam.noAR.Mod1),
file = paste0(output_dir,"/summary_VAR.gam.noAR.Mod1_vowels.txt"))
}else{
# reload model from output_dir
VAR.gam.noAR.Mod1 = readRDS(paste0(output_dir,"/VAR.gam.noAR.Mod1_vowels.rds"))
}
if (run_models == TRUE){
# set start_value_rho from VAR.gam.noAR.Mod1 as rho_est for the next model
rho_est <- start_value_rho(VAR.gam.noAR.Mod1)
mdl.sys.time2 <- system.time(VAR.gam.AR.Mod2 <- bam(rhoVar ~ langNoteInt.ord + s(theta_uncut_z, bs="cr", k=10) + s(theta_uncut_z, bs="cr", k=10, by=langNoteInt.ord) + s(theta_uncut_z, subject, bs="fs", k=10, m=1, by=langNoteInt.ord), AR.start=dfSummary$start, rho=rho_est, data=dfSummary, discrete=TRUE, nthreads=ncores, method = "fREML"))
mdl.sys.time2
# save model & model summary so they can be reloaded later
saveRDS(VAR.gam.AR.Mod2, paste0(output_dir,"/VAR.gam.AR.Mod2_vowels.rds"))
capture.output(summary(VAR.gam.AR.Mod2),
file = paste0(output_dir,"/summary_VAR.gam.AR.Mod2_vowels.txt"))
}else{
# reload model from output_dir
VAR.gam.AR.Mod2 = readRDS(paste0(output_dir,"/VAR.gam.AR.Mod2_vowels.rds"))
}
acf_resid(VAR.gam.AR.Mod2, main = "Average ACF AR", cex.lab=1.5, cex.axis=1.5)
acf_resid(VAR.gam.AR.Mod2,split_pred=list(dfSummary$tokenPooled),main = "Average ACF AR by tokenPooled",cex.lab=1.5,cex.axis=1.5)
acf_resid(VAR.gam.AR.Mod2,split_pred=list(dfSummary$native_lg),main = "Average ACF AR by native language",cex.lab=1.5,cex.axis=1.5)
summary(VAR.gam.AR.Mod2)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## rhoVar ~ langNoteInt.ord + s(theta_uncut_z, bs = "cr", k = 10) +
## s(theta_uncut_z, bs = "cr", k = 10, by = langNoteInt.ord) +
## s(theta_uncut_z, subject, bs = "fs", k = 10, m = 1, by = langNoteInt.ord)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.936753 0.122926 64.565 < 2e-16 ***
## langNoteInt.ordNZE.ɐ -0.875477 1.526103 -0.574 0.5662
## langNoteInt.ordNZE.ɐː -2.584383 1.701939 -1.518 0.1289
## langNoteInt.ordNZE.ɒ -2.326501 2.645874 -0.879 0.3793
## langNoteInt.ordNZE.e 0.561595 1.532209 0.367 0.7140
## langNoteInt.ordTongan.e -2.138085 0.911078 -2.347 0.0190 *
## langNoteInt.ordNZE.ə 0.001008 1.222929 0.001 0.9993
## langNoteInt.ordNZE.ə# -1.742856 0.385723 -4.518 6.29e-06 ***
## langNoteInt.ordNZE.ɛ -1.091940 1.691811 -0.645 0.5187
## langNoteInt.ordNZE.ɘ 0.122685 2.001099 0.061 0.9511
## langNoteInt.ordTongan.i -1.713343 1.283160 -1.335 0.1818
## langNoteInt.ordNZE.iː -27.797793 16.120495 -1.724 0.0847 .
## langNoteInt.ordTongan.o -0.624437 4.786433 -0.130 0.8962
## langNoteInt.ordNZE.oː -2.096078 1.592190 -1.316 0.1880
## langNoteInt.ordNZE.ɵː -1.449308 2.287160 -0.634 0.5263
## langNoteInt.ordTongan.u -5.943667 10.312811 -0.576 0.5644
## langNoteInt.ordNZE.ʉː -2.653813 3.419110 -0.776 0.4377
## langNoteInt.ordNZE.ʊ -1.505100 3.458559 -0.435 0.6634
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F
## s(theta_uncut_z) 8.848 8.928 55.747
## s(theta_uncut_z):langNoteInt.ordNZE.ɐ 5.420 5.854 1.674
## s(theta_uncut_z):langNoteInt.ordNZE.ɐː 6.106 6.560 3.215
## s(theta_uncut_z):langNoteInt.ordNZE.ɒ 4.615 4.918 1.450
## s(theta_uncut_z):langNoteInt.ordNZE.e 6.837 7.138 3.215
## s(theta_uncut_z):langNoteInt.ordTongan.e 5.619 6.062 1.272
## s(theta_uncut_z):langNoteInt.ordNZE.ə 8.063 8.425 9.566
## s(theta_uncut_z):langNoteInt.ordNZE.ə# 8.464 8.750 18.808
## s(theta_uncut_z):langNoteInt.ordNZE.ɛ 6.392 6.793 2.568
## s(theta_uncut_z):langNoteInt.ordNZE.ɘ 7.243 7.595 4.282
## s(theta_uncut_z):langNoteInt.ordTongan.i 7.591 7.971 2.842
## s(theta_uncut_z):langNoteInt.ordNZE.iː 5.937 6.386 2.780
## s(theta_uncut_z):langNoteInt.ordTongan.o 5.905 6.288 1.986
## s(theta_uncut_z):langNoteInt.ordNZE.oː 6.221 6.622 3.074
## s(theta_uncut_z):langNoteInt.ordNZE.ɵː 1.001 1.001 0.010
## s(theta_uncut_z):langNoteInt.ordTongan.u 6.314 6.770 1.986
## s(theta_uncut_z):langNoteInt.ordNZE.ʉː 1.000 1.001 0.318
## s(theta_uncut_z):langNoteInt.ordNZE.ʊ 1.000 1.000 0.073
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɐ 68.706 100.000 5.867
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɐː 67.797 100.000 6.526
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɒ 79.050 100.000 16.042
## s(theta_uncut_z,subject):langNoteInt.ordNZE.e 65.030 99.000 10.504
## s(theta_uncut_z,subject):langNoteInt.ordTongan.e 60.113 99.000 7.276
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ə 57.296 100.000 12.035
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ə# 45.015 99.000 2.521
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɛ 67.438 100.000 6.769
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɘ 70.237 100.000 11.450
## s(theta_uncut_z,subject):langNoteInt.ordTongan.i 61.023 99.000 11.857
## s(theta_uncut_z,subject):langNoteInt.ordNZE.iː 79.066 99.000 39.596
## s(theta_uncut_z,subject):langNoteInt.ordTongan.o 81.592 100.000 19.717
## s(theta_uncut_z,subject):langNoteInt.ordNZE.oː 67.050 100.000 11.473
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɵː 75.739 100.000 10.671
## s(theta_uncut_z,subject):langNoteInt.ordTongan.u 83.577 100.000 37.279
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ʉː 79.748 100.000 19.247
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ʊ 77.589 100.000 25.613
## p-value
## s(theta_uncut_z) < 2e-16 ***
## s(theta_uncut_z):langNoteInt.ordNZE.ɐ 0.09929 .
## s(theta_uncut_z):langNoteInt.ordNZE.ɐː 0.00279 **
## s(theta_uncut_z):langNoteInt.ordNZE.ɒ 0.26697
## s(theta_uncut_z):langNoteInt.ordNZE.e 0.00248 **
## s(theta_uncut_z):langNoteInt.ordTongan.e 0.24579
## s(theta_uncut_z):langNoteInt.ordNZE.ə 1.56e-12 ***
## s(theta_uncut_z):langNoteInt.ordNZE.ə# < 2e-16 ***
## s(theta_uncut_z):langNoteInt.ordNZE.ɛ 0.02705 *
## s(theta_uncut_z):langNoteInt.ordNZE.ɘ 8.28e-05 ***
## s(theta_uncut_z):langNoteInt.ordTongan.i 0.00369 **
## s(theta_uncut_z):langNoteInt.ordNZE.iː 0.00712 **
## s(theta_uncut_z):langNoteInt.ordTongan.o 0.07179 .
## s(theta_uncut_z):langNoteInt.ordNZE.oː 0.00379 **
## s(theta_uncut_z):langNoteInt.ordNZE.ɵː 0.91955
## s(theta_uncut_z):langNoteInt.ordTongan.u 0.05914 .
## s(theta_uncut_z):langNoteInt.ordNZE.ʉː 0.57295
## s(theta_uncut_z):langNoteInt.ordNZE.ʊ 0.78756
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɐ < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɐː < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɒ < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.e < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.e < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ə < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ə# < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɛ < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɘ < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.i < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.iː < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.o < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.oː < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɵː < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.u < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ʉː < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ʊ < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.863 Deviance explained = 87.7%
## fREML = 24523 Scale est. = 2.1482 n = 12704
And finally overall variance differences by language
dat1 = dfSummary
#dat1$predicted_values = predict(VAR.gam.AR.Mod2)
est = predict(VAR.gam.AR.Mod2,se=TRUE)
CI=1.98
printPDF=TRUE
dat1$predicted_values = est$fit
dat1$standard_error = est$se
# plot in polar coordinates using plotly
dat1_NZE = dat1[dat1$native_lg == "NZE",]
dat1_Tongan = dat1[dat1$native_lg == "Tongan",]
# estimate smooths using R's generic predict.smooth.spline function
smooth_NZE=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, dat1_NZE$predicted_values),
seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y, line=list(color="blue", dash="dash"))
smooth_NZE_top=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values + (dat1_NZE$standard_error * CI))),
seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y, line=list(color="blue", dash="dot"))
smooth_NZE_bottom=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values - (dat1_NZE$standard_error * CI))),
seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y, line=list(color="blue", dash="dot"))
smooth_Tongan=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, dat1_Tongan$predicted_values),
seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y, line=list(color="red", dash=""))
smooth_Tongan_top=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values + (dat1_Tongan$standard_error * CI))),
seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y, line=list(color="red", dash="dot"))
smooth_Tongan_bottom=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values - (dat1_Tongan$standard_error * CI))),
seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y, line=list(color="red", dash="dot"))
# 90%
CI=1.645
NZE_top_90=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values + (dat1_NZE$standard_error * CI))),
seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y)
NZE_bottom_90=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values - (dat1_NZE$standard_error * CI))),
seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y)
Tongan_top_90=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values + (dat1_Tongan$standard_error * CI))),
seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y)
Tongan_bottom_90=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values - (dat1_Tongan$standard_error * CI))),
seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y)
j = 1
k = 1
sigFlag = FALSE
hold = tribble(~thetaStart, ~thetaEnd)
for (i in c(1:99))
{
while(Tongan_top_90$theta[i] > NZE_top_90$theta[j])
{
j = j + 1
}
if ((Tongan_top_90$theta[i] <= NZE_top_90$theta[j]) &
(Tongan_top_90$theta[i+1] > NZE_top_90$theta[j]))
{
if (sigFlag == FALSE)
{
if(Tongan_top_90$r[i] < NZE_bottom_90$r[j] ||
NZE_top_90$r[j] < Tongan_bottom_90$r[i])
{
# Significant!
sigFlag = TRUE
hold[k,1] = Tongan_top_90$theta[i]
}
}
else
{
if(!(Tongan_top_90$r[i] < NZE_bottom_90$r[j] ||
NZE_top_90$r[j] < Tongan_bottom_90$r[i]))
{
hold[k,2] = Tongan_top_90$theta[i]
k = k + 1
sigFlag = FALSE
}
}
}
}
# set Rho max to the max of the predicted_values + 5
max_predictions = max(max(smooth_NZE_top$r), max(smooth_Tongan_top$r))
maximum=max_predictions+1
rm(max_predictions)
p = plot_ly(type='scatterpolar', mode='lines') %>%
add_trace(theta=smooth_NZE$theta, r=smooth_NZE$r, line=list(color=smooth_NZE$line$color[[1]], width=2.5, dash=smooth_NZE$line$dash[[1]]), name="NZE vowels") %>%
add_trace(theta=smooth_NZE_top$theta, r=smooth_NZE_top$r, line=list(color=smooth_NZE_top$line$color[[1]], width=0.5, dash=smooth_NZE_top$line$dash[[1]]), showlegend=FALSE) %>%
add_trace(theta=smooth_NZE_bottom$theta, r=smooth_NZE_bottom$r, line=list(color=smooth_NZE_bottom$line$color[[1]], width=0.5, dash=smooth_NZE_bottom$line$dash[[1]]), showlegend=FALSE) %>%
add_trace(theta=smooth_Tongan$theta, r=smooth_Tongan$r, line=list(color=smooth_Tongan$line$color[[1]], width=2.5, dash=smooth_Tongan$line$dash[[1]]), name="Tongan vowels") %>%
add_trace(theta=smooth_Tongan_top$theta, r=smooth_Tongan_top$r, line=list(color=smooth_Tongan_top$line$color[[1]], width=0.5, dash=smooth_Tongan_top$line$dash[[1]]), showlegend=FALSE) %>%
add_trace(theta=smooth_Tongan_bottom$theta, r=smooth_Tongan_bottom$r, line=list(color=smooth_Tongan_bottom$line$color[[1]], width=0.5, dash=smooth_Tongan_bottom$line$dash[[1]]), showlegend=FALSE) %>%
add_trace(theta=seq(as.numeric(hold[1,1]), as.numeric(hold[1,2]), length.out=20),
r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
add_trace(theta=seq(as.numeric(hold[2,1]), as.numeric(hold[2,2]), length.out=20),
r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
add_trace(theta=seq(as.numeric(hold[3,1]), as.numeric(hold[3,2]), length.out=20),
r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)), angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)), title="Overall variability of NZE vs Tongan vowel productions", legend=list(orientation="h", xanchor="center", x=0.5,y=0.012))
p
if(printPDF==TRUE)
{
orca(p, "Figure6.pdf",format="pdf")
}
#rm(dat1, dat1_NZE, dat1_Tongan, maximum, smooth_NZE, smooth_Tongan, est)
dfSummary <- group_by(df, subject, playing_proficiency, activity, native_lg, tokenPooled, theta_uncut_z_group = cut(theta_uncut_z,breaks=100)) %>% filter(activity=="music") %>% summarise(rhoVar = sd(rho_uncut_z,na.rm=TRUE))
dfSummary$theta_uncut_z = as.character(dfSummary$theta_uncut_z_group)
dfSummary$theta_uncut_z = gsub("\\[|\\]|\\(|\\)", "",dfSummary$theta_uncut_z)
dfSummary$theta_uncut_z = strsplit(dfSummary$theta_uncut_z,",")
dfSummary$theta_uncut_z2 = 1
for(i in c(1:nrow(dfSummary)))
{
dfSummary$theta_uncut_z2[i] = mean(as.numeric(unlist(dfSummary$theta_uncut_z[i])))
}
dfSummary$theta_uncut_z=dfSummary$theta_uncut_z2
dfSummary$playing_proficiency.ord <- as.ordered(dfSummary$playing_proficiency)
contrasts(dfSummary$playing_proficiency.ord) <- "contr.treatment"
dfSummary$langNoteInt <- interaction(dfSummary$native_lg,
dfSummary$tokenPooled)
dfSummary$langNoteInt.ord <- as.ordered(dfSummary$langNoteInt)
contrasts(dfSummary$langNoteInt.ord) <- "contr.treatment"
dfSummary$native_lg.ord <- as.ordered(dfSummary$native_lg)
contrasts(dfSummary$native_lg.ord) <- "contr.treatment"
dfSummary$tokenPooled.ord <- as.ordered(dfSummary$tokenPooled)
contrasts(dfSummary$tokenPooled.ord) <- "contr.treatment"
dfSummary = na.omit(dfSummary)
for(i in unique(dfSummary$subject))
{
dfSubject = subset(dfSummary,dfSummary$subject == i)
for(j in unique(dfSubject$tokenPooled))
{
dfSummary$start[dfSummary$subject == i & dfSummary$tokenPooled == j] <-
dfSummary$theta_uncut_z[dfSummary$subject == i & dfSummary$tokenPooled == j] ==
min(dfSummary$theta_uncut_z[dfSummary$subject == i & dfSummary$tokenPooled == j])
}
}
if (run_models == TRUE){
mdl.sys.time1 <- system.time(VAR.gam.noAR.Mod1 <- bam(rhoVar ~ langNoteInt.ord + s(theta_uncut_z, bs="cr", k=10) + s(theta_uncut_z, bs="cr", k=10, by=langNoteInt.ord) +s(theta_uncut_z, subject, bs="fs", k=10, m=1, by=langNoteInt.ord), data=dfSummary, discrete=TRUE, nthreads=ncores, method = "fREML"))
mdl.sys.time1
# save model & model summary so they can be reloaded later
saveRDS(VAR.gam.noAR.Mod1, paste0(output_dir,"/VAR.gam.noAR.Mod1_notes.rds"))
capture.output(summary(VAR.gam.noAR.Mod1),
file = paste0(output_dir,"/summary_VAR.gam.noAR.Mod1_notes.txt"))
}else{
# reload model from output_dir
VAR.gam.noAR.Mod1 = readRDS(paste0(output_dir,"/VAR.gam.noAR.Mod1_notes.rds"))
}
if (run_models == TRUE){
# set start_value_rho from VAR.gam.noAR.Mod1 as rho_est for the next model
rho_est <- start_value_rho(VAR.gam.noAR.Mod1)
mdl.sys.time2 <- system.time(VAR.gam.AR.Mod2 <- bam(rhoVar ~ langNoteInt.ord + s(theta_uncut_z, bs="cr", k=10) + s(theta_uncut_z, bs="cr", k=10, by=langNoteInt.ord) + s(theta_uncut_z, subject, bs="fs", k=10, m=1, by=langNoteInt.ord), AR.start=dfSummary$start, rho=rho_est, data=dfSummary, discrete=TRUE, nthreads=ncores, method = "fREML"))
mdl.sys.time2
# save model & model summary so they can be reloaded later
saveRDS(VAR.gam.AR.Mod2, paste0(output_dir,"/VAR.gam.AR.Mod2_notes.rds"))
capture.output(summary(VAR.gam.AR.Mod2),
file = paste0(output_dir,"/summary_VAR.gam.AR.Mod2_notes.txt"))
}else{
# reload model from output_dir
VAR.gam.AR.Mod2 = readRDS(paste0(output_dir,"/VAR.gam.AR.Mod2_notes.rds"))
}
acf_resid(VAR.gam.AR.Mod2, main = "Average ACF AR", cex.lab=1.5, cex.axis=1.5)
acf_resid(VAR.gam.AR.Mod2,split_pred=list(dfSummary$tokenPooled),main = "Average ACF AR by tokenPooled",cex.lab=1.5,cex.axis=1.5)
acf_resid(VAR.gam.AR.Mod2,split_pred=list(dfSummary$native_lg),main = "Average ACF AR by native language",cex.lab=1.5,cex.axis=1.5)
summary(VAR.gam.AR.Mod2)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## rhoVar ~ langNoteInt.ord + s(theta_uncut_z, bs = "cr", k = 10) +
## s(theta_uncut_z, bs = "cr", k = 10, by = langNoteInt.ord) +
## s(theta_uncut_z, subject, bs = "fs", k = 10, m = 1, by = langNoteInt.ord)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.8916 0.1406 56.142 < 2e-16 ***
## langNoteInt.ordTongan.Bb2 -0.7911 1.1702 -0.676 0.499030
## langNoteInt.ordNZE.Bb3 -2.0280 1.6032 -1.265 0.205920
## langNoteInt.ordTongan.Bb3 -1.8836 1.7785 -1.059 0.289599
## langNoteInt.ordNZE.D4 -3.3837 1.5121 -2.238 0.025280 *
## langNoteInt.ordTongan.D4 -4.1729 1.6716 -2.496 0.012578 *
## langNoteInt.ordNZE.F3 -2.2278 1.2172 -1.830 0.067265 .
## langNoteInt.ordTongan.F3 -1.8695 1.3557 -1.379 0.167956
## langNoteInt.ordNZE.F4 -4.5910 1.2732 -3.606 0.000314 ***
## langNoteInt.ordTongan.F4 -7.2303 2.2926 -3.154 0.001620 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F
## s(theta_uncut_z) 8.289 8.652 31.982
## s(theta_uncut_z):langNoteInt.ordTongan.Bb2 1.000 1.000 0.625
## s(theta_uncut_z):langNoteInt.ordNZE.Bb3 2.746 2.958 2.107
## s(theta_uncut_z):langNoteInt.ordTongan.Bb3 8.045 8.307 3.560
## s(theta_uncut_z):langNoteInt.ordNZE.D4 3.251 3.499 3.544
## s(theta_uncut_z):langNoteInt.ordTongan.D4 3.921 4.196 2.706
## s(theta_uncut_z):langNoteInt.ordNZE.F3 1.569 1.639 0.345
## s(theta_uncut_z):langNoteInt.ordTongan.F3 6.895 7.302 2.024
## s(theta_uncut_z):langNoteInt.ordNZE.F4 3.950 4.267 4.068
## s(theta_uncut_z):langNoteInt.ordTongan.F4 4.290 4.567 3.329
## s(theta_uncut_z,subject):langNoteInt.ordTongan.Bb2 68.005 99.000 12.819
## s(theta_uncut_z,subject):langNoteInt.ordNZE.Bb3 64.506 99.000 8.818
## s(theta_uncut_z,subject):langNoteInt.ordTongan.Bb3 67.080 99.000 15.000
## s(theta_uncut_z,subject):langNoteInt.ordNZE.D4 62.594 99.000 9.110
## s(theta_uncut_z,subject):langNoteInt.ordTongan.D4 66.440 99.000 16.881
## s(theta_uncut_z,subject):langNoteInt.ordNZE.F3 70.290 99.000 8.578
## s(theta_uncut_z,subject):langNoteInt.ordTongan.F3 65.267 99.000 13.840
## s(theta_uncut_z,subject):langNoteInt.ordNZE.F4 56.859 99.000 7.765
## s(theta_uncut_z,subject):langNoteInt.ordTongan.F4 65.349 99.000 17.684
## p-value
## s(theta_uncut_z) < 2e-16 ***
## s(theta_uncut_z):langNoteInt.ordTongan.Bb2 0.42912
## s(theta_uncut_z):langNoteInt.ordNZE.Bb3 0.10334
## s(theta_uncut_z):langNoteInt.ordTongan.Bb3 0.00017 ***
## s(theta_uncut_z):langNoteInt.ordNZE.D4 0.01028 *
## s(theta_uncut_z):langNoteInt.ordTongan.D4 0.02488 *
## s(theta_uncut_z):langNoteInt.ordNZE.F3 0.68530
## s(theta_uncut_z):langNoteInt.ordTongan.F3 0.02581 *
## s(theta_uncut_z):langNoteInt.ordNZE.F4 0.00226 **
## s(theta_uncut_z):langNoteInt.ordTongan.F4 0.00647 **
## s(theta_uncut_z,subject):langNoteInt.ordTongan.Bb2 < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.Bb3 < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.Bb3 < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.D4 < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.D4 < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.F3 < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.F3 < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.F4 < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.F4 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.817 Deviance explained = 83.5%
## fREML = 12394 Scale est. = 2.4004 n = 6446
And finally overall variance differences by language
dat1 = dfSummary
#dat1$predicted_values = predict(VAR.gam.AR.Mod2)
est = predict(VAR.gam.AR.Mod2,se=TRUE)
CI=1.98
printPDF=TRUE
dat1$predicted_values = est$fit
dat1$standard_error = est$se
# plot in polar coordinates using plotly
dat1_NZE = dat1[dat1$native_lg == "NZE",]
dat1_Tongan = dat1[dat1$native_lg == "Tongan",]
# estimate smooths using R's generic predict.smooth.spline function
smooth_NZE=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, dat1_NZE$predicted_values),
seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y, line=list(color="blue", dash="dash"))
smooth_NZE_top=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values + (dat1_NZE$standard_error * CI))),
seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y, line=list(color="blue", dash="dot"))
smooth_NZE_bottom=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values - (dat1_NZE$standard_error * CI))),
seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y, line=list(color="blue", dash="dot"))
smooth_Tongan=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, dat1_Tongan$predicted_values),
seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y, line=list(color="red", dash=""))
smooth_Tongan_top=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values + (dat1_Tongan$standard_error * CI))),
seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y, line=list(color="red", dash="dot"))
smooth_Tongan_bottom=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values - (dat1_Tongan$standard_error * CI))),
seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y, line=list(color="red", dash="dot"))
# 90%
CI=1.645
NZE_top_90=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values + (dat1_NZE$standard_error * CI))),
seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y)
NZE_bottom_90=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values - (dat1_NZE$standard_error * CI))),
seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y)
Tongan_top_90=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values + (dat1_Tongan$standard_error * CI))),
seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y)
Tongan_bottom_90=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values - (dat1_Tongan$standard_error * CI))),
seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y)
j = 1
k = 1
sigFlag = FALSE
hold = tribble(~thetaStart, ~thetaEnd)
for (i in c(1:99))
{
while(Tongan_top_90$theta[i] > NZE_top_90$theta[j])
{
j = j + 1
}
if ((Tongan_top_90$theta[i] <= NZE_top_90$theta[j]) &
(Tongan_top_90$theta[i+1] > NZE_top_90$theta[j]))
{
if (sigFlag == FALSE)
{
if(Tongan_top_90$r[i] < NZE_bottom_90$r[j] ||
NZE_top_90$r[j] < Tongan_bottom_90$r[i])
{
# Significant!
sigFlag = TRUE
hold[k,1] = Tongan_top_90$theta[i]
}
}
else
{
if(!(Tongan_top_90$r[i] < NZE_bottom_90$r[j] ||
NZE_top_90$r[j] < Tongan_bottom_90$r[i]))
{
hold[k,2] = Tongan_top_90$theta[i]
k = k + 1
sigFlag = FALSE
}
}
}
}
# set Rho max to the max of the predicted_values + 5
max_predictions = max(max(smooth_NZE_top$r), max(smooth_Tongan_top$r))
maximum=max_predictions+1
rm(max_predictions)
p = plot_ly(type='scatterpolar', mode='lines') %>%
add_trace(theta=smooth_NZE$theta, r=smooth_NZE$r, line=list(color=smooth_NZE$line$color[[1]], width=2.5, dash=smooth_NZE$line$dash[[1]]), name="NZE notes") %>%
add_trace(theta=smooth_NZE_top$theta, r=smooth_NZE_top$r, line=list(color=smooth_NZE_top$line$color[[1]], width=0.5, dash=smooth_NZE_top$line$dash[[1]]), showlegend=FALSE) %>%
add_trace(theta=smooth_NZE_bottom$theta, r=smooth_NZE_bottom$r, line=list(color=smooth_NZE_bottom$line$color[[1]], width=0.5, dash=smooth_NZE_bottom$line$dash[[1]]), showlegend=FALSE) %>%
add_trace(theta=smooth_Tongan$theta, r=smooth_Tongan$r, line=list(color=smooth_Tongan$line$color[[1]], width=2.5, dash=smooth_Tongan$line$dash[[1]]), name="Tongan notes") %>%
add_trace(theta=smooth_Tongan_top$theta, r=smooth_Tongan_top$r, line=list(color=smooth_Tongan_top$line$color[[1]], width=0.5, dash=smooth_Tongan_top$line$dash[[1]]), showlegend=FALSE) %>%
add_trace(theta=smooth_Tongan_bottom$theta, r=smooth_Tongan_bottom$r, line=list(color=smooth_Tongan_bottom$line$color[[1]], width=0.5, dash=smooth_Tongan_bottom$line$dash[[1]]), showlegend=FALSE) %>%
add_trace(theta=seq(as.numeric(hold[1,1]), as.numeric(hold[1,2]), length.out=20),
r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)), angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)), title="Overall variability of NZE vs Tongan note productions", legend=list(orientation="h", xanchor="center", x=0.5,y=0.012))
p
if(printPDF==TRUE)
{
orca(p, "Figure6a.pdf",format="pdf")
}
## [16307:0920/163049.735749:ERROR:gles2_cmd_decoder.cc(17766)] [.RenderCompositor-0x7fe872021e00]GL ERROR :GL_INVALID_OPERATION : glBindTexImage2DCHROMIUM: no image found with the given ID
#rm(dat1, dat1_NZE, dat1_Tongan, maximum, smooth_NZE, smooth_Tongan, est)