This notebook provides the analysis regarding Hypothesis 1, Prediction 1b for the article: Heyne, M., Derrick, D., and Al-Tamimi, J. (under review). “Native language influence on brass instrument performance: An application of generalized additive mixed models (GAMMs) to midsagittal ultrasound images of the tongue”. Frontiers Research Topic: Models and Theories of Speech Production. Ed. Adamantios Gafos & Pascal van Lieshout.

# specify directory to save models and summaries
output_dir = "updated_models"

# specify whether to run models -> if set to false script will attempt to load saved models from output_dir
run_models = FALSE
Sys.setenv('MAPBOX_TOKEN' = 'sk.eyJ1IjoiZGRlcnJpY2siLCJhIjoiY2p2MW1ndnNoMXczYTRkbXd6dzRuMTQ2aCJ9.UTai4wWcFFVaJAuOrAX7VQ')
printPDF = TRUE

Loading packages

load_packages = c("parallel","mgcv", "itsadug", "rlist", "ggplot2", "plotly", "dplyr","tidyr")
for(pkg in load_packages){
  eval(bquote(library(.(pkg))))
  if (paste0("package:", pkg) %in% search()){
    cat(paste0("Successfully loaded the ", pkg, " package.\n"))
  }else{
    install.packages(pkg)
    eval(bquote(library(.(pkg))))
    if (paste0("package:", pkg) %in% search()){
      cat(paste0("Successfully loaded the ", pkg, " package.\n"))
    }
  }
}
## Successfully loaded the parallel package.
## Successfully loaded the mgcv package.
## Successfully loaded the itsadug package.
## Successfully loaded the rlist package.
## Successfully loaded the ggplot2 package.
## Successfully loaded the plotly package.
## Successfully loaded the dplyr package.
## Successfully loaded the tidyr package.
rm(load_packages, pkg)

# detect number of cores available for model calculations
ncores = detectCores()
cat(paste0("Number of cores available for model calculations set to ", ncores, "."))
## Number of cores available for model calculations set to 12.

plotly_model_outputs function (Matthias Heyne, 2019)

# plot multiple GAM model outputs
plotly_model_outputs <- function(model, changing_cond, changing_var, constant_cond1, constant_var1, values, constant_cond2=NULL, constant_var2=NULL, print=TRUE){
  if(length(constant_var1)>1 | length(constant_var2)>1){
    print("Error: Constant variables can only have length 1.")
  }else{
    if(!is.null(constant_cond2) && !is.null(constant_var2) && length(changing_var)==2){
      # works for models Notes.gam...
      cond_p1 = capture.output(cat(paste0("list(", changing_cond, "='", changing_var[1], "', ", constant_cond1, "='", constant_var1, "', ", constant_cond2, "='", constant_var2, "')")))
      p1=plot_smooth(x=get(model), view=values, cond = eval(parse(text=cond_p1)))
      cond_p2 = capture.output(cat(paste0("list(", changing_cond, "='", changing_var[2], "', ", constant_cond1, "='", constant_var1, "', ", constant_cond2, "='", constant_var2, "')")))
      p2=plot_smooth(x=get(model), view=values, cond = eval(parse(text=cond_p2)))
      
      # set Rho max to the max of the fit + half the difference between max of the fit and the upper limit
      max_ul = max(p1$fv$ul, p2$fv$ul)
      max_fit = max(p1$fv$fit, p2$fv$fit)
      maximum=max_fit+((max_ul-max_fit)/2)
      
      # plot in polar coordinates
      p=plot_ly(type='scatterpolar', mode='lines') %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=changing_var[1]) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=changing_var[2]) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
        layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
                          angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)), 
               title=paste0("GAM smooths @", constant_var1, " & ", constant_var2))
      p
    }else if(is.null(constant_cond2) && is.null(constant_var2) && length(changing_var)==2){
      # no specific case yet
      cond_p1 = capture.output(cat(paste0("list(", changing_cond, "='", changing_var[1], "', ", constant_cond1, "='", constant_var1, "')")))
      p1=plot_smooth(x=get(model), view=values, cond = eval(parse(text=cond_p1)))
      cond_p2 = capture.output(cat(paste0("list(", changing_cond, "='", changing_var[2], "', ", constant_cond1, "='", constant_var1, "')")))
      p2=plot_smooth(x=get(model), view=values, cond = eval(parse(text=cond_p2)))
      
      # set Rho max to the max of the fit + half the difference between max of the fit and the upper limit
      max_ul = max(p1$fv$ul, p2$fv$ul)
      max_fit = max(p1$fv$fit, p2$fv$fit)
      maximum=max_fit+((max_ul-max_fit)/2)
      
      # plot in polar coordinates
      p=plot_ly(type='scatterpolar', mode='lines') %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=changing_var[1]) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=changing_var[2]) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
        layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
                          angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)), 
               title=paste0("GAM smooths @", constant_var1, " & ", constant_var2))
      p
    }else if(is.null(constant_cond2) && is.null(constant_var2) && length(changing_var)>2){
      # works for models NZE.gam... or Tongan.gam...
      for (i in 1:length(changing_var)){
        if(changing_cond == constant_cond1){
          cond_p1 = capture.output(cat(paste0("list(", constant_cond1, "='", constant_var1, "')")))
          p1=plot_smooth(x=get(model), view=values, cond = eval(parse(text=cond_p1)))
          # exception for KIT (='\\\\') when not using IPA symbols
          if (changing_var[i]!="\\\\"){
            cond_p2 = capture.output(cat(paste0("list(", changing_cond, "='", changing_var[i], "')")))
            p2=plot_smooth(x=get(model), view=values, cond = eval(parse(text=cond_p2)))
          }else{
            p2=plot_smooth(x=get(model), view=values, cond = list(token.ord='\\\\'))
          }
          
          # set Rho max to the max of the fit + half the difference between max of the fit and the upper limit
          max_ul = max(p1$fv$ul, p2$fv$ul)
          max_fit = max(p1$fv$fit, p2$fv$fit)
          maximum=max_fit+((max_ul-max_fit)/2)

          # plot in polar coordinates
          p=plot_ly(type='scatterpolar', mode='lines') %>% 
            add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=constant_var1) %>% 
            add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
            add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
            add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=changing_var[i]) %>% 
            add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>% 
            add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
            layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
                              angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)), 
                   title=paste0("GAM smooths ",constant_var1," vs ", changing_var[i]))
          Sys.sleep(0)
          print(p)
        }
      }
    }
  }
}

plotly_sig_diff_notes function (Matthias Heyne, 2019) -> with updated legend position

# plot smooths with shading to indicate significant differences (Matthias Heyne, 2019)
plotly_smooths_w_sig_diff <- function(model, condition, var1, var2, values, language, print=TRUE){
  # get intervals of significant differences by running plot_diff
  # unfortunately setting plot=FALSE doesn't work as intervals of significant difference are not displayed!
  # hardcoded condition
  # output = capture.output(plot_diff(get(model), view=values, 
  #                                   comp=list(tokenPooled.ord=c(var1, var2))))
  # output = capture.output(plot_diff(get(model), view=values, comp=list(langNoteInt.ord=c(paste0("Tongan.", note, ".", intensity), paste0("NZE.", note, ".", intensity)))))
  names_smooths=list()
  if (condition=="tokenPooled.ord" && length(language)==1){
    output_comp = capture.output(cat(paste0("list(", condition, "=c(var1, var2))")))
    names_smooths[1]=var1
    names_smooths[2]=var2
    plot_title = paste0("GAM smooths ", language, " ", var1, " vs ", var2)
  }else if (condition=="langNoteInt.ord" && length(language)==2){
    output_comp = capture.output(cat(paste0("list(", condition, "=c('", language[1], ".", var1, ".", var2, 
                                            "', '", language[2], ".", var1, ".", var2, "'))")))
    names_smooths[1]=paste0(language[1], ".", var1, ".", var2)
    names_smooths[2]=paste0(language[2], ".", var1, ".", var2)
    plot_title = paste0("GAM smooths ", language[1], ".", var1, ".", var2, " vs ", language[2], ".", var1, ".", var2)
  }else if (condition=="native_lg.ord"){
    output_comp = capture.output(cat(paste0("list(", condition, "=c(var1, var2))")))
    names_smooths[1]=var1
    names_smooths[2]=var2
    plot_title = paste0("GAM smooths ", language, " ", var1, " vs ", var2)
  }
  
  # output_comp = capture.output(cat(paste0("list(", condition, "=c(var1, var2))")))
  output = capture.output(plot_diff(get(model), view=values, comp=eval(parse(text=output_comp))))
  # no significant difference
  if ((length(language)==1 && length(output)==7) | (length(language)==2 && length(output)==6)){
    cat(paste0("Smooths for ", var1, " & ", var2, " are not significantly different.\n"))
    dat1 = NA
    assign(paste0("int_sig_diff_", var1, "_", var2), dat1, envir = .GlobalEnv)
    rm(dat1)
    # run plot_smooth to grab data for polar plots
    p1 = plot_smooth(x=get(model), view=values, cond=list(tokenPooled.ord=var1, tokenPooled.ord=var2))
    p2 = plot_smooth(x=get(model), view=values, cond=list(tokenPooled.ord=var2, tokenPooled.ord=var1))
    # set Rho max to the max of the fit + half the difference between max of the fit and the upper limit
    maximum=max(p1$fv$fit, p2$fv$fit)+((max(p1$fv$ul, p2$fv$ul)-max(p1$fv$fit, p2$fv$fit))/2)
    # plot in polar coordinates
    p=plot_ly(type='scatterpolar', mode='lines') %>% 
      add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=names_smooths[1]) %>% 
      add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
      add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
      add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=names_smooths[2]) %>% 
      add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>% 
      add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
      layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
                        angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)), 
             title=plot_title, legend=list(orientation="h", xanchor="center", x=0.5))
    Sys.sleep(0)
    print(p)
  # there are differences...
  }else{
    # grab intervals of significant differences from output
    if (length(language)==1 && length(output)>=8){
      sig_diff1 = c(as.double(unlist(strsplit(unlist(strsplit(output[8], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[8], " "))[3]))
    }else if (length(language)==2 && length(output)>=7){
      sig_diff1 = c(as.double(unlist(strsplit(unlist(strsplit(output[7], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[7], " "))[3]))
    }
    if (length(language)==1 && length(output)>=9){
      sig_diff2 = c(as.double(unlist(strsplit(unlist(strsplit(output[9], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[9], " "))[3]))
    }else if (length(language)==2 && length(output)>=8){
      sig_diff2 = c(as.double(unlist(strsplit(unlist(strsplit(output[8], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[8], " "))[3]))
    }
    if (length(language)==1 && length(output)>=10){
      sig_diff3 = c(as.double(unlist(strsplit(unlist(strsplit(output[10], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[10], " "))[3]))
    }else if (length(language)==2 && length(output)>=9){
      sig_diff3 = c(as.double(unlist(strsplit(unlist(strsplit(output[9], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[9], " "))[3]))
    }
    if (length(language)==1 && length(output)>=11){
      sig_diff4 = c(as.double(unlist(strsplit(unlist(strsplit(output[11], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[11], " "))[3]))
    }else if (length(language)==2 && length(output)>=10){
      sig_diff4 = c(as.double(unlist(strsplit(unlist(strsplit(output[10], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[10], " "))[3]))
    }
    # write intervals of significant difference to variable
    if ((length(language)==1 && length(output)>=11) | (length(language)==2 && length(output)>=10)){
      dat1 = c(sig_diff1, sig_diff2, sig_diff3, sig_diff4)
    }else if ((length(language)==1 && length(output)>=10) | (length(language)==2 && length(output)>=9)){
      dat1 = c(sig_diff1, sig_diff2, sig_diff3)
    }else if ((length(language)==1 && length(output)>=9) | (length(language)==2 && length(output)>=8)){
      dat1 = c(sig_diff1, sig_diff2)
    }else{
      dat1 = sig_diff1
    }
    assign(paste0("int_sig_diff_", var1, "_", var2), dat1, envir = .GlobalEnv)
    rm(dat1, output)
    # run plot_smooth to grab data for polar plots
    if (condition=="tokenPooled.ord" && length(language)==1){
      cond_p1 = capture.output(cat(paste0("list(", condition, "=var1)")))
      cond_p2 = capture.output(cat(paste0("list(", condition, "=var2)")))
    }else if (condition=="langNoteInt.ord" && length(language)==2){
      cond_p1 = capture.output(cat(paste0("list(", condition, "='", language[1], ".", var1, ".", var2, 
                                "', ", condition, "='", language[2], ".", var1, ".", var2, "')")))
      cond_p2 = capture.output(cat(paste0("list(", condition, "='", language[2], ".", var1, ".", var2, 
                                          "', ", condition, "='", language[1], ".", var1, ".", var2, "')")))
    }else if (condition=="native_lg.ord"){
      cond_p1 = capture.output(cat(paste0("list(", condition, "=var1)")))
      cond_p2 = capture.output(cat(paste0("list(", condition, "=var2)")))
    }
    # hardcoded condition
    p1 = plot_smooth(x=get(model), view=values, cond=eval(parse(text=cond_p1)))
    p2 = plot_smooth(x=get(model), view=values, cond=eval(parse(text=cond_p2)))
    # set Rho max to the max of the fit + half the difference between max of the fit and the upper limit
    maximum=max(p1$fv$fit, p2$fv$fit)+((max(p1$fv$ul, p2$fv$ul)-max(p1$fv$fit, p2$fv$fit))/2)
    # plot in polar coordinates
    if (exists("sig_diff4")){
      p=plot_ly(type='scatterpolar', mode='lines') %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=names_smooths[1]) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=names_smooths[2]) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
        add_trace(theta=seq(sig_diff1[1]*180/pi, sig_diff1[2]*180/pi, length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
        add_trace(theta=seq(sig_diff2[1]*180/pi, sig_diff2[2]*180/pi, length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
        add_trace(theta=seq(sig_diff3[1]*180/pi, sig_diff3[2]*180/pi, length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
        add_trace(theta=seq(sig_diff4[1]*180/pi, sig_diff4[2]*180/pi, length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
        layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
                          angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)), 
               title=plot_title, legend=list(orientation="h", xanchor="center", x=0.5))
      Sys.sleep(0)
      print(p)
    }else if (exists("sig_diff3")){
      p=plot_ly(type='scatterpolar', mode='lines') %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=names_smooths[1]) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=names_smooths[2]) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
        add_trace(theta=seq(sig_diff1[1]*180/pi, sig_diff1[2]*180/pi, length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
        add_trace(theta=seq(sig_diff2[1]*180/pi, sig_diff2[2]*180/pi, length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
        add_trace(theta=seq(sig_diff3[1]*180/pi, sig_diff3[2]*180/pi, length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
        layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
                          angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)), 
               title=plot_title, legend=list(orientation="h", xanchor="center", x=0.5))
      Sys.sleep(0)
      print(p)
    }else if (exists("sig_diff2")){
      p=plot_ly(type='scatterpolar', mode='lines') %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=names_smooths[1]) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=names_smooths[2]) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
        add_trace(theta=seq(sig_diff1[1]*180/pi, sig_diff1[2]*180/pi, length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
        add_trace(theta=seq(sig_diff2[1]*180/pi, sig_diff2[2]*180/pi, length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
        layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
                          angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)), 
               title=plot_title, legend=list(orientation="h", xanchor="center", x=0.5))
      Sys.sleep(0)
      print(p)
    }else{
      p=plot_ly(type='scatterpolar', mode='lines') %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=names_smooths[1]) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=names_smooths[2]) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
        add_trace(theta=seq(sig_diff1[1]*180/pi, sig_diff1[2]*180/pi, length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
        layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
                          angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)), 
               title=plot_title, legend=list(orientation="h", xanchor="center", x=0.5))
      Sys.sleep(0)
      print(p)
    }
  }
}

load in data

df <- read.csv("all_data_NZE_Tongan_w_context_checked_7_March_2019_not_cut_checked_27_Apr.csv", sep=',', stringsAsFactors = F)

# remove empty column
df$X = NULL

df$tokenPooled <- factor(df$tokenPooled)
df$subject <- factor(df$subject)
df$sex <- factor(df$sex)
df$native_lg <- factor(df$native_lg)

# updated 
df$playing_proficiency[df$playing_proficiency == "intermediate"] <- "amateur"
df$playing_proficiency <- factor(df$playing_proficiency, levels = c("amateur","semi-professional","professional"))

df$block <- factor(df$block)
df$theta_uncut_z <- as.numeric(df$theta_uncut_z)

# updated
df$note_intensity <- factor(df$note_intensity, levels = c("piano","mezzopiano","mezzoforte","forte"))

df$tokenPooled <- factor(df$tokenPooled)

predict average values for note productions NZE vs Tongan

Data manipulation

dfSummary <- group_by(df, subject, playing_proficiency, activity, native_lg, tokenPooled, theta_uncut_z_group = cut(theta_uncut_z,breaks=100)) %>% filter(activity=="speech") %>% summarise(rhoVar = sd(rho_uncut_z,na.rm=TRUE))


dfSummary$theta_uncut_z = as.character(dfSummary$theta_uncut_z_group)

dfSummary$theta_uncut_z = gsub("\\[|\\]|\\(|\\)", "",dfSummary$theta_uncut_z)

dfSummary$theta_uncut_z = strsplit(dfSummary$theta_uncut_z,",")

dfSummary$theta_uncut_z2 = 1
for(i in c(1:nrow(dfSummary)))
{
  dfSummary$theta_uncut_z2[i] = mean(as.numeric(unlist(dfSummary$theta_uncut_z[i])))
}

dfSummary$theta_uncut_z=dfSummary$theta_uncut_z2

dfSummary$playing_proficiency.ord <- as.ordered(dfSummary$playing_proficiency)
contrasts(dfSummary$playing_proficiency.ord) <- "contr.treatment"

dfSummary$langNoteInt <- interaction(dfSummary$native_lg, 
dfSummary$tokenPooled)

dfSummary$langNoteInt.ord <- as.ordered(dfSummary$langNoteInt)
contrasts(dfSummary$langNoteInt.ord) <- "contr.treatment"

dfSummary$native_lg.ord <- as.ordered(dfSummary$native_lg)
contrasts(dfSummary$native_lg.ord) <- "contr.treatment"

dfSummary$tokenPooled.ord <- as.ordered(dfSummary$tokenPooled)
contrasts(dfSummary$tokenPooled.ord) <- "contr.treatment"

dfSummary = na.omit(dfSummary)


for(i in unique(dfSummary$subject))
{
  dfSubject = subset(dfSummary,dfSummary$subject == i)
  for(j in unique(dfSubject$tokenPooled))
  {
    dfSummary$start[dfSummary$subject == i & dfSummary$tokenPooled == j] <- 
      dfSummary$theta_uncut_z[dfSummary$subject == i & dfSummary$tokenPooled == j] ==
      min(dfSummary$theta_uncut_z[dfSummary$subject == i & dfSummary$tokenPooled == j])
  }
}

Model specification

if (run_models == TRUE){
  mdl.sys.time1 <- system.time(VAR.gam.noAR.Mod1 <- bam(rhoVar ~ langNoteInt.ord + s(theta_uncut_z, bs="cr", k=10) + s(theta_uncut_z, bs="cr", k=10, by=langNoteInt.ord) +s(theta_uncut_z, subject, bs="fs", k=10, m=1, by=langNoteInt.ord), data=dfSummary, discrete=TRUE, nthreads=ncores, method = "fREML"))
   mdl.sys.time1
   # save model & model summary so they can be reloaded later
   saveRDS(VAR.gam.noAR.Mod1, paste0(output_dir,"/VAR.gam.noAR.Mod1_vowels.rds"))
   capture.output(summary(VAR.gam.noAR.Mod1), 
                  file = paste0(output_dir,"/summary_VAR.gam.noAR.Mod1_vowels.txt"))
}else{
  # reload model from output_dir
  VAR.gam.noAR.Mod1 = readRDS(paste0(output_dir,"/VAR.gam.noAR.Mod1_vowels.rds"))
}

if (run_models == TRUE){
  # set start_value_rho from VAR.gam.noAR.Mod1 as rho_est for the next model
  rho_est <- start_value_rho(VAR.gam.noAR.Mod1)
  mdl.sys.time2 <- system.time(VAR.gam.AR.Mod2 <- bam(rhoVar ~ langNoteInt.ord + s(theta_uncut_z, bs="cr", k=10) + s(theta_uncut_z, bs="cr", k=10, by=langNoteInt.ord) +  s(theta_uncut_z, subject, bs="fs", k=10, m=1, by=langNoteInt.ord), AR.start=dfSummary$start, rho=rho_est, data=dfSummary, discrete=TRUE, nthreads=ncores, method = "fREML"))
  mdl.sys.time2
  # save model & model summary so they can be reloaded later
  saveRDS(VAR.gam.AR.Mod2, paste0(output_dir,"/VAR.gam.AR.Mod2_vowels.rds"))
  capture.output(summary(VAR.gam.AR.Mod2), 
                 file = paste0(output_dir,"/summary_VAR.gam.AR.Mod2_vowels.txt"))
}else{
  # reload model from output_dir
  VAR.gam.AR.Mod2 = readRDS(paste0(output_dir,"/VAR.gam.AR.Mod2_vowels.rds"))
}

Model inspection

Checking ACF

Full

acf_resid(VAR.gam.AR.Mod2, main = "Average ACF AR", cex.lab=1.5, cex.axis=1.5)

By Token

acf_resid(VAR.gam.AR.Mod2,split_pred=list(dfSummary$tokenPooled),main = "Average ACF AR by tokenPooled",cex.lab=1.5,cex.axis=1.5)

By Language

acf_resid(VAR.gam.AR.Mod2,split_pred=list(dfSummary$native_lg),main = "Average ACF AR by native language",cex.lab=1.5,cex.axis=1.5)

Summary of the model

summary(VAR.gam.AR.Mod2)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## rhoVar ~ langNoteInt.ord + s(theta_uncut_z, bs = "cr", k = 10) + 
##     s(theta_uncut_z, bs = "cr", k = 10, by = langNoteInt.ord) + 
##     s(theta_uncut_z, subject, bs = "fs", k = 10, m = 1, by = langNoteInt.ord)
## 
## Parametric coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               7.936753   0.122926  64.565  < 2e-16 ***
## langNoteInt.ordNZE.ɐ     -0.875477   1.526103  -0.574   0.5662    
## langNoteInt.ordNZE.ɐː    -2.584383   1.701939  -1.518   0.1289    
## langNoteInt.ordNZE.ɒ     -2.326501   2.645874  -0.879   0.3793    
## langNoteInt.ordNZE.e      0.561595   1.532209   0.367   0.7140    
## langNoteInt.ordTongan.e  -2.138085   0.911078  -2.347   0.0190 *  
## langNoteInt.ordNZE.ə      0.001008   1.222929   0.001   0.9993    
## langNoteInt.ordNZE.ə#    -1.742856   0.385723  -4.518 6.29e-06 ***
## langNoteInt.ordNZE.ɛ     -1.091940   1.691811  -0.645   0.5187    
## langNoteInt.ordNZE.ɘ      0.122685   2.001099   0.061   0.9511    
## langNoteInt.ordTongan.i  -1.713343   1.283160  -1.335   0.1818    
## langNoteInt.ordNZE.iː   -27.797793  16.120495  -1.724   0.0847 .  
## langNoteInt.ordTongan.o  -0.624437   4.786433  -0.130   0.8962    
## langNoteInt.ordNZE.oː    -2.096078   1.592190  -1.316   0.1880    
## langNoteInt.ordNZE.ɵː    -1.449308   2.287160  -0.634   0.5263    
## langNoteInt.ordTongan.u  -5.943667  10.312811  -0.576   0.5644    
## langNoteInt.ordNZE.ʉː    -2.653813   3.419110  -0.776   0.4377    
## langNoteInt.ordNZE.ʊ     -1.505100   3.458559  -0.435   0.6634    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                                     edf  Ref.df      F
## s(theta_uncut_z)                                  8.848   8.928 55.747
## s(theta_uncut_z):langNoteInt.ordNZE.ɐ             5.420   5.854  1.674
## s(theta_uncut_z):langNoteInt.ordNZE.ɐː            6.106   6.560  3.215
## s(theta_uncut_z):langNoteInt.ordNZE.ɒ             4.615   4.918  1.450
## s(theta_uncut_z):langNoteInt.ordNZE.e             6.837   7.138  3.215
## s(theta_uncut_z):langNoteInt.ordTongan.e          5.619   6.062  1.272
## s(theta_uncut_z):langNoteInt.ordNZE.ə             8.063   8.425  9.566
## s(theta_uncut_z):langNoteInt.ordNZE.ə#            8.464   8.750 18.808
## s(theta_uncut_z):langNoteInt.ordNZE.ɛ             6.392   6.793  2.568
## s(theta_uncut_z):langNoteInt.ordNZE.ɘ             7.243   7.595  4.282
## s(theta_uncut_z):langNoteInt.ordTongan.i          7.591   7.971  2.842
## s(theta_uncut_z):langNoteInt.ordNZE.iː            5.937   6.386  2.780
## s(theta_uncut_z):langNoteInt.ordTongan.o          5.905   6.288  1.986
## s(theta_uncut_z):langNoteInt.ordNZE.oː            6.221   6.622  3.074
## s(theta_uncut_z):langNoteInt.ordNZE.ɵː            1.001   1.001  0.010
## s(theta_uncut_z):langNoteInt.ordTongan.u          6.314   6.770  1.986
## s(theta_uncut_z):langNoteInt.ordNZE.ʉː            1.000   1.001  0.318
## s(theta_uncut_z):langNoteInt.ordNZE.ʊ             1.000   1.000  0.073
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɐ    68.706 100.000  5.867
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɐː   67.797 100.000  6.526
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɒ    79.050 100.000 16.042
## s(theta_uncut_z,subject):langNoteInt.ordNZE.e    65.030  99.000 10.504
## s(theta_uncut_z,subject):langNoteInt.ordTongan.e 60.113  99.000  7.276
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ə    57.296 100.000 12.035
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ə#   45.015  99.000  2.521
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɛ    67.438 100.000  6.769
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɘ    70.237 100.000 11.450
## s(theta_uncut_z,subject):langNoteInt.ordTongan.i 61.023  99.000 11.857
## s(theta_uncut_z,subject):langNoteInt.ordNZE.iː   79.066  99.000 39.596
## s(theta_uncut_z,subject):langNoteInt.ordTongan.o 81.592 100.000 19.717
## s(theta_uncut_z,subject):langNoteInt.ordNZE.oː   67.050 100.000 11.473
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɵː   75.739 100.000 10.671
## s(theta_uncut_z,subject):langNoteInt.ordTongan.u 83.577 100.000 37.279
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ʉː   79.748 100.000 19.247
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ʊ    77.589 100.000 25.613
##                                                   p-value    
## s(theta_uncut_z)                                  < 2e-16 ***
## s(theta_uncut_z):langNoteInt.ordNZE.ɐ             0.09929 .  
## s(theta_uncut_z):langNoteInt.ordNZE.ɐː            0.00279 ** 
## s(theta_uncut_z):langNoteInt.ordNZE.ɒ             0.26697    
## s(theta_uncut_z):langNoteInt.ordNZE.e             0.00248 ** 
## s(theta_uncut_z):langNoteInt.ordTongan.e          0.24579    
## s(theta_uncut_z):langNoteInt.ordNZE.ə            1.56e-12 ***
## s(theta_uncut_z):langNoteInt.ordNZE.ə#            < 2e-16 ***
## s(theta_uncut_z):langNoteInt.ordNZE.ɛ             0.02705 *  
## s(theta_uncut_z):langNoteInt.ordNZE.ɘ            8.28e-05 ***
## s(theta_uncut_z):langNoteInt.ordTongan.i          0.00369 ** 
## s(theta_uncut_z):langNoteInt.ordNZE.iː            0.00712 ** 
## s(theta_uncut_z):langNoteInt.ordTongan.o          0.07179 .  
## s(theta_uncut_z):langNoteInt.ordNZE.oː            0.00379 ** 
## s(theta_uncut_z):langNoteInt.ordNZE.ɵː            0.91955    
## s(theta_uncut_z):langNoteInt.ordTongan.u          0.05914 .  
## s(theta_uncut_z):langNoteInt.ordNZE.ʉː            0.57295    
## s(theta_uncut_z):langNoteInt.ordNZE.ʊ             0.78756    
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɐ     < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɐː    < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɒ     < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.e     < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.e  < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ə     < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ə#    < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɛ     < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɘ     < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.i  < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.iː    < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.o  < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.oː    < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɵː    < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.u  < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ʉː    < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ʊ     < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.863   Deviance explained = 87.7%
## fREML =  24523  Scale est. = 2.1482    n = 12704

Plotting model predictions

Overall variance differences

And finally overall variance differences by language

dat1 = dfSummary
#dat1$predicted_values = predict(VAR.gam.AR.Mod2)

est = predict(VAR.gam.AR.Mod2,se=TRUE)
CI=1.98
printPDF=TRUE

dat1$predicted_values = est$fit
dat1$standard_error = est$se

# plot in polar coordinates using plotly
dat1_NZE = dat1[dat1$native_lg == "NZE",]
dat1_Tongan = dat1[dat1$native_lg == "Tongan",]

# estimate smooths using R's generic predict.smooth.spline function
smooth_NZE=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, dat1_NZE$predicted_values),
 seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y, line=list(color="blue", dash="dash"))

smooth_NZE_top=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values + (dat1_NZE$standard_error * CI))),
 seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y, line=list(color="blue", dash="dot"))

smooth_NZE_bottom=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values - (dat1_NZE$standard_error * CI))),
 seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y, line=list(color="blue", dash="dot"))

smooth_Tongan=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, dat1_Tongan$predicted_values),
 seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y, line=list(color="red", dash=""))

smooth_Tongan_top=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values + (dat1_Tongan$standard_error * CI))),
 seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y, line=list(color="red", dash="dot"))

smooth_Tongan_bottom=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values - (dat1_Tongan$standard_error * CI))),
 seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y, line=list(color="red", dash="dot"))

# 90%

CI=1.645

NZE_top_90=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values + (dat1_NZE$standard_error * CI))),
 seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y)

NZE_bottom_90=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values - (dat1_NZE$standard_error * CI))),
 seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y)

Tongan_top_90=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values + (dat1_Tongan$standard_error * CI))),
 seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y)

Tongan_bottom_90=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values - (dat1_Tongan$standard_error * CI))),
 seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y)

j = 1
k = 1
sigFlag = FALSE
hold = tribble(~thetaStart, ~thetaEnd)
for (i in c(1:99))
{
  while(Tongan_top_90$theta[i] > NZE_top_90$theta[j])
  {
    j = j + 1
  }
  if ((Tongan_top_90$theta[i] <= NZE_top_90$theta[j]) & 
      (Tongan_top_90$theta[i+1] > NZE_top_90$theta[j]))
  {
    if (sigFlag == FALSE)
    {
      if(Tongan_top_90$r[i] < NZE_bottom_90$r[j] || 
         NZE_top_90$r[j] < Tongan_bottom_90$r[i])
      {
        # Significant!
        sigFlag = TRUE
        hold[k,1] = Tongan_top_90$theta[i]
      }
    }
    else
    {
      if(!(Tongan_top_90$r[i] < NZE_bottom_90$r[j] || 
         NZE_top_90$r[j] < Tongan_bottom_90$r[i]))
      {
        hold[k,2] = Tongan_top_90$theta[i]
        k = k + 1
        sigFlag = FALSE
      }
    }
  }
}

# set Rho max to the max of the predicted_values + 5
max_predictions = max(max(smooth_NZE_top$r), max(smooth_Tongan_top$r))
maximum=max_predictions+1
rm(max_predictions)

p = plot_ly(type='scatterpolar', mode='lines') %>%
add_trace(theta=smooth_NZE$theta, r=smooth_NZE$r, line=list(color=smooth_NZE$line$color[[1]], width=2.5, dash=smooth_NZE$line$dash[[1]]), name="NZE vowels") %>%
  add_trace(theta=smooth_NZE_top$theta, r=smooth_NZE_top$r, line=list(color=smooth_NZE_top$line$color[[1]], width=0.5, dash=smooth_NZE_top$line$dash[[1]]), showlegend=FALSE) %>%
  add_trace(theta=smooth_NZE_bottom$theta, r=smooth_NZE_bottom$r, line=list(color=smooth_NZE_bottom$line$color[[1]], width=0.5, dash=smooth_NZE_bottom$line$dash[[1]]), showlegend=FALSE) %>%
  add_trace(theta=smooth_Tongan$theta, r=smooth_Tongan$r, line=list(color=smooth_Tongan$line$color[[1]], width=2.5, dash=smooth_Tongan$line$dash[[1]]), name="Tongan vowels") %>%
  add_trace(theta=smooth_Tongan_top$theta, r=smooth_Tongan_top$r, line=list(color=smooth_Tongan_top$line$color[[1]], width=0.5, dash=smooth_Tongan_top$line$dash[[1]]), showlegend=FALSE) %>%
  
  add_trace(theta=smooth_Tongan_bottom$theta, r=smooth_Tongan_bottom$r, line=list(color=smooth_Tongan_bottom$line$color[[1]], width=0.5, dash=smooth_Tongan_bottom$line$dash[[1]]), showlegend=FALSE) %>%
  
  add_trace(theta=seq(as.numeric(hold[1,1]), as.numeric(hold[1,2]), length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
  
    add_trace(theta=seq(as.numeric(hold[2,1]), as.numeric(hold[2,2]), length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
  
      add_trace(theta=seq(as.numeric(hold[3,1]), as.numeric(hold[3,2]), length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
  
  layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)), angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)), title="Overall variability of NZE vs Tongan vowel productions", legend=list(orientation="h", xanchor="center", x=0.5,y=0.012))
p
if(printPDF==TRUE)
{
  orca(p, "Figure6.pdf",format="pdf")
}
#rm(dat1, dat1_NZE, dat1_Tongan, maximum, smooth_NZE, smooth_Tongan, est)

Music

dfSummary <- group_by(df, subject, playing_proficiency, activity, native_lg, tokenPooled, theta_uncut_z_group = cut(theta_uncut_z,breaks=100)) %>% filter(activity=="music") %>% summarise(rhoVar = sd(rho_uncut_z,na.rm=TRUE))


dfSummary$theta_uncut_z = as.character(dfSummary$theta_uncut_z_group)

dfSummary$theta_uncut_z = gsub("\\[|\\]|\\(|\\)", "",dfSummary$theta_uncut_z)

dfSummary$theta_uncut_z = strsplit(dfSummary$theta_uncut_z,",")

dfSummary$theta_uncut_z2 = 1
for(i in c(1:nrow(dfSummary)))
{
  dfSummary$theta_uncut_z2[i] = mean(as.numeric(unlist(dfSummary$theta_uncut_z[i])))
}

dfSummary$theta_uncut_z=dfSummary$theta_uncut_z2

dfSummary$playing_proficiency.ord <- as.ordered(dfSummary$playing_proficiency)
contrasts(dfSummary$playing_proficiency.ord) <- "contr.treatment"

dfSummary$langNoteInt <- interaction(dfSummary$native_lg, 
dfSummary$tokenPooled)

dfSummary$langNoteInt.ord <- as.ordered(dfSummary$langNoteInt)
contrasts(dfSummary$langNoteInt.ord) <- "contr.treatment"

dfSummary$native_lg.ord <- as.ordered(dfSummary$native_lg)
contrasts(dfSummary$native_lg.ord) <- "contr.treatment"

dfSummary$tokenPooled.ord <- as.ordered(dfSummary$tokenPooled)
contrasts(dfSummary$tokenPooled.ord) <- "contr.treatment"

dfSummary = na.omit(dfSummary)


for(i in unique(dfSummary$subject))
{
  dfSubject = subset(dfSummary,dfSummary$subject == i)
  for(j in unique(dfSubject$tokenPooled))
  {
    dfSummary$start[dfSummary$subject == i & dfSummary$tokenPooled == j] <- 
      dfSummary$theta_uncut_z[dfSummary$subject == i & dfSummary$tokenPooled == j] ==
      min(dfSummary$theta_uncut_z[dfSummary$subject == i & dfSummary$tokenPooled == j])
  }
}

Model specification

if (run_models == TRUE){
  mdl.sys.time1 <- system.time(VAR.gam.noAR.Mod1 <- bam(rhoVar ~ langNoteInt.ord + s(theta_uncut_z, bs="cr", k=10) + s(theta_uncut_z, bs="cr", k=10, by=langNoteInt.ord) +s(theta_uncut_z, subject, bs="fs", k=10, m=1, by=langNoteInt.ord), data=dfSummary, discrete=TRUE, nthreads=ncores, method = "fREML"))
   mdl.sys.time1
   # save model & model summary so they can be reloaded later
   saveRDS(VAR.gam.noAR.Mod1, paste0(output_dir,"/VAR.gam.noAR.Mod1_notes.rds"))
   capture.output(summary(VAR.gam.noAR.Mod1), 
                  file = paste0(output_dir,"/summary_VAR.gam.noAR.Mod1_notes.txt"))
}else{
  # reload model from output_dir
  VAR.gam.noAR.Mod1 = readRDS(paste0(output_dir,"/VAR.gam.noAR.Mod1_notes.rds"))
}

if (run_models == TRUE){
  # set start_value_rho from VAR.gam.noAR.Mod1 as rho_est for the next model
  rho_est <- start_value_rho(VAR.gam.noAR.Mod1)
  mdl.sys.time2 <- system.time(VAR.gam.AR.Mod2 <- bam(rhoVar ~ langNoteInt.ord + s(theta_uncut_z, bs="cr", k=10) + s(theta_uncut_z, bs="cr", k=10, by=langNoteInt.ord) +  s(theta_uncut_z, subject, bs="fs", k=10, m=1, by=langNoteInt.ord), AR.start=dfSummary$start, rho=rho_est, data=dfSummary, discrete=TRUE, nthreads=ncores, method = "fREML"))
  mdl.sys.time2
  # save model & model summary so they can be reloaded later
  saveRDS(VAR.gam.AR.Mod2, paste0(output_dir,"/VAR.gam.AR.Mod2_notes.rds"))
  capture.output(summary(VAR.gam.AR.Mod2), 
                 file = paste0(output_dir,"/summary_VAR.gam.AR.Mod2_notes.txt"))
}else{
  # reload model from output_dir
  VAR.gam.AR.Mod2 = readRDS(paste0(output_dir,"/VAR.gam.AR.Mod2_notes.rds"))
}

Model inspection

Checking ACF

Full

acf_resid(VAR.gam.AR.Mod2, main = "Average ACF AR", cex.lab=1.5, cex.axis=1.5)

By Token

acf_resid(VAR.gam.AR.Mod2,split_pred=list(dfSummary$tokenPooled),main = "Average ACF AR by tokenPooled",cex.lab=1.5,cex.axis=1.5)

By Language

acf_resid(VAR.gam.AR.Mod2,split_pred=list(dfSummary$native_lg),main = "Average ACF AR by native language",cex.lab=1.5,cex.axis=1.5)

Summary of the model

summary(VAR.gam.AR.Mod2)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## rhoVar ~ langNoteInt.ord + s(theta_uncut_z, bs = "cr", k = 10) + 
##     s(theta_uncut_z, bs = "cr", k = 10, by = langNoteInt.ord) + 
##     s(theta_uncut_z, subject, bs = "fs", k = 10, m = 1, by = langNoteInt.ord)
## 
## Parametric coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 7.8916     0.1406  56.142  < 2e-16 ***
## langNoteInt.ordTongan.Bb2  -0.7911     1.1702  -0.676 0.499030    
## langNoteInt.ordNZE.Bb3     -2.0280     1.6032  -1.265 0.205920    
## langNoteInt.ordTongan.Bb3  -1.8836     1.7785  -1.059 0.289599    
## langNoteInt.ordNZE.D4      -3.3837     1.5121  -2.238 0.025280 *  
## langNoteInt.ordTongan.D4   -4.1729     1.6716  -2.496 0.012578 *  
## langNoteInt.ordNZE.F3      -2.2278     1.2172  -1.830 0.067265 .  
## langNoteInt.ordTongan.F3   -1.8695     1.3557  -1.379 0.167956    
## langNoteInt.ordNZE.F4      -4.5910     1.2732  -3.606 0.000314 ***
## langNoteInt.ordTongan.F4   -7.2303     2.2926  -3.154 0.001620 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                                       edf Ref.df      F
## s(theta_uncut_z)                                    8.289  8.652 31.982
## s(theta_uncut_z):langNoteInt.ordTongan.Bb2          1.000  1.000  0.625
## s(theta_uncut_z):langNoteInt.ordNZE.Bb3             2.746  2.958  2.107
## s(theta_uncut_z):langNoteInt.ordTongan.Bb3          8.045  8.307  3.560
## s(theta_uncut_z):langNoteInt.ordNZE.D4              3.251  3.499  3.544
## s(theta_uncut_z):langNoteInt.ordTongan.D4           3.921  4.196  2.706
## s(theta_uncut_z):langNoteInt.ordNZE.F3              1.569  1.639  0.345
## s(theta_uncut_z):langNoteInt.ordTongan.F3           6.895  7.302  2.024
## s(theta_uncut_z):langNoteInt.ordNZE.F4              3.950  4.267  4.068
## s(theta_uncut_z):langNoteInt.ordTongan.F4           4.290  4.567  3.329
## s(theta_uncut_z,subject):langNoteInt.ordTongan.Bb2 68.005 99.000 12.819
## s(theta_uncut_z,subject):langNoteInt.ordNZE.Bb3    64.506 99.000  8.818
## s(theta_uncut_z,subject):langNoteInt.ordTongan.Bb3 67.080 99.000 15.000
## s(theta_uncut_z,subject):langNoteInt.ordNZE.D4     62.594 99.000  9.110
## s(theta_uncut_z,subject):langNoteInt.ordTongan.D4  66.440 99.000 16.881
## s(theta_uncut_z,subject):langNoteInt.ordNZE.F3     70.290 99.000  8.578
## s(theta_uncut_z,subject):langNoteInt.ordTongan.F3  65.267 99.000 13.840
## s(theta_uncut_z,subject):langNoteInt.ordNZE.F4     56.859 99.000  7.765
## s(theta_uncut_z,subject):langNoteInt.ordTongan.F4  65.349 99.000 17.684
##                                                    p-value    
## s(theta_uncut_z)                                   < 2e-16 ***
## s(theta_uncut_z):langNoteInt.ordTongan.Bb2         0.42912    
## s(theta_uncut_z):langNoteInt.ordNZE.Bb3            0.10334    
## s(theta_uncut_z):langNoteInt.ordTongan.Bb3         0.00017 ***
## s(theta_uncut_z):langNoteInt.ordNZE.D4             0.01028 *  
## s(theta_uncut_z):langNoteInt.ordTongan.D4          0.02488 *  
## s(theta_uncut_z):langNoteInt.ordNZE.F3             0.68530    
## s(theta_uncut_z):langNoteInt.ordTongan.F3          0.02581 *  
## s(theta_uncut_z):langNoteInt.ordNZE.F4             0.00226 ** 
## s(theta_uncut_z):langNoteInt.ordTongan.F4          0.00647 ** 
## s(theta_uncut_z,subject):langNoteInt.ordTongan.Bb2 < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.Bb3    < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.Bb3 < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.D4     < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.D4  < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.F3     < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.F3  < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.F4     < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.F4  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.817   Deviance explained = 83.5%
## fREML =  12394  Scale est. = 2.4004    n = 6446

Plotting model predictions

Overall variance differences

And finally overall variance differences by language

dat1 = dfSummary
#dat1$predicted_values = predict(VAR.gam.AR.Mod2)

est = predict(VAR.gam.AR.Mod2,se=TRUE)
CI=1.98
printPDF=TRUE

dat1$predicted_values = est$fit
dat1$standard_error = est$se

# plot in polar coordinates using plotly
dat1_NZE = dat1[dat1$native_lg == "NZE",]
dat1_Tongan = dat1[dat1$native_lg == "Tongan",]

# estimate smooths using R's generic predict.smooth.spline function
smooth_NZE=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, dat1_NZE$predicted_values),
 seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y, line=list(color="blue", dash="dash"))

smooth_NZE_top=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values + (dat1_NZE$standard_error * CI))),
 seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y, line=list(color="blue", dash="dot"))

smooth_NZE_bottom=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values - (dat1_NZE$standard_error * CI))),
 seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y, line=list(color="blue", dash="dot"))

smooth_Tongan=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, dat1_Tongan$predicted_values),
 seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y, line=list(color="red", dash=""))

smooth_Tongan_top=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values + (dat1_Tongan$standard_error * CI))),
 seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y, line=list(color="red", dash="dot"))

smooth_Tongan_bottom=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values - (dat1_Tongan$standard_error * CI))),
 seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y, line=list(color="red", dash="dot"))

# 90%

CI=1.645

NZE_top_90=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values + (dat1_NZE$standard_error * CI))),
 seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y)

NZE_bottom_90=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values - (dat1_NZE$standard_error * CI))),
 seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y)

Tongan_top_90=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values + (dat1_Tongan$standard_error * CI))),
 seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y)

Tongan_bottom_90=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values - (dat1_Tongan$standard_error * CI))),
 seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y)

j = 1
k = 1
sigFlag = FALSE
hold = tribble(~thetaStart, ~thetaEnd)
for (i in c(1:99))
{
  while(Tongan_top_90$theta[i] > NZE_top_90$theta[j])
  {
    j = j + 1
  }
  if ((Tongan_top_90$theta[i] <= NZE_top_90$theta[j]) & 
      (Tongan_top_90$theta[i+1] > NZE_top_90$theta[j]))
  {
    if (sigFlag == FALSE)
    {
      if(Tongan_top_90$r[i] < NZE_bottom_90$r[j] || 
         NZE_top_90$r[j] < Tongan_bottom_90$r[i])
      {
        # Significant!
        sigFlag = TRUE
        hold[k,1] = Tongan_top_90$theta[i]
      }
    }
    else
    {
      if(!(Tongan_top_90$r[i] < NZE_bottom_90$r[j] || 
         NZE_top_90$r[j] < Tongan_bottom_90$r[i]))
      {
        hold[k,2] = Tongan_top_90$theta[i]
        k = k + 1
        sigFlag = FALSE
      }
    }
  }
}

# set Rho max to the max of the predicted_values + 5
max_predictions = max(max(smooth_NZE_top$r), max(smooth_Tongan_top$r))
maximum=max_predictions+1
rm(max_predictions)

p = plot_ly(type='scatterpolar', mode='lines') %>%
add_trace(theta=smooth_NZE$theta, r=smooth_NZE$r, line=list(color=smooth_NZE$line$color[[1]], width=2.5, dash=smooth_NZE$line$dash[[1]]), name="NZE notes") %>%
  add_trace(theta=smooth_NZE_top$theta, r=smooth_NZE_top$r, line=list(color=smooth_NZE_top$line$color[[1]], width=0.5, dash=smooth_NZE_top$line$dash[[1]]), showlegend=FALSE) %>%
  add_trace(theta=smooth_NZE_bottom$theta, r=smooth_NZE_bottom$r, line=list(color=smooth_NZE_bottom$line$color[[1]], width=0.5, dash=smooth_NZE_bottom$line$dash[[1]]), showlegend=FALSE) %>%
  add_trace(theta=smooth_Tongan$theta, r=smooth_Tongan$r, line=list(color=smooth_Tongan$line$color[[1]], width=2.5, dash=smooth_Tongan$line$dash[[1]]), name="Tongan notes") %>%
  add_trace(theta=smooth_Tongan_top$theta, r=smooth_Tongan_top$r, line=list(color=smooth_Tongan_top$line$color[[1]], width=0.5, dash=smooth_Tongan_top$line$dash[[1]]), showlegend=FALSE) %>%
  
  add_trace(theta=smooth_Tongan_bottom$theta, r=smooth_Tongan_bottom$r, line=list(color=smooth_Tongan_bottom$line$color[[1]], width=0.5, dash=smooth_Tongan_bottom$line$dash[[1]]), showlegend=FALSE) %>%
  
  add_trace(theta=seq(as.numeric(hold[1,1]), as.numeric(hold[1,2]), length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
  
  layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)), angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)), title="Overall variability of NZE vs Tongan note productions", legend=list(orientation="h", xanchor="center", x=0.5,y=0.012))
p
if(printPDF==TRUE)
{
  orca(p, "Figure6a.pdf",format="pdf")
}
## [16307:0920/163049.735749:ERROR:gles2_cmd_decoder.cc(17766)] [.RenderCompositor-0x7fe872021e00]GL ERROR :GL_INVALID_OPERATION : glBindTexImage2DCHROMIUM: no image found with the given ID
#rm(dat1, dat1_NZE, dat1_Tongan, maximum, smooth_NZE, smooth_Tongan, est)