This notebook provides the analysis regarding Hypothesis 1, Prediction 1b for the article: Heyne, M., Derrick, D., and Al-Tamimi, J. (under review). “Native language influence on brass instrument performance: An application of generalized additive mixed models (GAMMs) to midsagittal ultrasound images of the tongue”. Frontiers Research Topic: Models and Theories of Speech Production. Ed. Adamantios Gafos & Pascal van Lieshout.

# specify directory to save models and summaries
output_dir = "updated_models"

# specify whether to run models -> if set to false script will attempt to load saved models from output_dir
run_models = FALSE
Sys.setenv('MAPBOX_TOKEN' = 'sk.eyJ1IjoiZGRlcnJpY2siLCJhIjoiY2p2MW1ndnNoMXczYTRkbXd6dzRuMTQ2aCJ9.UTai4wWcFFVaJAuOrAX7VQ')
printPDF = TRUE

Loading packages

load_packages = c("parallel","mgcv", "itsadug", "rlist", "ggplot2", "plotly", "dplyr","tidyr")
for(pkg in load_packages){
  eval(bquote(library(.(pkg))))
  if (paste0("package:", pkg) %in% search()){
    cat(paste0("Successfully loaded the ", pkg, " package.\n"))
  }else{
    install.packages(pkg)
    eval(bquote(library(.(pkg))))
    if (paste0("package:", pkg) %in% search()){
      cat(paste0("Successfully loaded the ", pkg, " package.\n"))
    }
  }
}

## Successfully loaded the parallel package.
## Successfully loaded the mgcv package.
## Successfully loaded the itsadug package.
## Successfully loaded the rlist package.
## Successfully loaded the ggplot2 package.
## Successfully loaded the plotly package.
## Successfully loaded the dplyr package.
## Successfully loaded the tidyr package.

rm(load_packages, pkg)

# detect number of cores available for model calculations
ncores = detectCores()
cat(paste0("Number of cores available for model calculations set to ", ncores, "."))

## Number of cores available for model calculations set to 12.

plotly_model_outputs function (Matthias Heyne, 2019)

# plot multiple GAM model outputs
plotly_model_outputs <- function(model, changing_cond, changing_var, constant_cond1, constant_var1, values, constant_cond2=NULL, constant_var2=NULL, print=TRUE){
  if(length(constant_var1)>1 | length(constant_var2)>1){
    print("Error: Constant variables can only have length 1.")
  }else{
    if(!is.null(constant_cond2) && !is.null(constant_var2) && length(changing_var)==2){
      # works for models Notes.gam...
      cond_p1 = capture.output(cat(paste0("list(", changing_cond, "='", changing_var[1], "', ", constant_cond1, "='", constant_var1, "', ", constant_cond2, "='", constant_var2, "')")))
      p1=plot_smooth(x=get(model), view=values, cond = eval(parse(text=cond_p1)))
      cond_p2 = capture.output(cat(paste0("list(", changing_cond, "='", changing_var[2], "', ", constant_cond1, "='", constant_var1, "', ", constant_cond2, "='", constant_var2, "')")))
      p2=plot_smooth(x=get(model), view=values, cond = eval(parse(text=cond_p2)))
      
      # set Rho max to the max of the fit + half the difference between max of the fit and the upper limit
      max_ul = max(p1$fv$ul, p2$fv$ul)
      max_fit = max(p1$fv$fit, p2$fv$fit)
      maximum=max_fit+((max_ul-max_fit)/2)
      
      # plot in polar coordinates
      p=plot_ly(type='scatterpolar', mode='lines') %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=changing_var[1]) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=changing_var[2]) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
        layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
                          angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)), 
               title=paste0("GAM smooths @", constant_var1, " & ", constant_var2))
      p
    }else if(is.null(constant_cond2) && is.null(constant_var2) && length(changing_var)==2){
      # no specific case yet
      cond_p1 = capture.output(cat(paste0("list(", changing_cond, "='", changing_var[1], "', ", constant_cond1, "='", constant_var1, "')")))
      p1=plot_smooth(x=get(model), view=values, cond = eval(parse(text=cond_p1)))
      cond_p2 = capture.output(cat(paste0("list(", changing_cond, "='", changing_var[2], "', ", constant_cond1, "='", constant_var1, "')")))
      p2=plot_smooth(x=get(model), view=values, cond = eval(parse(text=cond_p2)))
      
      # set Rho max to the max of the fit + half the difference between max of the fit and the upper limit
      max_ul = max(p1$fv$ul, p2$fv$ul)
      max_fit = max(p1$fv$fit, p2$fv$fit)
      maximum=max_fit+((max_ul-max_fit)/2)
      
      # plot in polar coordinates
      p=plot_ly(type='scatterpolar', mode='lines') %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=changing_var[1]) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=changing_var[2]) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
        layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
                          angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)), 
               title=paste0("GAM smooths @", constant_var1, " & ", constant_var2))
      p
    }else if(is.null(constant_cond2) && is.null(constant_var2) && length(changing_var)>2){
      # works for models NZE.gam... or Tongan.gam...
      for (i in 1:length(changing_var)){
        if(changing_cond == constant_cond1){
          cond_p1 = capture.output(cat(paste0("list(", constant_cond1, "='", constant_var1, "')")))
          p1=plot_smooth(x=get(model), view=values, cond = eval(parse(text=cond_p1)))
          # exception for KIT (='\\\\') when not using IPA symbols
          if (changing_var[i]!="\\\\"){
            cond_p2 = capture.output(cat(paste0("list(", changing_cond, "='", changing_var[i], "')")))
            p2=plot_smooth(x=get(model), view=values, cond = eval(parse(text=cond_p2)))
          }else{
            p2=plot_smooth(x=get(model), view=values, cond = list(token.ord='\\\\'))
          }
          
          # set Rho max to the max of the fit + half the difference between max of the fit and the upper limit
          max_ul = max(p1$fv$ul, p2$fv$ul)
          max_fit = max(p1$fv$fit, p2$fv$fit)
          maximum=max_fit+((max_ul-max_fit)/2)

          # plot in polar coordinates
          p=plot_ly(type='scatterpolar', mode='lines') %>% 
            add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=constant_var1) %>% 
            add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
            add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
            add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=changing_var[i]) %>% 
            add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>% 
            add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
            layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
                              angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)), 
                   title=paste0("GAM smooths ",constant_var1," vs ", changing_var[i]))
          Sys.sleep(0)
          print(p)
        }
      }
    }
  }
}

plotly_sig_diff_notes function (Matthias Heyne, 2019) -> with updated legend position

# plot smooths with shading to indicate significant differences (Matthias Heyne, 2019)
plotly_smooths_w_sig_diff <- function(model, condition, var1, var2, values, language, print=TRUE){
  # get intervals of significant differences by running plot_diff
  # unfortunately setting plot=FALSE doesn't work as intervals of significant difference are not displayed!
  # hardcoded condition
  # output = capture.output(plot_diff(get(model), view=values, 
  #                                   comp=list(tokenPooled.ord=c(var1, var2))))
  # output = capture.output(plot_diff(get(model), view=values, comp=list(langNoteInt.ord=c(paste0("Tongan.", note, ".", intensity), paste0("NZE.", note, ".", intensity)))))
  names_smooths=list()
  if (condition=="tokenPooled.ord" && length(language)==1){
    output_comp = capture.output(cat(paste0("list(", condition, "=c(var1, var2))")))
    names_smooths[1]=var1
    names_smooths[2]=var2
    plot_title = paste0("GAM smooths ", language, " ", var1, " vs ", var2)
  }else if (condition=="langNoteInt.ord" && length(language)==2){
    output_comp = capture.output(cat(paste0("list(", condition, "=c('", language[1], ".", var1, ".", var2, 
                                            "', '", language[2], ".", var1, ".", var2, "'))")))
    names_smooths[1]=paste0(language[1], ".", var1, ".", var2)
    names_smooths[2]=paste0(language[2], ".", var1, ".", var2)
    plot_title = paste0("GAM smooths ", language[1], ".", var1, ".", var2, " vs ", language[2], ".", var1, ".", var2)
  }else if (condition=="native_lg.ord"){
    output_comp = capture.output(cat(paste0("list(", condition, "=c(var1, var2))")))
    names_smooths[1]=var1
    names_smooths[2]=var2
    plot_title = paste0("GAM smooths ", language, " ", var1, " vs ", var2)
  }
  
  # output_comp = capture.output(cat(paste0("list(", condition, "=c(var1, var2))")))
  output = capture.output(plot_diff(get(model), view=values, comp=eval(parse(text=output_comp))))
  # no significant difference
  if ((length(language)==1 && length(output)==7) | (length(language)==2 && length(output)==6)){
    cat(paste0("Smooths for ", var1, " & ", var2, " are not significantly different.\n"))
    dat1 = NA
    assign(paste0("int_sig_diff_", var1, "_", var2), dat1, envir = .GlobalEnv)
    rm(dat1)
    # run plot_smooth to grab data for polar plots
    p1 = plot_smooth(x=get(model), view=values, cond=list(tokenPooled.ord=var1, tokenPooled.ord=var2))
    p2 = plot_smooth(x=get(model), view=values, cond=list(tokenPooled.ord=var2, tokenPooled.ord=var1))
    # set Rho max to the max of the fit + half the difference between max of the fit and the upper limit
    maximum=max(p1$fv$fit, p2$fv$fit)+((max(p1$fv$ul, p2$fv$ul)-max(p1$fv$fit, p2$fv$fit))/2)
    # plot in polar coordinates
    p=plot_ly(type='scatterpolar', mode='lines') %>% 
      add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=names_smooths[1]) %>% 
      add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
      add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
      add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=names_smooths[2]) %>% 
      add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>% 
      add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
      layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
                        angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)), 
             title=plot_title, legend=list(orientation="h", xanchor="center", x=0.5))
    Sys.sleep(0)
    print(p)
  # there are differences...
  }else{
    # grab intervals of significant differences from output
    if (length(language)==1 && length(output)>=8){
      sig_diff1 = c(as.double(unlist(strsplit(unlist(strsplit(output[8], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[8], " "))[3]))
    }else if (length(language)==2 && length(output)>=7){
      sig_diff1 = c(as.double(unlist(strsplit(unlist(strsplit(output[7], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[7], " "))[3]))
    }
    if (length(language)==1 && length(output)>=9){
      sig_diff2 = c(as.double(unlist(strsplit(unlist(strsplit(output[9], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[9], " "))[3]))
    }else if (length(language)==2 && length(output)>=8){
      sig_diff2 = c(as.double(unlist(strsplit(unlist(strsplit(output[8], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[8], " "))[3]))
    }
    if (length(language)==1 && length(output)>=10){
      sig_diff3 = c(as.double(unlist(strsplit(unlist(strsplit(output[10], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[10], " "))[3]))
    }else if (length(language)==2 && length(output)>=9){
      sig_diff3 = c(as.double(unlist(strsplit(unlist(strsplit(output[9], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[9], " "))[3]))
    }
    if (length(language)==1 && length(output)>=11){
      sig_diff4 = c(as.double(unlist(strsplit(unlist(strsplit(output[11], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[11], " "))[3]))
    }else if (length(language)==2 && length(output)>=10){
      sig_diff4 = c(as.double(unlist(strsplit(unlist(strsplit(output[10], " "))[1], "\t"))[2]), as.double(unlist(strsplit(output[10], " "))[3]))
    }
    # write intervals of significant difference to variable
    if ((length(language)==1 && length(output)>=11) | (length(language)==2 && length(output)>=10)){
      dat1 = c(sig_diff1, sig_diff2, sig_diff3, sig_diff4)
    }else if ((length(language)==1 && length(output)>=10) | (length(language)==2 && length(output)>=9)){
      dat1 = c(sig_diff1, sig_diff2, sig_diff3)
    }else if ((length(language)==1 && length(output)>=9) | (length(language)==2 && length(output)>=8)){
      dat1 = c(sig_diff1, sig_diff2)
    }else{
      dat1 = sig_diff1
    }
    assign(paste0("int_sig_diff_", var1, "_", var2), dat1, envir = .GlobalEnv)
    rm(dat1, output)
    # run plot_smooth to grab data for polar plots
    if (condition=="tokenPooled.ord" && length(language)==1){
      cond_p1 = capture.output(cat(paste0("list(", condition, "=var1)")))
      cond_p2 = capture.output(cat(paste0("list(", condition, "=var2)")))
    }else if (condition=="langNoteInt.ord" && length(language)==2){
      cond_p1 = capture.output(cat(paste0("list(", condition, "='", language[1], ".", var1, ".", var2, 
                                "', ", condition, "='", language[2], ".", var1, ".", var2, "')")))
      cond_p2 = capture.output(cat(paste0("list(", condition, "='", language[2], ".", var1, ".", var2, 
                                          "', ", condition, "='", language[1], ".", var1, ".", var2, "')")))
    }else if (condition=="native_lg.ord"){
      cond_p1 = capture.output(cat(paste0("list(", condition, "=var1)")))
      cond_p2 = capture.output(cat(paste0("list(", condition, "=var2)")))
    }
    # hardcoded condition
    p1 = plot_smooth(x=get(model), view=values, cond=eval(parse(text=cond_p1)))
    p2 = plot_smooth(x=get(model), view=values, cond=eval(parse(text=cond_p2)))
    # set Rho max to the max of the fit + half the difference between max of the fit and the upper limit
    maximum=max(p1$fv$fit, p2$fv$fit)+((max(p1$fv$ul, p2$fv$ul)-max(p1$fv$fit, p2$fv$fit))/2)
    # plot in polar coordinates
    if (exists("sig_diff4")){
      p=plot_ly(type='scatterpolar', mode='lines') %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=names_smooths[1]) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=names_smooths[2]) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
        add_trace(theta=seq(sig_diff1[1]*180/pi, sig_diff1[2]*180/pi, length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
        add_trace(theta=seq(sig_diff2[1]*180/pi, sig_diff2[2]*180/pi, length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
        add_trace(theta=seq(sig_diff3[1]*180/pi, sig_diff3[2]*180/pi, length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
        add_trace(theta=seq(sig_diff4[1]*180/pi, sig_diff4[2]*180/pi, length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
        layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
                          angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)), 
               title=plot_title, legend=list(orientation="h", xanchor="center", x=0.5))
      Sys.sleep(0)
      print(p)
    }else if (exists("sig_diff3")){
      p=plot_ly(type='scatterpolar', mode='lines') %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=names_smooths[1]) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=names_smooths[2]) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
        add_trace(theta=seq(sig_diff1[1]*180/pi, sig_diff1[2]*180/pi, length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
        add_trace(theta=seq(sig_diff2[1]*180/pi, sig_diff2[2]*180/pi, length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
        add_trace(theta=seq(sig_diff3[1]*180/pi, sig_diff3[2]*180/pi, length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
        layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
                          angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)), 
               title=plot_title, legend=list(orientation="h", xanchor="center", x=0.5))
      Sys.sleep(0)
      print(p)
    }else if (exists("sig_diff2")){
      p=plot_ly(type='scatterpolar', mode='lines') %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=names_smooths[1]) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=names_smooths[2]) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
        add_trace(theta=seq(sig_diff1[1]*180/pi, sig_diff1[2]*180/pi, length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
        add_trace(theta=seq(sig_diff2[1]*180/pi, sig_diff2[2]*180/pi, length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
        layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
                          angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)), 
               title=plot_title, legend=list(orientation="h", xanchor="center", x=0.5))
      Sys.sleep(0)
      print(p)
    }else{
      p=plot_ly(type='scatterpolar', mode='lines') %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$fit, line=list(color="blue", width=2.5), name=names_smooths[1]) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ul, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p1$fv$theta_uncut_z*180/pi, r=p1$fv$ll, line=list(color="blue", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$fit, line=list(color="red", width=2.5), name=names_smooths[2]) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ul, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>% 
        add_trace(theta=p2$fv$theta_uncut_z*180/pi, r=p2$fv$ll, line=list(color="red", dash="dot", width=0.5), showlegend=FALSE) %>%
        add_trace(theta=seq(sig_diff1[1]*180/pi, sig_diff1[2]*180/pi, length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
        layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)),
                          angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)), 
               title=plot_title, legend=list(orientation="h", xanchor="center", x=0.5))
      Sys.sleep(0)
      print(p)
    }
  }
}

load in data

df <- read.csv("all_data_NZE_Tongan_w_context_checked_7_March_2019_not_cut_checked_27_Apr.csv", sep=',', stringsAsFactors = F)

# remove empty column
df$X = NULL

df$tokenPooled <- factor(df$tokenPooled)
df$subject <- factor(df$subject)
df$sex <- factor(df$sex)
df$native_lg <- factor(df$native_lg)

# updated 
df$playing_proficiency[df$playing_proficiency == "intermediate"] <- "amateur"
df$playing_proficiency <- factor(df$playing_proficiency, levels = c("amateur","semi-professional","professional"))

df$block <- factor(df$block)
df$theta_uncut_z <- as.numeric(df$theta_uncut_z)

# updated
df$note_intensity <- factor(df$note_intensity, levels = c("piano","mezzopiano","mezzoforte","forte"))

df$tokenPooled <- factor(df$tokenPooled)

predict average values for note productions NZE vs Tongan

Data manipulation

dfSummary <- group_by(df, subject, playing_proficiency, activity, native_lg, tokenPooled, theta_uncut_z_group = cut(theta_uncut_z,breaks=100)) %>% filter(activity=="speech") %>% summarise(rhoVar = sd(rho_uncut_z,na.rm=TRUE))


dfSummary$theta_uncut_z = as.character(dfSummary$theta_uncut_z_group)

dfSummary$theta_uncut_z = gsub("\\[|\\]|\\(|\\)", "",dfSummary$theta_uncut_z)

dfSummary$theta_uncut_z = strsplit(dfSummary$theta_uncut_z,",")

dfSummary$theta_uncut_z2 = 1
for(i in c(1:nrow(dfSummary)))
{
  dfSummary$theta_uncut_z2[i] = mean(as.numeric(unlist(dfSummary$theta_uncut_z[i])))
}

dfSummary$theta_uncut_z=dfSummary$theta_uncut_z2

dfSummary$playing_proficiency.ord <- as.ordered(dfSummary$playing_proficiency)
contrasts(dfSummary$playing_proficiency.ord) <- "contr.treatment"

dfSummary$langNoteInt <- interaction(dfSummary$native_lg, 
dfSummary$tokenPooled)

dfSummary$langNoteInt.ord <- as.ordered(dfSummary$langNoteInt)
contrasts(dfSummary$langNoteInt.ord) <- "contr.treatment"

dfSummary$native_lg.ord <- as.ordered(dfSummary$native_lg)
contrasts(dfSummary$native_lg.ord) <- "contr.treatment"

dfSummary$tokenPooled.ord <- as.ordered(dfSummary$tokenPooled)
contrasts(dfSummary$tokenPooled.ord) <- "contr.treatment"

dfSummary = na.omit(dfSummary)


for(i in unique(dfSummary$subject))
{
  dfSubject = subset(dfSummary,dfSummary$subject == i)
  for(j in unique(dfSubject$tokenPooled))
  {
    dfSummary$start[dfSummary$subject == i & dfSummary$tokenPooled == j] <- 
      dfSummary$theta_uncut_z[dfSummary$subject == i & dfSummary$tokenPooled == j] ==
      min(dfSummary$theta_uncut_z[dfSummary$subject == i & dfSummary$tokenPooled == j])
  }
}

Model specification

if (run_models == TRUE){
  mdl.sys.time1 <- system.time(VAR.gam.noAR.Mod1 <- bam(rhoVar ~ langNoteInt.ord + s(theta_uncut_z, bs="cr", k=10) + s(theta_uncut_z, bs="cr", k=10, by=langNoteInt.ord) +s(theta_uncut_z, subject, bs="fs", k=10, m=1, by=langNoteInt.ord), data=dfSummary, discrete=TRUE, nthreads=ncores, method = "fREML"))
   mdl.sys.time1
   # save model & model summary so they can be reloaded later
   saveRDS(VAR.gam.noAR.Mod1, paste0(output_dir,"/VAR.gam.noAR.Mod1_vowels.rds"))
   capture.output(summary(VAR.gam.noAR.Mod1), 
                  file = paste0(output_dir,"/summary_VAR.gam.noAR.Mod1_vowels.txt"))
}else{
  # reload model from output_dir
  VAR.gam.noAR.Mod1 = readRDS(paste0(output_dir,"/VAR.gam.noAR.Mod1_vowels.rds"))
}

if (run_models == TRUE){
  # set start_value_rho from VAR.gam.noAR.Mod1 as rho_est for the next model
  rho_est <- start_value_rho(VAR.gam.noAR.Mod1)
  mdl.sys.time2 <- system.time(VAR.gam.AR.Mod2 <- bam(rhoVar ~ langNoteInt.ord + s(theta_uncut_z, bs="cr", k=10) + s(theta_uncut_z, bs="cr", k=10, by=langNoteInt.ord) +  s(theta_uncut_z, subject, bs="fs", k=10, m=1, by=langNoteInt.ord), AR.start=dfSummary$start, rho=rho_est, data=dfSummary, discrete=TRUE, nthreads=ncores, method = "fREML"))
  mdl.sys.time2
  # save model & model summary so they can be reloaded later
  saveRDS(VAR.gam.AR.Mod2, paste0(output_dir,"/VAR.gam.AR.Mod2_vowels.rds"))
  capture.output(summary(VAR.gam.AR.Mod2), 
                 file = paste0(output_dir,"/summary_VAR.gam.AR.Mod2_vowels.txt"))
}else{
  # reload model from output_dir
  VAR.gam.AR.Mod2 = readRDS(paste0(output_dir,"/VAR.gam.AR.Mod2_vowels.rds"))
}

Model inspection

Checking ACF

Full

acf_resid(VAR.gam.AR.Mod2, main = "Average ACF AR", cex.lab=1.5, cex.axis=1.5)

By Token

acf_resid(VAR.gam.AR.Mod2,split_pred=list(dfSummary$tokenPooled),main = "Average ACF AR by tokenPooled",cex.lab=1.5,cex.axis=1.5)

By Language

acf_resid(VAR.gam.AR.Mod2,split_pred=list(dfSummary$native_lg),main = "Average ACF AR by native language",cex.lab=1.5,cex.axis=1.5)

Summary of the model

summary(VAR.gam.AR.Mod2)

## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## rhoVar ~ langNoteInt.ord + s(theta_uncut_z, bs = "cr", k = 10) + 
##     s(theta_uncut_z, bs = "cr", k = 10, by = langNoteInt.ord) + 
##     s(theta_uncut_z, subject, bs = "fs", k = 10, m = 1, by = langNoteInt.ord)
## 
## Parametric coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               7.936753   0.122926  64.565  < 2e-16 ***
## langNoteInt.ordNZE.ɐ     -0.875477   1.526103  -0.574   0.5662    
## langNoteInt.ordNZE.ɐː    -2.584383   1.701939  -1.518   0.1289    
## langNoteInt.ordNZE.ɒ     -2.326501   2.645874  -0.879   0.3793    
## langNoteInt.ordNZE.e      0.561595   1.532209   0.367   0.7140    
## langNoteInt.ordTongan.e  -2.138085   0.911078  -2.347   0.0190 *  
## langNoteInt.ordNZE.ə      0.001008   1.222929   0.001   0.9993    
## langNoteInt.ordNZE.ə#    -1.742856   0.385723  -4.518 6.29e-06 ***
## langNoteInt.ordNZE.ɛ     -1.091940   1.691811  -0.645   0.5187    
## langNoteInt.ordNZE.ɘ      0.122685   2.001099   0.061   0.9511    
## langNoteInt.ordTongan.i  -1.713343   1.283160  -1.335   0.1818    
## langNoteInt.ordNZE.iː   -27.797793  16.120495  -1.724   0.0847 .  
## langNoteInt.ordTongan.o  -0.624437   4.786433  -0.130   0.8962    
## langNoteInt.ordNZE.oː    -2.096078   1.592190  -1.316   0.1880    
## langNoteInt.ordNZE.ɵː    -1.449308   2.287160  -0.634   0.5263    
## langNoteInt.ordTongan.u  -5.943667  10.312811  -0.576   0.5644    
## langNoteInt.ordNZE.ʉː    -2.653813   3.419110  -0.776   0.4377    
## langNoteInt.ordNZE.ʊ     -1.505100   3.458559  -0.435   0.6634    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                                     edf  Ref.df      F
## s(theta_uncut_z)                                  8.848   8.928 55.747
## s(theta_uncut_z):langNoteInt.ordNZE.ɐ             5.420   5.854  1.674
## s(theta_uncut_z):langNoteInt.ordNZE.ɐː            6.106   6.560  3.215
## s(theta_uncut_z):langNoteInt.ordNZE.ɒ             4.615   4.918  1.450
## s(theta_uncut_z):langNoteInt.ordNZE.e             6.837   7.138  3.215
## s(theta_uncut_z):langNoteInt.ordTongan.e          5.619   6.062  1.272
## s(theta_uncut_z):langNoteInt.ordNZE.ə             8.063   8.425  9.566
## s(theta_uncut_z):langNoteInt.ordNZE.ə#            8.464   8.750 18.808
## s(theta_uncut_z):langNoteInt.ordNZE.ɛ             6.392   6.793  2.568
## s(theta_uncut_z):langNoteInt.ordNZE.ɘ             7.243   7.595  4.282
## s(theta_uncut_z):langNoteInt.ordTongan.i          7.591   7.971  2.842
## s(theta_uncut_z):langNoteInt.ordNZE.iː            5.937   6.386  2.780
## s(theta_uncut_z):langNoteInt.ordTongan.o          5.905   6.288  1.986
## s(theta_uncut_z):langNoteInt.ordNZE.oː            6.221   6.622  3.074
## s(theta_uncut_z):langNoteInt.ordNZE.ɵː            1.001   1.001  0.010
## s(theta_uncut_z):langNoteInt.ordTongan.u          6.314   6.770  1.986
## s(theta_uncut_z):langNoteInt.ordNZE.ʉː            1.000   1.001  0.318
## s(theta_uncut_z):langNoteInt.ordNZE.ʊ             1.000   1.000  0.073
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɐ    68.706 100.000  5.867
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɐː   67.797 100.000  6.526
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɒ    79.050 100.000 16.042
## s(theta_uncut_z,subject):langNoteInt.ordNZE.e    65.030  99.000 10.504
## s(theta_uncut_z,subject):langNoteInt.ordTongan.e 60.113  99.000  7.276
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ə    57.296 100.000 12.035
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ə#   45.015  99.000  2.521
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɛ    67.438 100.000  6.769
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɘ    70.237 100.000 11.450
## s(theta_uncut_z,subject):langNoteInt.ordTongan.i 61.023  99.000 11.857
## s(theta_uncut_z,subject):langNoteInt.ordNZE.iː   79.066  99.000 39.596
## s(theta_uncut_z,subject):langNoteInt.ordTongan.o 81.592 100.000 19.717
## s(theta_uncut_z,subject):langNoteInt.ordNZE.oː   67.050 100.000 11.473
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɵː   75.739 100.000 10.671
## s(theta_uncut_z,subject):langNoteInt.ordTongan.u 83.577 100.000 37.279
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ʉː   79.748 100.000 19.247
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ʊ    77.589 100.000 25.613
##                                                   p-value    
## s(theta_uncut_z)                                  < 2e-16 ***
## s(theta_uncut_z):langNoteInt.ordNZE.ɐ             0.09929 .  
## s(theta_uncut_z):langNoteInt.ordNZE.ɐː            0.00279 ** 
## s(theta_uncut_z):langNoteInt.ordNZE.ɒ             0.26697    
## s(theta_uncut_z):langNoteInt.ordNZE.e             0.00248 ** 
## s(theta_uncut_z):langNoteInt.ordTongan.e          0.24579    
## s(theta_uncut_z):langNoteInt.ordNZE.ə            1.56e-12 ***
## s(theta_uncut_z):langNoteInt.ordNZE.ə#            < 2e-16 ***
## s(theta_uncut_z):langNoteInt.ordNZE.ɛ             0.02705 *  
## s(theta_uncut_z):langNoteInt.ordNZE.ɘ            8.28e-05 ***
## s(theta_uncut_z):langNoteInt.ordTongan.i          0.00369 ** 
## s(theta_uncut_z):langNoteInt.ordNZE.iː            0.00712 ** 
## s(theta_uncut_z):langNoteInt.ordTongan.o          0.07179 .  
## s(theta_uncut_z):langNoteInt.ordNZE.oː            0.00379 ** 
## s(theta_uncut_z):langNoteInt.ordNZE.ɵː            0.91955    
## s(theta_uncut_z):langNoteInt.ordTongan.u          0.05914 .  
## s(theta_uncut_z):langNoteInt.ordNZE.ʉː            0.57295    
## s(theta_uncut_z):langNoteInt.ordNZE.ʊ             0.78756    
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɐ     < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɐː    < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɒ     < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.e     < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.e  < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ə     < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ə#    < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɛ     < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɘ     < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.i  < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.iː    < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.o  < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.oː    < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ɵː    < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.u  < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ʉː    < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.ʊ     < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.863   Deviance explained = 87.7%
## fREML =  24523  Scale est. = 2.1482    n = 12704

Plotting model predictions

Overall variance differences

And finally overall variance differences by language

dat1 = dfSummary
#dat1$predicted_values = predict(VAR.gam.AR.Mod2)

est = predict(VAR.gam.AR.Mod2,se=TRUE)

CI=1.98
printPDF=TRUE

dat1$predicted_values = est$fit
dat1$standard_error = est$se

# plot in polar coordinates using plotly
dat1_NZE = dat1[dat1$native_lg == "NZE",]
dat1_Tongan = dat1[dat1$native_lg == "Tongan",]

# estimate smooths using R's generic predict.smooth.spline function
smooth_NZE=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, dat1_NZE$predicted_values),
 seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y, line=list(color="blue", dash="dash"))

smooth_NZE_top=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values + (dat1_NZE$standard_error * CI))),
 seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y, line=list(color="blue", dash="dot"))

smooth_NZE_bottom=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values - (dat1_NZE$standard_error * CI))),
 seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y, line=list(color="blue", dash="dot"))

smooth_Tongan=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, dat1_Tongan$predicted_values),
 seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y, line=list(color="red", dash=""))

smooth_Tongan_top=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values + (dat1_Tongan$standard_error * CI))),
 seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y, line=list(color="red", dash="dot"))

smooth_Tongan_bottom=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values - (dat1_Tongan$standard_error * CI))),
 seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y, line=list(color="red", dash="dot"))

# 90%

CI=1.645

NZE_top_90=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values + (dat1_NZE$standard_error * CI))),
 seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y)

NZE_bottom_90=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values - (dat1_NZE$standard_error * CI))),
 seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y)

Tongan_top_90=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values + (dat1_Tongan$standard_error * CI))),
 seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y)

Tongan_bottom_90=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values - (dat1_Tongan$standard_error * CI))),
 seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y)

j = 1
k = 1
sigFlag = FALSE
hold = tribble(~thetaStart, ~thetaEnd)
for (i in c(1:99))
{
  while(Tongan_top_90$theta[i] > NZE_top_90$theta[j])
  {
    j = j + 1
  }
  if ((Tongan_top_90$theta[i] <= NZE_top_90$theta[j]) & 
      (Tongan_top_90$theta[i+1] > NZE_top_90$theta[j]))
  {
    if (sigFlag == FALSE)
    {
      if(Tongan_top_90$r[i] < NZE_bottom_90$r[j] || 
         NZE_top_90$r[j] < Tongan_bottom_90$r[i])
      {
        # Significant!
        sigFlag = TRUE
        hold[k,1] = Tongan_top_90$theta[i]
      }
    }
    else
    {
      if(!(Tongan_top_90$r[i] < NZE_bottom_90$r[j] || 
         NZE_top_90$r[j] < Tongan_bottom_90$r[i]))
      {
        hold[k,2] = Tongan_top_90$theta[i]
        k = k + 1
        sigFlag = FALSE
      }
    }
  }
}

# set Rho max to the max of the predicted_values + 5
max_predictions = max(max(smooth_NZE_top$r), max(smooth_Tongan_top$r))
maximum=max_predictions+1
rm(max_predictions)

p = plot_ly(type='scatterpolar', mode='lines') %>%
add_trace(theta=smooth_NZE$theta, r=smooth_NZE$r, line=list(color=smooth_NZE$line$color[[1]], width=2.5, dash=smooth_NZE$line$dash[[1]]), name="NZE vowels") %>%
  add_trace(theta=smooth_NZE_top$theta, r=smooth_NZE_top$r, line=list(color=smooth_NZE_top$line$color[[1]], width=0.5, dash=smooth_NZE_top$line$dash[[1]]), showlegend=FALSE) %>%
  add_trace(theta=smooth_NZE_bottom$theta, r=smooth_NZE_bottom$r, line=list(color=smooth_NZE_bottom$line$color[[1]], width=0.5, dash=smooth_NZE_bottom$line$dash[[1]]), showlegend=FALSE) %>%
  add_trace(theta=smooth_Tongan$theta, r=smooth_Tongan$r, line=list(color=smooth_Tongan$line$color[[1]], width=2.5, dash=smooth_Tongan$line$dash[[1]]), name="Tongan vowels") %>%
  add_trace(theta=smooth_Tongan_top$theta, r=smooth_Tongan_top$r, line=list(color=smooth_Tongan_top$line$color[[1]], width=0.5, dash=smooth_Tongan_top$line$dash[[1]]), showlegend=FALSE) %>%
  
  add_trace(theta=smooth_Tongan_bottom$theta, r=smooth_Tongan_bottom$r, line=list(color=smooth_Tongan_bottom$line$color[[1]], width=0.5, dash=smooth_Tongan_bottom$line$dash[[1]]), showlegend=FALSE) %>%
  
  add_trace(theta=seq(as.numeric(hold[1,1]), as.numeric(hold[1,2]), length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
  
    add_trace(theta=seq(as.numeric(hold[2,1]), as.numeric(hold[2,2]), length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
  
      add_trace(theta=seq(as.numeric(hold[3,1]), as.numeric(hold[3,2]), length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
  
  layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)), angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)), title="Overall variability of NZE vs Tongan vowel productions", legend=list(orientation="h", xanchor="center", x=0.5,y=0.012))
p

if(printPDF==TRUE)
{
  orca(p, "Figure6.pdf",format="pdf")
}
#rm(dat1, dat1_NZE, dat1_Tongan, maximum, smooth_NZE, smooth_Tongan, est)

Music

dfSummary <- group_by(df, subject, playing_proficiency, activity, native_lg, tokenPooled, theta_uncut_z_group = cut(theta_uncut_z,breaks=100)) %>% filter(activity=="music") %>% summarise(rhoVar = sd(rho_uncut_z,na.rm=TRUE))


dfSummary$theta_uncut_z = as.character(dfSummary$theta_uncut_z_group)

dfSummary$theta_uncut_z = gsub("\\[|\\]|\\(|\\)", "",dfSummary$theta_uncut_z)

dfSummary$theta_uncut_z = strsplit(dfSummary$theta_uncut_z,",")

dfSummary$theta_uncut_z2 = 1
for(i in c(1:nrow(dfSummary)))
{
  dfSummary$theta_uncut_z2[i] = mean(as.numeric(unlist(dfSummary$theta_uncut_z[i])))
}

dfSummary$theta_uncut_z=dfSummary$theta_uncut_z2

dfSummary$playing_proficiency.ord <- as.ordered(dfSummary$playing_proficiency)
contrasts(dfSummary$playing_proficiency.ord) <- "contr.treatment"

dfSummary$langNoteInt <- interaction(dfSummary$native_lg, 
dfSummary$tokenPooled)

dfSummary$langNoteInt.ord <- as.ordered(dfSummary$langNoteInt)
contrasts(dfSummary$langNoteInt.ord) <- "contr.treatment"

dfSummary$native_lg.ord <- as.ordered(dfSummary$native_lg)
contrasts(dfSummary$native_lg.ord) <- "contr.treatment"

dfSummary$tokenPooled.ord <- as.ordered(dfSummary$tokenPooled)
contrasts(dfSummary$tokenPooled.ord) <- "contr.treatment"

dfSummary = na.omit(dfSummary)


for(i in unique(dfSummary$subject))
{
  dfSubject = subset(dfSummary,dfSummary$subject == i)
  for(j in unique(dfSubject$tokenPooled))
  {
    dfSummary$start[dfSummary$subject == i & dfSummary$tokenPooled == j] <- 
      dfSummary$theta_uncut_z[dfSummary$subject == i & dfSummary$tokenPooled == j] ==
      min(dfSummary$theta_uncut_z[dfSummary$subject == i & dfSummary$tokenPooled == j])
  }
}

Model specification

if (run_models == TRUE){
  mdl.sys.time1 <- system.time(VAR.gam.noAR.Mod1 <- bam(rhoVar ~ langNoteInt.ord + s(theta_uncut_z, bs="cr", k=10) + s(theta_uncut_z, bs="cr", k=10, by=langNoteInt.ord) +s(theta_uncut_z, subject, bs="fs", k=10, m=1, by=langNoteInt.ord), data=dfSummary, discrete=TRUE, nthreads=ncores, method = "fREML"))
   mdl.sys.time1
   # save model & model summary so they can be reloaded later
   saveRDS(VAR.gam.noAR.Mod1, paste0(output_dir,"/VAR.gam.noAR.Mod1_notes.rds"))
   capture.output(summary(VAR.gam.noAR.Mod1), 
                  file = paste0(output_dir,"/summary_VAR.gam.noAR.Mod1_notes.txt"))
}else{
  # reload model from output_dir
  VAR.gam.noAR.Mod1 = readRDS(paste0(output_dir,"/VAR.gam.noAR.Mod1_notes.rds"))
}

if (run_models == TRUE){
  # set start_value_rho from VAR.gam.noAR.Mod1 as rho_est for the next model
  rho_est <- start_value_rho(VAR.gam.noAR.Mod1)
  mdl.sys.time2 <- system.time(VAR.gam.AR.Mod2 <- bam(rhoVar ~ langNoteInt.ord + s(theta_uncut_z, bs="cr", k=10) + s(theta_uncut_z, bs="cr", k=10, by=langNoteInt.ord) +  s(theta_uncut_z, subject, bs="fs", k=10, m=1, by=langNoteInt.ord), AR.start=dfSummary$start, rho=rho_est, data=dfSummary, discrete=TRUE, nthreads=ncores, method = "fREML"))
  mdl.sys.time2
  # save model & model summary so they can be reloaded later
  saveRDS(VAR.gam.AR.Mod2, paste0(output_dir,"/VAR.gam.AR.Mod2_notes.rds"))
  capture.output(summary(VAR.gam.AR.Mod2), 
                 file = paste0(output_dir,"/summary_VAR.gam.AR.Mod2_notes.txt"))
}else{
  # reload model from output_dir
  VAR.gam.AR.Mod2 = readRDS(paste0(output_dir,"/VAR.gam.AR.Mod2_notes.rds"))
}

Model inspection

Checking ACF

Full

acf_resid(VAR.gam.AR.Mod2, main = "Average ACF AR", cex.lab=1.5, cex.axis=1.5)

By Token

acf_resid(VAR.gam.AR.Mod2,split_pred=list(dfSummary$tokenPooled),main = "Average ACF AR by tokenPooled",cex.lab=1.5,cex.axis=1.5)

By Language

acf_resid(VAR.gam.AR.Mod2,split_pred=list(dfSummary$native_lg),main = "Average ACF AR by native language",cex.lab=1.5,cex.axis=1.5)

Summary of the model

summary(VAR.gam.AR.Mod2)

## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## rhoVar ~ langNoteInt.ord + s(theta_uncut_z, bs = "cr", k = 10) + 
##     s(theta_uncut_z, bs = "cr", k = 10, by = langNoteInt.ord) + 
##     s(theta_uncut_z, subject, bs = "fs", k = 10, m = 1, by = langNoteInt.ord)
## 
## Parametric coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 7.8916     0.1406  56.142  < 2e-16 ***
## langNoteInt.ordTongan.Bb2  -0.7911     1.1702  -0.676 0.499030    
## langNoteInt.ordNZE.Bb3     -2.0280     1.6032  -1.265 0.205920    
## langNoteInt.ordTongan.Bb3  -1.8836     1.7785  -1.059 0.289599    
## langNoteInt.ordNZE.D4      -3.3837     1.5121  -2.238 0.025280 *  
## langNoteInt.ordTongan.D4   -4.1729     1.6716  -2.496 0.012578 *  
## langNoteInt.ordNZE.F3      -2.2278     1.2172  -1.830 0.067265 .  
## langNoteInt.ordTongan.F3   -1.8695     1.3557  -1.379 0.167956    
## langNoteInt.ordNZE.F4      -4.5910     1.2732  -3.606 0.000314 ***
## langNoteInt.ordTongan.F4   -7.2303     2.2926  -3.154 0.001620 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                                       edf Ref.df      F
## s(theta_uncut_z)                                    8.289  8.652 31.982
## s(theta_uncut_z):langNoteInt.ordTongan.Bb2          1.000  1.000  0.625
## s(theta_uncut_z):langNoteInt.ordNZE.Bb3             2.746  2.958  2.107
## s(theta_uncut_z):langNoteInt.ordTongan.Bb3          8.045  8.307  3.560
## s(theta_uncut_z):langNoteInt.ordNZE.D4              3.251  3.499  3.544
## s(theta_uncut_z):langNoteInt.ordTongan.D4           3.921  4.196  2.706
## s(theta_uncut_z):langNoteInt.ordNZE.F3              1.569  1.639  0.345
## s(theta_uncut_z):langNoteInt.ordTongan.F3           6.895  7.302  2.024
## s(theta_uncut_z):langNoteInt.ordNZE.F4              3.950  4.267  4.068
## s(theta_uncut_z):langNoteInt.ordTongan.F4           4.290  4.567  3.329
## s(theta_uncut_z,subject):langNoteInt.ordTongan.Bb2 68.005 99.000 12.819
## s(theta_uncut_z,subject):langNoteInt.ordNZE.Bb3    64.506 99.000  8.818
## s(theta_uncut_z,subject):langNoteInt.ordTongan.Bb3 67.080 99.000 15.000
## s(theta_uncut_z,subject):langNoteInt.ordNZE.D4     62.594 99.000  9.110
## s(theta_uncut_z,subject):langNoteInt.ordTongan.D4  66.440 99.000 16.881
## s(theta_uncut_z,subject):langNoteInt.ordNZE.F3     70.290 99.000  8.578
## s(theta_uncut_z,subject):langNoteInt.ordTongan.F3  65.267 99.000 13.840
## s(theta_uncut_z,subject):langNoteInt.ordNZE.F4     56.859 99.000  7.765
## s(theta_uncut_z,subject):langNoteInt.ordTongan.F4  65.349 99.000 17.684
##                                                    p-value    
## s(theta_uncut_z)                                   < 2e-16 ***
## s(theta_uncut_z):langNoteInt.ordTongan.Bb2         0.42912    
## s(theta_uncut_z):langNoteInt.ordNZE.Bb3            0.10334    
## s(theta_uncut_z):langNoteInt.ordTongan.Bb3         0.00017 ***
## s(theta_uncut_z):langNoteInt.ordNZE.D4             0.01028 *  
## s(theta_uncut_z):langNoteInt.ordTongan.D4          0.02488 *  
## s(theta_uncut_z):langNoteInt.ordNZE.F3             0.68530    
## s(theta_uncut_z):langNoteInt.ordTongan.F3          0.02581 *  
## s(theta_uncut_z):langNoteInt.ordNZE.F4             0.00226 ** 
## s(theta_uncut_z):langNoteInt.ordTongan.F4          0.00647 ** 
## s(theta_uncut_z,subject):langNoteInt.ordTongan.Bb2 < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.Bb3    < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.Bb3 < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.D4     < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.D4  < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.F3     < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.F3  < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordNZE.F4     < 2e-16 ***
## s(theta_uncut_z,subject):langNoteInt.ordTongan.F4  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.817   Deviance explained = 83.5%
## fREML =  12394  Scale est. = 2.4004    n = 6446

Plotting model predictions

Overall variance differences

And finally overall variance differences by language

dat1 = dfSummary
#dat1$predicted_values = predict(VAR.gam.AR.Mod2)

est = predict(VAR.gam.AR.Mod2,se=TRUE)

CI=1.98
printPDF=TRUE

dat1$predicted_values = est$fit
dat1$standard_error = est$se

# plot in polar coordinates using plotly
dat1_NZE = dat1[dat1$native_lg == "NZE",]
dat1_Tongan = dat1[dat1$native_lg == "Tongan",]

# estimate smooths using R's generic predict.smooth.spline function
smooth_NZE=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, dat1_NZE$predicted_values),
 seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y, line=list(color="blue", dash="dash"))

smooth_NZE_top=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values + (dat1_NZE$standard_error * CI))),
 seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y, line=list(color="blue", dash="dot"))

smooth_NZE_bottom=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values - (dat1_NZE$standard_error * CI))),
 seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y, line=list(color="blue", dash="dot"))

smooth_Tongan=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, dat1_Tongan$predicted_values),
 seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y, line=list(color="red", dash=""))

smooth_Tongan_top=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values + (dat1_Tongan$standard_error * CI))),
 seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y, line=list(color="red", dash="dot"))

smooth_Tongan_bottom=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values - (dat1_Tongan$standard_error * CI))),
 seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y, line=list(color="red", dash="dot"))

# 90%

CI=1.645

NZE_top_90=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values + (dat1_NZE$standard_error * CI))),
 seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y)

NZE_bottom_90=list(theta=seq(min(dat1_NZE$theta_uncut_z)*180/pi, max(dat1_NZE$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_NZE$theta_uncut_z, (dat1_NZE$predicted_values - (dat1_NZE$standard_error * CI))),
 seq(min(dat1_NZE$theta_uncut_z), max(dat1_NZE$theta_uncut_z), length=100))$y)

Tongan_top_90=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values + (dat1_Tongan$standard_error * CI))),
 seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y)

Tongan_bottom_90=list(theta=seq(min(dat1_Tongan$theta_uncut_z)*180/pi, max(dat1_Tongan$theta_uncut_z)*180/pi, length=100), r=predict(smooth.spline(dat1_Tongan$theta_uncut_z, (dat1_Tongan$predicted_values - (dat1_Tongan$standard_error * CI))),
 seq(min(dat1_Tongan$theta_uncut_z), max(dat1_Tongan$theta_uncut_z), length=100))$y)

j = 1
k = 1
sigFlag = FALSE
hold = tribble(~thetaStart, ~thetaEnd)
for (i in c(1:99))
{
  while(Tongan_top_90$theta[i] > NZE_top_90$theta[j])
  {
    j = j + 1
  }
  if ((Tongan_top_90$theta[i] <= NZE_top_90$theta[j]) & 
      (Tongan_top_90$theta[i+1] > NZE_top_90$theta[j]))
  {
    if (sigFlag == FALSE)
    {
      if(Tongan_top_90$r[i] < NZE_bottom_90$r[j] || 
         NZE_top_90$r[j] < Tongan_bottom_90$r[i])
      {
        # Significant!
        sigFlag = TRUE
        hold[k,1] = Tongan_top_90$theta[i]
      }
    }
    else
    {
      if(!(Tongan_top_90$r[i] < NZE_bottom_90$r[j] || 
         NZE_top_90$r[j] < Tongan_bottom_90$r[i]))
      {
        hold[k,2] = Tongan_top_90$theta[i]
        k = k + 1
        sigFlag = FALSE
      }
    }
  }
}

# set Rho max to the max of the predicted_values + 5
max_predictions = max(max(smooth_NZE_top$r), max(smooth_Tongan_top$r))
maximum=max_predictions+1
rm(max_predictions)

p = plot_ly(type='scatterpolar', mode='lines') %>%
add_trace(theta=smooth_NZE$theta, r=smooth_NZE$r, line=list(color=smooth_NZE$line$color[[1]], width=2.5, dash=smooth_NZE$line$dash[[1]]), name="NZE notes") %>%
  add_trace(theta=smooth_NZE_top$theta, r=smooth_NZE_top$r, line=list(color=smooth_NZE_top$line$color[[1]], width=0.5, dash=smooth_NZE_top$line$dash[[1]]), showlegend=FALSE) %>%
  add_trace(theta=smooth_NZE_bottom$theta, r=smooth_NZE_bottom$r, line=list(color=smooth_NZE_bottom$line$color[[1]], width=0.5, dash=smooth_NZE_bottom$line$dash[[1]]), showlegend=FALSE) %>%
  add_trace(theta=smooth_Tongan$theta, r=smooth_Tongan$r, line=list(color=smooth_Tongan$line$color[[1]], width=2.5, dash=smooth_Tongan$line$dash[[1]]), name="Tongan notes") %>%
  add_trace(theta=smooth_Tongan_top$theta, r=smooth_Tongan_top$r, line=list(color=smooth_Tongan_top$line$color[[1]], width=0.5, dash=smooth_Tongan_top$line$dash[[1]]), showlegend=FALSE) %>%
  
  add_trace(theta=smooth_Tongan_bottom$theta, r=smooth_Tongan_bottom$r, line=list(color=smooth_Tongan_bottom$line$color[[1]], width=0.5, dash=smooth_Tongan_bottom$line$dash[[1]]), showlegend=FALSE) %>%
  
  add_trace(theta=seq(as.numeric(hold[1,1]), as.numeric(hold[1,2]), length.out=20), 
                  r=c(0, rep(maximum, 18), 0), line=list(color="black", width=0.5), fill="toself", fillcolor=rgb(0,0,0,max=255,alpha=25), showlegend=FALSE) %>%
  
  layout(polar=list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,maximum)), angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0)), title="Overall variability of NZE vs Tongan note productions", legend=list(orientation="h", xanchor="center", x=0.5,y=0.012))
p

if(printPDF==TRUE)
{
  orca(p, "Figure6a.pdf",format="pdf")
}

## [16307:0920/163049.735749:ERROR:gles2_cmd_decoder.cc(17766)] [.RenderCompositor-0x7fe872021e00]GL ERROR :GL_INVALID_OPERATION : glBindTexImage2DCHROMIUM: no image found with the given ID

#rm(dat1, dat1_NZE, dat1_Tongan, maximum, smooth_NZE, smooth_Tongan, est)

GAMMs analyses Trombone - Tongan vs English (NZE) -> Hypothesis 1, Prediction 1b

Jalal Al-Tamimi (Newcastle University)

Donald Derrick (University of Canterbury)

Matthias Heyne (Boston University)

20 September 2019

Loading packages

plotly_model_outputs function (Matthias Heyne, 2019)

plotly_sig_diff_notes function (Matthias Heyne, 2019) -> with updated legend position

load in data

predict average values for note productions NZE vs Tongan

Data manipulation

Model specification

Model inspection

Checking ACF

Full

By Token

By Language

Summary of the model

Plotting model predictions

Overall variance differences

Music

Model specification

Model inspection

Checking ACF

Full

By Token

By Language

Summary of the model

Plotting model predictions

Overall variance differences