This notebook provides the second half of the full analysis of the article: Heyne, M., Derrick, D., and Al-Tamimi, J. (under review). “Native language influence on brass instrument performance: An application of generalized additive mixed models (GAMMs) to midsagittal ultrasound images of the tongue”. Frontiers Research Topic: Models and Theories of Speech Production. Ed. Adamantios Gafos & Pascal van Lieshout.
# specify directory to save models and summaries
output_dir = "updated_models"
# specify whether to run models -> if set to false script will attempt to load saved models from output_dir
run_models = FALSE
load_packages = c("readr","knitr","ggplot2","mgcv","itsadug","parallel","dplyr","rlist","plotly")
# dplyr, rlist, and plotly are required by the custom plotting functions
for(pkg in load_packages){
eval(bquote(library(.(pkg))))
if (paste0("package:", pkg) %in% search()){
cat(paste0("Successfully loaded the ", pkg, " package.\n"))
}else{
install.packages(pkg)
eval(bquote(library(.(pkg))))
if (paste0("package:", pkg) %in% search()){
cat(paste0("Successfully loaded the ", pkg, " package.\n"))
}
}
}
Successfully loaded the readr package.
Successfully loaded the knitr package.
Successfully loaded the ggplot2 package.
Successfully loaded the mgcv package.
Successfully loaded the itsadug package.
Successfully loaded the parallel package.
Successfully loaded the dplyr package.
Successfully loaded the rlist package.
Successfully loaded the plotly package.
rm(load_packages, pkg)
# detect number of cores available for model calculations
ncores = detectCores()
cat(paste0("Number of cores available for model calculations set to ", ncores, "."))
Number of cores available for model calculations set to 8.
# save plots by using the option from the html widget created by markdown
# updated 13 April for conflated Tongan vowels
# This function plots multiple smoothing splines in the same window
plotly_scatterpolar_multiplot <- function(df, horizontal, vertical, cols2plot, print=TRUE){
if (length(cols2plot)>2){
print("ERROR: You specified more than 2 columns of values to plot.")
}else{
dat1=df
df_name=deparse(substitute(df))
# layout option 1
if (length(horizontal)==2 & length(vertical)==1){
# Note, Intensity, Language
hori1=nrow(unique(select(dat1, horizontal[1])))
hori2=nrow(unique(select(dat1, horizontal[2])))
hori=hori1*hori2
vert=nrow(unique(select(dat1, vertical[1])))
dat1=select(dat1, c(horizontal[1],horizontal[2],vertical[1],cols2plot[1],cols2plot[2]))
dat1=droplevels(dat1)
var_hori1=levels(dat1[,1])
var_hori2=levels(dat1[,2])
var_vert1=levels(dat1[,3])
# set up line types & colors
ltypes=list("","dash") # match length of hori1
colors=list("blue","green","orange","red") # match length of hori2
cat(paste0("Proceeding to assemble a ", hori, "x", vert, " multiplot.\n"))
cat(paste0("Your plot will show the columns/variables ",horizontal[1]," & ",horizontal[2]," in the horizontal direction and ",vertical[1]," in the vertical direction.\n"))
cat(paste0(horizontal[1], " will be plotted using the following linestyles: -> "))
for (n in 1:length(var_hori1)){
if (n<length(var_hori1)){
cat(paste0(var_hori1[n], ": ", ltypes[n], " - "))
}else{
cat(paste0(var_hori1[n], ": ", ltypes[n], "\n"))
}
}
cat(paste0(horizontal[2], " will be plotted using the following colors: -> "))
for (n in 1:length(var_hori2)){
if (n<length(var_hori2)){
cat(paste0(var_hori2[n], ": ", colors[n], " - "))
}else{
cat(paste0(var_hori2[n], ": ", colors[n], "\n"))
}
}
rm(n)
cat(paste0(vertical[1], " will be shown in the vertical direction from ", var_vert1[1], " (bottom) to ", var_vert1[length(var_vert1)], " (top).\n"))
# assemble layout options for all subplots
# plot_specs set as default
plot_specs = list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,max(dat1$rho_uncut_z)), tickfont=list(size=2)),
angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0, tickfont=list(size=4)))
# set layout options for required number of subplots
for (i in 1:hori){
for (j in 1:vert){
specsX=list.append(plot_specs, domain=list(x=c((i-1)/hori+(1/hori*0.2), i/hori-1/hori*0.2),
y=c((j-1)/vert+(1/vert*0.1),j/vert-1/vert*0.1)))
assign(paste0("sub_plot",((j-1)*hori)+i), specsX)
}
}
rm(i, j, specsX)
# assemble smoothing splines for traces
for (j in 1:vert){
# subset data set by vertical
dat2=dat1[dat1[,3]==var_vert1[j],]
for (i1 in 1:hori1){
# subset data set by horizontal[1]
dat3=dat2[dat2[,1]==var_hori1[i1],]
for (i2 in 1:hori2){
# subset data set by horizontal[2]
dat4=dat3[dat3[,2]==var_hori2[i2],]
if (!nrow(dat4)==0){
if ((((j-1)*hori)+((i1-1)*hori2)+i2)==1){
# assemble trace & assign number
traceX=list(theta=seq(min(dat4$theta_uncut_z)*180/pi,max(dat4$theta_uncut_z)*180/pi, length=100),
r=predict(smooth.spline(dat4$theta_uncut_z, dat4$rho_uncut_z),
seq(min(dat4$theta_uncut_z),max(dat4$theta_uncut_z), length=100))$y,
line=list(color=colors[i2], dash=ltypes[i1]))
assign(paste0("trace",((j-1)*hori)+((i1-1)*hori2)+i2),traceX)
}else if ((((j-1)*hori)+((i1-1)*hori2)+i2)<=hori){
# assemble trace & assign number
traceX=list(theta=seq(min(dat4$theta_uncut_z)*180/pi,max(dat4$theta_uncut_z)*180/pi, length=100),
r=predict(smooth.spline(dat4$theta_uncut_z, dat4$rho_uncut_z),
seq(min(dat4$theta_uncut_z),max(dat4$theta_uncut_z), length=100))$y,
line=list(color=colors[i2], dash=ltypes[i1]), subplot=paste0("polar",((j-1)*hori)+((i1-1)*hori2)+i2))
assign(paste0("trace",((j-1)*hori)+((i1-1)*hori2)+i2),traceX)
}else{
# assemble trace & assign number
traceX=list(theta=seq(min(dat4$theta_uncut_z)*180/pi,max(dat4$theta_uncut_z)*180/pi, length=100),
r=predict(smooth.spline(dat4$theta_uncut_z, dat4$rho_uncut_z),
seq(min(dat4$theta_uncut_z),max(dat4$theta_uncut_z), length=100))$y,
line=list(color=colors[i2], dash=ltypes[i1]), subplot=paste0("polar",((j-1)*hori)+((i1-1)*hori2)+i2), showlegend=FALSE)
assign(paste0("trace",((j-1)*hori)+((i1-1)*hori2)+i2),traceX)
}
}
}
}
}
rm(j, i1, i2, traceX, dat2, dat3, dat4)
# plot assembled traces with assembed layout specifications
p = plot_ly(type='scatterpolar', mode='lines')
dont_plot=c()
p = add_trace(p, theta=trace1$theta, r=trace1$r, line=list(color=trace1$line$color[[1]], dash=trace1$line$dash[[1]]))
for (k in 2:(hori*vert)){
if (exists(paste0("trace",k))){
p = add_trace(p, theta=get(paste0("trace",k))$theta, r=get(paste0("trace",k))$r,
subplot=get(paste0("trace",k))$subplot,
line=list(color=get(paste0("trace",k))$line$color[[1]], dash=get(paste0("trace",k))$line$dash[[1]]))
}else{
dont_plot=c(dont_plot,k)
}
}
# set layout
layout_comp = capture.output(
for (l in 1:(hori*vert)){
if (is.na(match(l, dont_plot))){
if (l==1){
cat(paste0("layout(p, polar=sub_plot",l,", "))
}else if (l<=hori){
cat(paste0("polar",l,"=sub_plot",l,", "))
}else if (l<hori*vert){
cat(paste0("polar",l,"=sub_plot",l,", "))
}else{
cat(paste0("polar",l,"=sub_plot",l,", showlegend=FALSE)"))
}
}
})
p; eval(parse(text=layout_comp))
# layout option 2
}else if (length(horizontal)==1 & length(vertical)==2){
# Subject, Note, Intensity
hori=nrow(unique(select(dat1, horizontal[1])))
vert1=nrow(unique(select(dat1, vertical[1])))
vert2=nrow(unique(select(dat1, vertical[2])))
vert=vert1*vert2
dat1=select(dat1, c(horizontal[1],vertical[1],vertical[2],cols2plot[1],cols2plot[2]))
# dat1[,1]=horizontal[1]; dat1[,2]=horizontal[2]; dat1[,3]=vertical[1];
dat1=droplevels(dat1)
var_hori1=levels(dat1[,1])
var_vert1=levels(dat1[,2])
var_vert2=levels(dat1[,3])
# set up line types & colors
colors=list("blue","green","orange","red","gray") # match length of vert1
ltypes=list("","dash","dashdot","dot") # match length of vert2
cat(paste0("Proceeding to assemble a ", hori, "x", vert, " multiplot.\n"))
cat(paste0("Your plot will show the columns/variables ",vertical[1]," & ",vertical[2]," in the vertical direction and ",horizontal[1]," in the horizontal direction.\n"))
cat(paste0(vertical[1], " will be plotted using the following colors: -> "))
for (n in 1:length(var_vert1)){
if (n<length(var_vert1)){
cat(paste0(var_vert1[n], ": ", colors[n], " - "))
}else{
cat(paste0(var_vert1[n], ": ", colors[n], "\n"))
}
}
cat(paste0(vertical[2], " will be plotted using the following linestyles: -> "))
for (n in 1:length(var_vert2)){
if (n<length(var_vert2)){
cat(paste0(var_vert2[n], ": ", ltypes[n], " - "))
}else{
cat(paste0(var_vert2[n], ": ", ltypes[n], "\n"))
}
}
rm(n)
cat(paste0(horizontal[1], " will be shown in the horizontal direction from ", var_hori1[1], " (left) to ", var_hori1[length(var_hori1)], " (right).\n"))
# assemble layout options for all subplots
# plot_specs set as default
plot_specs = list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,max(dat1$rho_uncut_z)), tickfont=list(size=2)),
angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0, tickfont=list(size=4)))
# set layout options for required number of subplots
for (i in 1:hori){
for (j in 1:vert){
specsX=list.append(plot_specs, domain=list(x=c((i-1)/hori+(1/hori*0.2), i/hori-1/hori*0.2),
y=c((j-1)/vert+(1/vert*0.1),j/vert-1/vert*0.1)))
assign(paste0("sub_plot",((j-1)*hori)+i), specsX)
}
}
rm(i, j, specsX)
# assemble smoothing splines for traces
for (i in 1:hori){
# subset data set by horizontal
dat2=dat1[dat1[,1]==var_hori1[i],]
for (j1 in 1:vert1){
# subset data set by vertical[1]
dat3=dat2[dat2[,2]==var_vert1[j1],]
for (j2 in 1:vert2){
# subset data set by vertical[2]
dat4=dat3[dat3[,3]==var_vert2[j2],]
if (!nrow(dat4)==0){
if (i+((j1-1)*vert)+((j2-1)*hori)+((j1-1)*vert)==1){
# assemble trace & assign number
traceX=list(theta=seq(min(dat4$theta_uncut_z)*180/pi,max(dat4$theta_uncut_z)*180/pi, length=100),
r=predict(smooth.spline(dat4$theta_uncut_z, dat4$rho_uncut_z),
seq(min(dat4$theta_uncut_z),max(dat4$theta_uncut_z), length=100))$y,
line=list(color=colors[j1], dash=ltypes[j2]))
assign(paste0("trace", i+((j1-1)*vert)+((j2-1)*hori)+((j1-1)*vert)), traceX)
}else if (i+((j1-1)*vert)+((j2-1)*hori)+((j1-1)*vert)<=hori){
# assemble trace & assign number
traceX=list(theta=seq(min(dat4$theta_uncut_z)*180/pi,max(dat4$theta_uncut_z)*180/pi, length=100),
r=predict(smooth.spline(dat4$theta_uncut_z, dat4$rho_uncut_z),
seq(min(dat4$theta_uncut_z),max(dat4$theta_uncut_z), length=100))$y,
line=list(color=colors[j1], dash=ltypes[j2]), subplot=paste0("polar",i+((j1-1)*vert)+((j2-1)*hori)+((j1-1)*vert)))
assign(paste0("trace",i+((j1-1)*vert)+((j2-1)*hori)+((j1-1)*vert)),traceX)
}else{
# assemble trace & assign number
traceX=list(theta=seq(min(dat4$theta_uncut_z)*180/pi,max(dat4$theta_uncut_z)*180/pi, length=100),
r=predict(smooth.spline(dat4$theta_uncut_z, dat4$rho_uncut_z),
seq(min(dat4$theta_uncut_z),max(dat4$theta_uncut_z), length=100))$y,
line=list(color=colors[j1], dash=ltypes[j2]), subplot=paste0("polar",i+((j1-1)*vert)+((j2-1)*hori)+((j1-1)*vert)), showlegend=FALSE)
assign(paste0("trace",i+((j1-1)*vert)+((j2-1)*hori)+((j1-1)*vert)),traceX)
}
}
}
}
}
rm(i, j1, j2, traceX, dat2, dat3, dat4)
# plot assembled traces with assembed layout specifications
p = plot_ly(type='scatterpolar', mode='lines')
dont_plot=c()
p = add_trace(p, theta=trace1$theta, r=trace1$r, line=list(color=trace1$line$color[[1]], dash=trace1$line$dash[[1]]))
for (k in 2:(hori*vert)){
if (exists(paste0("trace",k))){
p = add_trace(p, theta=get(paste0("trace",k))$theta, r=get(paste0("trace",k))$r,
subplot=get(paste0("trace",k))$subplot,
line=list(color=get(paste0("trace",k))$line$color[[1]], dash=get(paste0("trace",k))$line$dash[[1]]))
}else{
dont_plot=c(dont_plot,k)
}
}
# set layout
layout_comp = capture.output(
for (l in 1:(hori*vert)){
if (is.na(match(l, dont_plot))){
if (l==1){
cat(paste0("layout(p, polar=sub_plot",l,", "))
}else if (l<=hori){
cat(paste0("polar",l,"=sub_plot",l,", "))
}else if (l<hori*vert){
cat(paste0("polar",l,"=sub_plot",l,", "))
}else{
cat(paste0("polar",l,"=sub_plot",l,", showlegend=FALSE)"))
}
}
})
p; eval(parse(text=layout_comp))
# layout option 3
}else if (length(horizontal)==1 & length(vertical)==1){
# Subject, tokenPooled
hori=nrow(unique(select(dat1, horizontal[1])))
vert=nrow(unique(select(dat1, vertical[1])))
dat1=select(dat1, c(horizontal[1],vertical[1],cols2plot[1],cols2plot[2]))
dat1=droplevels(dat1)
var_hori1=levels(dat1[,1])
var_vert1=levels(dat1[,2])
# set up line types & colors
if (unique(df$native_lg=="Tongan")){
# levels(dfTongan$token)
colors=list("#D50D0B","#D50D0B","#003380","#003380","#FF7B00","#FF7B00","#009737","#009737","#C20088","#C20088","#191919","#191919","#191919","#191919","#191919")
ltypes=list("","dash","","dash","","dash","","dash","","dash","","dash","dashdot","dot","dash")
}else if (unique(df$native_lg=="NZE")){
# levels(dfNZE$token)
colors=list("#D50D0B","#990000","#0075DC","#E082B4","#003380","#FF7B00","#009737","#00AFC3","#C20088","#8F48B7","#ACB500","#7B4937","#6C6C6C","#191919","#191919","#191919","#191919","#191919")
ltypes=list("","","","","","","","","","","","","","","dash","dashdot","dot","dash")
}
cat(paste0("Proceeding to assemble a ", hori, "x", vert, " multiplot.\n"))
cat(paste0("Your plot will show the columns/variables ",horizontal[1]," in the horizontal direction and ",vertical[1]," in the vertical direction.\n"))
cat(paste0(vertical[1], " will be shown in the vertical direction from ", var_vert1[1], " (bottom) to ", var_vert1[length(var_vert1)], " (top).\n"))
# assemble layout options for all subplots
# plot_specs set as default
plot_specs = list(sector=c(20,160), radialaxis=list(angle=90, range=c(0,max(dat1$rho_uncut_z)), tickfont=list(size=2)),
angularaxis=list(thetaunit='radians', direction="clockwise", rotation=0, tickfont=list(size=4)))
# set layout options for required number of subplots
for (i in 1:hori){
for (j in 1:vert){
specsX=list.append(plot_specs, domain=list(x=c((i-1)/hori+(1/hori*0.2), i/hori-1/hori*0.2),
y=c((j-1)/vert+(1/vert*0.1),j/vert-1/vert*0.1)))
assign(paste0("sub_plot",((j-1)*hori)+i), specsX)
}
}
rm(i, j, specsX)
# assemble smoothing splines for traces
for (i in 1:hori){
# subset data set by horizontal
dat2=dat1[dat1[,1]==var_hori1[i],]
for (j in 1:vert){
# subset data set by vertical[1]
dat3=dat2[dat2[,2]==var_vert1[j],]
if (!nrow(dat3)==0){
if (i+(j-1)*hori==1){
# assemble trace & assign number
traceX=list(theta=seq(min(dat3$theta_uncut_z)*180/pi,max(dat3$theta_uncut_z)*180/pi, length=100),
r=predict(smooth.spline(dat3$theta_uncut_z, dat3$rho_uncut_z),
seq(min(dat3$theta_uncut_z),max(dat3$theta_uncut_z), length=100))$y,
line=list(color=colors[j], dash=ltypes[j]))
assign(paste0("trace", i+(j-1)*hori), traceX)
}else if (i+(j-1)*hori<=hori){
# assemble trace & assign number
traceX=list(theta=seq(min(dat3$theta_uncut_z)*180/pi,max(dat3$theta_uncut_z)*180/pi, length=100),
r=predict(smooth.spline(dat3$theta_uncut_z, dat3$rho_uncut_z),
seq(min(dat3$theta_uncut_z),max(dat3$theta_uncut_z), length=100))$y,
line=list(color=colors[j], dash=ltypes[j]), subplot=paste0("polar",i+(j-1)*hori))
assign(paste0("trace", i+(j-1)*hori), traceX)
}else{
# assemble trace & assign number
traceX=list(theta=seq(min(dat3$theta_uncut_z)*180/pi,max(dat3$theta_uncut_z)*180/pi, length=100),
r=predict(smooth.spline(dat3$theta_uncut_z, dat3$rho_uncut_z),
seq(min(dat3$theta_uncut_z),max(dat3$theta_uncut_z), length=100))$y,
line=list(color=colors[j], dash=ltypes[j]), subplot=paste0("polar",i+(j-1)*hori), showlegend=FALSE)
assign(paste0("trace", i+(j-1)*hori), traceX)
}
}
}
}
rm(i, j, traceX, dat2, dat3)
# plot assembled traces with assembed layout specifications
p = plot_ly(type='scatterpolar', mode='lines')
dont_plot=c()
p = add_trace(p, theta=trace1$theta, r=trace1$r, line=list(color=trace1$line$color[[1]], dash=trace1$line$dash[[1]]))
for (k in 2:(hori*vert)){
if (exists(paste0("trace",k))){
p = add_trace(p, theta=get(paste0("trace",k))$theta, r=get(paste0("trace",k))$r,
subplot=get(paste0("trace",k))$subplot,
line=list(color=get(paste0("trace",k))$line$color[[1]], dash=get(paste0("trace",k))$line$dash[[1]]))
}else{
dont_plot=c(dont_plot,k)
}
}
# set layout
layout_comp = capture.output(
for (l in 1:(hori*vert)){
if (is.na(match(l, dont_plot))){
if (l==1){
cat(paste0("layout(p, polar=sub_plot",l,", "))
}else if (l<=hori){
cat(paste0("polar",l,"=sub_plot",l,", "))
}else if (l<hori*vert){
cat(paste0("polar",l,"=sub_plot",l,", "))
}else{
cat(paste0("polar",l,"=sub_plot",l,", showlegend=FALSE)"))
}
}
})
p; eval(parse(text=layout_comp))
}else{
cat("Sorry, this layout is not yet implemented in the function. Currently the options are either 2 variables shown horizontally and 1 shown vertically or 1 horizontally and 2 vertically.\n")
cat("Usage: plotly_scatterpolar_multiplot(df, horizontal, vertical, cols2plot, print=TRUE) ->\n where df refers to the data.frame to plot, horizontal & vertical specify the column names to use as grouping variables,\n and cols2plot refers to the 2 columns of values to plot.\n")
cat("Use the c(x, y) notation to specify multiple colums for horizontal and/or vertical and for the cols2plot columns.\n")
}
}
}
df <- read.csv("GAMM_Trombone_data.csv", sep=',', stringsAsFactors = F)
# remove empty column
df$X = NULL
df$tokenPooled <- factor(df$tokenPooled)
df$subject <- factor(df$subject)
df$native_lg <- factor(df$native_lg)
# df$playing_proficiency[df$playing_proficiency == "intermediate"] <- "amateur"
df$playing_proficiency <- factor(df$playing_proficiency, levels = c("amateur","intermediate","semi-professional","professional"))
df$block <- factor(df$block)
df$point <- as.numeric(df$point)
df$note_intensity <- factor(df$note_intensity, levels = c("piano","mezzopiano","mezzoforte","forte"))
# remove fortissimo tokens
df = df[!(is.na(df$note_intensity) & df$activity=="music"),]
str(df)
'data.frame': 1968400 obs. of 28 variables:
$ subject : Factor w/ 20 levels "S1","S12","S14",..: 19 19 19 19 19 19 19 19 19 19 ...
$ native_lg : Factor w/ 2 levels "NZE","Tongan": 2 2 2 2 2 2 2 2 2 2 ...
$ playing_proficiency : Factor w/ 4 levels "amateur","intermediate",..: 1 1 1 1 1 1 1 1 1 1 ...
$ age_range : chr "40-45" "40-45" "40-45" "40-45" ...
$ activity : chr "speech" "speech" "speech" "speech" ...
$ block : Factor w/ 25 levels "english1","english10",..: 20 20 20 20 20 20 20 20 20 20 ...
$ label : chr "a:?-'a=" "a:?-'a=" "a:?-'a=" "a:?-'a=" ...
$ points : int 1 2 3 4 5 6 7 8 9 10 ...
$ token : chr "a" "a" "a" "a" ...
$ token_IPA : chr "a" "a" "a" "a" ...
$ tokenPooled : Factor w/ 22 levels "a","ɐ","ɐː","ɒ",..: 1 1 1 1 1 1 1 1 1 1 ...
$ x_00 : num -35.4 -33.2 -30.9 -28.7 -26.5 ...
$ y_00 : num 228 228 228 228 228 ...
$ theta_uncut_z : num -1.91 -1.9 -1.89 -1.89 -1.88 ...
$ rho_uncut_z : num 247 247 247 246 246 ...
$ note_intensity : Factor w/ 4 levels "piano","mezzopiano",..: NA NA NA NA NA NA NA NA NA NA ...
$ speech_preceding_sound : chr "glottal_stop" "glottal_stop" "glottal_stop" "glottal_stop" ...
$ speech_preceding_sound_IPA: chr "ʔ" "ʔ" "ʔ" "ʔ" ...
$ speech_following_sound : chr "NULL" "NULL" "NULL" "NULL" ...
$ speech_following_sound_IPA: chr "NULL" "NULL" "NULL" "NULL" ...
$ speech_prec_pooled : chr "glottals" "glottals" "glottals" "glottals" ...
$ speech_fol_pooled : chr "NULL" "NULL" "NULL" "NULL" ...
$ vowels_pooled : chr "low" "low" "low" "low" ...
$ x_orig : num 522 524 526 529 531 ...
$ y_orig : num 169 169 169 169 169 169 169 169 169 169 ...
$ theta_orig : num -1.72 -1.72 -1.71 -1.7 -1.69 ...
$ rho_orig : num 231 230 230 230 229 ...
$ point : num 1 2 3 4 5 6 7 8 9 10 ...
for NZE - note that we put the note intensity in place of preceeding and following context for notes. This makes the models run more effectively
# using columns with IPA symbols
dfNZE <- subset(df,df$native_lg=="NZE")
dfNZE$tokenPooled <- factor(dfNZE$tokenPooled, levels = c("ɐː","ɐ","ɛ","ɵː","e","iː","ʉː","ʊ","oː","ɒ","ɘ","ə","ə#","Bb2","F3","Bb3","D4","F4"))
dfNZE$playing_proficiency <- as.factor(dfNZE$playing_proficiency)
# change NAs to NULL
# checked that only NAs are for speech tokens
# added removal of fortissimo tokens above!
dfNZE$note_intensity[is.na(dfNZE$note_intensity)] = "NULL"
# we're using speech_prec_pooled & speech_fol_pooled to create interactions below
# neither include NAs and both have NULL for speech tokens where there were no preceding/following sounds and intensity for the note tokens
levels(dfNZE$tokenPooled)
[1] "ɐː" "ɐ" "ɛ" "ɵː" "e" "iː" "ʉː" "ʊ" "oː" "ɒ" "ɘ" "ə" "ə#" "Bb2" "F3"
[16] "Bb3" "D4" "F4"
levels(dfNZE$playing_proficiency)
[1] "amateur" "intermediate" "semi-professional" "professional"
levels(dfNZE$note_intensity)
[1] "piano" "mezzopiano" "mezzoforte" "forte"
dfTongan <- subset(df,df$native_lg=="Tongan")
dfTongan$tokenPooled <- factor(dfTongan$tokenPooled, levels = c("aː","a","eː","e","iː","i","uː","u","oː","o","Bb2","F3","Bb3","D4","F4"))
dfTongan$tokenPooled[dfTongan$tokenPooled == "aː"] = "a"
dfTongan$tokenPooled[dfTongan$tokenPooled == "eː"] = "e"
dfTongan$tokenPooled[dfTongan$tokenPooled == "iː"] = "i"
dfTongan$tokenPooled[dfTongan$tokenPooled == "uː"] = "u"
dfTongan$tokenPooled[dfTongan$tokenPooled == "oː"] = "o"
dfTongan$tokenPooled <- factor(dfTongan$tokenPooled)
dfTongan$playing_proficiency <- as.factor(dfTongan$playing_proficiency)
# we're using speech_prec_pooled & speech_fol_pooled to create interactions below
# neither include NAs and both have NULL for speech tokens where there were no preceding/following sounds and intensity for the note tokens
# speech_fol_pooled includes NAs that should be NULL
# checked that these NAs were only for speech tokens!
dfTongan$speech_prec_pooled[is.na(dfTongan$speech_prec_pooled)] = "NULL"
dfTongan$speech_fol_pooled[is.na(dfTongan$speech_fol_pooled)] = "NULL"
levels(dfTongan$tokenPooled)
[1] "a" "e" "i" "u" "o" "Bb2" "F3" "Bb3" "D4" "F4"
levels(dfTongan$playing_proficiency)
[1] "amateur" "intermediate" "semi-professional" "professional"
levels(dfTongan$note_intensity)
[1] "piano" "mezzopiano" "mezzoforte" "forte"
kable(table(dfNZE$tokenPooled,dfNZE$native_lg),format="html")
NZE | Tongan | |
---|---|---|
ɐː | 52400 | 0 |
ɐ | 61700 | 0 |
ɛ | 63200 | 0 |
ɵː | 53300 | 0 |
e | 53300 | 0 |
iː | 47500 | 0 |
ʉː | 43900 | 0 |
ʊ | 19400 | 0 |
oː | 31300 | 0 |
ɒ | 52300 | 0 |
ɘ | 72500 | 0 |
ə | 154800 | 0 |
ə# | 77800 | 0 |
Bb2 | 68700 | 0 |
F3 | 130300 | 0 |
Bb3 | 115000 | 0 |
D4 | 43000 | 0 |
F4 | 14500 | 0 |
kable(table(dfNZE$note_intensity,dfNZE$native_lg),format="html")
NZE | Tongan | |
---|---|---|
piano | 16100 | 0 |
mezzopiano | 14400 | 0 |
mezzoforte | 314300 | 0 |
forte | 26700 | 0 |
kable(table(dfNZE$playing_proficiency,dfNZE$native_lg),format="html")
NZE | Tongan | |
---|---|---|
amateur | 503400 | 0 |
intermediate | 249700 | 0 |
semi-professional | 132600 | 0 |
professional | 269200 | 0 |
kable(table(dfNZE$age_range,dfNZE$native_lg),format="html")
NZE | Tongan | |
---|---|---|
20-25 | 133300 | 0 |
25-30 | 232900 | 0 |
30-35 | 386700 | 0 |
35-40 | 22500 | 0 |
60-65 | 111800 | 0 |
65-70 | 267700 | 0 |
kable(table(dfTongan$tokenPooled,dfTongan$native_lg),format="html")
NZE | Tongan | |
---|---|---|
a | 0 | 121200 |
e | 0 | 71000 |
i | 0 | 102500 |
u | 0 | 61700 |
o | 0 | 85800 |
Bb2 | 0 | 62600 |
F3 | 0 | 132900 |
Bb3 | 0 | 116300 |
D4 | 0 | 44800 |
F4 | 0 | 14700 |
kable(table(dfTongan$note_intensity,dfTongan$native_lg),format="html")
NZE | Tongan | |
---|---|---|
piano | 0 | 10600 |
mezzopiano | 0 | 8100 |
mezzoforte | 0 | 330400 |
forte | 0 | 22200 |
kable(table(dfTongan$playing_proficiency,dfTongan$native_lg),format="html")
NZE | Tongan | |
---|---|---|
amateur | 0 | 462300 |
intermediate | 0 | 0 |
semi-professional | 0 | 261500 |
professional | 0 | 89700 |
kable(table(dfTongan$age_range,dfTongan$native_lg),format="html")
NZE | Tongan | |
---|---|---|
15-20 | 0 | 244600 |
20-25 | 0 | 82200 |
25-30 | 0 | 173000 |
30-35 | 0 | 265100 |
40-45 | 0 | 48600 |
Before running anything, we start by visualising the data
Let’s start with the NZE data. We see that speakers are variable in how they are producing the vowels (which is normal).
plotly_scatterpolar_multiplot(df=dfNZE, horizontal="subject", vertical="tokenPooled", cols2plot=c("theta_uncut_z","rho_uncut_z"))
Proceeding to assemble a 10x18 multiplot.
Your plot will show the columns/variables subject in the horizontal direction and tokenPooled in the vertical direction.
tokenPooled will be shown in the vertical direction from ɐː (bottom) to F4 (top).
Moving on to the Tongan data, we see again that speakers are variable in how they are producing vowels (which is normal).
plotly_scatterpolar_multiplot(df=dfTongan, horizontal="subject", vertical="tokenPooled", cols2plot=c("theta_uncut_z","rho_uncut_z"))
Proceeding to assemble a 10x10 multiplot.
Your plot will show the columns/variables subject in the horizontal direction and tokenPooled in the vertical direction.
tokenPooled will be shown in the vertical direction from a (bottom) to F4 (top).
We will create two variables, one that combines token and preceeding context (either sound or note intensity), another that combines token and following context. This will allow us later on to use these instead of subject only to model the within subject variation with respect to the two other predictors (note and intensity)
dfNZE$subVowelInt <- interaction(dfNZE$subject, dfNZE$tokenPooled)
dfNZE$precSoundVowelInt <- interaction(dfNZE$speech_prec_pooled, dfNZE$tokenPooled)
dfNZE$follSoundVowelInt <- interaction(dfNZE$speech_fol_pooled, dfNZE$tokenPooled)
cat("\nNZE data\n")
NZE data
levels(dfNZE$subVowelInt)
[1] "S1.ɐː" "S12.ɐː" "S14.ɐː" "S15.ɐː" "S16.ɐː" "S17.ɐː" "S18.ɐː" "S19.ɐː" "S20.ɐː"
[10] "S21.ɐː" "S22.ɐː" "S24.ɐː" "S25.ɐː" "S26.ɐː" "S27.ɐː" "S29.ɐː" "S3.ɐː" "S30.ɐː"
[19] "S4.ɐː" "S5.ɐː" "S1.ɐ" "S12.ɐ" "S14.ɐ" "S15.ɐ" "S16.ɐ" "S17.ɐ" "S18.ɐ"
[28] "S19.ɐ" "S20.ɐ" "S21.ɐ" "S22.ɐ" "S24.ɐ" "S25.ɐ" "S26.ɐ" "S27.ɐ" "S29.ɐ"
[37] "S3.ɐ" "S30.ɐ" "S4.ɐ" "S5.ɐ" "S1.ɛ" "S12.ɛ" "S14.ɛ" "S15.ɛ" "S16.ɛ"
[46] "S17.ɛ" "S18.ɛ" "S19.ɛ" "S20.ɛ" "S21.ɛ" "S22.ɛ" "S24.ɛ" "S25.ɛ" "S26.ɛ"
[55] "S27.ɛ" "S29.ɛ" "S3.ɛ" "S30.ɛ" "S4.ɛ" "S5.ɛ" "S1.ɵː" "S12.ɵː" "S14.ɵː"
[64] "S15.ɵː" "S16.ɵː" "S17.ɵː" "S18.ɵː" "S19.ɵː" "S20.ɵː" "S21.ɵː" "S22.ɵː" "S24.ɵː"
[73] "S25.ɵː" "S26.ɵː" "S27.ɵː" "S29.ɵː" "S3.ɵː" "S30.ɵː" "S4.ɵː" "S5.ɵː" "S1.e"
[82] "S12.e" "S14.e" "S15.e" "S16.e" "S17.e" "S18.e" "S19.e" "S20.e" "S21.e"
[91] "S22.e" "S24.e" "S25.e" "S26.e" "S27.e" "S29.e" "S3.e" "S30.e" "S4.e"
[100] "S5.e" "S1.iː" "S12.iː" "S14.iː" "S15.iː" "S16.iː" "S17.iː" "S18.iː" "S19.iː"
[109] "S20.iː" "S21.iː" "S22.iː" "S24.iː" "S25.iː" "S26.iː" "S27.iː" "S29.iː" "S3.iː"
[118] "S30.iː" "S4.iː" "S5.iː" "S1.ʉː" "S12.ʉː" "S14.ʉː" "S15.ʉː" "S16.ʉː" "S17.ʉː"
[127] "S18.ʉː" "S19.ʉː" "S20.ʉː" "S21.ʉː" "S22.ʉː" "S24.ʉː" "S25.ʉː" "S26.ʉː" "S27.ʉː"
[136] "S29.ʉː" "S3.ʉː" "S30.ʉː" "S4.ʉː" "S5.ʉː" "S1.ʊ" "S12.ʊ" "S14.ʊ" "S15.ʊ"
[145] "S16.ʊ" "S17.ʊ" "S18.ʊ" "S19.ʊ" "S20.ʊ" "S21.ʊ" "S22.ʊ" "S24.ʊ" "S25.ʊ"
[154] "S26.ʊ" "S27.ʊ" "S29.ʊ" "S3.ʊ" "S30.ʊ" "S4.ʊ" "S5.ʊ" "S1.oː" "S12.oː"
[163] "S14.oː" "S15.oː" "S16.oː" "S17.oː" "S18.oː" "S19.oː" "S20.oː" "S21.oː" "S22.oː"
[172] "S24.oː" "S25.oː" "S26.oː" "S27.oː" "S29.oː" "S3.oː" "S30.oː" "S4.oː" "S5.oː"
[181] "S1.ɒ" "S12.ɒ" "S14.ɒ" "S15.ɒ" "S16.ɒ" "S17.ɒ" "S18.ɒ" "S19.ɒ" "S20.ɒ"
[190] "S21.ɒ" "S22.ɒ" "S24.ɒ" "S25.ɒ" "S26.ɒ" "S27.ɒ" "S29.ɒ" "S3.ɒ" "S30.ɒ"
[199] "S4.ɒ" "S5.ɒ" "S1.ɘ" "S12.ɘ" "S14.ɘ" "S15.ɘ" "S16.ɘ" "S17.ɘ" "S18.ɘ"
[208] "S19.ɘ" "S20.ɘ" "S21.ɘ" "S22.ɘ" "S24.ɘ" "S25.ɘ" "S26.ɘ" "S27.ɘ" "S29.ɘ"
[217] "S3.ɘ" "S30.ɘ" "S4.ɘ" "S5.ɘ" "S1.ə" "S12.ə" "S14.ə" "S15.ə" "S16.ə"
[226] "S17.ə" "S18.ə" "S19.ə" "S20.ə" "S21.ə" "S22.ə" "S24.ə" "S25.ə" "S26.ə"
[235] "S27.ə" "S29.ə" "S3.ə" "S30.ə" "S4.ə" "S5.ə" "S1.ə#" "S12.ə#" "S14.ə#"
[244] "S15.ə#" "S16.ə#" "S17.ə#" "S18.ə#" "S19.ə#" "S20.ə#" "S21.ə#" "S22.ə#" "S24.ə#"
[253] "S25.ə#" "S26.ə#" "S27.ə#" "S29.ə#" "S3.ə#" "S30.ə#" "S4.ə#" "S5.ə#" "S1.Bb2"
[262] "S12.Bb2" "S14.Bb2" "S15.Bb2" "S16.Bb2" "S17.Bb2" "S18.Bb2" "S19.Bb2" "S20.Bb2" "S21.Bb2"
[271] "S22.Bb2" "S24.Bb2" "S25.Bb2" "S26.Bb2" "S27.Bb2" "S29.Bb2" "S3.Bb2" "S30.Bb2" "S4.Bb2"
[280] "S5.Bb2" "S1.F3" "S12.F3" "S14.F3" "S15.F3" "S16.F3" "S17.F3" "S18.F3" "S19.F3"
[289] "S20.F3" "S21.F3" "S22.F3" "S24.F3" "S25.F3" "S26.F3" "S27.F3" "S29.F3" "S3.F3"
[298] "S30.F3" "S4.F3" "S5.F3" "S1.Bb3" "S12.Bb3" "S14.Bb3" "S15.Bb3" "S16.Bb3" "S17.Bb3"
[307] "S18.Bb3" "S19.Bb3" "S20.Bb3" "S21.Bb3" "S22.Bb3" "S24.Bb3" "S25.Bb3" "S26.Bb3" "S27.Bb3"
[316] "S29.Bb3" "S3.Bb3" "S30.Bb3" "S4.Bb3" "S5.Bb3" "S1.D4" "S12.D4" "S14.D4" "S15.D4"
[325] "S16.D4" "S17.D4" "S18.D4" "S19.D4" "S20.D4" "S21.D4" "S22.D4" "S24.D4" "S25.D4"
[334] "S26.D4" "S27.D4" "S29.D4" "S3.D4" "S30.D4" "S4.D4" "S5.D4" "S1.F4" "S12.F4"
[343] "S14.F4" "S15.F4" "S16.F4" "S17.F4" "S18.F4" "S19.F4" "S20.F4" "S21.F4" "S22.F4"
[352] "S24.F4" "S25.F4" "S26.F4" "S27.F4" "S29.F4" "S3.F4" "S30.F4" "S4.F4" "S5.F4"
str(dfNZE$subVowelInt)
Factor w/ 360 levels "S1.ɐː","S12.ɐː",..: 81 81 81 81 81 81 81 81 81 81 ...
levels(dfNZE$precSoundVowelInt)
[1] "coronals.ɐː" "forte.ɐː" "glottals.ɐː" "labials.ɐː" "mezzoforte.ɐː"
[6] "mezzopiano.ɐː" "NULL.ɐː" "piano.ɐː" "velars.ɐː" "vowels.ɐː"
[11] "coronals.ɐ" "forte.ɐ" "glottals.ɐ" "labials.ɐ" "mezzoforte.ɐ"
[16] "mezzopiano.ɐ" "NULL.ɐ" "piano.ɐ" "velars.ɐ" "vowels.ɐ"
[21] "coronals.ɛ" "forte.ɛ" "glottals.ɛ" "labials.ɛ" "mezzoforte.ɛ"
[26] "mezzopiano.ɛ" "NULL.ɛ" "piano.ɛ" "velars.ɛ" "vowels.ɛ"
[31] "coronals.ɵː" "forte.ɵː" "glottals.ɵː" "labials.ɵː" "mezzoforte.ɵː"
[36] "mezzopiano.ɵː" "NULL.ɵː" "piano.ɵː" "velars.ɵː" "vowels.ɵː"
[41] "coronals.e" "forte.e" "glottals.e" "labials.e" "mezzoforte.e"
[46] "mezzopiano.e" "NULL.e" "piano.e" "velars.e" "vowels.e"
[51] "coronals.iː" "forte.iː" "glottals.iː" "labials.iː" "mezzoforte.iː"
[56] "mezzopiano.iː" "NULL.iː" "piano.iː" "velars.iː" "vowels.iː"
[61] "coronals.ʉː" "forte.ʉː" "glottals.ʉː" "labials.ʉː" "mezzoforte.ʉː"
[66] "mezzopiano.ʉː" "NULL.ʉː" "piano.ʉː" "velars.ʉː" "vowels.ʉː"
[71] "coronals.ʊ" "forte.ʊ" "glottals.ʊ" "labials.ʊ" "mezzoforte.ʊ"
[76] "mezzopiano.ʊ" "NULL.ʊ" "piano.ʊ" "velars.ʊ" "vowels.ʊ"
[81] "coronals.oː" "forte.oː" "glottals.oː" "labials.oː" "mezzoforte.oː"
[86] "mezzopiano.oː" "NULL.oː" "piano.oː" "velars.oː" "vowels.oː"
[91] "coronals.ɒ" "forte.ɒ" "glottals.ɒ" "labials.ɒ" "mezzoforte.ɒ"
[96] "mezzopiano.ɒ" "NULL.ɒ" "piano.ɒ" "velars.ɒ" "vowels.ɒ"
[101] "coronals.ɘ" "forte.ɘ" "glottals.ɘ" "labials.ɘ" "mezzoforte.ɘ"
[106] "mezzopiano.ɘ" "NULL.ɘ" "piano.ɘ" "velars.ɘ" "vowels.ɘ"
[111] "coronals.ə" "forte.ə" "glottals.ə" "labials.ə" "mezzoforte.ə"
[116] "mezzopiano.ə" "NULL.ə" "piano.ə" "velars.ə" "vowels.ə"
[121] "coronals.ə#" "forte.ə#" "glottals.ə#" "labials.ə#" "mezzoforte.ə#"
[126] "mezzopiano.ə#" "NULL.ə#" "piano.ə#" "velars.ə#" "vowels.ə#"
[131] "coronals.Bb2" "forte.Bb2" "glottals.Bb2" "labials.Bb2" "mezzoforte.Bb2"
[136] "mezzopiano.Bb2" "NULL.Bb2" "piano.Bb2" "velars.Bb2" "vowels.Bb2"
[141] "coronals.F3" "forte.F3" "glottals.F3" "labials.F3" "mezzoforte.F3"
[146] "mezzopiano.F3" "NULL.F3" "piano.F3" "velars.F3" "vowels.F3"
[151] "coronals.Bb3" "forte.Bb3" "glottals.Bb3" "labials.Bb3" "mezzoforte.Bb3"
[156] "mezzopiano.Bb3" "NULL.Bb3" "piano.Bb3" "velars.Bb3" "vowels.Bb3"
[161] "coronals.D4" "forte.D4" "glottals.D4" "labials.D4" "mezzoforte.D4"
[166] "mezzopiano.D4" "NULL.D4" "piano.D4" "velars.D4" "vowels.D4"
[171] "coronals.F4" "forte.F4" "glottals.F4" "labials.F4" "mezzoforte.F4"
[176] "mezzopiano.F4" "NULL.F4" "piano.F4" "velars.F4" "vowels.F4"
str(dfNZE$precSoundVowelInt)
Factor w/ 180 levels "coronals.ɐː",..: 44 44 44 44 44 44 44 44 44 44 ...
levels(dfNZE$follSoundVowelInt)
[1] "coronals.ɐː" "forte.ɐː" "glottals.ɐː" "labials.ɐː" "mezzoforte.ɐː"
[6] "mezzopiano.ɐː" "NULL.ɐː" "piano.ɐː" "velars.ɐː" "vowels.ɐː"
[11] "coronals.ɐ" "forte.ɐ" "glottals.ɐ" "labials.ɐ" "mezzoforte.ɐ"
[16] "mezzopiano.ɐ" "NULL.ɐ" "piano.ɐ" "velars.ɐ" "vowels.ɐ"
[21] "coronals.ɛ" "forte.ɛ" "glottals.ɛ" "labials.ɛ" "mezzoforte.ɛ"
[26] "mezzopiano.ɛ" "NULL.ɛ" "piano.ɛ" "velars.ɛ" "vowels.ɛ"
[31] "coronals.ɵː" "forte.ɵː" "glottals.ɵː" "labials.ɵː" "mezzoforte.ɵː"
[36] "mezzopiano.ɵː" "NULL.ɵː" "piano.ɵː" "velars.ɵː" "vowels.ɵː"
[41] "coronals.e" "forte.e" "glottals.e" "labials.e" "mezzoforte.e"
[46] "mezzopiano.e" "NULL.e" "piano.e" "velars.e" "vowels.e"
[51] "coronals.iː" "forte.iː" "glottals.iː" "labials.iː" "mezzoforte.iː"
[56] "mezzopiano.iː" "NULL.iː" "piano.iː" "velars.iː" "vowels.iː"
[61] "coronals.ʉː" "forte.ʉː" "glottals.ʉː" "labials.ʉː" "mezzoforte.ʉː"
[66] "mezzopiano.ʉː" "NULL.ʉː" "piano.ʉː" "velars.ʉː" "vowels.ʉː"
[71] "coronals.ʊ" "forte.ʊ" "glottals.ʊ" "labials.ʊ" "mezzoforte.ʊ"
[76] "mezzopiano.ʊ" "NULL.ʊ" "piano.ʊ" "velars.ʊ" "vowels.ʊ"
[81] "coronals.oː" "forte.oː" "glottals.oː" "labials.oː" "mezzoforte.oː"
[86] "mezzopiano.oː" "NULL.oː" "piano.oː" "velars.oː" "vowels.oː"
[91] "coronals.ɒ" "forte.ɒ" "glottals.ɒ" "labials.ɒ" "mezzoforte.ɒ"
[96] "mezzopiano.ɒ" "NULL.ɒ" "piano.ɒ" "velars.ɒ" "vowels.ɒ"
[101] "coronals.ɘ" "forte.ɘ" "glottals.ɘ" "labials.ɘ" "mezzoforte.ɘ"
[106] "mezzopiano.ɘ" "NULL.ɘ" "piano.ɘ" "velars.ɘ" "vowels.ɘ"
[111] "coronals.ə" "forte.ə" "glottals.ə" "labials.ə" "mezzoforte.ə"
[116] "mezzopiano.ə" "NULL.ə" "piano.ə" "velars.ə" "vowels.ə"
[121] "coronals.ə#" "forte.ə#" "glottals.ə#" "labials.ə#" "mezzoforte.ə#"
[126] "mezzopiano.ə#" "NULL.ə#" "piano.ə#" "velars.ə#" "vowels.ə#"
[131] "coronals.Bb2" "forte.Bb2" "glottals.Bb2" "labials.Bb2" "mezzoforte.Bb2"
[136] "mezzopiano.Bb2" "NULL.Bb2" "piano.Bb2" "velars.Bb2" "vowels.Bb2"
[141] "coronals.F3" "forte.F3" "glottals.F3" "labials.F3" "mezzoforte.F3"
[146] "mezzopiano.F3" "NULL.F3" "piano.F3" "velars.F3" "vowels.F3"
[151] "coronals.Bb3" "forte.Bb3" "glottals.Bb3" "labials.Bb3" "mezzoforte.Bb3"
[156] "mezzopiano.Bb3" "NULL.Bb3" "piano.Bb3" "velars.Bb3" "vowels.Bb3"
[161] "coronals.D4" "forte.D4" "glottals.D4" "labials.D4" "mezzoforte.D4"
[166] "mezzopiano.D4" "NULL.D4" "piano.D4" "velars.D4" "vowels.D4"
[171] "coronals.F4" "forte.F4" "glottals.F4" "labials.F4" "mezzoforte.F4"
[176] "mezzopiano.F4" "NULL.F4" "piano.F4" "velars.F4" "vowels.F4"
str(dfNZE$follSoundVowelInt)
Factor w/ 180 levels "coronals.ɐː",..: 41 41 41 41 41 41 41 41 41 41 ...
dfTongan$subVowelInt <- interaction(dfTongan$subject, dfTongan$tokenPooled)
dfTongan$precSoundVowelInt <- interaction(dfTongan$speech_prec_pooled, dfTongan$tokenPooled)
dfTongan$follSoundVowelInt <- interaction(dfTongan$speech_fol_pooled, dfTongan$tokenPooled)
cat("\nTongan data\n")
Tongan data
levels(dfTongan$subVowelInt)
[1] "S1.a" "S12.a" "S14.a" "S15.a" "S16.a" "S17.a" "S18.a" "S19.a" "S20.a"
[10] "S21.a" "S22.a" "S24.a" "S25.a" "S26.a" "S27.a" "S29.a" "S3.a" "S30.a"
[19] "S4.a" "S5.a" "S1.e" "S12.e" "S14.e" "S15.e" "S16.e" "S17.e" "S18.e"
[28] "S19.e" "S20.e" "S21.e" "S22.e" "S24.e" "S25.e" "S26.e" "S27.e" "S29.e"
[37] "S3.e" "S30.e" "S4.e" "S5.e" "S1.i" "S12.i" "S14.i" "S15.i" "S16.i"
[46] "S17.i" "S18.i" "S19.i" "S20.i" "S21.i" "S22.i" "S24.i" "S25.i" "S26.i"
[55] "S27.i" "S29.i" "S3.i" "S30.i" "S4.i" "S5.i" "S1.u" "S12.u" "S14.u"
[64] "S15.u" "S16.u" "S17.u" "S18.u" "S19.u" "S20.u" "S21.u" "S22.u" "S24.u"
[73] "S25.u" "S26.u" "S27.u" "S29.u" "S3.u" "S30.u" "S4.u" "S5.u" "S1.o"
[82] "S12.o" "S14.o" "S15.o" "S16.o" "S17.o" "S18.o" "S19.o" "S20.o" "S21.o"
[91] "S22.o" "S24.o" "S25.o" "S26.o" "S27.o" "S29.o" "S3.o" "S30.o" "S4.o"
[100] "S5.o" "S1.Bb2" "S12.Bb2" "S14.Bb2" "S15.Bb2" "S16.Bb2" "S17.Bb2" "S18.Bb2" "S19.Bb2"
[109] "S20.Bb2" "S21.Bb2" "S22.Bb2" "S24.Bb2" "S25.Bb2" "S26.Bb2" "S27.Bb2" "S29.Bb2" "S3.Bb2"
[118] "S30.Bb2" "S4.Bb2" "S5.Bb2" "S1.F3" "S12.F3" "S14.F3" "S15.F3" "S16.F3" "S17.F3"
[127] "S18.F3" "S19.F3" "S20.F3" "S21.F3" "S22.F3" "S24.F3" "S25.F3" "S26.F3" "S27.F3"
[136] "S29.F3" "S3.F3" "S30.F3" "S4.F3" "S5.F3" "S1.Bb3" "S12.Bb3" "S14.Bb3" "S15.Bb3"
[145] "S16.Bb3" "S17.Bb3" "S18.Bb3" "S19.Bb3" "S20.Bb3" "S21.Bb3" "S22.Bb3" "S24.Bb3" "S25.Bb3"
[154] "S26.Bb3" "S27.Bb3" "S29.Bb3" "S3.Bb3" "S30.Bb3" "S4.Bb3" "S5.Bb3" "S1.D4" "S12.D4"
[163] "S14.D4" "S15.D4" "S16.D4" "S17.D4" "S18.D4" "S19.D4" "S20.D4" "S21.D4" "S22.D4"
[172] "S24.D4" "S25.D4" "S26.D4" "S27.D4" "S29.D4" "S3.D4" "S30.D4" "S4.D4" "S5.D4"
[181] "S1.F4" "S12.F4" "S14.F4" "S15.F4" "S16.F4" "S17.F4" "S18.F4" "S19.F4" "S20.F4"
[190] "S21.F4" "S22.F4" "S24.F4" "S25.F4" "S26.F4" "S27.F4" "S29.F4" "S3.F4" "S30.F4"
[199] "S4.F4" "S5.F4"
str(dfTongan$subVowelInt)
Factor w/ 200 levels "S1.a","S12.a",..: 19 19 19 19 19 19 19 19 19 19 ...
levels(dfTongan$precSoundVowelInt)
[1] "coronals.a" "forte.a" "glottals.a" "labials.a" "mezzoforte.a"
[6] "mezzopiano.a" "NULL.a" "piano.a" "velars.a" "vowels.a"
[11] "coronals.e" "forte.e" "glottals.e" "labials.e" "mezzoforte.e"
[16] "mezzopiano.e" "NULL.e" "piano.e" "velars.e" "vowels.e"
[21] "coronals.i" "forte.i" "glottals.i" "labials.i" "mezzoforte.i"
[26] "mezzopiano.i" "NULL.i" "piano.i" "velars.i" "vowels.i"
[31] "coronals.u" "forte.u" "glottals.u" "labials.u" "mezzoforte.u"
[36] "mezzopiano.u" "NULL.u" "piano.u" "velars.u" "vowels.u"
[41] "coronals.o" "forte.o" "glottals.o" "labials.o" "mezzoforte.o"
[46] "mezzopiano.o" "NULL.o" "piano.o" "velars.o" "vowels.o"
[51] "coronals.Bb2" "forte.Bb2" "glottals.Bb2" "labials.Bb2" "mezzoforte.Bb2"
[56] "mezzopiano.Bb2" "NULL.Bb2" "piano.Bb2" "velars.Bb2" "vowels.Bb2"
[61] "coronals.F3" "forte.F3" "glottals.F3" "labials.F3" "mezzoforte.F3"
[66] "mezzopiano.F3" "NULL.F3" "piano.F3" "velars.F3" "vowels.F3"
[71] "coronals.Bb3" "forte.Bb3" "glottals.Bb3" "labials.Bb3" "mezzoforte.Bb3"
[76] "mezzopiano.Bb3" "NULL.Bb3" "piano.Bb3" "velars.Bb3" "vowels.Bb3"
[81] "coronals.D4" "forte.D4" "glottals.D4" "labials.D4" "mezzoforte.D4"
[86] "mezzopiano.D4" "NULL.D4" "piano.D4" "velars.D4" "vowels.D4"
[91] "coronals.F4" "forte.F4" "glottals.F4" "labials.F4" "mezzoforte.F4"
[96] "mezzopiano.F4" "NULL.F4" "piano.F4" "velars.F4" "vowels.F4"
str(dfTongan$precSoundVowelInt)
Factor w/ 100 levels "coronals.a","forte.a",..: 3 3 3 3 3 3 3 3 3 3 ...
levels(dfTongan$follSoundVowelInt)
[1] "coronals.a" "forte.a" "glottals.a" "labials.a" "mezzoforte.a"
[6] "mezzopiano.a" "NULL.a" "piano.a" "velars.a" "vowels.a"
[11] "coronals.e" "forte.e" "glottals.e" "labials.e" "mezzoforte.e"
[16] "mezzopiano.e" "NULL.e" "piano.e" "velars.e" "vowels.e"
[21] "coronals.i" "forte.i" "glottals.i" "labials.i" "mezzoforte.i"
[26] "mezzopiano.i" "NULL.i" "piano.i" "velars.i" "vowels.i"
[31] "coronals.u" "forte.u" "glottals.u" "labials.u" "mezzoforte.u"
[36] "mezzopiano.u" "NULL.u" "piano.u" "velars.u" "vowels.u"
[41] "coronals.o" "forte.o" "glottals.o" "labials.o" "mezzoforte.o"
[46] "mezzopiano.o" "NULL.o" "piano.o" "velars.o" "vowels.o"
[51] "coronals.Bb2" "forte.Bb2" "glottals.Bb2" "labials.Bb2" "mezzoforte.Bb2"
[56] "mezzopiano.Bb2" "NULL.Bb2" "piano.Bb2" "velars.Bb2" "vowels.Bb2"
[61] "coronals.F3" "forte.F3" "glottals.F3" "labials.F3" "mezzoforte.F3"
[66] "mezzopiano.F3" "NULL.F3" "piano.F3" "velars.F3" "vowels.F3"
[71] "coronals.Bb3" "forte.Bb3" "glottals.Bb3" "labials.Bb3" "mezzoforte.Bb3"
[76] "mezzopiano.Bb3" "NULL.Bb3" "piano.Bb3" "velars.Bb3" "vowels.Bb3"
[81] "coronals.D4" "forte.D4" "glottals.D4" "labials.D4" "mezzoforte.D4"
[86] "mezzopiano.D4" "NULL.D4" "piano.D4" "velars.D4" "vowels.D4"
[91] "coronals.F4" "forte.F4" "glottals.F4" "labials.F4" "mezzoforte.F4"
[96] "mezzopiano.F4" "NULL.F4" "piano.F4" "velars.F4" "vowels.F4"
str(dfTongan$follSoundVowelInt)
Factor w/ 100 levels "coronals.a","forte.a",..: 7 7 7 7 7 7 7 7 7 7 ...
We are intersted in the tongue position of musical notes in relation to the native language vowels. We create three new predictors. (*** Not sure if we should keep these yet)
dfNZE$tokenPooled.ord <- as.ordered(dfNZE$tokenPooled)
contrasts(dfNZE$tokenPooled.ord) <- "contr.treatment"
dfNZE$vowels_pooled.ord <- as.ordered(dfNZE$vowels_pooled)
contrasts(dfNZE$vowels_pooled.ord) <- "contr.treatment"
dfNZE$playing_proficiency.ord <- as.ordered(dfNZE$playing_proficiency)
contrasts(dfNZE$playing_proficiency.ord) <- "contr.treatment"
dfTongan$tokenPooled.ord <- as.ordered(dfTongan$tokenPooled)
contrasts(dfTongan$tokenPooled.ord) <- "contr.treatment"
dfTongan$vowels_pooled.ord <- as.ordered(dfTongan$vowels_pooled)
contrasts(dfTongan$vowels_pooled.ord) <- "contr.treatment"
dfTongan$playing_proficiency.ord <- as.ordered(dfTongan$playing_proficiency)
contrasts(dfTongan$playing_proficiency.ord) <- "contr.treatment"
We create a new variable (start) when Point of tongue == 1. Our dataset is already ordered by speaker, by token, by preceeding and following context, and by points of measurements.
dfNZE$start <- dfNZE$points==1
dfTongan$start <- dfTongan$points==1
We start by running a model with no random effects. Just to evaluate structure
if (run_models == TRUE){
mdl.sys.time1 <- system.time(NZE.gam.noAR.noRandom <- bam(rho_uncut_z ~ tokenPooled.ord +
## 1d smooths
s(theta_uncut_z, bs="cr", k=10) +
## 1d smooths * factors
s(theta_uncut_z, k=10, bs="cr", by=tokenPooled.ord),
data=dfNZE, discrete=TRUE, nthreads=ncores))
mdl.sys.time1
# save model & model summary so they can be reloaded later
saveRDS(NZE.gam.noAR.noRandom, paste0(output_dir,"/NZE.gam.noAR.noRandom.rds"))
capture.output(summary(NZE.gam.noAR.noRandom),
file = paste0(output_dir,"/summary_NZE.gam.noAR.noRandom.txt"))
}else{
# reload model from output_dir
NZE.gam.noAR.noRandom = readRDS(paste0(output_dir,"/NZE.gam.noAR.noRandom.rds"))
}
summary(NZE.gam.noAR.noRandom)
Family: gaussian
Link function: identity
Formula:
rho_uncut_z ~ tokenPooled.ord + s(theta_uncut_z, bs = "cr", k = 10) +
s(theta_uncut_z, k = 10, bs = "cr", by = tokenPooled.ord)
Parametric coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 213.7217 0.1732 1233.836 < 2e-16 ***
tokenPooled.ordɐ 2.3944 0.2040 11.738 < 2e-16 ***
tokenPooled.ordɛ 0.8608 0.5584 1.542 0.12318
tokenPooled.ordɵː -1.6984 0.3847 -4.415 1.01e-05 ***
tokenPooled.orde 1.6694 1.2695 1.315 0.18853
tokenPooled.ordiː -0.4550 1.6776 -0.271 0.78620
tokenPooled.ordʉː 2.1494 0.4688 4.585 4.54e-06 ***
tokenPooled.ordʊ 1.6377 0.5841 2.804 0.00505 **
tokenPooled.ordoː -5.6004 0.5535 -10.117 < 2e-16 ***
tokenPooled.ordɒ 2.3988 0.3322 7.222 5.14e-13 ***
tokenPooled.ordɘ 4.4296 0.2758 16.063 < 2e-16 ***
tokenPooled.ordə 7.3881 0.2278 32.427 < 2e-16 ***
tokenPooled.ordə# 3.1585 0.2204 14.328 < 2e-16 ***
tokenPooled.ordBb2 1.9419 0.3342 5.811 6.23e-09 ***
tokenPooled.ordF3 5.3504 0.2430 22.017 < 2e-16 ***
tokenPooled.ordBb3 5.0044 0.2536 19.734 < 2e-16 ***
tokenPooled.ordD4 6.8152 0.4044 16.854 < 2e-16 ***
tokenPooled.ordF4 4.7946 0.5677 8.445 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Approximate significance of smooth terms:
edf Ref.df F p-value
s(theta_uncut_z) 8.888 8.951 4067.6 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordɐ 6.504 7.379 195.1 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordɛ 8.683 8.916 6789.4 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordɵː 8.605 8.903 5302.6 <2e-16 ***
s(theta_uncut_z):tokenPooled.orde 8.122 8.324 9796.2 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordiː 8.193 8.413 10018.0 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordʉː 8.799 8.972 6747.0 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordʊ 8.181 8.668 627.0 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordoː 8.515 8.877 5112.2 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordɒ 8.746 8.954 1152.3 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordɘ 8.545 8.867 3707.0 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordə 8.564 8.861 3757.7 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordə# 8.423 8.808 937.1 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordBb2 8.518 8.861 361.0 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordF3 8.676 8.917 227.7 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordBb3 8.660 8.915 249.0 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordD4 8.831 8.980 357.2 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordF4 8.539 8.893 203.0 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
R-sq.(adj) = 0.671 Deviance explained = 67.1%
fREML = 4.5679e+06 Scale est. = 159.48 n = 1154900
Our second model includes random effects for subject.
if (run_models == TRUE){
mdl.sys.time2 <- system.time(NZE.gam.noAR.Mod1 <- bam(rho_uncut_z ~ tokenPooled.ord +
## 1d smooths
s(theta_uncut_z, bs="cr", k=10) +
## 1d smooths * factors
s(theta_uncut_z, k=10, bs="cr", by=tokenPooled.ord) +
## Factor smooths by subject, note and intensity
s(theta_uncut_z, subVowelInt, bs="fs", k=10, m=1)+
## Factor smooths by preceding sound adjusted by vowel
s(theta_uncut_z, precSoundVowelInt, bs="fs", k=10, m=1)+
## Factor smooths by following sound adjusted by vowel
s(theta_uncut_z, follSoundVowelInt, bs="fs", k=10, m=1),
data=dfNZE, discrete=TRUE, nthreads=ncores))
mdl.sys.time2
# save model & model summary so they can be reloaded later
saveRDS(NZE.gam.noAR.Mod1, paste0(output_dir,"/NZE.gam.noAR.Mod1.rds"))
capture.output(summary(NZE.gam.noAR.Mod1),
file = paste0(output_dir,"/summary_NZE.gam.noAR.Mod1.txt"))
}else{
# reload model from output_dir
NZE.gam.noAR.Mod1 = readRDS(paste0(output_dir,"/NZE.gam.noAR.Mod1.rds"))
}
k
gam.check(NZE.gam.noAR.Mod1)
Method: fREML Optimizer: perf chol
$grad
[1] 1.654837e-05 2.971235e-06 -1.165087e-05 -1.724376e-05 -9.274196e-06 -7.802588e-07
[7] -6.860490e-06 4.258896e-06 1.056341e-05 1.364637e-05 2.512391e-03 -1.151481e-05
[13] -6.494795e-04 7.556761e-03 1.416157e-05 6.800191e-06 9.014313e-06 -2.995809e-04
[19] -3.369618e-05 -7.643223e-05 -2.461416e-05 6.025140e-06 -3.050454e-05 4.224951e-06
[25] 4.293448e-04
$hess
[,1] [,2] [,3] [,4] [,5] [,6]
3.793112e+00 -3.374011e-03 3.949937e-02 5.385995e-02 4.902094e-02 -5.200200e-03
-3.374011e-03 4.543842e-01 -1.432551e-02 -9.419005e-03 -8.380941e-03 -3.060018e-03
3.949937e-02 -1.432551e-02 2.841025e+00 -5.846016e-02 -3.755060e-02 -7.622698e-03
5.385995e-02 -9.419005e-03 -5.846016e-02 2.323383e+00 -4.409260e-02 -6.302219e-03
4.902094e-02 -8.380941e-03 -3.755060e-02 -4.409260e-02 3.262854e+00 -3.278951e-03
-5.200200e-03 -3.060018e-03 -7.622698e-03 -6.302219e-03 -3.278951e-03 2.549975e+00
7.603801e-02 -2.986798e-03 -2.383397e-02 -3.983796e-02 -2.832536e-02 3.106524e-03
6.749594e-03 2.635858e-02 1.762480e-02 3.918605e-02 1.577620e-02 2.055870e-03
1.591732e-03 1.389086e-02 3.313733e-02 6.470510e-02 2.195498e-02 3.323862e-03
2.874597e-02 -2.349285e-02 2.110905e-02 2.309455e-02 1.367747e-02 2.491797e-03
-3.752626e-06 1.919640e-04 3.096877e-06 -5.293282e-05 -3.125918e-06 -1.429002e-07
4.446021e-02 3.361426e-02 1.435262e-02 -1.135008e-02 -8.001568e-03 1.276264e-02
-3.634290e-05 1.446352e-06 5.321751e-06 2.203601e-07 7.394776e-06 -1.416174e-06
5.048975e-04 9.172439e-04 -1.372698e-04 -2.443225e-04 -1.642332e-04 -3.879372e-07
-1.826424e-02 1.382816e-02 1.941254e-02 2.946470e-02 2.284172e-02 -3.347233e-03
2.134514e-03 1.508574e-02 6.621866e-02 5.419218e-02 3.556640e-02 1.850402e-02
7.208082e-03 -2.419015e-02 1.802459e-02 2.231900e-02 1.540263e-02 2.926692e-03
3.577549e-04 1.241299e-02 1.480191e-02 2.459174e-02 1.068628e-02 1.538305e-03
-4.618590e-01 -1.231274e-01 -3.897899e-01 -5.469263e-02 -3.992132e-01 -4.906983e-01
-6.363975e-02 2.251932e-02 -5.043218e-02 3.852647e-03 -5.643217e-02 2.185808e-02
-6.041235e-02 -5.717302e-02 -2.248594e-02 1.312402e-01 -1.245246e-02 5.036908e-03
6.887623e-03 -7.304122e-03 -1.246713e-02 1.611486e-02 1.858246e-02 -3.579513e-03
-9.148925e-02 -8.985507e-03 -4.038175e-02 -3.737819e-02 -1.248678e-01 -6.633447e-02
1.845525e-02 -5.850593e-02 1.016921e-03 -6.233048e-03 -2.923505e-03 1.235273e-01
d -3.869134e+00 -9.085465e-01 -3.422456e+00 -3.015135e+00 -3.398392e+00 -3.873705e+00
[,7] [,8] [,9] [,10] [,11] [,12]
7.603801e-02 6.749594e-03 1.591732e-03 2.874597e-02 -3.752626e-06 4.446021e-02
-2.986798e-03 2.635858e-02 1.389086e-02 -2.349285e-02 1.919640e-04 3.361426e-02
-2.383397e-02 1.762480e-02 3.313733e-02 2.110905e-02 3.096877e-06 1.435262e-02
-3.983796e-02 3.918605e-02 6.470510e-02 2.309455e-02 -5.293282e-05 -1.135008e-02
-2.832536e-02 1.577620e-02 2.195498e-02 1.367747e-02 -3.125918e-06 -8.001568e-03
3.106524e-03 2.055870e-03 3.323862e-03 2.491797e-03 -1.429002e-07 1.276264e-02
2.506946e+00 1.789762e-02 2.037555e-02 -2.089395e-03 -5.066482e-06 -3.396548e-02
1.789762e-02 1.446085e+00 -5.553757e-02 -1.884523e-02 -5.280523e-05 1.443099e-02
2.037555e-02 -5.553757e-02 2.036636e+00 -3.226667e-02 1.204207e-05 1.422890e-02
-2.089395e-03 -1.884523e-02 -3.226667e-02 1.678824e+00 9.913579e-05 6.048736e-02
-5.066482e-06 -5.280523e-05 1.204207e-05 9.913579e-05 -2.509292e-03 -1.334008e-04
-3.396548e-02 1.443099e-02 1.422890e-02 6.048736e-02 -1.334008e-04 6.038836e-01
1.274123e-05 8.797865e-07 4.758266e-06 3.750339e-06 -7.089752e-08 -2.667055e-06
-2.118933e-04 -4.262020e-04 -1.960567e-04 3.344769e-04 -3.809291e-06 -4.886729e-04
2.302373e-02 -4.181667e-02 -3.743639e-02 -6.969201e-02 -3.801839e-05 7.601662e-02
9.225654e-04 -1.584306e-02 -3.224802e-02 -3.099862e-02 -1.922254e-05 -7.323664e-02
7.700171e-03 -1.092671e-02 -1.482194e-02 -5.305533e-02 1.128576e-04 3.164509e-02
6.018090e-03 -2.482943e-02 -3.056282e-02 -2.430940e-02 -1.845099e-05 1.630589e-02
-8.145317e-01 1.964162e-01 4.705430e-01 -9.095706e-03 1.861523e-03 1.504170e-02
-4.179941e-02 1.764149e-02 -4.685687e-02 1.421397e-02 8.312716e-05 3.118750e-02
1.947939e-02 1.170647e-01 1.981764e-01 -1.865185e-02 5.549024e-04 -8.554403e-02
1.873149e-01 -5.904869e-03 -1.991708e-02 -9.543811e-03 4.846690e-05 8.282968e-03
7.930917e-02 -9.767626e-02 1.525045e-01 6.385740e-02 7.356678e-04 -5.041947e-02
-1.934628e-02 -2.133338e-03 -1.812350e-04 3.070091e-02 -6.738370e-06 3.473085e-02
d -3.473479e+00 -2.016873e+00 -2.575827e+00 -2.337246e+00 -3.421944e-03 -1.752163e+00
[,13] [,14] [,15] [,16] [,17] [,18]
-3.634290e-05 5.048975e-04 -1.826424e-02 2.134514e-03 7.208082e-03 3.577549e-04
1.446352e-06 9.172439e-04 1.382816e-02 1.508574e-02 -2.419015e-02 1.241299e-02
5.321751e-06 -1.372698e-04 1.941254e-02 6.621866e-02 1.802459e-02 1.480191e-02
2.203601e-07 -2.443225e-04 2.946470e-02 5.419218e-02 2.231900e-02 2.459174e-02
7.394776e-06 -1.642332e-04 2.284172e-02 3.556640e-02 1.540263e-02 1.068628e-02
-1.416174e-06 -3.879372e-07 -3.347233e-03 1.850402e-02 2.926692e-03 1.538305e-03
1.274123e-05 -2.118933e-04 2.302373e-02 9.225654e-04 7.700171e-03 6.018090e-03
8.797865e-07 -4.262020e-04 -4.181667e-02 -1.584306e-02 -1.092671e-02 -2.482943e-02
4.758266e-06 -1.960567e-04 -3.743639e-02 -3.224802e-02 -1.482194e-02 -3.056282e-02
3.750339e-06 3.344769e-04 -6.969201e-02 -3.099862e-02 -5.305533e-02 -2.430940e-02
-7.089752e-08 -3.809291e-06 -3.801839e-05 -1.922254e-05 1.128576e-04 -1.845099e-05
-2.667055e-06 -4.886729e-04 7.601662e-02 -7.323664e-02 3.164509e-02 1.630589e-02
6.491377e-04 -1.151219e-07 -8.780726e-06 -6.674875e-06 2.594757e-06 1.555496e-06
-1.151219e-07 -7.469015e-03 -3.095259e-04 1.525778e-04 5.401537e-04 -2.436027e-04
-8.780726e-06 -3.095259e-04 5.939864e-01 5.035505e-03 -3.203383e-02 -3.549869e-02
-6.674875e-06 1.525778e-04 5.035505e-03 6.364166e-01 -2.835539e-02 -8.908986e-03
2.594757e-06 5.401537e-04 -3.203383e-02 -2.835539e-02 3.469184e-01 -8.750922e-03
1.555496e-06 -2.436027e-04 -3.549869e-02 -8.908986e-03 -8.750922e-03 1.340264e-01
-1.917568e-04 8.838707e-03 -6.989032e-02 -1.883764e-01 2.433038e-01 3.014825e-01
-8.480467e-06 9.050074e-04 -1.221429e-01 -5.731121e-02 -1.415313e-02 1.963488e-01
-5.148845e-05 3.506498e-03 7.464517e-02 -3.675596e-02 5.256951e-02 1.308073e-01
-5.218884e-06 -3.715319e-04 -9.413380e-03 -5.522610e-03 9.357937e-03 -7.976850e-03
-1.625675e-04 4.375338e-03 1.314263e-01 -2.544651e-02 7.069881e-02 1.679347e-01
2.742712e-06 6.072829e-05 7.529114e-03 3.747101e-03 7.925557e-04 4.333952e-03
d -1.886865e-04 -1.196740e-02 -1.265600e+00 -2.233783e+00 -1.047626e+00 -8.153047e-01
[,19] [,20] [,21] [,22] [,23] [,24]
-4.618590e-01 -6.363975e-02 -6.041235e-02 6.887623e-03 -9.148925e-02 1.845525e-02
-1.231274e-01 2.251932e-02 -5.717302e-02 -7.304122e-03 -8.985507e-03 -5.850593e-02
-3.897899e-01 -5.043218e-02 -2.248594e-02 -1.246713e-02 -4.038175e-02 1.016921e-03
-5.469263e-02 3.852647e-03 1.312402e-01 1.611486e-02 -3.737819e-02 -6.233048e-03
-3.992132e-01 -5.643217e-02 -1.245246e-02 1.858246e-02 -1.248678e-01 -2.923505e-03
-4.906983e-01 2.185808e-02 5.036908e-03 -3.579513e-03 -6.633447e-02 1.235273e-01
-8.145317e-01 -4.179941e-02 1.947939e-02 1.873149e-01 7.930917e-02 -1.934628e-02
1.964162e-01 1.764149e-02 1.170647e-01 -5.904869e-03 -9.767626e-02 -2.133338e-03
4.705430e-01 -4.685687e-02 1.981764e-01 -1.991708e-02 1.525045e-01 -1.812350e-04
-9.095706e-03 1.421397e-02 -1.865185e-02 -9.543811e-03 6.385740e-02 3.070091e-02
1.861523e-03 8.312716e-05 5.549024e-04 4.846690e-05 7.356678e-04 -6.738370e-06
1.504170e-02 3.118750e-02 -8.554403e-02 8.282968e-03 -5.041947e-02 3.473085e-02
-1.917568e-04 -8.480467e-06 -5.148845e-05 -5.218884e-06 -1.625675e-04 2.742712e-06
8.838707e-03 9.050074e-04 3.506498e-03 -3.715319e-04 4.375338e-03 6.072829e-05
-6.989032e-02 -1.221429e-01 7.464517e-02 -9.413380e-03 1.314263e-01 7.529114e-03
-1.883764e-01 -5.731121e-02 -3.675596e-02 -5.522610e-03 -2.544651e-02 3.747101e-03
2.433038e-01 -1.415313e-02 5.256951e-02 9.357937e-03 7.069881e-02 7.925557e-04
3.014825e-01 1.963488e-01 1.308073e-01 -7.976850e-03 1.679347e-01 4.333952e-03
4.831369e+02 -2.271212e+01 1.588184e+00 -1.564959e-01 3.201561e+00 -2.704389e-01
-2.271212e+01 5.776723e+01 2.402152e-01 -9.514371e-03 3.899191e-01 -5.974779e-02
1.588184e+00 2.402152e-01 1.549689e+02 -1.654194e+00 1.338302e+00 4.720372e-02
-1.564959e-01 -9.514371e-03 -1.654194e+00 1.357742e+01 -1.351696e-01 -3.027312e-01
3.201561e+00 3.899191e-01 1.338302e+00 -1.351696e-01 1.288099e+02 -5.848947e+00
-2.704389e-01 -5.974779e-02 4.720372e-02 -3.027312e-01 -5.848947e+00 1.591238e+01
d -6.426462e+02 -6.458785e+01 -2.158640e+02 -2.054095e+01 -1.862798e+02 -1.909768e+01
[,25]
-3.869134e+00
-9.085465e-01
-3.422456e+00
-3.015135e+00
-3.398392e+00
-3.873705e+00
-3.473479e+00
-2.016873e+00
-2.575827e+00
-2.337246e+00
-3.421944e-03
-1.752163e+00
-1.886865e-04
-1.196740e-02
-1.265600e+00
-2.233783e+00
-1.047626e+00
-8.153047e-01
-6.426462e+02
-6.458785e+01
-2.158640e+02
-2.054095e+01
-1.862798e+02
-1.909768e+01
d 5.774320e+05
Model rank = 3540 / 3540
Basis dimension (k) checking results. Low p-value (k-index<1) may
indicate that k is too low, especially if edf is close to k'.
k' edf k-index p-value
s(theta_uncut_z) 9.00 8.74 0.99 0.24
s(theta_uncut_z):tokenPooled.ordɐ 9.00 2.82 0.99 0.20
s(theta_uncut_z):tokenPooled.ordɛ 9.00 7.84 0.99 0.23
s(theta_uncut_z):tokenPooled.ordɵː 9.00 7.03 0.99 0.27
s(theta_uncut_z):tokenPooled.orde 9.00 7.80 0.99 0.21
s(theta_uncut_z):tokenPooled.ordiː 9.00 8.75 0.99 0.24
s(theta_uncut_z):tokenPooled.ordʉː 9.00 7.95 0.99 0.23
s(theta_uncut_z):tokenPooled.ordʊ 9.00 5.03 0.99 0.26
s(theta_uncut_z):tokenPooled.ordoː 9.00 6.15 0.99 0.21
s(theta_uncut_z):tokenPooled.ordɒ 9.00 5.67 0.99 0.20
s(theta_uncut_z):tokenPooled.ordɘ 9.00 1.00 0.99 0.23
s(theta_uncut_z):tokenPooled.ordə 9.00 4.50 0.99 0.20
s(theta_uncut_z):tokenPooled.ordə# 9.00 1.00 0.99 0.23
s(theta_uncut_z):tokenPooled.ordBb2 9.00 1.01 0.99 0.26
s(theta_uncut_z):tokenPooled.ordF3 9.00 3.53 0.99 0.23
s(theta_uncut_z):tokenPooled.ordBb3 9.00 5.47 0.99 0.27
s(theta_uncut_z):tokenPooled.ordD4 9.00 3.10 0.99 0.23
s(theta_uncut_z):tokenPooled.ordF4 9.00 2.63 0.99 0.29
s(theta_uncut_z,subVowelInt) 1800.00 1414.47 0.99 0.17
s(theta_uncut_z,precSoundVowelInt) 800.00 472.81 0.99 0.21
s(theta_uncut_z,follSoundVowelInt) 760.00 410.76 0.99 0.24
summary(NZE.gam.noAR.Mod1)
Family: gaussian
Link function: identity
Formula:
rho_uncut_z ~ tokenPooled.ord + s(theta_uncut_z, bs = "cr", k = 10) +
s(theta_uncut_z, k = 10, bs = "cr", by = tokenPooled.ord) +
s(theta_uncut_z, subVowelInt, bs = "fs", k = 10, m = 1) +
s(theta_uncut_z, precSoundVowelInt, bs = "fs", k = 10, m = 1) +
s(theta_uncut_z, follSoundVowelInt, bs = "fs", k = 10, m = 1)
Parametric coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 220.5606 5.6894 38.767 < 2e-16 ***
tokenPooled.ordɐ -2.2033 8.2819 -0.266 0.79021
tokenPooled.ordɛ -2.0080 8.7174 -0.230 0.81783
tokenPooled.ordɵː -10.0385 8.6289 -1.163 0.24468
tokenPooled.orde -1.5428 8.5005 -0.181 0.85598
tokenPooled.ordiː -23.8614 8.9149 -2.677 0.00744 **
tokenPooled.ordʉː -9.9912 8.4445 -1.183 0.23675
tokenPooled.ordʊ -7.0168 8.8143 -0.796 0.42599
tokenPooled.ordoː -13.7476 8.3639 -1.644 0.10024
tokenPooled.ordɒ -2.2441 8.4721 -0.265 0.79110
tokenPooled.ordɘ -4.1643 7.8985 -0.527 0.59803
tokenPooled.ordə -4.3322 8.2799 -0.523 0.60082
tokenPooled.ordə# -1.0692 9.0310 -0.118 0.90576
tokenPooled.ordBb2 -10.8830 8.1880 -1.329 0.18380
tokenPooled.ordF3 -5.4316 8.4482 -0.643 0.52027
tokenPooled.ordBb3 0.4549 8.5637 0.053 0.95763
tokenPooled.ordD4 2.7940 8.5964 0.325 0.74517
tokenPooled.ordF4 -3.5499 9.0236 -0.393 0.69402
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Approximate significance of smooth terms:
edf Ref.df F p-value
s(theta_uncut_z) 8.738 8.805 49.107 < 2e-16 ***
s(theta_uncut_z):tokenPooled.ordɐ 2.817 3.214 0.958 0.409209
s(theta_uncut_z):tokenPooled.ordɛ 7.845 8.125 7.027 1.08e-09 ***
s(theta_uncut_z):tokenPooled.ordɵː 7.030 7.476 5.518 1.38e-06 ***
s(theta_uncut_z):tokenPooled.orde 7.797 8.063 16.128 < 2e-16 ***
s(theta_uncut_z):tokenPooled.ordiː 8.747 8.878 29.107 < 2e-16 ***
s(theta_uncut_z):tokenPooled.ordʉː 7.947 8.233 9.354 3.05e-13 ***
s(theta_uncut_z):tokenPooled.ordʊ 5.034 5.485 4.467 0.000347 ***
s(theta_uncut_z):tokenPooled.ordoː 6.152 6.608 7.085 5.46e-08 ***
s(theta_uncut_z):tokenPooled.ordɒ 5.674 6.243 2.636 0.011222 *
s(theta_uncut_z):tokenPooled.ordɘ 1.002 1.002 1.202 0.273595
s(theta_uncut_z):tokenPooled.ordə 4.504 5.167 0.544 0.738413
s(theta_uncut_z):tokenPooled.ordə# 1.002 1.002 0.228 0.633806
s(theta_uncut_z):tokenPooled.ordBb2 1.009 1.011 2.732 0.095650 .
s(theta_uncut_z):tokenPooled.ordF3 3.531 4.089 0.977 0.468897
s(theta_uncut_z):tokenPooled.ordBb3 5.468 6.129 0.687 0.719137
s(theta_uncut_z):tokenPooled.ordD4 3.095 3.464 1.022 0.377872
s(theta_uncut_z):tokenPooled.ordF4 2.631 2.837 1.010 0.440744
s(theta_uncut_z,subVowelInt) 1414.468 1790.000 735.968 < 2e-16 ***
s(theta_uncut_z,precSoundVowelInt) 472.810 789.000 113.570 < 2e-16 ***
s(theta_uncut_z,follSoundVowelInt) 410.755 755.000 83.984 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
R-sq.(adj) = 0.856 Deviance explained = 85.7%
fREML = 4.0957e+06 Scale est. = 69.676 n = 1154900
So far, our second model that takes into account the random effect structure of by speaker, by note and by intensity accounted for 87% of the variance in the data. It showed some differences between the two languages in terms of how tongue contours are different depending on the note and its intensity. We next need to check the autocorrelation in the residuals and acocunt for these.
As we see below, the autocorrelation in the residuals is massive. We need to check whether this is on all predictors or on specific ones.
acf_resid(NZE.gam.noAR.Mod1, main = "Average ACF No.AR", cex.lab=1.5, cex.axis=1.5)
theta_uncut_z
There are some correlations between successive theta_uncut_z values that need to be taken into account (or not)
acf_resid(NZE.gam.noAR.Mod1, split_pred=list(dfNZE$theta_uncut_z), main = "Average ACF No.AR by theta_uncut_z", cex.lab=1.5, cex.axis=1.5)
token
There is massive correlations in the tokens that needs to be taken into account
acf_resid(NZE.gam.noAR.Mod1, split_pred=list(dfNZE$tokenPooled), main = "Average ACF No.AR by note", cex.lab=1.5, cex.axis=1.5)
This model takes into account the autocorrelations in the residuals
Rho
We use the following to get an estimate of the rho
to be included later on in our model
rho_est <- start_value_rho(NZE.gam.noAR.Mod1)
rho_est
[1] 0.9816646
if (run_models == TRUE){
mdl.sys.time3 <- system.time(NZE.gam.AR.Mod2 <- bam(rho_uncut_z ~ tokenPooled.ord +
## 1d smooths
s(theta_uncut_z, bs="cr", k=10) +
## 1d smooths * factors
s(theta_uncut_z, k=10, bs="cr", by=tokenPooled.ord) +
## Factor smooths by subject, note and intensity
s(theta_uncut_z, subVowelInt, bs="fs", k=10, m=1)+
## Factor smooths by preceding sound adjusted by vowel
s(theta_uncut_z, precSoundVowelInt, bs="fs", k=10, m=1)+
## Factor smooths by following sound adjusted by vowel
s(theta_uncut_z, follSoundVowelInt, bs="fs", k=10, m=1),
data=dfNZE,
AR.start=dfNZE$start, rho=rho_est,
discrete=TRUE, nthreads=ncores))
mdl.sys.time3
# save model & model summary so they can be reloaded later
saveRDS(NZE.gam.AR.Mod2, paste0(output_dir,"/NZE.gam.AR.Mod2.rds"))
capture.output(summary(NZE.gam.AR.Mod2),
file = paste0(output_dir,"/summary_NZE.gam.AR.Mod2.txt"))
}else{
# reload model from output_dir
NZE.gam.AR.Mod2 = readRDS(paste0(output_dir,"/NZE.gam.AR.Mod2.rds"))
}
acf_resid(NZE.gam.AR.Mod2, main = "Average ACF AR", cex.lab=1.5, cex.axis=1.5)
token
acf_resid(NZE.gam.AR.Mod2, split_pred=list(dfNZE$tokenPooled), main = "Average ACF AR by note", cex.lab=1.5, cex.axis=1.5)
summary(NZE.gam.AR.Mod2)
Family: gaussian
Link function: identity
Formula:
rho_uncut_z ~ tokenPooled.ord + s(theta_uncut_z, bs = "cr", k = 10) +
s(theta_uncut_z, k = 10, bs = "cr", by = tokenPooled.ord) +
s(theta_uncut_z, subVowelInt, bs = "fs", k = 10, m = 1) +
s(theta_uncut_z, precSoundVowelInt, bs = "fs", k = 10, m = 1) +
s(theta_uncut_z, follSoundVowelInt, bs = "fs", k = 10, m = 1)
Parametric coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 219.4975 6.4489 34.036 <2e-16 ***
tokenPooled.ordɐ 2.4522 9.0716 0.270 0.7869
tokenPooled.ordɛ -0.6698 9.7937 -0.068 0.9455
tokenPooled.ordɵː -12.0457 9.7007 -1.242 0.2143
tokenPooled.orde -3.4341 9.5064 -0.361 0.7179
tokenPooled.ordiː -14.6382 9.7174 -1.506 0.1320
tokenPooled.ordʉː -11.3554 9.5532 -1.189 0.2346
tokenPooled.ordʊ -8.0553 9.9145 -0.812 0.4165
tokenPooled.ordoː -16.3151 9.4803 -1.721 0.0853 .
tokenPooled.ordɒ -4.6904 9.5583 -0.491 0.6236
tokenPooled.ordɘ 0.3295 9.2818 0.036 0.9717
tokenPooled.ordə -9.0175 8.9919 -1.003 0.3159
tokenPooled.ordə# -3.5295 10.6120 -0.333 0.7394
tokenPooled.ordBb2 -8.6730 9.5329 -0.910 0.3629
tokenPooled.ordF3 -4.2528 9.5876 -0.444 0.6573
tokenPooled.ordBb3 5.4863 9.3918 0.584 0.5591
tokenPooled.ordD4 2.7435 9.5766 0.286 0.7745
tokenPooled.ordF4 -5.2434 9.9441 -0.527 0.5980
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Approximate significance of smooth terms:
edf Ref.df F p-value
s(theta_uncut_z) 8.848 8.912 71.239 < 2e-16 ***
s(theta_uncut_z):tokenPooled.ordɐ 1.202 1.341 0.076 0.866266
s(theta_uncut_z):tokenPooled.ordɛ 7.845 8.281 10.982 3.17e-16 ***
s(theta_uncut_z):tokenPooled.ordɵː 7.983 8.378 10.994 2.34e-15 ***
s(theta_uncut_z):tokenPooled.orde 8.374 8.628 20.567 < 2e-16 ***
s(theta_uncut_z):tokenPooled.ordiː 8.625 8.789 41.432 < 2e-16 ***
s(theta_uncut_z):tokenPooled.ordʉː 8.286 8.572 12.210 < 2e-16 ***
s(theta_uncut_z):tokenPooled.ordʊ 6.243 6.929 4.071 0.000344 ***
s(theta_uncut_z):tokenPooled.ordoː 8.437 8.664 24.825 < 2e-16 ***
s(theta_uncut_z):tokenPooled.ordɒ 7.847 8.276 11.559 < 2e-16 ***
s(theta_uncut_z):tokenPooled.ordɘ 5.015 5.845 1.347 0.231386
s(theta_uncut_z):tokenPooled.ordə 1.003 1.005 3.817 0.050103 .
s(theta_uncut_z):tokenPooled.ordə# 5.567 6.345 1.279 0.208213
s(theta_uncut_z):tokenPooled.ordBb2 4.789 5.641 3.525 0.002560 **
s(theta_uncut_z):tokenPooled.ordF3 7.342 7.912 7.666 1.15e-08 ***
s(theta_uncut_z):tokenPooled.ordBb3 4.207 5.077 1.835 0.096271 .
s(theta_uncut_z):tokenPooled.ordD4 3.927 4.764 2.596 0.026484 *
s(theta_uncut_z):tokenPooled.ordF4 2.132 2.563 3.631 0.016174 *
s(theta_uncut_z,subVowelInt) 1566.993 1790.000 468.943 < 2e-16 ***
s(theta_uncut_z,precSoundVowelInt) 517.725 785.000 77.278 < 2e-16 ***
s(theta_uncut_z,follSoundVowelInt) 481.245 745.000 50.975 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
R-sq.(adj) = 0.852 Deviance explained = 85.1%
fREML = 9.239e+05 Scale est. = 7.6199 n = 1154900
This, in comparison, is the Tongan data.
if (run_models == TRUE){
mdl.sys.time4 <- system.time(Tongan.gam.noAR.noRandom <- bam(rho_uncut_z ~ tokenPooled.ord +
## 1d smooths
s(theta_uncut_z, bs="cr", k=10) +
## 1d smooths * factors
s(theta_uncut_z, k=10, bs="cr",
by=tokenPooled.ord), data=dfTongan,
discrete=TRUE, nthreads=ncores))
mdl.sys.time4
# save model & model summary so they can be reloaded later
saveRDS(Tongan.gam.noAR.noRandom, paste0(output_dir,"/Tongan.gam.noAR.noRandom.rds"))
capture.output(summary(Tongan.gam.noAR.noRandom),
file = paste0(output_dir,"/summary_Tongan.gam.noAR.noRandom.txt"))
}else{
# reload model from output_dir
Tongan.gam.noAR.noRandom = readRDS(paste0(output_dir,"/Tongan.gam.noAR.noRandom.rds"))
}
summary(Tongan.gam.noAR.noRandom)
Family: gaussian
Link function: identity
Formula:
rho_uncut_z ~ tokenPooled.ord + s(theta_uncut_z, bs = "cr", k = 10) +
s(theta_uncut_z, k = 10, bs = "cr", by = tokenPooled.ord)
Parametric coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 218.5297 0.1533 1425.058 < 2e-16 ***
tokenPooled.orde -5.4635 0.4771 -11.452 < 2e-16 ***
tokenPooled.ordi -8.6799 0.5615 -15.457 < 2e-16 ***
tokenPooled.ordu -6.2339 0.4126 -15.108 < 2e-16 ***
tokenPooled.ordo -2.6835 0.2539 -10.570 < 2e-16 ***
tokenPooled.ordBb2 2.2519 0.8319 2.707 0.00679 **
tokenPooled.ordF3 3.9731 0.3307 12.015 < 2e-16 ***
tokenPooled.ordBb3 -0.6387 0.6410 -0.996 0.31905
tokenPooled.ordD4 1.1504 1.0303 1.117 0.26418
tokenPooled.ordF4 -0.7389 1.7391 -0.425 0.67092
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Approximate significance of smooth terms:
edf Ref.df F p-value
s(theta_uncut_z) 8.883 8.982 12643.7 <2e-16 ***
s(theta_uncut_z):tokenPooled.orde 8.785 8.971 12453.2 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordi 8.856 8.986 29287.8 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordu 8.832 8.984 2989.5 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordo 8.846 8.984 1288.5 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordBb2 7.933 8.167 1383.8 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordF3 8.740 8.957 1909.6 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordBb3 8.667 8.912 1699.0 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordD4 7.870 8.124 1425.1 <2e-16 ***
s(theta_uncut_z):tokenPooled.ordF4 7.702 7.977 350.5 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
R-sq.(adj) = 0.708 Deviance explained = 70.8%
fREML = 3.4371e+06 Scale est. = 273.62 n = 813500
Our second model includes random effects for subject.
if (run_models == TRUE){
mdl.sys.time5 <- system.time(Tongan.gam.noAR.Mod1 <- bam(rho_uncut_z ~ tokenPooled.ord +
## 1d smooths
s(theta_uncut_z, bs="cr", k=10) +
## 1d smooths * factors
s(theta_uncut_z, k=10, bs="cr", by=tokenPooled.ord) +
## Factor smooths by subject, note and intensity
s(theta_uncut_z, subVowelInt, bs="fs", k=10, m=1)+
## Factor smooths by preceding sound adjusted by vowel
s(theta_uncut_z, precSoundVowelInt, bs="fs", k=10, m=1)+
## Factor smooths by following sound adjusted by vowel
s(theta_uncut_z, follSoundVowelInt, bs="fs", k=10, m=1),
data=dfTongan, discrete=TRUE, nthreads=ncores))
mdl.sys.time5
# save model & model summary so they can be reloaded later
saveRDS(Tongan.gam.noAR.Mod1, paste0(output_dir,"/Tongan.gam.noAR.Mod1.rds"))
capture.output(summary(Tongan.gam.noAR.Mod1),
file = paste0(output_dir,"/summary_Tongan.gam.noAR.Mod1.txt"))
}else{
# reload model from output_dir
Tongan.gam.noAR.Mod1 = readRDS(paste0(output_dir,"/Tongan.gam.noAR.Mod1.rds"))
}
k
gam.check(Tongan.gam.noAR.Mod1)
Method: fREML Optimizer: perf chol
$grad
[1] -0.0037299727 0.0008094535 0.0005760527 0.0016178286 0.0013381010 -0.0075482728
[7] 0.0034881667 0.0046833614 0.0021633155 -0.0073101876 -0.0206029611 0.0007975225
[13] 0.0001617048 0.0001330463 -0.0012637833 0.0001134548 0.0261423946
$hess
[,1] [,2] [,3] [,4] [,5] [,6] [,7]
3.674824214 -0.0168129926 1.459397e-03 0.1025344547 0.1552790434 0.0029836723 0.0803821662
-0.016812993 3.4606661235 -4.759258e-02 0.0173048738 0.0564581558 -0.0006753256 0.0117019083
0.001459397 -0.0475925835 3.593145e+00 0.0261883794 0.0158067422 -0.0004705847 -0.0052459965
0.102534455 0.0173048738 2.618838e-02 2.7046687801 -0.0410711741 -0.0009878523 0.0446467989
0.155279043 0.0564581558 1.580674e-02 -0.0410711741 2.2354389171 -0.0013468657 -0.0684899601
0.002983672 -0.0006753256 -4.705847e-04 -0.0009878523 -0.0013468657 0.0080327453 -0.0035792088
0.080382166 0.0117019083 -5.245997e-03 0.0446467989 -0.0684899601 -0.0035792088 0.4202367167
0.037080642 -0.0140631738 -2.625464e-02 0.0297963546 -0.0348950695 -0.0046763130 -0.0606897235
-0.033342534 -0.0471310466 -1.930816e-02 -0.0400261167 0.0564029428 -0.0018752732 0.0556532215
0.001525335 -0.0002528857 -2.066501e-04 -0.0007975903 -0.0003454773 -0.0001461937 -0.0009166797
0.042629766 -0.5059312574 -4.542025e-01 -0.1380005367 -0.4639657730 0.0288245834 -0.0259674125
0.074737919 -0.0029286834 -2.919048e-03 -0.0350880206 0.0146564216 -0.0007688201 0.0923776662
-0.005620049 -0.0304123943 -2.682336e-02 -0.0039641295 -0.0608409312 -0.0002811752 -0.0265845457
-0.008617918 0.0004205656 2.700101e-05 0.0413527224 0.0664884217 -0.0003558693 0.0009696407
0.023561548 -0.0466596851 -5.386878e-02 -0.0271838948 -0.0433940581 0.0013828394 -0.0245791163
0.009056123 0.0006566096 1.544831e-02 -0.0052431038 -0.0358107160 -0.0004177834 -0.0071837261
d -3.749725331 -3.5099848763 -3.692835e+00 -3.1792014796 -3.1550878355 -0.0264730465 -1.2622694751
[,8] [,9] [,10] [,11] [,12] [,13]
0.0370806424 -0.0333425337 1.525335e-03 4.262977e-02 7.473792e-02 -5.620049e-03
-0.0140631738 -0.0471310466 -2.528857e-04 -5.059313e-01 -2.928683e-03 -3.041239e-02
-0.0262546426 -0.0193081648 -2.066501e-04 -4.542025e-01 -2.919048e-03 -2.682336e-02
0.0297963546 -0.0400261167 -7.975903e-04 -1.380005e-01 -3.508802e-02 -3.964129e-03
-0.0348950695 0.0564029428 -3.454773e-04 -4.639658e-01 1.465642e-02 -6.084093e-02
-0.0046763130 -0.0018752732 -1.461937e-04 2.882458e-02 -7.688201e-04 -2.811752e-04
-0.0606897235 0.0556532215 -9.166797e-04 -2.596741e-02 9.237767e-02 -2.658455e-02
0.4505497816 0.0316236309 -1.273688e-03 1.332836e-01 9.615375e-02 7.193811e-04
0.0316236309 1.0248365524 -6.592622e-04 -2.112164e-01 4.456296e-02 -3.879999e-02
-0.0012736880 -0.0006592622 7.342660e-03 8.065494e-04 -2.111415e-04 8.764130e-05
0.1332835849 -0.2112164203 8.065494e-04 2.629130e+02 -1.329773e+01 2.189716e-01
0.0961537504 0.0445629650 -2.111415e-04 -1.329773e+01 3.052433e+01 -6.108662e-02
0.0007193811 -0.0387999878 8.764130e-05 2.189716e-01 -6.108662e-02 9.628986e+01
0.0190177947 -0.0214407156 8.388489e-05 1.340576e-01 5.983849e-02 -2.064554e+00
0.0206881696 -0.0617537246 3.249645e-04 1.190249e+00 -7.389997e-02 3.370610e+00
0.0293527988 -0.0327118652 1.406867e-04 7.061089e-02 -6.700397e-03 -1.371948e-01
d -1.0487372310 -1.8896365089 -5.860768e-03 -3.791200e+02 -3.517497e+01 -1.180238e+02
[,14] [,15] [,16] [,17]
-8.617918e-03 2.356155e-02 9.056123e-03 -3.749725e+00
4.205656e-04 -4.665969e-02 6.566096e-04 -3.509985e+00
2.700101e-05 -5.386878e-02 1.544831e-02 -3.692835e+00
4.135272e-02 -2.718389e-02 -5.243104e-03 -3.179201e+00
6.648842e-02 -4.339406e-02 -3.581072e-02 -3.155088e+00
-3.558693e-04 1.382839e-03 -4.177834e-04 -2.647305e-02
9.696407e-04 -2.457912e-02 -7.183726e-03 -1.262269e+00
1.901779e-02 2.068817e-02 2.935280e-02 -1.048737e+00
-2.144072e-02 -6.175372e-02 -3.271187e-02 -1.889637e+00
8.388489e-05 3.249645e-04 1.406867e-04 -5.860768e-03
1.340576e-01 1.190249e+00 7.061089e-02 -3.791200e+02
5.983849e-02 -7.389997e-02 -6.700397e-03 -3.517497e+01
-2.064554e+00 3.370610e+00 -1.371948e-01 -1.180238e+02
5.571024e+00 1.572429e-02 1.848103e-01 -9.381030e+00
1.572429e-02 1.157892e+02 -2.919043e+00 -1.360659e+02
1.848103e-01 -2.919043e+00 7.044041e+00 -1.016346e+01
d -9.381030e+00 -1.360659e+02 -1.016346e+01 4.067400e+05
Model rank = 2100 / 2100
Basis dimension (k) checking results. Low p-value (k-index<1) may
indicate that k is too low, especially if edf is close to k'.
k' edf k-index p-value
s(theta_uncut_z) 9.00 8.51 1.01 0.79
s(theta_uncut_z):tokenPooled.orde 9.00 8.02 1.01 0.83
s(theta_uncut_z):tokenPooled.ordi 9.00 8.38 1.01 0.86
s(theta_uncut_z):tokenPooled.ordu 9.00 7.36 1.01 0.83
s(theta_uncut_z):tokenPooled.ordo 9.00 7.31 1.01 0.81
s(theta_uncut_z):tokenPooled.ordBb2 9.00 1.07 1.01 0.80
s(theta_uncut_z):tokenPooled.ordF3 9.00 3.52 1.01 0.86
s(theta_uncut_z):tokenPooled.ordBb3 9.00 3.09 1.01 0.85
s(theta_uncut_z):tokenPooled.ordD4 9.00 4.77 1.01 0.81
s(theta_uncut_z):tokenPooled.ordF4 9.00 1.03 1.01 0.81
s(theta_uncut_z,subVowelInt) 1000.00 828.63 1.01 0.84
s(theta_uncut_z,precSoundVowelInt) 500.00 254.81 1.01 0.84
s(theta_uncut_z,follSoundVowelInt) 500.00 292.46 1.01 0.77
summary(Tongan.gam.noAR.Mod1)
Family: gaussian
Link function: identity
Formula:
rho_uncut_z ~ tokenPooled.ord + s(theta_uncut_z, bs = "cr", k = 10) +
s(theta_uncut_z, k = 10, bs = "cr", by = tokenPooled.ord) +
s(theta_uncut_z, subVowelInt, bs = "fs", k = 10, m = 1) +
s(theta_uncut_z, precSoundVowelInt, bs = "fs", k = 10, m = 1) +
s(theta_uncut_z, follSoundVowelInt, bs = "fs", k = 10, m = 1)
Parametric coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 225.513 5.791 38.939 <2e-16 ***
tokenPooled.orde -16.700 8.728 -1.913 0.0557 .
tokenPooled.ordi -19.958 8.777 -2.274 0.0230 *
tokenPooled.ordu -14.072 8.680 -1.621 0.1050
tokenPooled.ordo -7.126 8.613 -0.827 0.4080
tokenPooled.ordBb2 -10.324 8.309 -1.243 0.2140
tokenPooled.ordF3 -7.949 8.566 -0.928 0.3534
tokenPooled.ordBb3 -13.285 8.613 -1.542 0.1230
tokenPooled.ordD4 -9.310 8.886 -1.048 0.2948
tokenPooled.ordF4 -13.530 8.900 -1.520 0.1285
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Approximate significance of smooth terms:
edf Ref.df F p-value
s(theta_uncut_z) 8.507 8.639 34.686 < 2e-16 ***
s(theta_uncut_z):tokenPooled.orde 8.018 8.277 14.869 < 2e-16 ***
s(theta_uncut_z):tokenPooled.ordi 8.385 8.574 26.315 < 2e-16 ***
s(theta_uncut_z):tokenPooled.ordu 7.355 7.730 6.415 3.38e-08 ***
s(theta_uncut_z):tokenPooled.ordo 7.307 7.694 4.260 5.91e-05 ***
s(theta_uncut_z):tokenPooled.ordBb2 1.068 1.085 1.236 0.2783
s(theta_uncut_z):tokenPooled.ordF3 3.518 4.077 0.539 0.7141
s(theta_uncut_z):tokenPooled.ordBb3 3.088 3.527 1.839 0.1245
s(theta_uncut_z):tokenPooled.ordD4 4.775 5.249 1.676 0.1950
s(theta_uncut_z):tokenPooled.ordF4 1.026 1.030 5.772 0.0159 *
s(theta_uncut_z,subVowelInt) 828.630 992.000 1480.283 < 2e-16 ***
s(theta_uncut_z,precSoundVowelInt) 254.809 491.000 56.092 < 2e-16 ***
s(theta_uncut_z,follSoundVowelInt) 292.461 492.000 202.043 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
R-sq.(adj) = 0.903 Deviance explained = 90.3%
fREML = 2.9942e+06 Scale est. = 91.252 n = 813500
So far, our second model that takes into account the random effect structure of by speaker, by note and by intensity accounted for 90% of the variance in the data. It showed some differences between the two languages in terms of how tongue contours are different depending on the note and its intensity. We next need to check the autocorrelation in the residuals and acocunt for these.
As we see below, the autocorrelation in the residuals is massive. We need to check whether this is on all predictors or on specific ones.
acf_resid(Tongan.gam.noAR.Mod1, main = "Average ACF No.AR", cex.lab=1.5, cex.axis=1.5)
theta_uncut_z
There is some correlations between successive theta_uncut_z that needs to be taken into account (or not)
acf_resid(Tongan.gam.noAR.Mod1, split_pred=list(dfTongan$theta_uncut_z),main = "Average ACF No.AR by theta_uncut_z", cex.lab=1.5, cex.axis=1.5)
token
There is massive correlations in the notes that needs to be taken into account
acf_resid(Tongan.gam.noAR.Mod1, split_pred=list(dfTongan$tokenPooled), main = "Average ACF No.AR by note", cex.lab=1.5, cex.axis=1.5)
This model takes into account the autocorrelations in the residuals
Rho
We use the following to get an estimate of the rho
to be included later on in our model
rho_est <- start_value_rho(Tongan.gam.noAR.Mod1)
rho_est
[1] 0.9786894
if (run_models == TRUE){
mdl.sys.time6 <- system.time(Tongan.gam.AR.Mod2 <- bam(rho_uncut_z ~ tokenPooled.ord +
## 1d smooths
s(theta_uncut_z, bs="cr", k=10) +
## 1d smooths * factors
s(theta_uncut_z, k=10, bs="cr", by=tokenPooled.ord) +
## Factor smooths by subject, note and intensity
s(theta_uncut_z, subVowelInt, bs="fs", k=10, m=1)+
## Factor smooths by preceding sound and vowel adjusted by language
s(theta_uncut_z, precSoundVowelInt, bs="fs", k=10, m=1)+
## Factor smooths by following sound and vowel adjusted by language
s(theta_uncut_z, follSoundVowelInt, bs="fs", k=10, m=1),
data=dfTongan, AR.start=dfTongan$start, rho=rho_est,
discrete=TRUE, nthreads=ncores))
mdl.sys.time6
# save model & model summary so they can be reloaded later
saveRDS(Tongan.gam.AR.Mod2, paste0(output_dir,"/Tongan.gam.AR.Mod2.rds"))
capture.output(summary(Tongan.gam.AR.Mod2),
file = paste0(output_dir,"/summary_Tongan.gam.AR.Mod2.txt"))
}else{
# reload model from output_dir
Tongan.gam.AR.Mod2 = readRDS(paste0(output_dir,"/Tongan.gam.AR.Mod2.rds"))
}
acf_resid(Tongan.gam.AR.Mod2, main = "Average ACF AR", cex.lab=1.5, cex.axis=1.5)
token
acf_resid(Tongan.gam.AR.Mod2, split_pred=list(dfTongan$tokenPooled), main = "Average ACF AR by token", cex.lab=1.5, cex.axis=1.5)
summary(Tongan.gam.AR.Mod2)
Family: gaussian
Link function: identity
Formula:
rho_uncut_z ~ tokenPooled.ord + s(theta_uncut_z, bs = "cr", k = 10) +
s(theta_uncut_z, k = 10, bs = "cr", by = tokenPooled.ord) +
s(theta_uncut_z, subVowelInt, bs = "fs", k = 10, m = 1) +
s(theta_uncut_z, precSoundVowelInt, bs = "fs", k = 10, m = 1) +
s(theta_uncut_z, follSoundVowelInt, bs = "fs", k = 10, m = 1)
Parametric coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 228.095 6.966 32.743 < 2e-16 ***
tokenPooled.orde -19.295 10.399 -1.855 0.06355 .
tokenPooled.ordi -28.433 10.441 -2.723 0.00647 **
tokenPooled.ordu -26.009 10.302 -2.525 0.01158 *
tokenPooled.ordo -20.967 10.119 -2.072 0.03826 *
tokenPooled.ordBb2 -16.207 10.393 -1.559 0.11889
tokenPooled.ordF3 -18.870 10.257 -1.840 0.06582 .
tokenPooled.ordBb3 -12.953 10.313 -1.256 0.20913
tokenPooled.ordD4 -12.090 10.513 -1.150 0.25015
tokenPooled.ordF4 -19.974 10.248 -1.949 0.05128 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Approximate significance of smooth terms:
edf Ref.df F p-value
s(theta_uncut_z) 8.752 8.845 50.452 < 2e-16 ***
s(theta_uncut_z):tokenPooled.orde 8.834 8.918 107.823 < 2e-16 ***
s(theta_uncut_z):tokenPooled.ordi 8.934 8.969 268.104 < 2e-16 ***
s(theta_uncut_z):tokenPooled.ordu 7.908 8.315 7.966 2.54e-11 ***
s(theta_uncut_z):tokenPooled.ordo 7.678 8.144 7.534 6.06e-09 ***
s(theta_uncut_z):tokenPooled.ordBb2 5.893 6.562 3.783 0.001534 **
s(theta_uncut_z):tokenPooled.ordF3 6.709 7.384 4.869 0.000199 ***
s(theta_uncut_z):tokenPooled.ordBb3 5.406 6.216 2.383 0.020325 *
s(theta_uncut_z):tokenPooled.ordD4 5.678 6.361 2.130 0.025229 *
s(theta_uncut_z):tokenPooled.ordF4 1.012 1.016 11.952 0.000526 ***
s(theta_uncut_z,subVowelInt) 898.693 998.000 752.116 < 2e-16 ***
s(theta_uncut_z,precSoundVowelInt) 278.146 498.000 38.325 < 2e-16 ***
s(theta_uncut_z,follSoundVowelInt) 330.752 498.000 127.646 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
R-sq.(adj) = 0.898 Deviance explained = 89.8%
fREML = 8.5305e+05 Scale est. = 10.831 n = 813500