### R code used for the analysis presented in ### Ambrazaitis, G., & House, D. (2022). Probing effects of lexical prosody on speech-gesture integration in prominence production by Swedish news presenters.Laboratory Phonology. ##################### #### Introduction ### ##################### ## This file contains all code essential to reproduce our analysis from Section 2.3 in the paper and onwards (incl. illustrations in the main text and in the supplementary materials). ## The data file is also included in the supplementary materials. ## This script also contains a few additional explanations of the data file that are not included in the paper; these explanations are not relevant for the study as such, but important to know about when using the data file. ############################ #### Some pre-processing ### ############################ ## Load required libraries: library(tidyverse) library(lme4) library(MuMIn) ## Set the working directory (NOTE: change the path to match the working directory on your computer): setwd("/Users/XXX/") ## Read the datafile: rawdata <- read.table('Ambrazaitis_House_Appendix_C_data.txt', sep = '\t', header = TRUE) ## We prefer to work with a tibble, instead of a traditional data frame: rawdata <- as_tibble(rawdata) ## The raw data file contains columns that we won't use in this analysis (among others, the points in time from which the fo measures were taken in the original sound/video-files. Here, we only use the fo-values.) ## With the following command we extract only those columns that we need for this analysis; they are saved in a new tibble/data frame called "data". data <- select(rawdata,Praat.file,Speaker,Sex,Story,Topic,Topic_descr,Word,MMP.cluster_num,WA,Lex,H_freq,La_freq,Lb_freq,FH_freq) ############################################# ## Some comments on the selected columns: ### ############################################# # "Praat.file" is a sequential number according to which the audio files were processed in Praat using ProsodyPro. It is not central to this analysis, but it was handy to have during data exploration, as it enabled us to easily access the original sound file/annotation when checking extreme values. (More on this below.) # Story/ Topic: see explanation in the paper. (In "Topic", topics are distinguished using arbitrary letters as category names (A, B, C,...), while in "Topic_descr", real keywords are used as a rough description of the topics, such as "incident" or "society"; to see all topic labels used, use the following command:) unique(data$Topic_descr) # "MMP_cluster_num" codes the MMP constellation (see paper for explanation) numerically, where "1" = "BA", "2" = "BA+HB", etc. (We will, below, transfer this column into a factor with levels "BA", "BA+HB", "BA+HB+EB"). NOTE: In this column, five different categories are coded, although we only use three in this study; we will remove the unused below. (The data file contains more conditions than those used in the study.) # "WA" codes each word as having Accent 1 or Accent 2. # "Lex" codes each word as having 1 lexical stress (simplex words) or 2 lexical stresses (compounds) # Finally, there are four columns containing fo-values: H_freq, La_freq, Lb_freq_ FH_freq. According to the paper, we have labelled only three landmarks: H, L, FH. In fact, we have labelled 4 landmarks in some cases (namely, two different L-landmarks - La and Lb - where there was a longer stretch between the two H-landmarks). However, in this study, we have decided to simplify the procedure and only use the lowest L-landmark; a new variable containing only the lowest L-value is defined below. ################################# #### Some MORE pre-processing ### ################################# ## Here, we transform our two L-measures into a single one, only containing the lowest possible value: data <- mutate(data, L_freq = ifelse(is.na(Lb_freq),La_freq,ifelse(La_freq 0) data ## 543 tokens remaining for the analysis. ########################################################################################## ### DESCRIPTIVE STATISTICS ############################################################### ########################################################################################## ########################################################################################## ### Sample sizes ### ## Sample sizes per MMP (applies to Rise, but not to Fall, since Fall has missing cases): # (see Fig. 3 & Tab. 3 in paper) filter(data, data$MMP=="BA") ## 280 cases filter(data, data$MMP=="BA+HB") ## 182 cases filter(data, data$MMP=="BA+HB+EB") ## 81 cases # The following commands provide sample sizes for Fall: # (see Fig. 3 & Tab. 3 in paper) data %>% filter(is.na(Fall)) ### 125 missing cases for the fall data %>% filter(!is.na(Fall)) %>% filter(MMP=="BA") ## 224 cases data %>% filter(!is.na(Fall)) %>% filter(MMP=="BA+HB") ## 135 cases data %>% filter(!is.na(Fall)) %>% filter(MMP=="BA+HB+EB") ## 59 cases ## Sample sizes per MMP and LexPros # (see Tab. 4 in paper) # Fall data %>% filter(!is.na(Fall)) %>% filter(Lexpros=="simp_A1") %>% filter(MMP=="BA") ## 95 cases data %>% filter(!is.na(Fall)) %>% filter(Lexpros =="simp_A1") %>% filter(MMP=="BA+HB") ## 49 cases data %>% filter(!is.na(Fall)) %>% filter(Lexpros =="simp_A1") %>% filter(MMP=="BA+HB+EB") ## 17 cases data %>% filter(!is.na(Fall)) %>% filter(Lexpros=="simp_A2") %>% filter(MMP=="BA") ## 52 cases data %>% filter(!is.na(Fall)) %>% filter(Lexpros =="simp_A2") %>% filter(MMP=="BA+HB") ## 35 cases data %>% filter(!is.na(Fall)) %>% filter(Lexpros =="simp_A2") %>% filter(MMP=="BA+HB+EB") ## 22 cases data %>% filter(!is.na(Fall)) %>% filter(Lexpros=="comp_A2") %>% filter(MMP=="BA") ## 77 cases data %>% filter(!is.na(Fall)) %>% filter(Lexpros =="comp_A2") %>% filter(MMP=="BA+HB") ## 51 cases data %>% filter(!is.na(Fall)) %>% filter(Lexpros =="comp_A2") %>% filter(MMP=="BA+HB+EB") ## 20 cases # Rise data %>% filter(Lexpros=="simp_A1") %>% filter(MMP=="BA") ## 150 cases data %>% filter(Lexpros =="simp_A1") %>% filter(MMP=="BA+HB") ## 95 cases data %>% filter(Lexpros =="simp_A1") %>% filter(MMP=="BA+HB+EB") ## 39 cases data %>% filter(Lexpros=="simp_A2") %>% filter(MMP=="BA") ## 52 cases data %>% filter(Lexpros =="simp_A2") %>% filter(MMP=="BA+HB") ## 36 cases data %>% filter(Lexpros =="simp_A2") %>% filter(MMP=="BA+HB+EB") ## 22 cases data %>% filter(Lexpros=="comp_A2") %>% filter(MMP=="BA") ## 78 cases data %>% filter(Lexpros =="comp_A2") %>% filter(MMP=="BA+HB") ## 51 cases data %>% filter(Lexpros =="comp_A2") %>% filter(MMP=="BA+HB+EB") ## 20 cases ## Sample sizes per MMP and Speaker # (see Tab. 2 in paper) # (no samples sizes per Speaker for the fall are provided in the paper) data %>% filter(Speaker=="Katarina") %>% filter(MMP=="BA") ## 60 cases data %>% filter(Speaker=="Katarina") %>% filter(MMP=="BA+HB") ## 29 cases data %>% filter(Speaker=="Katarina") %>% filter(MMP=="BA+HB+EB") ## 16 cases data %>% filter(Speaker=="Sofia") %>% filter(MMP=="BA") ## 36 cases data %>% filter(Speaker=="Sofia") %>% filter(MMP=="BA+HB") ## 36 cases data %>% filter(Speaker=="Sofia") %>% filter(MMP=="BA+HB+EB") ## 6 cases data %>% filter(Speaker=="Alexander") %>% filter(MMP=="BA") ## 60 cases data %>% filter(Speaker=="Alexander") %>% filter(MMP=="BA+HB") ## 71 cases data %>% filter(Speaker=="Alexander") %>% filter(MMP=="BA+HB+EB") ## 34 cases data %>% filter(Speaker=="Filip") %>% filter(MMP=="BA") ## 75 cases data %>% filter(Speaker=="Filip") %>% filter(MMP=="BA+HB") ## 12 cases data %>% filter(Speaker=="Filip") %>% filter(MMP=="BA+HB+EB") ## 2 cases data %>% filter(Speaker=="Pelle") %>% filter(MMP=="BA") ## 49 cases data %>% filter(Speaker=="Pelle") %>% filter(MMP=="BA+HB") ## 34 cases data %>% filter(Speaker=="Pelle") %>% filter(MMP=="BA+HB+EB") ## 23 cases ############################################################################################# ### Means and sd ### ## per MMP # (see Tab. 3 in paper) # Fall data %>% filter(!is.na(Fall)) %>% group_by(MMP) %>% summarise_at(vars(Fall),list(name=mean)) data %>% filter(!is.na(Fall)) %>% group_by(MMP) %>% summarise_at(vars(Fall),list(name=sd)) # Rise data %>% group_by(MMP) %>% summarise_at(vars(Rise),list(name=mean)) data %>% group_by(MMP) %>% summarise_at(vars(Rise),list(name=sd)) ## per MMP and LexPros # (see Tab. 4 in paper) # Fall data %>% filter(!is.na(Fall)) %>% group_by(MMP,Lp_fac) %>% summarise_at(vars(Fall),list(name=mean)) data %>% filter(!is.na(Fall)) %>% group_by(MMP,Lp_fac) %>% summarise_at(vars(Fall),list(name=sd)) # Rise data %>% group_by(MMP,Lp_fac) %>% summarise_at(vars(Rise),list(name=mean)) data %>% group_by(MMP,Lp_fac) %>% summarise_at(vars(Rise),list(name=sd)) ## per LexPros only # (see Tab. 4 in paper) # Fall data %>% filter(!is.na(Fall)) %>% group_by(Lp_fac) %>% summarise_at(vars(Fall),list(name=mean)) data %>% filter(!is.na(Fall)) %>% group_by(Lp_fac) %>% summarise_at(vars(Fall),list(name=sd)) # Rise data %>% group_by(Lp_fac) %>% summarise_at(vars(Rise),list(name=mean)) data %>% group_by(Lp_fac) %>% summarise_at(vars(Rise),list(name=sd)) ################################################################################################### ## Boxplots ## ## Overview: overall effect of MMP on Fall and Rise # (Fig. 3 in paper) ggplot(data, aes(x=MMP, y=Fall, fill=MMP)) + geom_boxplot() + theme_minimal() + scale_fill_brewer(palette = 'Oranges') +labs(title="(a) Accentual fall",x="Multimodal prominence cluster (MMP)", y="fo movement (semitones)") + theme(plot.title = element_text(size=24), axis.title.x = element_text(size=16), axis.text.x = element_text(size=12), axis.title.y = element_text(size=16), axis.text.y = element_text(size=12), legend.title = element_text(size=16), legend.text = element_text(size=16)) ggplot(data, aes(x=MMP, y=Rise, fill=MMP)) + geom_boxplot() + theme_minimal() + scale_fill_brewer(palette = 'Oranges')+labs(title="(b) Big-accent rise",x="Multimodal prominence cluster (MMP)", y="fo movement (semitones)") + theme(plot.title = element_text(size=24), axis.title.x = element_text(size=16), axis.text.x = element_text(size=12), axis.title.y = element_text(size=16), axis.text.y = element_text(size=12), legend.title = element_text(size=16), legend.text = element_text(size=16)) ## MMP per LexPros # (Fig. 4 in paper) ggplot(data, aes(x=Lp_fac, y=Fall, fill=MMP)) + geom_boxplot() + theme_minimal() + scale_fill_brewer(palette = 'Oranges') +labs(title="(a) Accentual fall",x="Lexical-prosodic type", y="fo movement (semitones)") + theme(plot.title = element_text(size=24), axis.title.x = element_text(size=16), axis.text.x = element_text(size=12), axis.title.y = element_text(size=16), axis.text.y = element_text(size=12), legend.title = element_text(size=16), legend.text = element_text(size=16)) ggplot(data, aes(x=Lp_fac, y=Rise, fill=MMP)) + geom_boxplot() + theme_minimal() + scale_fill_brewer(palette = 'Oranges') +labs(title="(b) Big-accent rise",x="Lexical-prosodic type", y="fo movement (semitones)") + theme(plot.title = element_text(size=24), axis.title.x = element_text(size=16), axis.text.x = element_text(size=12), axis.title.y = element_text(size=16), axis.text.y = element_text(size=12), legend.title = element_text(size=16), legend.text = element_text(size=16)) ## MMP per speaker # (Fig. 5 in paper) ggplot(data, aes(x=Sp_fac, y=Fall, fill=MMP)) + geom_boxplot() + theme_minimal() + scale_fill_brewer(palette = 'Oranges') +labs(title="(a) Accentual fall",x="Speaker", y="fo movement (semitones)") + theme(plot.title = element_text(size=24), axis.title.x = element_text(size=16), axis.text.x = element_text(size=12), axis.title.y = element_text(size=16), axis.text.y = element_text(size=12), legend.title = element_text(size=16), legend.text = element_text(size=16)) ggplot(data, aes(x=Sp_fac, y=Rise, fill=MMP)) + geom_boxplot() + theme_minimal() + scale_fill_brewer(palette = 'Oranges') +labs(title="(b) Big-accent rise",x="Speaker", y="fo movement (semitones)") + theme(plot.title = element_text(size=24), axis.title.x = element_text(size=16), axis.text.x = element_text(size=12), axis.title.y = element_text(size=16), axis.text.y = element_text(size=12), legend.title = element_text(size=16), legend.text = element_text(size=16)) ## MMP per Lexpros AND Speaker # NOTE: The follwing five illustrations are NOT included in the main text, but only in the supplementary materials. # # NOTE: For reasons of space, we have only included boxplots for the Rise (per LexPros for each individual speaker), but not for the Fall katar <- filter(data, Speaker == "Katarina") ggplot(katar, aes(x=Lp_fac, y=Rise, fill=MMP)) + geom_boxplot() + theme_minimal() + scale_fill_brewer(palette = 'Oranges') +labs(title="(a) Big-accent rise, Katarina",x="Lexical-prosodic type", y="fo movement (semitones)") + theme(plot.title = element_text(size=24), axis.title.x = element_text(size=16), axis.text.x = element_text(size=12), axis.title.y = element_text(size=16), axis.text.y = element_text(size=12), legend.title = element_text(size=16), legend.text = element_text(size=16)) sofia <- filter(data, Speaker == "Sofia") ggplot(sofia, aes(x=Lp_fac, y=Rise, fill=MMP)) + geom_boxplot() + theme_minimal() + scale_fill_brewer(palette = 'Oranges') +labs(title="(b) Big-accent rise, Sofia",x="Lexical-prosodic type", y="fo movement (semitones)") + theme(plot.title = element_text(size=24), axis.title.x = element_text(size=16), axis.text.x = element_text(size=12), axis.title.y = element_text(size=16), axis.text.y = element_text(size=12), legend.title = element_text(size=16), legend.text = element_text(size=16)) pelle <- filter(data, Speaker == "Pelle") ggplot(pelle, aes(x=Lp_fac, y=Rise, fill=MMP)) + geom_boxplot() + theme_minimal() + scale_fill_brewer(palette = 'Oranges') +labs(title="(c) Big-accent rise, Pelle",x="Lexical-prosodic type", y="fo movement (semitones)") + theme(plot.title = element_text(size=24), axis.title.x = element_text(size=16), axis.text.x = element_text(size=12), axis.title.y = element_text(size=16), axis.text.y = element_text(size=12), legend.title = element_text(size=16), legend.text = element_text(size=16)) filip <- filter(data, Speaker == "Filip") ggplot(filip, aes(x=Lp_fac, y=Rise, fill=MMP)) + geom_boxplot() + theme_minimal() + scale_fill_brewer(palette = 'Oranges') +labs(title="(d) Big-accent rise, Filip",x="Lexical-prosodic type", y="fo movement (semitones)") + theme(plot.title = element_text(size=24), axis.title.x = element_text(size=16), axis.text.x = element_text(size=12), axis.title.y = element_text(size=16), axis.text.y = element_text(size=12), legend.title = element_text(size=16), legend.text = element_text(size=16)) alexa <- filter(data, Speaker == "Alexander") ggplot(alexa, aes(x=Lp_fac, y=Rise, fill=MMP)) + geom_boxplot() + theme_minimal() + scale_fill_brewer(palette = 'Oranges') +labs(title="(e) Big-accent rise, Alexander",x="Lexical-prosodic type", y="fo movement (semitones)") + theme(plot.title = element_text(size=24), axis.title.x = element_text(size=16), axis.text.x = element_text(size=12), axis.title.y = element_text(size=16), axis.text.y = element_text(size=12), legend.title = element_text(size=16), legend.text = element_text(size=16)) ## Exploring effecs of Sex and Topic # NOTE: The following four illustrations are NOT included in the main text, but only in the supplementary materials. # Sex ggplot(data, aes(x=Sex, y=Fall)) + geom_boxplot() + theme_minimal() +labs(title="(a) Accentual fall",x="Speaker sex", y="fo movement (semitones)") + theme(plot.title = element_text(size=24), axis.title.x = element_text(size=16), axis.text.x = element_text(size=12), axis.title.y = element_text(size=16), axis.text.y = element_text(size=12), legend.title = element_text(size=16), legend.text = element_text(size=16)) ggplot(data, aes(x=Sex, y=Rise)) + geom_boxplot() + theme_minimal() +labs(title="(b) Big-accent rise",x="Speaker sex", y="fo movement (semitones)") + theme(plot.title = element_text(size=24), axis.title.x = element_text(size=16), axis.text.x = element_text(size=12), axis.title.y = element_text(size=16), axis.text.y = element_text(size=12), legend.title = element_text(size=16), legend.text = element_text(size=16)) # Topic ggplot(data, aes(x=Topic, y=Fall)) + geom_boxplot() + theme_minimal() +labs(title="(a) Accentual fall",x="Topics", y="fo movement (semitones)") + theme(plot.title = element_text(size=24), axis.title.x = element_text(size=16), axis.text.x = element_text(size=12), axis.title.y = element_text(size=16), axis.text.y = element_text(size=12), legend.title = element_text(size=16), legend.text = element_text(size=16)) ggplot(data, aes(x=Topic, y=Rise)) + geom_boxplot() + theme_minimal() +labs(title="(b) Big-accent rise",x="Topics", y="fo movement (semitones)") + theme(plot.title = element_text(size=24), axis.title.x = element_text(size=16), axis.text.x = element_text(size=12), axis.title.y = element_text(size=16), axis.text.y = element_text(size=12), legend.title = element_text(size=16), legend.text = element_text(size=16)) ########################################################################################## ### MODELING ############################################################################# ########################################################################################## ## Note: Cf. the paper for explanations! ########################################################################################### ## Note: the following code builds the models and calculates R2-values # (Tab. 5 in paper) ### Models for Fall # Full model (with interaction MMP * Lexpros) Fall_mdl <- lmer(Fall ~ MMP * Lexpros + Sex + (1|Speaker) + (1|Topic), data = data, REML = FALSE) r.squaredGLMM(Fall_mdl) # Reduced model 1 (no interaction) Fall_no_inter <- lmer(Fall ~ MMP + Lexpros + Sex + (1|Speaker) + (1|Topic), data = data, REML = FALSE) r.squaredGLMM(Fall_no_inter) # Reduced model 2 (no MMP) Fall_null1 <- lmer(Fall ~ Lexpros + Sex + (1|Speaker) + (1|Topic), data = data, REML = FALSE) r.squaredGLMM(Fall_null1) # Reduced model 2 (no Lexpros) Fall_null2 <- lmer(Fall ~ MMP + Sex + (1|Speaker) + (1|Topic), data = data, REML = FALSE) r.squaredGLMM(Fall_null2) # Residual plots for the Fall (Full model) res <- residuals(Fall_mdl) par(mfrow = c(1,3)) hist(res) qqnorm(res) qqline(res) ## Residuals seem to be roughly normally distributed, but: plot(fitted(Fall_mdl), res) ## looks as if homoscedasticity might be violated. ### Models for Rise: # Full model (with interaction MMP * Lexpros) Rise_mdl <- lmer(Rise ~ MMP * Lexpros + Sex + (1|Speaker) + (1|Topic), data = data, REML = FALSE) r.squaredGLMM(Rise_mdl) # Reduced model 1 (no interaction) Rise_no_inter <- lmer(Rise ~ MMP + Lexpros + Sex + (1|Speaker) + (1|Topic), data = data, REML = FALSE) r.squaredGLMM(Rise_no_inter) # Reduced model 2 (no MMP) Rise_null1 <- lmer(Rise ~ Lexpros + Sex + (1|Speaker) + (1|Topic), data = data, REML = FALSE) r.squaredGLMM(Rise_null1) # Reduced model 2 (no Lexpros) Rise_null2 <- lmer(Rise ~ MMP + Sex + (1|Speaker) + (1|Topic), data = data, REML = FALSE) r.squaredGLMM(Rise_null2) # Residual plots for the Rise (Full model) res <- residuals(Rise_mdl) par(mfrow = c(1,3)) hist(res) qqnorm(res) qqline(res) ## Residuals seem to be roughly normally distributed, and plot(fitted(Rise_mdl), res) ## looks as if homoscedasticity is given. ########################################################################################### ## Likelihood ratio tests # (Tab. 6 in paper) ## Comparisons for Fall anova(Fall_no_inter, Fall_mdl) anova(Fall_null1, Fall_no_inter) anova(Fall_null2, Fall_no_inter) ## Comparisons for Rise anova(Rise_no_inter, Rise_mdl) anova(Rise_null1, Rise_no_inter) anova(Rise_null2, Rise_no_inter) ########################################################################################### ## Alternative analysis referred to in Footnote 4 ## ### A model for Fall lacking A1-words data_noA1 <- filter(data, data$WA==2) ## Check if the data look as expected (yes): ggplot(data_noA1, aes(x=Lp_fac, y=Fall, fill= MMP)) + geom_boxplot() + theme_minimal() + scale_fill_brewer(palette = 'Oranges') ## Models: Fall_mdl <- lmer(Fall ~ MMP * Lexpros + Sex + (1|Speaker) + (1|Topic), data = data_noA1, REML = FALSE) r.squaredGLMM(Fall_mdl) Fall_no_inter <- lmer(Fall ~ MMP + Lexpros + Sex + (1|Speaker) + (1|Topic), data = data_noA1, REML = FALSE) r.squaredGLMM(Fall_no_inter) Fall_null1 <- lmer(Fall ~ Lexpros + Sex + (1|Speaker) + (1|Topic), data = data_noA1, REML = FALSE) r.squaredGLMM(Fall_null1) Fall_null2 <- lmer(Fall ~ MMP + Sex + (1|Speaker) + (1|Topic), data = data_noA1, REML = FALSE) r.squaredGLMM(Fall_null2) ## Model comparisons: anova(Fall_no_inter, Fall_mdl) anova(Fall_null1, Fall_no_inter) anova(Fall_null2, Fall_no_inter) ################ ### The end. ### ################