# Kruskal-Wallis rank sum test for voicing differences
names = c("Christian", "JuAllison", "Molly", "Roger", "Rosalee", "Sara", "Savannah", "SugarMagnolia", "Chris", "Dave", "Ed", "Rayne", "Acilie", "Bella", "Margaret")
types = c("Phonological", "Phonological", "Phonological", "Phonological", "Phonological", "Phonological", "Phonological", "Phonological", "Phonological", "Phonological", "Phonological", "Phonetic", "Phonetic", "Phonetic", "Phonetic")
# vds (voicing differences) were calculated as each talker's mean F1 maxima in pre-voiced obstruent position minus their mean F1 maxima in pre-voiceless obstruent position; any flaps were entirely excluded from these calculations
vds = c(95, 78, 116, 69, 85, 125, 163, 121, 138, 142, 177, 67, 63, 60, 69)

kwdf = data.frame(names, types, vds)
kwdf$types = as.factor(kwdf$types)
kruskal.test(vds ~ types, data = kwdf)
# X^2 = 7.89, df = 1, p = 0.005

# Plot code
# Here is a sample for Mary. Any other talker can be plotted by replacing all instances of "Mary" with the new talker's name. This code assumes that the name of the data frame with all the data is named Chalmette_all_f1long_aw2.
library(ggplot2)
ggplot(data = subset(Chalmette_all_f1long_aw2, Subject == "Mary"), aes(x = timepoint, y = f1_long)) + geom_smooth(method = "gam", formula = y ~s(x), se = F, aes(linetype = Wordclass)) + labs(x = "Timepoint", y = "F1 mean (Hz)", title = "F1 trajectories by following environment for Mary (76, F)", linetype = "Following environment") + theme_bw(base_size = 24) + scale_color_grey()

# Voicing difference by raiser type plot
Chalmette_all_f1long_aw2$raisertype = case_when(Chalmette_all_f1long_aw2$Subject == "Christian" | Chalmette_all_f1long_aw2$Subject == "JuAllison" | Chalmette_all_f1long_aw2$Subject == "Molly" | Chalmette_all_f1long_aw2$Subject == "Roger" | Chalmette_all_f1long_aw2$Subject == "Rosalee" | Chalmette_all_f1long_aw2$Subject == "Sara" | Chalmette_all_f1long_aw2$Subject == "Savannah" | Chalmette_all_f1long_aw2$Subject == "SugarMagnolia" | Chalmette_all_f1long_aw2$Subject == "Chris" | Chalmette_all_f1long_aw2$Subject == "Dave" | Chalmette_all_f1long_aw2$Subject == "Ed" ~ 'Phonological',  Chalmette_all_f1long_aw2$Subject == "Rayne"| Chalmette_all_f1long_aw2$Subject == "Acilie"| Chalmette_all_f1long_aw2$Subject == "Bella"| Chalmette_all_f1long_aw2$Subject == "Margaret" ~ 'Phonetic', Chalmette_all_f1long_aw2$Subject == "Ellie" | Chalmette_all_f1long_aw2$Subject == "Haylie" | Chalmette_all_f1long_aw2$Subject == "Lance" | Chalmette_all_f1long_aw2$Subject == "MommaB" | Chalmette_all_f1long_aw2$Subject == "Peaches" | Chalmette_all_f1long_aw2$Subject == "Buckaroo" | Chalmette_all_f1long_aw2$Subject == "Chocolate" | Chalmette_all_f1long_aw2$Subject == "Dayle" | Chalmette_all_f1long_aw2$Subject == "Frank" | Chalmette_all_f1long_aw2$Subject == "Katherine" | Chalmette_all_f1long_aw2$Subject == "KillaB" | Chalmette_all_f1long_aw2$Subject == "Mary" | Chalmette_all_f1long_aw2$Subject == "MrB" | Chalmette_all_f1long_aw2$Subject == "Parrain" | Chalmette_all_f1long_aw2$Subject == "Paul" | Chalmette_all_f1long_aw2$Subject == "Pauly" | Chalmette_all_f1long_aw2$Subject == "Ronda" | Chalmette_all_f1long_aw2$Subject == "Rosie" | Chalmette_all_f1long_aw2$Subject == "Sam" | Chalmette_all_f1long_aw2$Subject == "Super" | Chalmette_all_f1long_aw2$Subject == "Victor" ~ 'None')

raisertypehz_type = c("Phonological", "Phonetic")
raisertypehz_mean = c(109.91, 64.75)
raisertypehz_sd = c(31.05, 4.03)
raisertypehz_se = c(9.362842, 2.015564)

raisertypehz = data.frame(raisertypehz_type, raisertypehz_mean, raisertypehz_sd)

ggplot(subset(raisertypehz, raisertypehz_type == "Phonetic" | raisertypehz_type == "Phonological"), aes(raisertypehz_type, raisertypehz_mean)) + geom_bar(stat = "identity", aes(fill = raisertypehz_type)) + geom_errorbar(ymin = raisertypehz_mean - raisertypehz_se, ymax = raisertypehz_mean + raisertypehz_se, width = .5) + labs(x= "Raiser type", y = "Mean F1 difference by voicing (Hz)", fill = "Raising pattern", title = "Mean voicing difference for phonetic and phonological raisers") + ylim(0, 150) + scale_fill_grey() + theme_bw()

# Voicing difference by age plot
agecats = c("Phonological", "Phonetic", "None")
# We calculated the mean age of talkers in each raiser type group using mean()
agemean = c(35.09, 54.25, 51.24)
# We calculated the standard error for each raiser type group using se()
agese = c(3.831578, 3.75, 4.086508)

agedf = data.frame(agecats, agemean, agese)

ggplot(data = agedf, aes(agecats, agemean)) + geom_bar(stat = "identity", aes(fill = agecats)) + geom_errorbar(ymin = agemean - agese, ymax = agemean + agese, width = .2) + theme_bw() + ylim(0, 60) + labs(x = "Raising pattern", y = "Age (years)", title = "Mean age by raiser type", fill = "Raising pattern") + scale_fill_brewer(palette = "Greys")

Chalmette_all_f1long_aw2$vd = case_when(Chalmette_all_f1long_aw2$Subject == "Christian" ~ 95, Chalmette_all_f1long_aw2$Subject == "JuAllison" ~ 78,Chalmette_all_f1long_aw2$Subject == "Molly" ~ 116,Chalmette_all_f1long_aw2$Subject == "Roger" ~ 69,Chalmette_all_f1long_aw2$Subject == "Rosalee" ~ 85,Chalmette_all_f1long_aw2$Subject == "Sara" ~ 125,Chalmette_all_f1long_aw2$Subject == "Savannah" ~ 163,Chalmette_all_f1long_aw2$Subject == "SugarMagnolia" ~ 121,Chalmette_all_f1long_aw2$Subject == "Chris" ~ 138,Chalmette_all_f1long_aw2$Subject == "Dave" ~ 142, Chalmette_all_f1long_aw2$Subject == "Ed" ~ 77,Chalmette_all_f1long_aw2$Subject == "Rayne" ~ 67,Chalmette_all_f1long_aw2$Subject == "Acilie" ~ 63,Chalmette_all_f1long_aw2$Subject == "Bella" ~ 60,Chalmette_all_f1long_aw2$Subject == "Margaret" ~ 69)