<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20120330//EN" "http://jats.nlm.nih.gov/publishing/1.2/JATS-journalpublishing1.dtd">
<!--<?xml-stylesheet type="text/xsl" href="article.xsl"?>-->
<article article-type="research-article" dtd-version="1.2" xml:lang="en"
    xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
    <front>
        <journal-meta>
            <journal-id journal-id-type="issn">1868-6354</journal-id>
            <journal-title-group>
                <journal-title>Laboratory Phonology: Journal of the Association for Laboratory
                    Phonology</journal-title>
            </journal-title-group>
            <issn pub-type="epub">1868-6354</issn>
            <publisher>
                <publisher-name>Ubiquity Press</publisher-name>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="doi">10.5334/labphon.248</article-id>
            <article-categories>
                <subj-group>
                    <subject>Journal article</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>Tapping into linguistic rhythm</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Rathcke</surname>
                        <given-names>Tamara</given-names>
                    </name>
                    <email>tamara.rathcke@uni-konstanz.de</email>
                    <xref ref-type="aff" rid="aff-1">1</xref>
                    <xref ref-type="aff" rid="aff-2">2</xref>
                    <xref ref-type="aff" rid="aff-3">3</xref>
                </contrib>
                <contrib contrib-type="author">
                    <name>
                        <surname>Lin</surname>
                        <given-names>Chia-Yuan</given-names>
                    </name>
                    <xref ref-type="aff" rid="aff-3">3</xref>
                </contrib>
                <contrib contrib-type="author">
                    <name>
                        <surname>Falk</surname>
                        <given-names>Simone</given-names>
                    </name>
                    <xref ref-type="aff" rid="aff-4">4</xref>
                    <xref ref-type="aff" rid="aff-5">5</xref>
                </contrib>
                <contrib contrib-type="author">
                    <name>
                        <surname>Dalla Bella</surname>
                        <given-names>Simone</given-names>
                    </name>
                    <xref ref-type="aff" rid="aff-5">5</xref>
                    <xref ref-type="aff" rid="aff-6">6</xref>
                    <xref ref-type="aff" rid="aff-7">7</xref>
                    <xref ref-type="aff" rid="aff-8">8</xref>
                </contrib>
            </contrib-group>
            <aff id="aff-1"><label>1</label>Department of Linguistics, University of Konstanz,
                DE</aff>
            <aff id="aff-2"><label>2</label>MARCS Institute for Brain, Behavior and Development,
                Western Sydney University, AU</aff>
            <aff id="aff-3"><label>3</label>English Language and Linguistics, University of Kent,
                UK</aff>
            <aff id="aff-4"><label>4</label>Department of Linguistics and Translation, University of
                Montreal, CA</aff>
            <aff id="aff-5"><label>5</label>International Laboratory for Brain, Music and Sound Research (BRAMS), Canada</aff>
            <aff id="aff-6"><label>6</label>Department of Psychology, University of Montreal, Canada</aff>
            <aff id="aff-7"><label>7</label>Centre for Research on Brain, Language and Music, Montreal, Canada</aff>
            <aff id="aff-8"><label>8</label>Department of Cognitive Psychology, University of Economics and Human Sciences in Warsaw, Warsaw, Poland </aff>
            <pub-date publication-format="electronic" date-type="pub" iso-8601-date="2021-05-28">
                <day>28</day>
                <month>05</month>
                <year>2021</year>
            </pub-date>
            <pub-date pub-type="collection">
                <year>2021</year>
            </pub-date>
            <volume>12</volume>
            <issue>1</issue>
            <elocation-id>11</elocation-id>
            <history>
                <date date-type="received" iso-8601-date="2020-11-22">
                    <day>22</day>
                    <month>11</month>
                    <year>2020</year>
                </date>
                <date date-type="accepted" iso-8601-date="2021-04-12">
                    <day>12</day>
                    <month>04</month>
                    <year>2021</year>
                </date>
            </history>
            <permissions>
                <copyright-statement>Copyright: &#x00A9; 2021 The Author(s)</copyright-statement>
                <copyright-year>2021</copyright-year>
                <license license-type="open-access"
                    xlink:href="http://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open-access article distributed under the terms of the
                        Creative Commons Attribution 4.0 International License (CC-BY 4.0), which
                        permits unrestricted use, distribution, and reproduction in any medium,
                        provided the original author and source are credited. See <uri
                            xlink:href="http://creativecommons.org/licenses/by/4.0/"
                            >http://creativecommons.org/licenses/by/4.0/</uri>.</license-p>
                </license>
            </permissions>
            <self-uri xlink:href="http://www.journal-labphon.org/articles/10.5334/labphon.248/"/>
            <abstract>
                <p>Rhythmic properties of speech and language have been a matter of long-standing
                    debates, with both traditional production and perception studies delivering
                    controversial findings. The present study examines the possibility of
                    investigating linguistic rhythm using movement-based paradigms. Informed by the
                    theory and methods of sensorimotor synchronization, we developed two
                    finger-tapping tasks (synchronization and reproduction), and tested them with
                    English participants. The synchronization task required participants to tap
                    along with the beat of a looped sentence while the reproduction task asked them
                    to tap out the perceived beat patterns after listening to a sentence loop. The
                    results showed that both tasks engaged participants in period tracking of a
                    beat-like structure in the linguistic stimuli, though synchronization did so to
                    a greater extent. Patterns obtained in the reproduction task tended to converge
                    toward participants&#8217; spontaneous tapping rates and showed a degree of
                    regularization. Data collected in the synchronization task displayed a
                    consistent anchoring of taps with the vowel onsets. Overall, synchronization
                    performance with language resembled many well-established findings of
                    sensorimotor synchronization with metronome and music. We conclude that our
                    setting of the sensorimotor synchronization paradigm&#8212;finger tapping along
                    with looped spoken phrases&#8212;is a valid experimentation tool for studying
                    rhythm perception in language.</p>
            </abstract>
            <kwd-group>
                <kwd>Speech rhythm</kwd>
                <kwd>sensorimotor synchronization</kwd>
                <kwd>motor reproduction</kwd>
                <kwd>vowel onset</kwd>
                <kwd>p-centre</kwd>
            </kwd-group>
        </article-meta>
    </front>
    <body>
        <sec>
            <title>1. Introduction</title>
            <p>Is language rhythmic? For decades, this seemingly simple but profoundly important
                question that connects language with other aspects of human cognition has been
                controversially debated (<xref ref-type="bibr" rid="B20">Cummins, 2012</xref>; <xref
                    ref-type="bibr" rid="B97">Roach, 1982</xref>). Early accounts of linguistic
                rhythm suggest that it relies on some acoustic isochrony in spoken
                language&#8212;either at the level of syllables or inter-stress intervals (<xref
                    ref-type="bibr" rid="B1">Abercrombie, 1967</xref>). However, this idea did not
                stand up to acoustic measurement as no temporal analyses of speech have ever
                provided any evidence for an isochrony-based rhythm (<xref ref-type="bibr" rid="B24"
                    >Dauer, 1983</xref>; <xref ref-type="bibr" rid="B38">Fowler &amp; Tassinary,
                    1981</xref>; <xref ref-type="bibr" rid="B79">Pointon, 1980</xref>; <xref
                    ref-type="bibr" rid="B97">Roach, 1982</xref>; <xref ref-type="bibr" rid="B109"
                    >Uldall, 1971</xref>; <xref ref-type="bibr" rid="B111">van Santen &amp; Shih,
                    2000</xref>). The failure to find isochrony in speech has led to the development
                of alternative approaches. One of the most prominent proposals has suggested that
                linguistic rhythm may be ascribed to the durational variability present in
                consonantal and vocalic intervals, with languages being more or less variable and
                thus sounding rhythmically different (<xref ref-type="bibr" rid="B26">Dellwo &amp;
                    Wagner, 2003</xref>; <xref ref-type="bibr" rid="B27">Deterding, 2001</xref>;
                    <xref ref-type="bibr" rid="B42">Grabe &amp; Low, 2002</xref>; <xref
                    ref-type="bibr" rid="B60">ling Low, Grabe, &amp; Nolan, 2000</xref>; <xref
                    ref-type="bibr" rid="B85">Ramus, Nespor, &amp; Mehler, 1999</xref>; <xref
                    ref-type="bibr" rid="B117">White &amp; Mattys, 2007</xref>). Initially appealing
                and fuelling much research into the cross-linguistic study of rhythm, this approach
                has been recently critiqued as empirically inadequate and misrepresenting the issue
                at heart (<xref ref-type="bibr" rid="B4">Arvaniti, 2009</xref>, <xref
                    ref-type="bibr" rid="B5">2012</xref>; <xref ref-type="bibr" rid="B6">Arvaniti
                    &amp; Rodriquez, 2013</xref>; <xref ref-type="bibr" rid="B10">Barry, Andreeva,
                    &amp; Koreman, 2009</xref>; <xref ref-type="bibr" rid="B51">Kohler, 2009</xref>;
                    <xref ref-type="bibr" rid="B89">Rathcke &amp; Smith, 2015a</xref>, <xref
                    ref-type="bibr" rid="B90">2015b</xref>; <xref ref-type="bibr" rid="B121">Wiget
                    et al., 2010</xref>). The latest attempts at capturing rhythmic properties of
                spoken language involve analyses of the prosodic hierarchy and its temporal
                signatures, i.e., durational implementation of the hierarchical structure of
                linguistic utterances (<xref ref-type="bibr" rid="B89">Rathcke &amp; Smith,
                    2015a</xref>; <xref ref-type="bibr" rid="B119">White, Payne, &amp; Mattys,
                    2009</xref>), though this proposal has also seen some counterevidence (<xref
                    ref-type="bibr" rid="B64">Mairano, Santiago, &amp; Romano, 2015</xref>). Other
                recent approaches are less theoretically than acoustically driven, and rely on
                signal analyses such as properties of the amplitude envelope to capture some
                rhythmic properties in language (<xref ref-type="bibr" rid="B41">Goswami et al.,
                    2002</xref>; <xref ref-type="bibr" rid="B82">Port, Cummins, &amp; Gasser,
                    1995</xref>; <xref ref-type="bibr" rid="B103">&#352;turm &amp; Vol&#237;n,
                    2016</xref>; <xref ref-type="bibr" rid="B107">Tilsen &amp; Arvaniti,
                2013</xref>). Following on from these diverse and controversial accounts of rhythm,
                ideas have been put forward that language has a scale of rhythmicity (<xref
                    ref-type="bibr" rid="B51">Kohler, 2009</xref>), is only occasionally rhythmic
                    (<xref ref-type="bibr" rid="B118">White, Mattys, &amp; Wiget, 2012</xref>), or
                even anti-rhythmic (<xref ref-type="bibr" rid="B74">Nolan &amp; Jeon,
                2014</xref>).</p>
            <p>However, it has also been noted that previous production and perception studies of
                linguistic rhythm do not capture one of the core features of rhythm&#8212;its
                ability to entrain movement (<xref ref-type="bibr" rid="B19">Cummins, 2009</xref>,
                    <xref ref-type="bibr" rid="B20">2012</xref>). The idea that rhythm perception
                and movement are closely interconnected looks back at a long history (<xref
                    ref-type="bibr" rid="B12">Bolton, 1894</xref>), and is supported by a growing
                body of evidence showing that beat and rhythm perception involve motor regions of
                the brain (e.g., the basal ganglia and premotor cortex) and their connections to
                auditory regions (e.g., <xref ref-type="bibr" rid="B15">Brett &amp; Grahn,
                    2007</xref>; <xref ref-type="bibr" rid="B43">Grahn &amp; Rowe, 2009</xref>;
                    <xref ref-type="bibr" rid="B76">Patel &amp; Iversen, 2014</xref>; <xref
                    ref-type="bibr" rid="B124">Zatorre, Chen, &amp; Penhune, 2007</xref>).
                Behavioural research has exploited the potential of external, rhythmically
                structured events to entrain movement, with the goal to gain a better understanding
                of rhythmic mechanisms and their underpinnings. The sensorimotor synchronization
                (SMS) paradigm has been developed and successfully utilized to study rhythm
                perception and the properties of the human timing system by observing how a motor
                action is temporally coordinated with an external auditory event (<xref
                    ref-type="bibr" rid="B7">Aschersleben, 2002</xref>; <xref ref-type="bibr"
                    rid="B94">Repp, 2005</xref>; <xref ref-type="bibr" rid="B96">Repp &amp; Su,
                    2013</xref>). Such coordination of a perceived rhythm and a motor action gives
                insights into mechanisms underlying our capacity to achieve complex coordination in
                time when we dance, jointly sing, or chant, such as perceptual beat tracking and the
                generation of temporal expectancies (<xref ref-type="bibr" rid="B94">Repp,
                    2005</xref>). In the present study, we test the potential of SMS to provide new
                insights into the rhythmic organization of language.</p>
            <p>The simplest way to measure SMS is to record finger tapping in time with an auditory
                stimulus, such as repeated sounds of a metronome or more complex musical sequences
                    (<xref ref-type="bibr" rid="B7">Aschersleben, 2002</xref>; <xref ref-type="bibr"
                    rid="B94">Repp, 2005</xref>; <xref ref-type="bibr" rid="B96">Repp &amp; Su,
                    2013</xref>; for batteries of tests involving SMS see <xref ref-type="bibr"
                    rid="B23">Dalla Bella et al., 2017</xref>; <xref ref-type="bibr" rid="B45"
                    >Iversen &amp; Patel, 2008</xref>). Typically, the task consists in
                synchronizing finger taps produced with the dominant hand to the beat perceived in
                the auditory signal. Measures of the temporal asynchrony between the stimulus and
                the tap, the duration, and the variability of inter-tap intervals quantify the
                synchronization performance and motor stability during the task.</p>
            <p>According to this research, the most stable patterns of synchronization arise when
                participants tap at 1:1 or other multiple integer ratios in-phase with the beat
                whereas more complex ratios and anti-phase tapping are generally more difficult
                    (<xref ref-type="bibr" rid="B14">Bouvet, Varlet, Dalla Bella, Keller, &amp;
                    Bardy, 2019</xref>; <xref ref-type="bibr" rid="B94">Repp, 2005</xref>).
                Synchronized tapping on temporal scales longer than 800 ms is often more difficult
                and breaks down completely when inter-onset intervals exceed 1.8&#8211;2 seconds
                    (<xref ref-type="bibr" rid="B32">Engstr&#246;m, Kelso, &amp; Holroyd,
                    1996</xref>; <xref ref-type="bibr" rid="B67">Mates, M&#252;ller, Radil, &amp;
                    P&#246;ppel, 1994</xref>). The fastest tapping rates occur at the inter-onset
                intervals of 150&#8211;200 ms (<xref ref-type="bibr" rid="B94">Repp, 2005</xref>;
                    <xref ref-type="bibr" rid="B108">Truman &amp; Hammond, 1990</xref>), and are
                faster in musicians than non-musicians (<xref ref-type="bibr" rid="B92">Repp,
                    2003</xref>). In addition to motor constraints, upper and lower rate limits
                reveal general cognitive constraints on temporal processing (<xref ref-type="bibr"
                    rid="B94">Repp, 2005</xref>) and have been related to the working memory
                capacity (<xref ref-type="bibr" rid="B81">P&#246;ppel, 1997</xref>).<xref
                    ref-type="fn" rid="n1">1</xref> Importantly, both upper and lower SMS limits of
                manual synchronization with a pacing signal have meaningful temporal counterparts in
                speech acoustics. Lower limits correspond to the average duration of a vowel or a
                syllable, and upper limits can correspond to the duration of larger units such as a
                prosodic or a syntactic phrase. Existing evidence further demonstrates that SMS can
                take place even in syncopated signals (<xref ref-type="bibr" rid="B54">Large &amp;
                    Palmer, 2002</xref>) and in auditory stimuli with complex metrical structures
                    (<xref ref-type="bibr" rid="B63">Madison, 2014</xref>). That is, SMS responds to
                those features that bear the closest resemblance to language. Unlike traditional
                speech perception experiments that rely on listeners&#8217; metalinguistic
                conceptualization of rhythm, and delivered inconclusive results in the past (<xref
                    ref-type="bibr" rid="B69">Miller, 1984</xref>; <xref ref-type="bibr" rid="B90"
                    >Rathcke &amp; Smith, 2015b</xref>), SMS appeals as more intuitive to
                non-specialists and taps into the motor routines that are known to sharpen sensory
                rhythmic representations in music (<xref ref-type="bibr" rid="B71">Morillon,
                    Schroeder, &amp; Wyart, 2014</xref>; <xref ref-type="bibr" rid="B91">Ravignani
                    et al., 2019</xref>).</p>
            <p>Although it has been noted that the ability to synchronize movement to an external
                timekeeper is predominantly human and might have even played an important role in
                the evolution of language and music (<xref ref-type="bibr" rid="B68">Merker,
                    2000</xref>; <xref ref-type="bibr" rid="B91">Ravignani et al., 2019</xref>), SMS
                paradigms have so far inspired relatively little interest within the linguistic
                field. Some early work on speech rhythm (<xref ref-type="bibr" rid="B2">Allen,
                    1972</xref>) utilized a version of sensorimotor synchronization with speech by
                asking listeners to tap to a designated syllable in a spoken sentence that was
                repeated and played back to them 50 times. The results suggested that the paradigm
                was able to unveil the &#8216;beat location&#8217; of a syllable which was close to
                a vowel onset and varied with prominence and syllable structure. Similarly, Falk,
                Rathcke, Dalla Bella, and Bella (<xref ref-type="bibr" rid="B35">2014</xref>) used
                sentence looping and demonstrated that SMS was highly sensitive to a low-level,
                within-language timing variation, thus suggesting that the method is well suited for
                the study of subtle rhythmic differences in spoken signals, despite their high
                complexity and temporal variability. Lidji, Palmer, Peretz, and Morningstar (<xref
                    ref-type="bibr" rid="B59">2011</xref>) compared finger tapping performance of
                monolingual French and English participants as well as French-English bilinguals
                when tapping to French and English spoken sentences with a regular metrical
                structure of strong and weak syllables. The study revealed that both the
                listener&#8217;s native language and the language-specific acoustics affected the
                obtained tapping patterns in terms of tapping frequency and inter-tap-interval
                variability.</p>
            <p>Falk and Dalla Bella (<xref ref-type="bibr" rid="B33">2016</xref>) used finger
                tapping with metrically regular sentences to examine potential benefits on speech
                and language processing that might arise from a concurrent motor activity while
                listening. Tapping congruently (i.e., in-phase) with accented syllables was found to
                enhance speech processing compared to incongruent (i.e., anti-phase) tapping, or
                listening without the motor activity. Such linguistic processing advantages may be
                supported by increased attentional resources being available through the coupling of
                perception and action that is typical of SMS tasks (cf. <xref ref-type="bibr"
                    rid="B44">Hommel, 2015</xref>; <xref ref-type="bibr" rid="B53">Large &amp;
                    Jones, 1999</xref>). Overall, the recent SMS studies with language suggest that
                movement-based paradigms tap into language-specific rhythmic properties of
                speech.</p>
            <p>Most of the studies above reported measures of motor rate and variability (including
                duration of inter-tap-intervals, ITI, and the coefficient of their variation, CV).
                Dalla Bella and colleagues (<xref ref-type="bibr" rid="B22">Dalla Bella,
                    Bia&#322;u&#324;ska, &amp; Sowi&#324;ski, 2013</xref>) note that SMS with
                language displays a relatively high amount of variation in contrast to SMS with
                music (reflected in a CV of 30% versus 4%, respectively), though it is unclear if
                this variability arises from the fact that language entrains movement less than
                temporally more regular stimuli (e.g., music) as the authors suggest, or is rather
                reflective of the unique temporal properties of language such as lack of isochrony
                in its acoustic signal (cf. <xref ref-type="bibr" rid="B24">Dauer, 1983</xref>;
                    <xref ref-type="bibr" rid="B79">Pointon, 1980</xref>; <xref ref-type="bibr"
                    rid="B97">Roach, 1982</xref>; <xref ref-type="bibr" rid="B109">Uldall,
                    1971</xref>; <xref ref-type="bibr" rid="B111">van Santen &amp; Shih,
                2000</xref>).</p>
            <p>Only a few studies to date have addressed the question of potential SMS anchors in
                the acoustic signal of speech (<xref ref-type="bibr" rid="B2">Allen, 1972</xref>;
                    <xref ref-type="bibr" rid="B36">Falk, Volpi-Moncorger, &amp; Dalla Bella,
                    2017</xref>; <xref ref-type="bibr" rid="B87">Rathcke, Lin, Falk, &amp; Dalla
                    Bella, 2019</xref>). An answer to this question crucially hinges on empirical
                evidence that would demonstrate whether or not listeners attempt to systematically
                synchronize their movement with some specific points in the time course of an
                acoustic signal. Falk et al. (<xref ref-type="bibr" rid="B36">2017</xref>) defined
                SMS-anchors to coincide with the so-called &#8216;perceptual centres&#8217; (or
                p-centres, <xref ref-type="bibr" rid="B65">Marcus, 1981</xref>). A p-centre
                describes the subjective moment of occurrence of an event (typically a syllable in
                speech). More often than not, the p-centre and the acoustic onset of the
                corresponding event do not co-occur in time (<xref ref-type="bibr" rid="B18">Cooper,
                    Whalen, &amp; Fowler, 1986</xref>; <xref ref-type="bibr" rid="B65">Marcus,
                    1981</xref>; <xref ref-type="bibr" rid="B72">Morton, Marcus, &amp; Frankish,
                    1976</xref>). The original interest in p-centres arose from a search for some
                temporal constancy in language, and led to the hypothesis that temporal isochrony in
                language might be perceptual, and not acoustic, in nature (<xref ref-type="bibr"
                    rid="B55">Lehiste, 1977</xref>; <xref ref-type="bibr" rid="B72">Morton et al.,
                    1976</xref>). There have been attempts to localize the p-centre at the midpoint
                of the amplitude rise-time at the onset of nuclear accented vowels (following <xref
                    ref-type="bibr" rid="B21">Cummins &amp; Port, 1998</xref>; see also <xref
                    ref-type="bibr" rid="B72">Morton et al., 1976</xref>). However, neither
                kinematic nor any acoustic properties of speech signals seem to consistently capture
                the essence of the p-centre location (<xref ref-type="bibr" rid="B25">De Jong,
                    1994</xref>; <xref ref-type="bibr" rid="B77">Patel, L&#246;fqvist, &amp; Naito,
                    1999</xref>), and after 40 years of p-centre research, a comprehensive and
                reliable account of the phenomenon still remains a desideratum (<xref
                    ref-type="bibr" rid="B112">Villing, Repp, Ward, &amp; Timoney, 2011</xref>).</p>
            <p>In our own study (<xref ref-type="bibr" rid="B87">Rathcke et al., 2019</xref>), we
                systematically examined several potential anchors by measuring SMS with very simple
                verbal stimuli, namely regularly spaced sequences containing alternations of
                syllables /bi/ and /bu/. The results of the study suggest that vowel onsets are
                likely to serve as attractors of individual taps. Moreover, SMS accuracy with
                similarly structured verbal and tonal stimuli did not significantly differ, if SMS
                in verbal stimuli was measured at vowel onsets. The latter result echoes previous
                findings obtained using a different tapping task (<xref ref-type="bibr" rid="B22"
                    >Dalla Bella et al., 2013</xref>). When linguistic stimuli closely resemble the
                metrical structure of music, the discrepancy between music and speech in their
                ability to attract movement disappears. However, natural speech is rarely metrical
                and never isochronous. Thus, rhythmic motor entrainment with language is yet to be
                demonstrated.</p>
            <p>In contrast, a movement task that does not involve synchronization avoids the
                challenge of locating a tapping anchor in the acoustic signal. This non-synchronized
                motor reproduction (henceforth, NMR) paradigm has occasionally been used in previous
                speech research (<xref ref-type="bibr" rid="B30">Donovan &amp; Darwin, 1979</xref>;
                    <xref ref-type="bibr" rid="B99">Scott, Isard, &amp; de Boysson-Bardies,
                    1985</xref>; <xref ref-type="bibr" rid="B114">Wagner, Cwiek, &amp; Samlowski,
                    2019</xref>). In this task, listeners are asked to tap or drum a perceived
                rhythmic pattern after listening to an auditory prompt. NMR is somewhat similar to
                the synchronization-continuation paradigm which is common in the timing literature
                (e.g., <xref ref-type="bibr" rid="B122">Wing, 2002</xref>), with the difference that
                a synchronized tapping phase is missing. In NMR, listeners&#8217; tapping
                performance can be quantified by the interval duration between their taps (ITI) and
                by the variability in the interval duration (CV of the ITIs). When measuring period
                tracking of a beat in a linguistic input by means of such a non-synchronized task,
                ITI and CV could reflect some meaningful timing properties of the corresponding
                speech signal, e.g., syllable, word, or phrase duration, and the number of taps
                could reflect the number of rhythmically relevant events. Since this beat tracking
                ability of NMR has not been explicitly demonstrated in previous work, it is as yet
                unclear if, and how well, this motor paradigm can assist with understanding rhythm
                perception in language.</p>
        </sec>
        <sec>
            <title>2. Aims and hypotheses</title>
            <p>The aims of the present study were two-fold: (1) to test whether or not motor
                paradigms can help to tap into rhythm perception in natural language, and (2) to
                identify which paradigm would optimally support this.</p>
            <p>While it seems natural and easy to synchronize to music, prolonged motor
                synchronization to speech is at first sight a less obvious and widespread activity
                    (<xref ref-type="bibr" rid="B22">Dalla Bella et al., 2013</xref>), although
                previous studies have provided some evidence that it is not impossible (e.g., <xref
                    ref-type="bibr" rid="B2">Allen, 1972</xref>; <xref ref-type="bibr" rid="B35"
                    >Falk et al., 2014</xref>; <xref ref-type="bibr" rid="B59">Lidji et al.,
                    2011</xref>; <xref ref-type="bibr" rid="B87">Rathcke et al., 2019</xref>). It
                has its natural precursors in motor engagement with nursery rhymes (<xref
                    ref-type="bibr" rid="B16">Cardany, 2013</xref>), clapping to political oratory
                    (<xref ref-type="bibr" rid="B105">Tanaka &amp; Rathcke, 2016</xref>) or
                co-speech gesturing (<xref ref-type="bibr" rid="B115">Wagner, Malisz, &amp; Kopp,
                    2014</xref>). There are different implementations of a laboratory SMS paradigm
                with speech that can be found in the literature. For example, Lidji et al. (<xref
                    ref-type="bibr" rid="B59">2011</xref>) asked their participants to listen to
                three repetitions of a spoken sentence in total, and to synchronize their finger
                taps with the beat during the second and the third presentation of the sentence. In
                the present study, we decided to use a larger number of repetitions, and
                participants were instructed to tap along with the perceived beat throughout a
                sentence loop.</p>
            <p>Capitalizing on the general perceptual phenomenon of repetition (<xref
                    ref-type="bibr" rid="B66">Margulis &amp; Simchy-Gross, 2016</xref>; <xref
                    ref-type="bibr" rid="B98">Rowland, Kasdan, &amp; Poeppel, 2019</xref>), looped
                speech has the potential to reveal underlying rhythmic structures of sentences
                    (<xref ref-type="bibr" rid="B35">Falk et al., 2014</xref>; <xref ref-type="bibr"
                    rid="B88">Rathcke, Falk, &amp; Dalla Bella, 2018</xref>). After having listened
                to repetitions of a sentence, listeners are no longer engaged in cognitively
                demanding semantic and syntactic processing. Instead, they can attend to the
                prosodic structure of the sentence and extract its rhythmic properties more easily.
                Looped speech is also known to sometimes induce the so-called &#8216;speech-to-song
                illusion&#8217; (S2S). S2S describes a perceptual phenomenon in which an originally
                spoken phrase switches to being perceived by many people as a song if it is embedded
                in a loop (<xref ref-type="bibr" rid="B28">Deutsch, 2003</xref>; <xref
                    ref-type="bibr" rid="B29">Deutsch, Henthorn, &amp; Lapidis, 2011</xref>).
                However, not all phrases are equally likely to transform into song (e.g., <xref
                    ref-type="bibr" rid="B35">Falk et al., 2014</xref>), and we controlled for this
                phenomenon in our materials. Early work by Allen (<xref ref-type="bibr" rid="B2"
                    >1972</xref>) also utilized looped sentences, though participants of this
                experiment were only asked to synchronize with one designated syllable on each
                repetition and not tap along with the beat of the looped sentence as in this
                study.</p>
            <p>The present approach is also different from the NMR paradigm implemented in previous
                research in which listeners were either presented with one repetition of each
                stimulus and could hear the stimulus again if needed (<xref ref-type="bibr"
                    rid="B114">Wagner et al., 2019</xref>), or presented with 10 repetitions of a
                stimulus but were asked to tap after each repetition (<xref ref-type="bibr"
                    rid="B30">Donovan &amp; Darwin, 1979</xref>; <xref ref-type="bibr" rid="B99"
                    >Scott et al., 1985</xref>). The looped version of both SMS and NMR represents a
                principled way of expanding and applying current movement-based paradigms to
                language.</p>
            <p>Given the differences in the nature of the SMS and NMR tasks, different scenarios
                would indicate (the degree of) the success of the paradigm in representing perceived
                rhythmic structures. In the case of SMS, motor entrainment necessarily involves the
                presence of a consistent anchor of synchronization in the speech signal. Lack of
                temporal consistency between a tap occurrence and an acoustic landmark will suggest
                lack of entrainment. In the case of NMR, beat tracking and rhythmic reproduction
                would be considered successful if tapping rates diverge from participants&#8217;
                natural and preferred tapping rate and converge towards the IOI of meaningful
                durational intervals in the linguistic input. If patterns resulting from NMR deviate
                from this prediction, beat tracking cannot be assumed to have been successful during
                the task. We further expect to find individual variation in both tasks, which should
                be at least partially explainable by individual musicality, timekeeping, and
                synchronization abilities (<xref ref-type="bibr" rid="B23">Dalla Bella et al.,
                    2017</xref>).</p>
            <p>In summary, this study set out to assess two movement-based paradigms that had been
                previously used with language, synchronization and reproduction, with the aim of
                providing empirical evidence on a methodology that would be best suited for studying
                rhythmic properties of spoken language.</p>
        </sec>
        <sec sec-type="methods">
            <title>3. Method</title>
            <sec>
                <title>3.1 Experimental stimuli</title>
                <p>Six English sentences were chosen from an existing database that we had
                    previously created to investigate S2S. Every sentence in this database has been
                    tagged for S2S-likelihood based on perception data obtained from 40 healthy
                    native listeners (<xref ref-type="bibr" rid="B88">Rathcke et al., 2018</xref>,
                    forthcoming; see Table <xref ref-type="table" rid="T1">1</xref>). The sentences
                    were read by a female native Standard Southern British English speaker (22 years
                    old at the time of the recording).</p>
                <table-wrap id="T1">
                    <label>Table 1</label>
                    <caption>
                        <p>Summary of the materials used in the experiment. Accented syllables are
                            underlined.</p>
                    </caption>
                    <table>
                        <tr>
                            <th valign="top" align="left">Sentence length</th>
                            <th valign="top" align="left">Sentence</th>
                            <th valign="top" align="center">S2S likelihood</th>
                            <th valign="top" align="center">Duration (seconds)</th>
                        </tr>
                        <tr>
                            <td colspan="4"><hr/></td>
                        </tr>
                        <tr>
                            <td valign="top" align="left">short: 4 syllables</td>
                            <td valign="top" align="left">S1: <italic>I <underline>wove</underline>
                                    a <underline>yarn</underline></italic>.<break/>S2: <italic>I
                                        <underline>took</underline> the
                                    <underline>prize</underline></italic>.</td>
                            <td valign="top" align="right">68% (high)<break/>30% (low)</td>
                            <td valign="top" align="right">1.1<break/>1.2</td>
                        </tr>
                        <tr>
                            <td valign="top" align="left">medium: 7 syllables</td>
                            <td valign="top" align="left">M1: <italic><underline>Ann</underline> won
                                    the <underline>ye</underline>llow
                                    a<underline>ward</underline></italic>.<break/>M2:
                                        <italic><underline>Grand</underline>pa did
                                        <underline>not</underline> eat the
                                        <underline>cake</underline></italic>.</td>
                            <td valign="top" align="right">73% (high)<break/>35% (low)</td>
                            <td valign="top" align="right">1.5<break/>1.8</td>
                        </tr>
                        <tr>
                            <td valign="top" align="left">long: 10 syllables</td>
                            <td valign="top" align="left">L1: <italic>The
                                    <underline>in</underline>cident oc<underline>curred</underline>
                                    last Friday <underline>night</underline></italic>.<break/>L2:
                                    <italic>As the boy <underline>sneezed</underline>, the door
                                        <underline>closed</underline>&#160;<underline>su</underline>ddenly</italic>.</td>
                            <td valign="top" align="right">34% (low)<break/>58% (high)</td>
                            <td valign="top" align="right">2.1<break/>3.0</td>
                        </tr>
                    </table>
                </table-wrap>
                <p>The materials of the present study comprised six sentences, two with four
                    syllables, two with seven, and two with ten syllables. The ten-syllable
                    sentences were syntactically more complex than the shorter ones. To control for
                    the possibility that repetition might induce musical interpretation of speech
                    and thereby bias synchronization or reproduction in unexpected ways, the chosen
                    pairs varied in their probability to induce S2S, with one high-transforming and
                    one low-transforming sentence in each pair (see Table <xref ref-type="table"
                        rid="T1">1</xref>).</p>
                <p>The stimuli were repeated 20 times for the SMS task and 10 times for the NMR task
                    (see 3.7). A 400 ms pause separated the repetitions.</p>
            </sec>
            <sec>
                <title>3.2 Sentence annotations</title>
                <p>A trained phonetician (first author) annotated onsets of vowels and syllables in
                    the test sentences. Vowels were defined as syllabic nuclei identified by the
                    presence of voicing, formant structure, and relatively high intensity.
                    Accordingly, pre-aspiration was excluded. Segmentation of vowel onsets in
                    post-sonorant contexts combined acoustic and auditory criteria that were guided
                    by impressions of the intended vowel quality. There were no segmental reduction
                    or deletion phenomena, given that the recordings comprised of clear, read speech
                    samples. There were also no cases of glottalization in these recordings.
                    Segmentation examples are given in Figures <xref ref-type="fig" rid="F2"
                        >2</xref> and <xref ref-type="fig" rid="F3">3</xref>. All materials are
                    available from <ext-link ext-link-type="uri"
                        xmlns:xlink="http://www.w3.org/1999/xlink"
                        xlink:href="https://osf.io/3dh4m/">https://osf.io/3dh4m/</ext-link>. An
                    independent annotator segmented vowel onsets in all test sentences following the
                    criteria given above, and reached a cross-annotator agreement of .999945
                        (<italic>p</italic> &lt; .001) in Pearson&#8217;s correlation
                    coefficient.</p>
                <p>Additionally, each syllable (and its vowel) was specified with respect to its
                    metrical status (strong or weak) and phrasal prominence (accented or
                    unaccented). From these annotations, we derived acoustic timings of the two
                    potential synchronization anchors (syllable or vowel) and linguistic prominence
                    of the underlying units (0 for metrically weak, 1 for metrically strong but
                    unaccented, 2 for accented syllables/vowels).</p>
            </sec>
            <sec>
                <title>3.3 Acoustic pre-processing</title>
                <p>To define potential acoustic SMS anchors, linguistically informed data
                    preparation above was complemented by analyses of the amplitude envelope (cf.
                        <xref ref-type="bibr" rid="B41">Goswami et al., 2002</xref>; <xref
                        ref-type="bibr" rid="B82">Port, Cummins, &amp; Gasser, 1995</xref>; <xref
                        ref-type="bibr" rid="B107">Tilsen &amp; Arvaniti, 2013</xref>) and signal
                    energy derivatives (<xref ref-type="bibr" rid="B103">&#352;turm &amp;
                        Vol&#237;n, 2016</xref>). Amplitude envelopes were created by employing the
                    envelope function in Matlab (2018b). Accordingly, envelopes were derived from
                    the absolute signal amplitude, and smoothed using a spline interpolation with a
                    window of at least 500 samples (amounting to approximately 11 ms). Smoothed
                    energy contours were derived following the procedure developed by &#352;turm and
                    Vol&#237;n (<xref ref-type="bibr" rid="B103">2016</xref>) which was based on the
                    calculation of energy averages across 40-ms segment windows with a 44-sample
                    shift and the 6th-order moving-average filter.</p>
                <p>Figure <xref ref-type="fig" rid="F1">1</xref> compares the amplitude envelope and
                    the smoothed energy contour for the test sentence M2, and demonstrates core
                    differences between the two contours. The amplitude envelope (shown in blue)
                    closely follows the waveform, apart from the sections where there is a
                    discrepancy between positive and negative amplitude values (since the envelope
                    is based on an average of the absolute values). In contrast, the energy function
                    shows multiple deviations from the original waveform, especially in regions of
                    low sonority. Moreover, energy contours of open vowels at the beginning of the
                    sentence are more closely matched to the waveform than the contours of close
                    vowels towards the end of the sentence.</p>
                <fig id="F1">
                    <label>Figure 1</label>
                    <caption>
                        <p>Waveform (grey), amplitude envelope (blue), and energy contour (red) of
                            the test sentence M2 (&#8220;Grandpa did not eat the cake&#8221;).</p>
                    </caption>
                    <graphic xmlns:xlink="http://www.w3.org/1999/xlink"
                        xlink:href="/article/id/6294/file/76021/"/>
                </fig>
                <fig id="F2">
                    <label>Figure 2</label>
                    <caption>
                        <p>Waveform, spectrogram, and annotation of the example word
                            &#8220;Friday&#8221; comparing temporal locations of the five landmarks
                            under investigation: (1) syllable onset (SylOn, identified manually),
                            (2) vowel onset (VowOn, identified manually), (3) local amplitude
                            maximum (LAM, derived from the amplitude envelope, indicated in blue),
                            (4) local energy maximum (maxE, derived from the energy contour,
                            indicated in red), (5) fastest local energy increase (maxD, derived from
                            the energy difference function, indicated in green).</p>
                    </caption>
                    <graphic xmlns:xlink="http://www.w3.org/1999/xlink"
                        xlink:href="/article/id/6294/file/76022/"/>
                </fig>
                <fig id="F3">
                    <label>Figure 3</label>
                    <caption>
                        <p>Waveform and amplitude envelope of the example word &#8220;Friday&#8221;
                            (taken from L1). Local amplitude maxima (in red) and minima (in green),
                            and their derived measures of rise-time and rise-slope are
                            indicated.</p>
                    </caption>
                    <graphic xmlns:xlink="http://www.w3.org/1999/xlink"
                        xlink:href="/article/id/6294/file/76023/"/>
                </fig>
                <p>Additionally, local energy dynamics of voiced parts of the acoustic signal were
                    described following the formula in &#352;turm and Vol&#237;n (<xref
                        ref-type="bibr" rid="B103">2016</xref>). This function operates on the
                    smoothed energy contour, calculates differences between two neighbouring samples
                    (disregarding samples with zero-crossing rates higher than 4000 that are typical
                    of voiceless fricatives), and smooths the difference values via a moving-average
                    filter of order 10. Figure <xref ref-type="fig" rid="F2">2</xref> displays the
                    two energy contours in comparison (red versus green lines). Subsequently, local
                    maxima of the smoothed energy function (maxE) and local maxima of the energy
                    difference function (maxD) were identified and localized within each syllable of
                    the test sentences. According to &#352;turm and Vol&#237;n (<xref
                        ref-type="bibr" rid="B103">2016</xref>), maxD represents a close
                    approximation of the p-centre in Czech.</p>
                <p>Both values (maxE and maxD) were subsequently examined with respect to their
                    ability to serve as SMS anchors, along with the syllable amplitude maxima
                    derived from amplitude envelopes and the syllable/vowel onsets identified
                    manually. Figure <xref ref-type="fig" rid="F2">2</xref> compares temporal
                    locations of the five potential SMS anchors for the bisyllabic word
                    &#8220;Friday&#8221; (taken from L1). The figure illustrates our general finding
                    that distances between the five temporal landmarks varied depending on the
                    properties of the syllable, and could be rather small (as in the second syllable
                    of the example word) or large (as in the first syllable).</p>
                <p>Each temporal landmark was then further described on the basis of the acoustic
                    properties in their local amplitude envelopes. More specifically, two measures
                    of local changes in the signal amplitude were identified: (1) rise-time, i.e.,
                    the temporal distance between a local amplitude minimum and a maximum (<xref
                        ref-type="bibr" rid="B41">Goswami et al., 2002</xref>; <xref ref-type="bibr"
                        rid="B40">Goswami &amp; Leong, 2013</xref>) and (2) rise-slope, i.e., the
                    steepness of a change in the envelope measured as the amplitude differences
                    between a minimum and a maximum, divided by the duration of their rise-time.
                    These measures are illustrated in Figure <xref ref-type="fig" rid="F3"
                    >3</xref>.</p>
                <p>To derive these measures, neighbouring maxima and minima in the amplitude
                    envelope were located using the <italic>findpeaks</italic> function in Matlab
                    2018b. First, a local maximum was found in the closest proximity to the event
                    onset (in vowels, it could precede or follow the identified vowel onset but in
                    syllables, the temporal location was restricted by syllable boundaries). Second,
                    the algorithm searched for a preceding minimum in the amplitude envelope. The
                    local minima were mostly located around the &#8216;valleys&#8217; between local
                    maxima in the amplitude envelope (see Figure <xref ref-type="fig" rid="F3"
                        >3</xref>). A local threshold was adjusted to each individual case, based on
                    a combination of two parameters (duration of the sampling time window and
                    average amplitude decrease over a series of consecutive sampling intervals).
                    Automatically detected turning points of the amplitude envelope were manually
                    checked by a trained phonetician (first author).</p>
            </sec>
            <sec>
                <title>3.4 Individual data</title>
                <p>All participants had to fill in an online questionnaire prior to their scheduled
                    experimental session. The form asked about musical training, ongoing and past
                    musical activities, and dancing experience. A general musicality index was
                    derived from these data (similar to the approach by <xref ref-type="bibr"
                        rid="B103">&#352;turm &amp; Vol&#237;n, 2016</xref>). The index was an
                    aggregate score based on years of musical training (from 0 to 12 in the present
                    sample), current regular music practice (0 for non-active and 1 for active
                    participants), number of musical instruments (which included singing and
                    dancing, from 0 to 4 in the sample), and finally the age at which participants
                    started their musical training (below the age of 10 was coded as 2, from 10 up
                    to 20 years as 1, above 20 years as 0). The resulting musicality indices varied
                    from minimally 0 (no musical experience) to 18 (a high level of musical
                    experience and skills). There were no professional musicians or dancers among
                    the participants of this study, though 69% had received some musical training,
                    taken dancing classes, or danced regularly. Given the aims and hypotheses of the
                    present study, the questionnaire only included questions about active music
                    practice and did not collect information about passive experience of listening
                    to specific music styles.</p>
                <p>We assessed individual SMS abilities with the Battery for the Assessment of
                    Auditory Sensorimotor Timing Abilities (<xref ref-type="bibr" rid="B23">BAASTA,
                        Dalla Bella et al., 2017</xref>). Six tasks were selected from the battery,
                    including two unpaced tapping tasks, two paced tapping-to-tones tasks, and two
                    paced tapping-to-music tasks. Unpaced tapping tasks measured the speed of
                    participants&#8217; spontaneous and fast tapping rates without a stimulus, and
                    their ability to sustain the regular motor activity. In these tasks,
                    participants were instructed to either tap at their most comfortable speed for
                    60 seconds, or to tap at their fastest possible speed for 30 seconds, paying
                    attention to maintaining a constant speed for the whole duration of a trial. In
                    the paced tapping tasks, participants&#8217; synchronization abilities were
                    measured with a simple regular sound (here, a piano tone with a frequency of
                    1319 Hz or E6) and computer-generated excerpts of classical music. When tapping
                    to piano tones, participants were presented with 60 repetitions of a tone
                    presented at a faster (450 ms) and a slower (600 ms) IOI, and asked to tap in
                    synchrony with the tones throughout the repetitions. When tapping to music,
                    participants were instructed to synchronize with what they perceived as the beat
                    in musical excerpts from Bach&#8217;s &#8220;Badinerie&#8221; and
                    Rossini&#8217;s &#8220;William Tell Overture.&#8221; Both music extracts
                    consisted of 64 beats with a quarter note of 600 ms IOI (see <xref
                        ref-type="bibr" rid="B23">Dalla Bella et al., 2017</xref> for more detail).
                    The task order was counterbalanced across experimental sessions, following a
                    Latin square design. The order within each task category was fixed though: In
                    the unpaced tapping task, participants first tapped spontaneously then fast; in
                    the paced tapping task, they first synchronized with the metronome of 450 ms IOI
                    and then 600 ms IOI; in the music synchronization task, they first tapped to
                    Bach and then to the Rossini piece.</p>
                <p>After the experiment, all participants filled in a brief questionnaire that
                    collected confidence ratings for their self-evaluated experimental performance.
                    Participants indicated how easily they were able to extract beat patterns from
                    looped test sentences, and how well they were able to replicate them in the NMR
                    task. In addition, they were asked to self-report how confident they felt about
                    tapping precisely in time with the stimuli they experienced in the SMS task. A
                    9-point Likert scale (with 9 being the highest level of confidence) was used to
                    collect the ratings.</p>
            </sec>
            <sec>
                <title>3.5 Participants</title>
                <p>Thirty-one native speakers of Southern British English (21 female; mean age 23.1
                    years, range 18 &#8211; 36 years) participated in the study. They gave informed
                    consent and received a small fee in compensation for their time and efforts. The
                    data of two participants were removed from the sample because they self-declared
                    as dyslexics. All remaining participants had no existing history of language
                    impairments or motor disorders that could affect their rhythmic processing or
                    SMS abilities (e.g., dyslexia: <xref ref-type="bibr" rid="B56">Leong &amp;
                        Goswami, 2014</xref>; apraxia: <xref ref-type="bibr" rid="B75">Park,
                        2017</xref>; dystonia: <xref ref-type="bibr" rid="B61">Liu et al.,
                        2008</xref>), and no hearing impairments at the time of testing. Moreover,
                    their individual performance with the metronome tasks of BAASTA did not indicate
                    any issues with their general synchronization abilities (cf. <xref
                        ref-type="bibr" rid="B23">Dalla Bella et al., 2017</xref>).</p>
            </sec>
            <sec>
                <title>3.6 Tasks, procedure, and apparatus</title>
                <p>The study obtained ethical approval from the Ethics Committee of the University
                    of Kent, and was conducted in a quiet behavioral testing room of the Kent
                    Linguistics Laboratory. Each experimental session consisted of one SMS and one
                    NMR task with the experimental stimuli. During the SMS task, participants were
                    presented with 20 repetitions of each target sentence and asked to start
                    synchronizing with what they perceived as the beat structure of the sentence as
                    soon as they felt able to, while the repeated auditory sequence was still
                    ongoing. In the NMR task, participants were asked to listen to 10 repetitions of
                    a test sentence first, and then to replicate the beat pattern they had heard. No
                    instruction was given as to how many taps or cycles they should reproduce.
                    During each task, test sentences were presented in increasing order of
                    complexity, i.e., the short sentences were tested first, the long ones last.
                    Test sentences were played binaurally through Sennheiser HD 380 headphones. The
                    order of the SMS and NMR tasks was counterbalanced across participants. At the
                    start of an experimental session, participants familiarized themselves with the
                    equipment and had an opportunity to clarify their questions about the procedure.
                    The session ended with the BAASTA tests and the post-test questionnaire, and
                    took 35&#8211;45 minutes in total to complete.</p>
                <p>Tapping responses were collected using a Roland HandSonic drum pad (HPD-20) and a
                    Dell Latitude 7390 laptop in the CakeWalk MIDI software (BandLab). BAASTA was
                    implemented as an app running on an Acer tablet (Iconia One 10 B3-A40FHD 32GB)
                    with an Android 7.0 system. Participants were free to adjust the sound volume to
                    a comfortable level.</p>
            </sec>
            <sec>
                <title>3.7 Preparation of SMS data</title>
                <p>Collected taps can be analyzed with regards to different aspects of their
                    distribution in time. Figure <xref ref-type="fig" rid="F4">4</xref> shows a
                    hypothetical example of two taps produced in time with the bisyllabic word
                    Friday. The first tap follows the syllable onset (resulting in positive
                    asynchronies measured with this landmark) but precedes all other landmarks
                    (resulting in negative asynchronies indicative of anticipation of different
                    magnitudes: maxD can be considered less anticipated than the vowel onset in this
                    example). The second tap shows positive asynchronies for all landmarks, though
                    the magnitude of the time lag is landmark-specific&#8212;here, it is the
                    smallest for maxE and the largest for the syllable onset. Moreover, the distance
                    between the taps (or the inter-tap interval, ITI) and the variability of these
                    intervals can provide insightful information about the sychronization
                    performance with each sentence as a whole.</p>
                <fig id="F4">
                    <label>Figure 4</label>
                    <caption>
                        <p>A hypothetical example of two taps (identified by black circles) produced
                            in time with the bisyllabic word Friday. Distances between the taps (or
                            inter-tap intervals, ITI) and distances between taps and the nearby
                            landmarks are indicated by grey arrow lines.</p>
                    </caption>
                    <graphic xmlns:xlink="http://www.w3.org/1999/xlink"
                        xlink:href="/article/id/6294/file/76024/"/>
                </fig>
                <p>We extracted the tapping data using Matlab MIDI toolbox (<xref ref-type="bibr"
                        rid="B31">Eerola &amp; Toiviainen, 2004</xref>), and corrected the timing of
                    taps by subtracting the delay of the MIDI device (here, 5 ms). For each sentence
                    and participant, we calculated the temporal distribution of individual taps
                    within the temporal window of the sentence duration and then aggregated the
                    available taps across all repetitions of the same sentence. Using GGPLOT2 (<xref
                        ref-type="bibr" rid="B120">Wickham, 2016</xref>), a Gaussian kernel
                    estimation with a bandwidth adjustment of &#8539; was applied to the aggregated
                    data. This procedure allowed us to obtain a smoothed distribution for each
                    participant and sentence while retaining salient peaks of the aggregated taps.
                    Figure <xref ref-type="fig" rid="F5">5</xref> shows an example of such density
                    functions for the test sentence S1. Individual densities were obtained from the
                    SMS data and aggregated across all participants. The resulting distribution in
                    Figure <xref ref-type="fig" rid="F5">5</xref> is clearly multimodal, with one
                    tapping peak per syllable of this test sentence.</p>
                <fig id="F5">
                    <label>Figure 5</label>
                    <caption>
                        <p>An aggregated density function of the group SMS-performance with the
                            four-syllable test sentence S1 (&#8220;I wove a yarn&#8221;).</p>
                    </caption>
                    <graphic xmlns:xlink="http://www.w3.org/1999/xlink"
                        xlink:href="/article/id/6294/file/76025/"/>
                </fig>
                <p>Figure <xref ref-type="fig" rid="F6">6</xref> displays density functions created
                    for the group tapping performance with the same sentence during the NMR task.
                    The NMR task seems to increase variability at both individual and group level,
                    and lacks the clearly defined quadrimodality observed in the SMS task with this
                    sentence.</p>
                <fig id="F6">
                    <label>Figure 6</label>
                    <caption>
                        <p>An aggregated density function of the group NMR-performance with the test
                            sentence S1 (&#8220;I wove a yarn&#8221;).</p>
                    </caption>
                    <graphic xmlns:xlink="http://www.w3.org/1999/xlink"
                        xlink:href="/article/id/6294/file/76026/"/>
                </fig>
                <p>The temporal location of the density peak maxima (see Figure <xref ref-type="fig"
                        rid="F5">5</xref>) was used to quantify the individual SMS performance. To
                    derive this measure, the <italic>findpeaks</italic> function from the R-package
                    PRACMA (<xref ref-type="bibr" rid="B13">Borchers, 2018</xref>) was applied. It
                    identified all peaks using a 40%-threshold of the maximum peak value of each
                    sentence, separated by at least 100 ms distance. The timepoints of the density
                    peaks and locations of the temporal landmarks under investigation (see 3.3) were
                    then compared. Asynchronies between the taps and the temporal landmarks were
                    calculated for those density peaks which occurred within a &#177;120 ms window
                    of the corresponding landmark location (cf. <xref ref-type="bibr" rid="B93"
                        >Repp, 2004</xref>).</p>
            </sec>
            <sec>
                <title>3.8 Period tracking in SMS and NMR</title>
                <p>To compare the properties of period tracking in SMS and NMR, we calculated ITIs
                    (in ms) that participants produced in each target sentence during the very first
                    movement cycle as well as mean ITIs upon completion of a trial. Variability of
                    the interval duration between taps was expressed by the coefficient of variation
                    CV, calculated as SD(ITI)/mean(ITI).</p>
            </sec>
        </sec>
        <sec>
            <title>4. Results</title>
            <p>All analyses below were conducted in Rstudio running R version 3.5.1., using packages
                    <sc>LME</sc>4 (<xref ref-type="bibr" rid="B11">Bates, M&#228;chler, Bolker,
                    &amp; Walker, 2015</xref>), <sc>LMERTEST</sc> (<xref ref-type="bibr" rid="B52"
                    >Kuznetsova, Brockhoff, &amp; Christensen, 2017</xref>), <sc>CHANGEPOINT</sc>
                    (<xref ref-type="bibr" rid="B49">Killick &amp; Eckley, 2014</xref>),
                    <sc>GGPLOT</sc>2 (<xref ref-type="bibr" rid="B120">Wickham, 2016</xref>), and
                    <sc>SJPLOT</sc> (<xref ref-type="bibr" rid="B62">L&#252;decke, 2019</xref>).</p>
            <sec>
                <title>4.1 Motor activity in SMS and NMR</title>
                <p>First, one-sample Kolmogorov-Smirnov tests confirmed that tapping data were not
                    uniformly distributed in both tasks. That is, participants did not tap randomly
                    in either task, or any of the test sentences (see Table <xref ref-type="table"
                        rid="T2">2</xref>; all <italic>p</italic> values were &lt; .01). Moreover,
                    confidence ratings did not differ significantly between the two tasks.
                    Participants felt equally confident (median: 6, interquartile range: 5&#8211;7)
                    about their ability to extract the beat patterns in NMR and to synchronize with
                    the beat in SMS.</p>
                <table-wrap id="T2">
                    <label>Table 2</label>
                    <caption>
                        <p>The D-statistic of one-sample Kolmogorov-Smirnov tests for taps collected
                            with the six target sentences in the SMS and NMR task.</p>
                    </caption>
                    <table>
                        <tr>
                            <th valign="top" align="left">Sentence/Task</th>
                            <th valign="top" align="center">S1</th>
                            <th valign="top" align="center">S2</th>
                            <th valign="top" align="center">M1</th>
                            <th valign="top" align="center">M2</th>
                            <th valign="top" align="center">L1</th>
                            <th valign="top" align="center">L2</th>
                        </tr>
                        <tr>
                            <td colspan="7"><hr/></td>
                        </tr>
                        <tr>
                            <td valign="top" align="left">SMS</td>
                            <td valign="top" align="right">.280</td>
                            <td valign="top" align="right">.291</td>
                            <td valign="top" align="right">.182</td>
                            <td valign="top" align="right">.119</td>
                            <td valign="top" align="right">.073</td>
                            <td valign="top" align="right">.127</td>
                        </tr>
                        <tr>
                            <td valign="top" align="left">NMR</td>
                            <td valign="top" align="right">.203</td>
                            <td valign="top" align="right">.148</td>
                            <td valign="top" align="right">.135</td>
                            <td valign="top" align="right">.270</td>
                            <td valign="top" align="right">.346</td>
                            <td valign="top" align="right">.288</td>
                        </tr>
                    </table>
                </table-wrap>
            </sec>
            <sec>
                <title>4.2 Comparisons between landmarks as potential SMS anchors</title>
                <p>To find the most appropriate SMS-anchor, we fitted linear mixed-effects models to
                    absolute asynchronies between the tapping peaks and the temporal landmarks. The
                    model included a five-level predictor landmark and two random effects:
                    participant (P1&#8211;P29) and sentence (1&#8211;6). We started with a maximal
                    random effect structure recommended by Barr, Levy, Scheepers, and Tily (<xref
                        ref-type="bibr" rid="B9">2013</xref>), and iteratively removed random
                    effects if the model failed to converge or produced a singular fit. A change of
                    the default optimizer (to &#8216;optimx,&#8217; <xref ref-type="bibr" rid="B47"
                        >John et al., 2020</xref>) helped to resolve the model convergence issues
                    and keep the random effect structure maximal. The likelihood ratio tests were
                    run to determine the best-fit models.</p>
                <p>Figure <xref ref-type="fig" rid="F7">7</xref> displays estimates and standard
                    errors of absolute asynchronies for the five landmarks under investigation.
                    Smaller asynchronies indicate a higher accuracy of a tap in the proximity of the
                    corresponding landmark (the &#177;120 ms window applied across all landmarks).
                    Here (and below), raw duration measurements in ms were logarithmically
                    transformed to reduce or remove the skewness of the distribution that is
                    typically observed in durational data (<xref ref-type="bibr" rid="B8">Baayen,
                        2008, pp. 31ff</xref>). Visual inspection of the estimates in Figure <xref
                        ref-type="fig" rid="F7">7</xref> led to the conclusion that vowel onsets
                    demonstrated the smallest temporal discrepancy between SMS-peak locations and
                    the nearby landmarks. Vowel onsets were thus taken as the reference level for
                    the pairwise comparisons with the Bonferroni-corrected
                    <italic>&#945;</italic>-level set to 0.0125 (0.05/4). Accordingly, syllable
                    onsets (<italic>t</italic> = 6.80, <italic>p</italic> &lt; .001) and maxE
                        (<italic>t</italic> = 3.15, <italic>p</italic> &lt; .01) differed
                    significantly from the asynchronies measured with vowel onsets while LAM did not
                    reach significance at the Bonferroni-corrected &#945;-level of 0.0125
                        (<italic>t</italic> = 2.18, <italic>p</italic> = 0.03). Despite the
                    numerical difference observed in Figure <xref ref-type="fig" rid="F7">7</xref>,
                    maxD did not produce significantly longer absolute asynchronies in comparison to
                    vowel onsets (<italic>t</italic> = 0.91, n.s.).</p>
                <fig id="F7">
                    <label>Figure 7</label>
                    <caption>
                        <p>Estimated means and standard errors of absolute asynchrony, comparing
                            participants&#8217; SMS performance with the five temporal
                            landmarks&#8212;syllable onset (SylOn), local energy maximum (maxE),
                            local amplitude maximum (LAM), maximal difference in the local energy
                            contour (maxD), and vowel onset (VowOn). The model contained
                            log-transformed absolute asynchronies which were back-transformed to the
                            original ms-scale in the plot.</p>
                    </caption>
                    <graphic xmlns:xlink="http://www.w3.org/1999/xlink"
                        xlink:href="/article/id/6294/file/76027/"/>
                </fig>
                <p>Based on the analyses above, we conclude that vowel onsets constitute the best
                    anchor of SMS in these data. Figure <xref ref-type="fig" rid="F8">8</xref>
                    displays an example of the group performance with the vowel onsets of the test
                    sentence S1. Despite some individual variation, cumulative tapping peaks shown
                    in the graph are temporally well aligned with the vowel onsets (indicated by
                    vertical dashed blue lines). While the sentence-initial vowel demonstrates large
                    negative asynchronies typical of SMS performance with a metronome (e.g., <xref
                        ref-type="bibr" rid="B7">Aschersleben, 2002</xref>), all following vowels
                    seem much less anticipated in this example, i.e., display smaller or no negative
                    mean asynchronies.</p>
                <fig id="F8">
                    <label>Figure 8</label>
                    <caption>
                        <p>Accumulated tapping frequencies of 29 participants of this study, found
                            in the experimental sentence S1 (&#8220;I wove a yarn&#8221;). Dashed
                            vertical lines indicate vowel onsets in the acoustic signal of the
                            sentence.</p>
                    </caption>
                    <graphic xmlns:xlink="http://www.w3.org/1999/xlink"
                        xlink:href="/article/id/6294/file/76028/"/>
                </fig>
            </sec>
            <sec>
                <title>4.3 Probability of a tap in the SMS task</title>
                <p>A logistic mixed-effects regression was performed to test for the likelihood of a
                    tapping peak being present (1) or absent (0) in the proximity of a vowel onset
                    (&#177;120 ms around the temporal landmark; see 3.7). Metrical status (i.e., the
                    vowel being nucleus of a metrically weak, strong, or pitch-accented syllable),
                    rise-time and rise-slope of the amplitude envelope, S2S likelihood of the
                    sentence (high/low), and participant-specific characteristics were entered as
                    predictors. We also tested if the order of tasks (SMS first versus NMR first)
                    had an impact on tapping with vowels. Participant (P1&#8211;P29) and sentence
                    (1&#8211;6) were fitted as random effects. Again, we started with a maximal
                    random effect structure and retained those random effects that allowed the
                    models to converge. To combat the model convergence issues of the mixed-effects
                    logistic regressions, we changed the default optimizer (to &#8216;bobyqa&#8217;)
                    and increased the number of iterations from default 10,000 to 100,000. Summary
                    of the best-fit model established by the likelihood ratio tests can be found in
                    the supplementary materials.</p>
                <p>The best-fit model produced two main effects, including the metrical status of
                    the vowel and S2S likelihood of the sentence (see Table <xref ref-type="table"
                        rid="T3">3</xref>). Accordingly, metrically weak vowels were less likely to
                    attract a tap, in comparison to either metrically strong (<italic>z</italic> =
                    3.28, <italic>p</italic> &lt; .01) or accented vowels (<italic>z</italic> =
                    4.50, <italic>p</italic> &lt; .001). Although accented vowels were slightly more
                    often tapped to than metrically strong but phrasally unaccented vowels, the
                    difference between them was not significant. Sentences identified as
                    high-transforming in previous S2S-experiments (<xref ref-type="bibr" rid="B88"
                        >Rathcke et al., 2018, forthcoming</xref>) were also more likely to induce a
                    higher number of taps, forming a tapping peak in the density map around a vowel
                    onset (<italic>z</italic> = 2.37, <italic>p</italic> &lt; .05). These effect
                    estimates are summarized in Table <xref ref-type="table" rid="T4">4</xref>.</p>
                <table-wrap id="T3">
                    <label>Table 3</label>
                    <caption>
                        <p>Summary of the logistic mixed-effects model best fitting the SMS
                            probability data.</p>
                    </caption>
                    <table>
                        <tr>
                            <th valign="top" align="left">Factor</th>
                            <th valign="top" align="center"><italic>AIC</italic></th>
                            <th valign="top" align="center"><italic>df</italic></th>
                            <th valign="top" align="center">&#967;<sup>2</sup></th>
                            <th valign="top" align="center"><italic>p</italic></th>
                        </tr>
                        <tr>
                            <td colspan="5"><hr/></td>
                        </tr>
                        <tr>
                            <td valign="top" align="left"><italic>Metrical status</italic></td>
                            <td valign="top" align="right">771.42</td>
                            <td valign="top" align="right">2</td>
                            <td valign="top" align="right">24.68</td>
                            <td valign="top" align="right">&lt;.001</td>
                        </tr>
                        <tr>
                            <td valign="top" align="left"><italic>S2S likelihood</italic></td>
                            <td valign="top" align="right">754.23</td>
                            <td valign="top" align="right">1</td>
                            <td valign="top" align="right">5.48</td>
                            <td valign="top" align="right">&lt;.05</td>
                        </tr>
                    </table>
                </table-wrap>
                <table-wrap id="T4">
                    <label>Table 4</label>
                    <caption>
                        <p>Estimates of fixed effects for the SMS probability data (reference level
                            of metrical status is weak, and reference level of S2S is
                            high-transforming).</p>
                    </caption>
                    <table>
                        <tr>
                            <th valign="top" align="left"/>
                            <th valign="top" align="center"><italic>Estimate</italic></th>
                            <th valign="top" align="center"><italic>SE</italic></th>
                            <th valign="top" align="center"><italic>z</italic></th>
                            <th valign="top" align="center"><italic>p</italic></th>
                        </tr>
                        <tr>
                            <td colspan="5"><hr/></td>
                        </tr>
                        <tr>
                            <td valign="top" align="left">Intercept</td>
                            <td valign="top" align="right">2.26</td>
                            <td valign="top" align="right">0.30</td>
                            <td valign="top" align="right">7.52</td>
                            <td valign="top" align="right">&lt;.001</td>
                        </tr>
                        <tr>
                            <td valign="top" align="left"><italic>Metrical status:
                                strong</italic></td>
                            <td valign="top" align="right">1.00</td>
                            <td valign="top" align="right">0.30</td>
                            <td valign="top" align="right">3.28</td>
                            <td valign="top" align="right">&lt;.01</td>
                        </tr>
                        <tr>
                            <td valign="top" align="left"><italic>Metrical status:
                                accented</italic></td>
                            <td valign="top" align="right">0.98</td>
                            <td valign="top" align="right">0.22</td>
                            <td valign="top" align="right">4.50</td>
                            <td valign="top" align="right">&lt;.001</td>
                        </tr>
                        <tr>
                            <td valign="top" align="left"><italic>S2S: low</italic></td>
                            <td valign="top" align="right">&#8211;0.47</td>
                            <td valign="top" align="right">0.20</td>
                            <td valign="top" align="right">&#8211;2.37</td>
                            <td valign="top" align="right">&lt;.05</td>
                        </tr>
                    </table>
                </table-wrap>
            </sec>
            <sec>
                <title>4.4 SMS accuracy</title>
                <p>SMS accuracy was measured as absolute asynchronies between SMS peaks and vowel
                    onsets (on a logarithmic scale), and entered linear mixed-effects modelling as
                    the dependent variable. Again, we tested the predictive power of metrical status
                    (i.e., the vowel being nucleus of a metrically weak, strong, or pitch-accented
                    syllable), rise-time and rise-slope of the amplitude envelope, S2S likelihood of
                    the sentence (high/low), and participant-specific characteristics. We further
                    added the order of tasks (SMS first/NMR first) to check if SMS improved in those
                    participants who first performed NMR. Individual participant (P1&#8211;P29) and
                    sentence (1&#8211;6) were fitted as random effects. Starting with the maximal
                    random effect structure and changing the default optimizer (to
                    &#8216;optimx,&#8217; <xref ref-type="bibr" rid="B47">John et al., 2020</xref>),
                    random effects were iteratively removed if they produced convergence or
                    singular-fit issues. The best-fit model established by the likelihood ratio
                    tests is given in the supplementary materials.</p>
                <p>Table <xref ref-type="table" rid="T5">5</xref> displays the best-fit model which
                    included two factors, (1) the rise-time of the amplitude envelope around the
                    vowel onset and (2) the musicality score of participants.</p>
                <table-wrap id="T5">
                    <label>Table 5</label>
                    <caption>
                        <p>Summary of the linear mixed-effects model best fitting the SMS accuracy
                            data.</p>
                    </caption>
                    <table>
                        <tr>
                            <th valign="top" align="left">Factor</th>
                            <th valign="top" align="center"><italic>Sum Sq</italic>.</th>
                            <th valign="top" align="center"><italic>Mean Sq</italic>.</th>
                            <th valign="top" align="center"><italic>df</italic></th>
                            <th valign="top" align="center"><italic>F</italic></th>
                            <th valign="top" align="center"><italic>P</italic></th>
                        </tr>
                        <tr>
                            <td colspan="6"><hr/></td>
                        </tr>
                        <tr>
                            <td valign="top" align="left"><italic>Log(rise-time)</italic></td>
                            <td valign="top" align="right">7.63</td>
                            <td valign="top" align="right">7.63</td>
                            <td valign="top" align="right">1</td>
                            <td valign="top" align="right">6.23</td>
                            <td valign="top" align="right">&lt;.05</td>
                        </tr>
                        <tr>
                            <td valign="top" align="left"><italic>Participant
                                musicality</italic></td>
                            <td valign="top" align="right">6.34</td>
                            <td valign="top" align="right">6.34</td>
                            <td valign="top" align="right">1</td>
                            <td valign="top" align="right">5.17</td>
                            <td valign="top" align="right">&lt;.05</td>
                        </tr>
                    </table>
                </table-wrap>
                <p>Effect estimates from the best-fit model are plotted in Figure <xref
                        ref-type="fig" rid="F9">9</xref>. Vowels with shorter amplitude rise-times
                    displayed smaller asynchronies (<italic>t</italic> = 2.50, <italic>p</italic>
                    &lt; .05). Higher levels of musical training also improved SMS accuracy
                        (<italic>t</italic> = &#8211;2.27, <italic>p</italic> &lt; .05).</p>
                <fig id="F9">
                    <label>Figure 9</label>
                    <caption>
                        <p>Estimated effects for the two factors that predict SMS accuracy:
                                <bold>(A)</bold> amplitude rise-time around the vowel onset and
                                <bold>(B)</bold> musical training of participants. The model
                            contained log-transformed absolute asynchronies which were
                            back-transformed to the original ms-scale in the predictions.</p>
                    </caption>
                    <graphic xmlns:xlink="http://www.w3.org/1999/xlink"
                        xlink:href="/article/id/6294/file/76029/"/>
                </fig>
            </sec>
            <sec>
                <title>4.5 Anticipation during SMS</title>
                <p>To see if participants displayed anticipation in SMS with language, we analyzed
                    signed asynchronies between tapping peaks and vowel onsets. Here, negative
                    values indicated that a tap preceded (i.e., anticipated) a vowel onset. Linear
                    mixed-effects models tested four stimulus-related predictors, including metrical
                    status (weak, strong, or accented), rise-time and rise-slope of the amplitude
                    envelope, S2S likelihood of the sentence (high/low), and participant-specific
                    characteristics. As observed before, targets that occurred at the beginning of a
                    sentence seemed more anticipated than any of the subsequent targets, i.e., they
                    show larger negative mean asynchronies. To see how systematically this effect
                    occurred in our data, we included the serial order of targets within a sentence
                    as a covariate. We also fitted the order of tasks (SMS first/NMR first) as a
                    fixed effect to see if the anticipation of the upcoming vowels is reduced after
                    participants had experienced the sentence during the NMR-task. The model further
                    contained two random effects: participant (P1&#8211;P29) and sentence
                    (1&#8211;6). The maximal random effect structure initially included random
                    slopes and was iteratively simplified if convergence or singular-fit issues
                    persisted despite the change in the optimizer (<xref ref-type="bibr" rid="B47"
                        >John et al., 2020</xref>). The final model established by the likelihood
                    ratio tests is shown in the supplementary materials.</p>
                <p>The best-fit model retained three covariates related to the acoustic and
                    positional properties of sentence targets (see Table <xref ref-type="table"
                        rid="T6">6</xref>). Both rise-time and rise-slope of the amplitude envelope
                    around the vowel onset showed a strong influence.<xref ref-type="fn" rid="n2"
                        >2</xref> More specifically, vowels with longer rise-times
                        (<italic>t</italic> = &#8211;4.21, <italic>p</italic> &lt; .001) and steeper
                    rise-slopes (<italic>t</italic> = &#8211;3.55, <italic>p</italic> &lt; .001)
                    were more anticipated than vowels with shorter rise-times and shallower
                    rise-slopes (see Figure <xref ref-type="fig" rid="F10">10A&#8211;B</xref>). As
                    far as the serial order of a vowel in a sentence was concerned, our preliminary
                    observations were confirmed. Each subsequent vowel showed smaller negative
                    asynchronies and was thus less anticipated than its predecessor
                        (<italic>t</italic> = 2.29, <italic>p</italic> &lt; .05). That is, SMS
                    accuracy increased incrementally and was particularly high for sentence-final
                    vowels (see Figure <xref ref-type="fig" rid="F10">10C</xref>).</p>
                <table-wrap id="T6">
                    <label>Table 6</label>
                    <caption>
                        <p>Summary of the linear mixed-effects model best fitting the SMS
                            anticipation data.</p>
                    </caption>
                    <table>
                        <tr>
                            <th valign="top" align="left">Factor</th>
                            <th valign="top" align="center"><italic>Sum Sq</italic>.</th>
                            <th valign="top" align="center"><italic>Mean Sq</italic>.</th>
                            <th valign="top" align="center"><italic>df</italic></th>
                            <th valign="top" align="center"><italic>F</italic></th>
                            <th valign="top" align="center"><italic>P</italic></th>
                        </tr>
                        <tr>
                            <td colspan="6"><hr/></td>
                        </tr>
                        <tr>
                            <td valign="top" align="left"><italic>Log(rise-time)</italic></td>
                            <td valign="top" align="right">44253</td>
                            <td valign="top" align="right">44253</td>
                            <td valign="top" align="right">1</td>
                            <td valign="top" align="right">17.76</td>
                            <td valign="top" align="right">&lt;.001</td>
                        </tr>
                        <tr>
                            <td valign="top" align="left"><italic>Rise-slope</italic></td>
                            <td valign="top" align="right">31469</td>
                            <td valign="top" align="right">31469</td>
                            <td valign="top" align="right">1</td>
                            <td valign="top" align="right">12.63</td>
                            <td valign="top" align="right">&lt;.001</td>
                        </tr>
                        <tr>
                            <td valign="top" align="left"><italic>Serial order</italic></td>
                            <td valign="top" align="right">13242</td>
                            <td valign="top" align="right">13242</td>
                            <td valign="top" align="right">1</td>
                            <td valign="top" align="right">5.31</td>
                            <td valign="top" align="right">&lt;.05</td>
                        </tr>
                    </table>
                </table-wrap>
                <fig id="F10">
                    <label>Figure 10</label>
                    <caption>
                        <p>Model estimate plots for the three factors that best explain SMS
                            anticipation with language: <bold>(A)</bold> amplitude rise-time,
                                <bold>(B)</bold> amplitude rise-slope, and <bold>(C)</bold> serial
                            order of a vowel in a sentence. The temporal onset of a vowel is
                            indicated as 0 ms. Log-transformed rise-times are back-transformed to
                            the original ms-scale.</p>
                    </caption>
                    <graphic xmlns:xlink="http://www.w3.org/1999/xlink"
                        xlink:href="/article/id/6294/file/76030/"/>
                </fig>
            </sec>
            <sec>
                <title>4.6 Number of repetitions in SMS</title>
                <p>Given that our SMS task involved a total of 20 stimulus presentations, we
                    examined participants&#8217; tapping behaviour across the repetitions. In
                    particular, we were interested in answering the two main questions. Firstly,
                    when did participants start tapping, and how might this have been influenced by
                    their self-reported confidence in the personal synchronization performance?
                    Secondly, assuming that SMS improved with practice, how many tapping cycles were
                    needed for participants to achieve their best, stable SMS performance in this
                    task?</p>
                <p>On average, most participants started to tap during the third repetition of a
                    sentence (median: 3, interquartile range: 2&#8211;3). The first tap was recorded
                    slightly later at the very beginning of the SMS task (interquartile range:
                    2&#8211;4) and generally shifted to an earlier repetition cycle at the end of
                    the SMS session (interquartile range: 2&#8211;3). Only on a few trials,
                    participants started to synchronize as late as during the 10th or the 11th
                    repetition cycle. The location of the first synchronization attempt within the
                    loop was unaffected by the self-reported confidence ratings participants
                    provided upon completion of the task, or by their musicality score. The order of
                    tasks (SMS first or NMR first) did not have any impact, either.</p>
                <p>The time-point at which tapping performance stabilized also differed across
                    participants. Figure <xref ref-type="fig" rid="F11">11</xref> displays examples
                    of the time-series data collected for the participants P02 (A) and P12 (B)
                    during an SMS trial with the target sentence S2 (&#8220;I took the
                    prize&#8221;). All taps collected for each participant are plotted along the
                    x-axis where 1 demarcates the first tap recorded. If participants had tapped to
                    every single vowel from the very first repetition of the sentence in the loop,
                    the total number of taps would be (4 vowels &#215; 20 repetitions =) 80, which
                    was not the case for either participant in the example. Instead, there was a lot
                    of individual variability. The overall number of taps available per trial
                    differs, depending on when the participant started tapping and how many vowels
                    they sychronized with (P02 produced more taps than P12).</p>
                <fig id="F11">
                    <label>Figure 11</label>
                    <caption>
                        <p>Time-series plots of signed asynchronies measured from vowel onset for
                            each tap produced by participants P02 <bold>(A)</bold> and P12
                                <bold>(B)</bold> during their synchronization trial with the
                            sentence S2. Black lines show raw data, red lines represent estimated
                            means fitted by change-point analyses.</p>
                    </caption>
                    <graphic xmlns:xlink="http://www.w3.org/1999/xlink"
                        xlink:href="/article/id/6294/file/76031/"/>
                </fig>
                <p>The y-axis in Figure <xref ref-type="fig" rid="F11">11</xref> displays signed
                    asynchronies (in ms) where 0 represents the vowel onset. The two chosen examples
                    suggest that P02 started off tapping 10&#8211;20 ms ahead of the vocalic targets
                    in this sentence and became consistently more accurate in synchronization with
                    the vowel onset after s/he had produced 12 taps, while P12 started off lagging
                    behind the vowel targets by 20&#8211;40 ms and reached a stably improved
                    performance after s/he had tapped 17 times. Alternatively, these asynchronies
                    could be interpreted as stable from the start of synchronization but timed with
                    a different landmark at the beginning versus toward the end of a trial. Yet,
                    this interpretation seems very unlikely. As shown in Figure <xref ref-type="fig"
                        rid="F2">2</xref>, timing of every landmark varied quite substantially with
                    respect to the vowel landmark. For example, maxE could occur before or after a
                    vowel onset in two successive syllables. Such variability means that
                    trajectories plotted in 10-A or 10-B would show little systematicity prior to
                    the identified point of stability when synchronization with the vowel onset
                    begins (which was clearly not the case).</p>
                <p>These time-series data were analyzed using R-library <sc>CHANGEPOINT</sc> (<xref
                        ref-type="bibr" rid="B49">Killick &amp; Eckley, 2014</xref>). For each
                    participant and sentence, we identified the individual point of change in the
                    synchronization accuracy by examining global fluctuations in the mean and
                    variance. We further calculated the number of sentence repetitions that
                    participants required as input as well as the number of tapping cycles that
                    participants performed until they achieved stable synchronization (as measured
                    by the mean and variance in their signed asynchronies). In Figure <xref
                        ref-type="fig" rid="F11">11</xref>, horizontal red lines show estimated
                    means of asynchronies. The local discontinuity between the two fitted lines
                    indicates the location of the change point. On average, participants performed 5
                    tapping cycles of each sentence (interquartile range: 3&#8211;7) until they
                    reached the point of stability in their synchronization. Each of these tapping
                    cycles could consist of 10&#8211;20 taps, depending on the participant&#8217;s
                    performance. None of the hypothesized predictors (confidence ratings,
                    musicality, order of tasks) had an influence on the individually achieved point
                    of synchronization stability.</p>
            </sec>
            <sec>
                <title>4.7 Period tracking in SMS versus NMR</title>
                <p>To understand how well participants were able to track the beat period in speech
                    stimuli in the two tasks under investigation, we compared SMS and NMR in two
                    aspects: (1) how successful the two tasks were in making participants deviate
                    from their spontaneous tapping rates (measured by <xref ref-type="bibr"
                        rid="B23">BAASTA, Dalla Bella et al., 2017</xref>); and (2) if both tasks
                    induced convergence between participants&#8217; tapping rates and IOI of the
                    intervocalic intervals of the test sentences. A mixed-effects regression was
                    fitted to the dependent variable mean ITI per sentence and participant. We
                    tested for two interactions, namely (1) between the task and the
                    participant&#8217;s unpaced spontaneous tapping rate and (2) between the task
                    and vocalic IOI. We also fitted the order of tasks (SMS first/NMR first) as a
                    predictor to control for a potential task order effect. Participant and sentence
                    were defined as random effects. Again, we started with a maximal random effect
                    structure and iteratively simplified it if the model failed to converge or
                    produced a singular fit. A change of the default optimizer (to
                    &#8216;optimx,&#8217; <xref ref-type="bibr" rid="B47">John et al., 2020</xref>)
                    counteracted some of the convergence issues. The likelihood ratio test helped to
                    determine the best-fit model which is given in the supplementary materials.</p>
                <p>Both interactions were significant in the best-fit model (see Tables <xref
                        ref-type="table" rid="T7">7</xref> and <xref ref-type="table" rid="T8"
                        >8</xref>). Accordingly, larger intervocalic intervals significantly
                    increased ITI, with a positive linear relationship in both tasks
                        (<italic>t</italic> = 4.76, <italic>p</italic> &lt; .01). However, an
                    increase in vocalic IOI showed a notably smaller effect on the increase of ITI
                    in NMR than in SMS (<italic>t</italic> = &#8211;2.63, <italic>p</italic> &lt;
                    .01). While individual tapping rates did not have any effect on ITI obtained in
                    the SMS task, ITI in the NMR task tended to show longer durations if
                    participants&#8217; spontaneous tapping tempo also had longer ITI
                        (<italic>t</italic> = 2.08, <italic>p</italic> &lt; .05). These findings
                    demonstrated that period tracking was present in both tasks, though it had a
                    subtler effect in NMR whose ITI drifted toward the participant&#8217;s preferred
                    individual tapping tempo in the absence of a simultaneous auditory signal.
                    Crucially, an ITI regularization could also be observed in the NMR task.
                    According to an additional model fit to the CV of ITI (see Tables <xref
                        ref-type="table" rid="T9">9</xref> and <xref ref-type="table" rid="T10"
                        >10</xref>), this dependent variable differed significantly across the two
                    tasks, showing that NMR led to less variability across ITI than SMS did
                        (<italic>t</italic> = &#8211;2.55, <italic>p</italic> &lt; .05). That is,
                    taps were paced more regularly in NMR.</p>
                <table-wrap id="T7">
                    <label>Table 7</label>
                    <caption>
                        <p>Summary of the linear mixed-effects model best fitting the ITI data.</p>
                    </caption>
                    <table>
                        <tr>
                            <th valign="top" align="left">Factor</th>
                            <th valign="top" align="center"><italic>Sum Sq</italic>.</th>
                            <th valign="top" align="center"><italic>Mean Sq</italic>.</th>
                            <th valign="top" align="center"><italic>df</italic></th>
                            <th valign="top" align="center"><italic>F</italic></th>
                            <th valign="top" align="center"><italic>p</italic></th>
                        </tr>
                        <tr>
                            <td colspan="6"><hr/></td>
                        </tr>
                        <tr>
                            <td valign="top" align="left"><italic>task * log(vocalic
                                IOI)</italic></td>
                            <td valign="top" align="right">.23</td>
                            <td valign="top" align="right">.23</td>
                            <td valign="top" align="right">1</td>
                            <td valign="top" align="right">6.93</td>
                            <td valign="top" align="right">&lt;.01</td>
                        </tr>
                        <tr>
                            <td valign="top" align="left"><italic>task * log(spontaneous tapping
                                    rate)</italic></td>
                            <td valign="top" align="right">.15</td>
                            <td valign="top" align="right">.15</td>
                            <td valign="top" align="right">1</td>
                            <td valign="top" align="right">4.31</td>
                            <td valign="top" align="right">&lt;.05</td>
                        </tr>
                    </table>
                </table-wrap>
                <table-wrap id="T8">
                    <label>Table 8</label>
                    <caption>
                        <p>Estimates of fixed effects for the ITI data (reference level of task is
                            SMS).</p>
                    </caption>
                    <table>
                        <tr>
                            <th valign="top" align="left"/>
                            <th valign="top" align="center"><italic>Estimate</italic></th>
                            <th valign="top" align="center"><italic>SE</italic></th>
                            <th valign="top" align="center"><italic>t</italic></th>
                            <th valign="top" align="center"><italic>p</italic></th>
                        </tr>
                        <tr>
                            <td colspan="5"><hr/></td>
                        </tr>
                        <tr>
                            <td valign="top" align="left">Intercept</td>
                            <td valign="top" align="right">1.65</td>
                            <td valign="top" align="right">1.12</td>
                            <td valign="top" align="right">1.48</td>
                            <td valign="top" align="right">n.s.</td>
                        </tr>
                        <tr>
                            <td valign="top" align="left"><italic>Task</italic></td>
                            <td valign="top" align="right">0.38</td>
                            <td valign="top" align="right">0.60</td>
                            <td valign="top" align="right">0.64</td>
                            <td valign="top" align="right">n.s.</td>
                        </tr>
                        <tr>
                            <td valign="top" align="left"><italic>log(vocalic IOI)</italic></td>
                            <td valign="top" align="right">0.61</td>
                            <td valign="top" align="right">0.13</td>
                            <td valign="top" align="right">4.76</td>
                            <td valign="top" align="right">&lt;.01</td>
                        </tr>
                        <tr>
                            <td valign="top" align="left"><italic>log(spontaneous tapping
                                    rate)</italic></td>
                            <td valign="top" align="right">0.13</td>
                            <td valign="top" align="right">0.14</td>
                            <td valign="top" align="right">0.96</td>
                            <td valign="top" align="right">n.s.</td>
                        </tr>
                        <tr>
                            <td valign="top" align="left"><italic>task * log(vocalic
                                IOI)</italic></td>
                            <td valign="top" align="right">&#8211;0.22</td>
                            <td valign="top" align="right">0.09</td>
                            <td valign="top" align="right">&#8211;2.63</td>
                            <td valign="top" align="right">&lt;.01</td>
                        </tr>
                        <tr>
                            <td valign="top" align="left"><italic>task * log(spontaneous tapping
                                    rate)</italic></td>
                            <td valign="top" align="right">0.13</td>
                            <td valign="top" align="right">0.06</td>
                            <td valign="top" align="right">2.08</td>
                            <td valign="top" align="right">&lt;.05</td>
                        </tr>
                    </table>
                </table-wrap>
                <table-wrap id="T9">
                    <label>Table 9</label>
                    <caption>
                        <p>Summary of the linear mixed-effects model best fitting the CV of ITI
                            data.</p>
                    </caption>
                    <table>
                        <tr>
                            <th valign="top" align="left">Factor</th>
                            <th valign="top" align="center"><italic>Sum Sq</italic>.</th>
                            <th valign="top" align="center"><italic>Mean Sq</italic>.</th>
                            <th valign="top" align="center"><italic>df</italic></th>
                            <th valign="top" align="center"><italic>F</italic></th>
                            <th valign="top" align="center"><italic>p</italic></th>
                        </tr>
                        <tr>
                            <td colspan="6"><hr/></td>
                        </tr>
                        <tr>
                            <td valign="top" align="left"><italic>task</italic></td>
                            <td valign="top" align="right">0.13</td>
                            <td valign="top" align="right">0.13</td>
                            <td valign="top" align="right">1</td>
                            <td valign="top" align="right">6.52</td>
                            <td valign="top" align="right">&lt;.05</td>
                        </tr>
                    </table>
                </table-wrap>
                <table-wrap id="T10">
                    <label>Table 10</label>
                    <caption>
                        <p>Estimates of fixed effects for the CV of ITI data (reference level of
                            task is SMS).</p>
                    </caption>
                    <table>
                        <tr>
                            <th valign="top" align="left"/>
                            <th valign="top" align="center"><italic>Estimate</italic></th>
                            <th valign="top" align="center"><italic>SE</italic></th>
                            <th valign="top" align="center"><italic>t</italic></th>
                            <th valign="top" align="center"><italic>p</italic></th>
                        </tr>
                        <tr>
                            <td colspan="5"><hr/></td>
                        </tr>
                        <tr>
                            <td valign="top" align="left">Intercept</td>
                            <td valign="top" align="right">0.47</td>
                            <td valign="top" align="right">0.04</td>
                            <td valign="top" align="right">12.21</td>
                            <td valign="top" align="right">&lt;.001</td>
                        </tr>
                        <tr>
                            <td valign="top" align="left"><italic>task: NMR</italic></td>
                            <td valign="top" align="right">&#8211;0.05</td>
                            <td valign="top" align="right">0.02</td>
                            <td valign="top" align="right">&#8211;2.55</td>
                            <td valign="top" align="right">&lt;.05</td>
                        </tr>
                    </table>
                </table-wrap>
                <p>To test whether or not the above effects were merely consequences of a
                    self-sustained, repeated movement in the SMS task, in contrast to the NMR task
                    which generally led to fewer taps, we compared SMS and NMR data collected on the
                    very first tapping trial. However, these comparisons produced comparable
                    results.</p>
            </sec>
        </sec>
        <sec>
            <title>5. Discussion</title>
            <p>The present study was conducted to examine the suitability of two movement-based
                paradigms&#8212;synchronization (SMS) and reproduction (NMR)&#8212;for the study of
                rhythm perception in natural language, and to provide empirical evidence on the
                settings of such a paradigm. Below, we discuss the results with reference to our
                original research aims and hypotheses and comment on how they compare to previous
                research with other types of auditory stimuli.</p>
            <sec>
                <title>5.1 Suitability of motor paradigms for linguistic rhythm research</title>
                <p>The present study demonstrates that motor paradigms are suitable tools to
                    investigate rhythm perception in language. Our results suggest that particularly
                    SMS is informative and better suited than NMR to support rhythm research in
                    language. Our version of SMS with natural language produced consistent patterns
                    of synchronization with vowel onsets, thus replicating our previous results with
                    simpler verbal stimuli (<xref ref-type="bibr" rid="B87">Rathcke et al.,
                        2019</xref>). In our version of NMR, a certain level of period tracking
                    could also be observed. However, the NMR results showed weaker relations to
                    linguistic stimuli and an overall trend to converge towards participants&#8217;
                    spontaneous tapping rates. Alongside this shift toward individually preferred
                    tapping rates, the overall variability of inter-tap intervals was reduced,
                    suggesting that participants tapped more regularly. This result parallels
                    previous research with a similar motor reproduction paradigm (see <xref
                        ref-type="bibr" rid="B30">Donovan &amp; Darwin, 1979</xref>; <xref
                        ref-type="bibr" rid="B99">Scott et al., 1985</xref>), as well as with other
                    paradigms that involve beat tracking during speech production (<xref
                        ref-type="bibr" rid="B48">Jungers, Palmer, &amp; Speer, 2002</xref>; <xref
                        ref-type="bibr" rid="B82">Port et al., 1995</xref>).</p>
                <p>When movement is not synchronized in time with an auditory signal, temporal
                    regularization of ITI prevails in linguistic stimuli, though not in other types
                    of stimuli (see <xref ref-type="bibr" rid="B30">Donovan &amp; Darwin,
                        1979</xref>; <xref ref-type="bibr" rid="B99">Scott et al., 1985</xref>).
                    Such regularization is likely to arise due to a high level of rhythmic
                    complexity in language that lacks temporal isochrony (<xref ref-type="bibr"
                        rid="B24">Dauer, 1983</xref>; <xref ref-type="bibr" rid="B79">Pointon,
                        1980</xref>; <xref ref-type="bibr" rid="B97">Roach, 1982</xref>; <xref
                        ref-type="bibr" rid="B109">Uldall, 1971</xref>; <xref ref-type="bibr"
                        rid="B111">van Santen &amp; Shih, 2000</xref>) while employing a highly
                    intricate hierarchy of nested constituents (cf. <xref ref-type="bibr" rid="B73"
                        >Nespor &amp; Vogel, 1986</xref>; <xref ref-type="bibr" rid="B101">Selkirk,
                        1984</xref>) and prominence alternations (<xref ref-type="bibr" rid="B58"
                        >Liberman &amp; Prince, 1977</xref>; <xref ref-type="bibr" rid="B84">Prince,
                        1983</xref>). In the context of such complexity, the superiority of SMS is
                    in line with previous research that demonstrated that movement along a complex
                    rhythm facilitates the discovery of its beat (<xref ref-type="bibr" rid="B104"
                        >Su &amp; P&#246;ppel, 2012</xref>). Such advantage of a synchronized
                    movement possibly arises due to an enhanced internal representation of an
                    auditory rhythm that accompanies movement (<xref ref-type="bibr" rid="B17"
                        >Chemin, Mouraux, &amp; Nozaradan, 2014</xref>). In contrast, movement
                    without a concurrent auditory signal relies more heavily on an internal
                    representation of temporal patterns, thus increasing working memory load and the
                    associated processing cost (<xref ref-type="bibr" rid="B96">Repp &amp; Su,
                        2013</xref>). Recent evidence suggests that tapping without a concurrent
                    auditory signal might be a more demanding task than SMS (<xref ref-type="bibr"
                        rid="B50">Koch, Oliveri, &amp; Caltagirone, 2009</xref>; <xref
                        ref-type="bibr" rid="B57">Lewis, Wing, Pope, Praamstra, &amp; Miall,
                        2004</xref>). More specifically, reproduction or continuation tasks with a
                    metronome, whose settings are quite similar to the NMR task with language in our
                    experiment, seem to be placing higher demands on both working memory (<xref
                        ref-type="bibr" rid="B46">Jantzen, Oullier, Marshall, Steinberg, &amp;
                        Kelso, 2007</xref>; <xref ref-type="bibr" rid="B50">Koch et al.,
                    2009</xref>) and motor timing abilities (<xref ref-type="bibr" rid="B102"
                        >Serrien, 2008</xref>). In contrast, SMS is likely to enhance basic
                    perceptual abilities (<xref ref-type="bibr" rid="B110">Valdesolo, Ouyang, &amp;
                        DeSteno, 2010</xref>). These findings provide some explanations as to why
                    participants might have tended to converge toward their spontaneous tapping
                    rates in the NMR but not in the SMS task, as well as why SMS might be superior
                    to NMR in the context of beat perception and rhythmic processing in
                    language.</p>
            </sec>
            <sec>
                <title>5.2 SMS anchors and acoustic influences on SMS</title>
                <p>As hypothesized, SMS with language can produce systematic responses to the
                    temporal structure of natural spoken sentences. The present study tested five
                    potential anchors of synchronization, including onsets of linguistic units
                    (syllables, vowels) and acoustic landmarks (local maxima of energy or amplitude,
                    local changes in the smoothed energy contour). The shortest asynchronies were
                    observed between SMS peaks and nearby vowel onsets, followed by the moment of
                    the fastest change in the smoothed energy contour (maxD) and the local amplitude
                    maximum (LAM). The numerical difference in synchronization accuracy with these
                    three landmarks was not significant at the Bonferroni-corrected &#945;-level,
                    though vowel onsets produced smallest asynchronies. In contrast, anchoring taps
                    to syllable onsets and local maxima in the energy contours led to a
                    significantly deteriorated accuracy in the participants&#8217; performance.
                    These results indicate that vowel onsets seem to reliably attract taps not only
                    in simple verbal prompts (<xref ref-type="bibr" rid="B87">Rathcke et al.,
                        2019</xref>), but also in complex temporal patterns of natural spoken
                    language. Recent evidence from naturally evolved drummed languages like
                    Amazonian Bora further corroborates this finding. In drummed Bora, rhythmic
                    units have also been shown to consistently match intervocalic intervals,
                    irrespective of syllable complexity (<xref ref-type="bibr" rid="B100">Seifart,
                        Meyer, Grawunder, &amp; Dentel, 2018</xref>).</p>
                <p>Vowels play an important role in shaping the trajectory of the sonority contour
                    in speech signal, frequently constituting local sonority peaks (<xref
                        ref-type="bibr" rid="B70">Morgan &amp; Fosler-Lussier, 1998</xref>; <xref
                        ref-type="bibr" rid="B116">Wang &amp; Narayanan, 2007</xref>). The sonority
                    contour reflects variable degrees of energy emanating from the vocal tract
                    during speech production and is particularly high for open vowels. The cyclical
                    production of vowel gestures in connected speech has been previously highlighted
                    as one of the potential reasons why spoken language might be rhythmic in nature
                        (<xref ref-type="bibr" rid="B37">Fowler, 1983</xref>; <xref ref-type="bibr"
                        rid="B38">Fowler &amp; Tassinary, 1981</xref>). Local fluctuations in signal
                    sonority related to vowel acoustics have also been argued to guide speech
                    segmentation and to assist first language acquisition (<xref ref-type="bibr"
                        rid="B86">R&#228;s&#228;nen, Doyle, &amp; Frank, 2018</xref>). It thus does
                    not seem surprising that beat perception (at least in English) locks on to
                    vocalic and not to syllabic onsets, though more research is needed to determine
                    if beat perception in English involves tracking of vowels per se or rather
                    tracking of nuclear constituents within larger units such as syllables.</p>
                <p>Importantly, SMS-performance with the linguistic stimuli of the present study
                    demonstrated anticipation of vowel targets (as indicated by negative mean
                    asynchrony, cf. <xref ref-type="bibr" rid="B7">Aschersleben, 2002</xref>).
                    Especially the first synchronization target within a sentence, i.e., the target
                    that occurred after an acoustic silence, showed larger negative asynchronies and
                    was thus more anticipated than all subsequent vocalic targets. This finding is
                    in keeping with the existing evidence on the properties of SMS with other types
                    of auditory stimuli. Anticipation seems to be a characteristic of
                    non-musicians&#8217; syncronizing with a metronome signal where regular auditory
                    prompts are interspersed with silences (<xref ref-type="bibr" rid="B7"
                        >Aschersleben, 2002</xref>; <xref ref-type="bibr" rid="B94">Repp,
                        2005</xref>). The negative mean asynchrony is reduced, or even disappears
                    completely in more complex rhythmic contexts where synchronization targets are
                    not separated by silences, e.g., in music (<xref ref-type="bibr" rid="B106"
                        >Thaut, Rathbun, &amp; Miller, 1997</xref>; <xref ref-type="bibr" rid="B123"
                        >Wohlschl&#228;ger &amp; Koch, 2000</xref>).</p>
                <p>SMS in the present study was more precise with those vowels that had shorter
                    rise-times. Unfortunately, rise-time (and also rise-slope) of an amplitude
                    envelope is a complex acoustic measure that is influenced by many aspects of
                    speech production. These properties can change depending on the manner and the
                    place of articulation of the onset consonant(s), levels of syllabic prominence
                    (weak, strong, accented, or emphatic), degrees of coarticulation, and syllable
                    reduction. This underlying complexity impedes a meaningful interpretation of the
                    rise-time contribution to beat perception (cf. <xref ref-type="bibr" rid="B78"
                        >Peelle &amp; Davis, 2012</xref>), though interestingly, once again we find
                    parallels to SMS with a metronome where shorter rise-times of tones have been
                    shown to improve synchronization accuracy (<xref ref-type="bibr" rid="B113">Vos,
                        van Kruysbergen, &amp; Mates, 1995</xref>).</p>
            </sec>
            <sec>
                <title>5.3 SMS sensitivity to the metrical structure</title>
                <p>One of the crucial findings of the present study is that SMS is sensitive to the
                    metrical structure of spoken sentences. In the present study, native English
                    participants were more likely to tap to metrically strong than metrically weak
                    syllables. These results are somewhat comparable with the findings by Allen
                        (<xref ref-type="bibr" rid="B2">1972: 89</xref>) who concluded that English
                    participants tend to &#8220;tap before the nuclear vowels of rhythmically
                    accented syllables.&#8221; Given that the prosodic system of English
                    incorporates word-level stress and sentence-level alternations of strong and
                    weak syllables (<xref ref-type="bibr" rid="B58">Liberman &amp; Prince,
                        1977</xref>; <xref ref-type="bibr" rid="B84">Prince, 1983</xref>), it is
                    highly likely that the prosodic system of a listener&#8217;s native language
                    plays a major role in inducing their feeling of a beat in speech and that rhythm
                    perception in language might be a constructive perceptual process.</p>
            </sec>
            <sec>
                <title>5.4 On the role of repetition in SMS</title>
                <p>In our view, repetition is a crucial aspect of the success of the SMS-paradigm
                    with language. Despite being a laboratory task, looping resonates with the idea
                    that linguistic rhythm arises on large temporal scales through repeated
                    experience with one&#8217;s native language. Unlike other approaches that rely
                    on special cases of language use like poetry, mantra, or chant (cf. <xref
                        ref-type="bibr" rid="B20">Cummins, 2012</xref>) or on short, metrical, or
                    regularized speech (<xref ref-type="bibr" rid="B59">Lidji et al., 2011</xref>),
                    looping can be applied to any natural spoken utterances, leading to an increased
                    ecological validity of the proposed paradigm (cf. <xref ref-type="bibr" rid="B3"
                        >Allen, 1975</xref>). The present SMS method creates a unique situation for
                    unlocking the rhythmic structure of natural, unmanipulated language while
                    bypassing other mechanisms of sentence processing (cf. <xref ref-type="bibr"
                        rid="B88">Rathcke et al., 2018</xref>).</p>
                <p>In the present experiment, most participants appeared to have created an internal
                    representation of the sentence beat structure after two repetitions and could
                    start synchronizing during the third presentation cycle of the sentence. As our
                    results indicate, a total of three repetitions used in previous research (<xref
                        ref-type="bibr" rid="B59">Lidji et al., 2011</xref>) is not quite sufficient
                    to fully capture stable SMS patterns. For example, the Kernel density fitting
                    procedure relies on the presence of at least two events and can lead to missing
                    data in shorter sentences or in participants who might require longer to
                    entrain. Given the results of our time-series analyses, we recommend using at
                    least 10 repetitions of a sentence to produce stable, consistent, and
                    representative patterns across individual participants (e.g., <xref
                        ref-type="bibr" rid="B39">G&#233;rard &amp; Rosenfeld, 1995</xref>; <xref
                        ref-type="bibr" rid="B83">Pressing &amp; Jolley-Rogers, 1997</xref>; <xref
                        ref-type="bibr" rid="B94">Repp, 2005</xref>; <xref ref-type="bibr" rid="B95"
                        >Repp &amp; Penel, 2002</xref>).</p>
                <p>Finally, the results of the present study exclude the possibility that the
                    speech-to-song illusion interferes with SMS patterns in a significant way. Both
                    high- and low-transforming sentences tested in the present study produced
                    similar results in terms of synchronization accuracy and targets. The only
                    difference between high- and low-transforming sentences consisted in the overall
                    number of recorded taps. Accordingly, sentences that led to more S2S
                    transformations (<xref ref-type="bibr" rid="B88">Rathcke et al., 2018,
                        forthcoming</xref>) also induced more taps. The reason for this effect is as
                    yet not quite clear, though it might be related to a higher level of the overall
                    signal sonority in the high-transforming set (<xref ref-type="bibr" rid="B88"
                        >Rathcke et al., 2018, forthcoming</xref>). It is, however, clear that the
                    speech-to-song illusion is not a core prerequisite for a successful application
                    of the SMS paradigm to language, which is in line with our previous work showing
                    that S2S transformations rely more heavily on pitch- than on time-related
                    features of speech (<xref ref-type="bibr" rid="B35">Falk et al., 2014</xref>;
                        <xref ref-type="bibr" rid="B34">Falk &amp; Rathcke, 2010</xref>).</p>
            </sec>
            <sec>
                <title>5.5 Individual variability in SMS with language</title>
                <p>As expected, we found some individual variation in the SMS task. Some aspects of
                    this variation, e.g., SMS accuracy, could be partially explained by individually
                    varying levels of musical training. Participants produced lower asynchronies if
                    they had higher levels of musical training and experience (which included
                    playing an instrument, singing, and dancing). Musical sophistication is also
                    known to decrease error and variability in synchronization with a metronome in
                    non-professional musicians (e.g., <xref ref-type="bibr" rid="B39">G&#233;rard
                        &amp; Rosenfeld, 1995</xref>; <xref ref-type="bibr" rid="B83">Pressing &amp;
                        Jolley-Rogers, 1997</xref>; <xref ref-type="bibr" rid="B95">Repp &amp;
                        Penel, 2002</xref>). However, measures of general synchronization
                    performance with music and metronome employed in the present study (<xref
                        ref-type="bibr" rid="B23">BAASTA, Dalla Bella et al., 2017</xref>) did not
                    help to explain individual variability in SMS with speech. Reasons for this
                    might be multiple (e.g., different mechanisms of rhythm perception in
                    isochronous versus non-isochronous signals or in non-speech versus speech) and
                    require further investigation.</p>
            </sec>
        </sec>
        <sec>
            <title>6. Conclusions and outlook</title>
            <p>The two movement-based paradigms that were elaborated and tested in the present study
                view language rhythm as a consequence of general internal timekeeping mechanisms
                that allow us to synchronize, anticipate, and adapt our behaviour in response to an
                external stimulus (<xref ref-type="bibr" rid="B94">Repp, 2005</xref>). We showed
                that SMS performance can be successfully used with linguistic stimuli and that SMS
                patterns resemble well-documented findings of SMS with metronome and music (<xref
                    ref-type="bibr" rid="B7">Aschersleben, 2002</xref>; <xref ref-type="bibr"
                    rid="B94">Repp, 2005</xref>; <xref ref-type="bibr" rid="B96">Repp &amp; Su,
                    2013</xref>) in listeners of various degrees of musical training (<xref
                    ref-type="bibr" rid="B39">G&#233;rard &amp; Rosenfeld, 1995</xref>; <xref
                    ref-type="bibr" rid="B83">Pressing &amp; Jolley-Rogers, 1997</xref>; <xref
                    ref-type="bibr" rid="B95">Repp &amp; Penel, 2002</xref>). Like music, beat
                perception in language can be linked to temporal expectancy and prediction of
                upcoming events, and we showed that such expectancies can be elicited during SMS
                with spoken sentences presented in a loop. Our study further demonstrated that
                vowels constitute the most likely rhythmic anchors in language, though more work is
                required with diverse languages to establish if the present finding generalizes
                beyond English. An alternative movement task, NMR, showed some potential to engage
                listeners&#8217; capacity to extract rhythmic patterns from speech, though it also
                tended to evoke motor regularization arising from preferred individual
                finger-tapping rates.</p>
            <p>In sum, the present study demonstrates that natural language can entrain movement.
                Our setting of the SMS paradigm is a valid experimental paradigm to study beat
                tracking and rhythm extraction in linguistic stimuli of different degrees of
                complexity, and can be used in future work to answer many open questions on rhythm
                perception and cognition across prosodically diverse languages.</p>
        </sec>
        <sec>
            <title>Data Accessibility Statement</title>
            <p>The data and materials for the experiment reported here can be made available upon
                request. Speech materials and annotations can be downloaded from OSF (see <ext-link
                    ext-link-type="uri" xmlns:xlink="http://www.w3.org/1999/xlink"
                    xlink:href="https://osf.io/3dh4m/">https://osf.io/3dh4m/</ext-link>). The
                experiment was not preregistered.</p>
        </sec>
        <sec sec-type="supplementary-material">
            <title>Additional File</title>
            <p>The additional file for this article can be found as follows:</p>
            <supplementary-material id="S1" xmlns:xlink="http://www.w3.org/1999/xlink"
                xlink:href="https://doi.org/10.5334/labphon.248.s1">
                <!--[<inline-supplementary-material xlink:title="local_file" xlink:href="labphon-12-248-s1.pdf">labphon-12-248-s1.pdf</inline-supplementary-material>]-->
                <label>Supplementary Material</label>
                <caption>
                    <p>Summary of statistical analyses and best-fit models. DOI:
                            <uri>https://doi.org/10.5334/labphon.248.s1</uri></p>
                </caption>
            </supplementary-material>
        </sec>
    </body>
    <back>
        <fn-group>
            <fn id="n1">
                <p>Synchronization abilities can also vary across individuals (<xref ref-type="bibr"
                        rid="B23">Dalla Bella et al., 2017</xref>) and across music-cultural
                    environments (<xref ref-type="bibr" rid="B80">Polak et al., 2018</xref>).</p>
            </fn>
            <fn id="n2">
                <p>A correlation test (see supplementary materials) indicated that rise-time and
                    rise-slope were not correlated in these data (r = &#8211;0.12, n.s.), i.e., each
                    affected the participants&#8217; SMS-performance independently.</p>
            </fn>
        </fn-group>
        <ack>
            <title>Acknowledgements</title>
            <p>This research was supported by a research grant from the Leverhulme Trust
                (RPG-2017-306) to the first author. We would like to express our deep gratitude to
                Georg Lohfink for his help with the equipmental set-up, testing, and data collection
                at Kent Linguistics Lab, and to Roger Dean for his help with the time-series
                analyses reported in 4.6. Our thanks also go to Mona Franke (University of Montreal)
                who prepared the annotations of vowel onsets for the cross-validation of the
                annotator agreement reported in 3.2. We are further indebted to Naeem Komeilipoor,
                M&#233;lody Blais, and Simon Rigoulot from the Centre for Research on Brain,
                Language and Music Montreal for their support with the preparation of the BAASTA
                data. Discussions at IPS Munich, MARCS Sydney, and CLAS Macquarie helped to develop
                the ideas presented in the paper while the input from two anonymous reviewers and
                the associate editor helped to improve the manuscript. Our special thanks go to all
                the tireless tappers who participated in the experiment.</p>
        </ack>
        <sec>
            <title>Competing Interests</title>
            <p>The authors have no competing interests to declare.</p>
        </sec>
        <sec>
            <title>Author Contributions</title>
            <p>Tamara Rathcke designed the experiment, annotated the sentences to identify the
                syllable and vowel onsets, devised the acoustic analyses of the sentences, conducted
                the statistical analyses, was the primary author of all sections of the manuscript,
                and dealt with the manuscript revisions. Chia-Yuan Lin set up the experiment,
                collected the tapping data, conducted the pre-processing of the data, and performed
                the acoustic analyses of the sentences. Simone Falk coordinated annotations of the
                second annotator and offered comments on the manuscript. Simone Dalla-Bella guided
                the development of the pre-processing procedure of the tapping data and commented on
                the manuscript. The study evolved from extensive discussions between TR, SF, and
                SDB.</p>
        </sec>
        <ref-list>
            <ref id="B1">
                <label>1</label>
                <mixed-citation publication-type="book"><string-name><surname>Abercrombie</surname>,
                            <given-names>D.</given-names></string-name> (<year>1967</year>).
                        <source>Elements of general phonetics</source> (<publisher-name>E. U.
                        Press</publisher-name>, ed.).</mixed-citation>
            </ref>
            <ref id="B2">
                <label>2</label>
                <mixed-citation publication-type="journal"><string-name><surname>Allen</surname>,
                            <given-names>G. D.</given-names></string-name> (<year>1972</year>).
                        <article-title>The Location of Rhythmic Stress Beats in English: An
                        Experimental Study I</article-title>. <source>Language and Speech</source>,
                        <volume>15</volume>(<issue>1</issue>),
                        <fpage>72</fpage>&#8211;<lpage>100</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1177/002383097201500110</pub-id></mixed-citation>
            </ref>
            <ref id="B3">
                <label>3</label>
                <mixed-citation publication-type="journal"><string-name><surname>Allen</surname>,
                            <given-names>G. D.</given-names></string-name> (<year>1975</year>).
                        <article-title>Speech rhythm: Its relation to performance universals and
                        articulatory timing</article-title>. <source>Journal of Phonetics</source>.
                    DOI: <pub-id pub-id-type="doi"
                    >10.1016/S0095-4470(19)31351-8</pub-id></mixed-citation>
            </ref>
            <ref id="B4">
                <label>4</label>
                <mixed-citation publication-type="journal"><string-name><surname>Arvaniti</surname>,
                            <given-names>A.</given-names></string-name> (<year>2009</year>).
                        <article-title>Rhythm, timing and the timing of rhythm</article-title>.
                        <source>Phonetica</source>, <volume>66</volume>(<issue>1&#8211;2</issue>),
                        <fpage>46</fpage>&#8211;<lpage>63</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1159/000208930</pub-id></mixed-citation>
            </ref>
            <ref id="B5">
                <label>5</label>
                <mixed-citation publication-type="journal"><string-name><surname>Arvaniti</surname>,
                            <given-names>A.</given-names></string-name> (<year>2012</year>).
                        <article-title>The usefulness of metrics in the quantification of speech
                        rhythm</article-title>. <source>Journal of Phonetics</source>,
                        <volume>40</volume>(<issue>3</issue>),
                        <fpage>351</fpage>&#8211;<lpage>373</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1016/j.wocn.2012.02.003</pub-id></mixed-citation>
            </ref>
            <ref id="B6">
                <label>6</label>
                <mixed-citation publication-type="journal"><string-name><surname>Arvaniti</surname>,
                            <given-names>A.</given-names></string-name>, &amp;
                            <string-name><surname>Rodriquez</surname>,
                        <given-names>T.</given-names></string-name> (<year>2013</year>).
                        <article-title>The role of rhythm class, speaking rate, and F0 in language
                        discrimination</article-title>. <source>Laboratory Phonology</source>,
                        <volume>4</volume>(<issue>1</issue>),
                        <fpage>7</fpage>&#8211;<lpage>38</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1515/lp-2013-0002</pub-id></mixed-citation>
            </ref>
            <ref id="B7">
                <label>7</label>
                <mixed-citation publication-type="journal"
                            ><string-name><surname>Aschersleben</surname>,
                            <given-names>G.</given-names></string-name> (<year>2002</year>).
                        <article-title>Temporal control of movements in sensorimotor
                        synchronization</article-title>. <source>Brain and Cognition</source>,
                        <volume>48</volume>(<issue>1</issue>),
                        <fpage>66</fpage>&#8211;<lpage>79</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1006/brcg.2001.1304</pub-id></mixed-citation>
            </ref>
            <ref id="B8">
                <label>8</label>
                <mixed-citation publication-type="book"><string-name><surname>Baayen</surname>,
                            <given-names>R. H.</given-names></string-name> (<year>2008</year>).
                        <source>Analyzing linguistic data: A practical introduction to statistics
                        using R</source>. <publisher-name>Cambridge University
                        Press</publisher-name>. DOI: <pub-id pub-id-type="doi"
                        >10.1017/CBO9780511801686</pub-id></mixed-citation>
            </ref>
            <ref id="B9">
                <label>9</label>
                <mixed-citation publication-type="journal"><string-name><surname>Barr</surname>,
                            <given-names>D. J.</given-names></string-name>,
                            <string-name><surname>Levy</surname>,
                        <given-names>R.</given-names></string-name>,
                            <string-name><surname>Scheepers</surname>,
                        <given-names>C.</given-names></string-name>, &amp;
                            <string-name><surname>Tily</surname>, <given-names>H.
                        J.</given-names></string-name> (<year>2013</year>). <article-title>Random
                        effects structure for confirmatory hypothesis testing: Keep it
                        maximal</article-title>. <source>Journal of Memory and Language</source>,
                        <volume>68</volume>(<issue>3</issue>),
                        <fpage>255</fpage>&#8211;<lpage>278</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1016/j.jml.2012.11.001</pub-id></mixed-citation>
            </ref>
            <ref id="B10">
                <label>10</label>
                <mixed-citation publication-type="journal"><string-name><surname>Barry</surname>,
                            <given-names>W.</given-names></string-name>,
                            <string-name><surname>Andreeva</surname>,
                        <given-names>B.</given-names></string-name>, &amp;
                            <string-name><surname>Koreman</surname>,
                        <given-names>J.</given-names></string-name> (<year>2009</year>).
                        <article-title>Do rhythm measures reflect perceived rhythm?</article-title>
                    <source>Phonetica</source>, <volume>66</volume>(<issue>1&#8211;2</issue>),
                        <fpage>78</fpage>&#8211;<lpage>94</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1159/000208932</pub-id></mixed-citation>
            </ref>
            <ref id="B11">
                <label>11</label>
                <mixed-citation publication-type="journal"><string-name><surname>Bates</surname>,
                            <given-names>D.</given-names></string-name>,
                            <string-name><surname>M&#228;chler</surname>,
                            <given-names>M.</given-names></string-name>,
                            <string-name><surname>Bolker</surname>,
                        <given-names>B.</given-names></string-name>, &amp;
                            <string-name><surname>Walker</surname>,
                        <given-names>S.</given-names></string-name> (<year>2015</year>).
                        <article-title>Fitting Linear Mixed-Effects Models Using
                        {lme4}</article-title>. <source>Journal of Statistical Software</source>,
                        <volume>67</volume>(<issue>1</issue>),
                        <fpage>1</fpage>&#8211;<lpage>48</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.18637/jss.v067.i01</pub-id></mixed-citation>
            </ref>
            <ref id="B12">
                <label>12</label>
                <mixed-citation publication-type="journal"><string-name><surname>Bolton</surname>,
                            <given-names>T. L.</given-names></string-name> (<year>1894</year>).
                        <article-title>Rhythm</article-title>. <source>American Journal of
                        Psychology</source>, <volume>6</volume>,
                        <fpage>145</fpage>&#8211;<lpage>238</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.2307/1410948</pub-id></mixed-citation>
            </ref>
            <ref id="B13">
                <label>13</label>
                <mixed-citation publication-type="webpage"><string-name><surname>Borchers</surname>,
                            <given-names>H. W.</given-names></string-name> (<year>2018</year>).
                        <source>pracma: Practical Numerical Math Functions</source>.
                        <uri>https://cran.r-project.org/package=pracma</uri></mixed-citation>
            </ref>
            <ref id="B14">
                <label>14</label>
                <mixed-citation publication-type="journal"><string-name><surname>Bouvet</surname>,
                            <given-names>C. J.</given-names></string-name>,
                            <string-name><surname>Varlet</surname>,
                        <given-names>M.</given-names></string-name>, <string-name><surname>Dalla
                            Bella</surname>, <given-names>S.</given-names></string-name>,
                            <string-name><surname>Keller</surname>, <given-names>P.
                        E.</given-names></string-name>, &amp; <string-name><surname>Bardy</surname>,
                            <given-names>B. G.</given-names></string-name> (<year>2019</year>).
                        <article-title>Accent-induced stabilization of spontaneous
                        auditory&#8211;motor synchronization</article-title>. <source>Psychological
                        Research</source>. DOI: <pub-id pub-id-type="doi"
                        >10.1007/s00426-019-01208-z</pub-id></mixed-citation>
            </ref>
            <ref id="B15">
                <label>15</label>
                <mixed-citation publication-type="webpage"><string-name><surname>Brett</surname>,
                            <given-names>M.</given-names></string-name>, &amp;
                            <string-name><surname>Grahn</surname>, <given-names>J.
                        A.</given-names></string-name> (<year>2007</year>). <article-title>Rhythm
                        and beat perception in motor areas of the brain</article-title>.
                        <source>Journal of Cognitive Neuroscience</source>,
                        <volume>19</volume>(<issue>5</issue>),
                        <fpage>893</fpage>&#8211;<lpage>906</lpage>.
                        <uri>papers2://publication/uuid/9405ACE2-E77B-48A6-9CAC-98818CC2CB87</uri>.
                    DOI: <pub-id pub-id-type="doi"
                    >10.1162/jocn.2007.19.5.893</pub-id></mixed-citation>
            </ref>
            <ref id="B16">
                <label>16</label>
                <mixed-citation publication-type="journal"><string-name><surname>Cardany</surname>,
                            <given-names>A. B.</given-names></string-name> (<year>2013</year>).
                        <article-title>Nursery Rhymes in Music and Language
                    Literacy</article-title>. <source>General Music Today</source>,
                        <volume>26</volume>(<issue>2</issue>),
                        <fpage>30</fpage>&#8211;<lpage>36</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1177/1048371312462869</pub-id></mixed-citation>
            </ref>
            <ref id="B17">
                <label>17</label>
                <mixed-citation publication-type="journal"><string-name><surname>Chemin</surname>,
                            <given-names>B.</given-names></string-name>,
                            <string-name><surname>Mouraux</surname>,
                        <given-names>A.</given-names></string-name>, &amp;
                            <string-name><surname>Nozaradan</surname>,
                        <given-names>S.</given-names></string-name> (<year>2014</year>).
                        <article-title>Body Movement Selectively Shapes the Neural Representation of
                        Musical Rhythms</article-title>. <source>Psychological Science</source>,
                        <volume>25</volume>(<issue>12</issue>),
                        <fpage>2147</fpage>&#8211;<lpage>2159</lpage>. DOI: <pub-id
                        pub-id-type="doi">10.1177/0956797614551161</pub-id></mixed-citation>
            </ref>
            <ref id="B18">
                <label>18</label>
                <mixed-citation publication-type="journal"><string-name><surname>Cooper</surname>,
                            <given-names>A. M.</given-names></string-name>,
                            <string-name><surname>Whalen</surname>, <given-names>D.
                        H.</given-names></string-name>, &amp;
                        <string-name><surname>Fowler</surname>, <given-names>C.
                        A.</given-names></string-name> (<year>1986</year>). <article-title>P-centers
                        are unaffected by phonetic categorization</article-title>.
                        <source>Perception &amp; Psychophysics</source>,
                        <volume>39</volume>(<issue>3</issue>),
                        <fpage>187</fpage>&#8211;<lpage>196</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.3758/BF03212490</pub-id></mixed-citation>
            </ref>
            <ref id="B19">
                <label>19</label>
                <mixed-citation publication-type="journal"><string-name><surname>Cummins</surname>,
                            <given-names>F.</given-names></string-name> (<year>2009</year>).
                        <article-title>Rhythm as entrainment: The case of synchronous
                        speech</article-title>. <source>Journal of Phonetics</source>,
                        <volume>37</volume>(<issue>1</issue>),
                        <fpage>16</fpage>&#8211;<lpage>28</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1016/j.wocn.2008.08.003</pub-id></mixed-citation>
            </ref>
            <ref id="B20">
                <label>20</label>
                <mixed-citation publication-type="journal"><string-name><surname>Cummins</surname>,
                            <given-names>F.</given-names></string-name> (<year>2012</year>).
                        <article-title>Looking for rhythm in speech</article-title>.
                        <source>Empirical Musicology Review</source>,
                        <volume>7</volume>(<issue>1</issue>),
                        <fpage>28</fpage>&#8211;<lpage>35</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.18061/1811/52976</pub-id></mixed-citation>
            </ref>
            <ref id="B21">
                <label>21</label>
                <mixed-citation publication-type="journal"><string-name><surname>Cummins</surname>,
                            <given-names>F.</given-names></string-name>, &amp;
                            <string-name><surname>Port</surname>,
                        <given-names>R.</given-names></string-name> (<year>1998</year>).
                        <article-title>Rhythmic constraints on stress timing in
                        English</article-title>. <source>Journal of Phonetics</source>,
                        <volume>26</volume>(<issue>2</issue>),
                        <fpage>145</fpage>&#8211;<lpage>171</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1006/jpho.1998.0070</pub-id></mixed-citation>
            </ref>
            <ref id="B22">
                <label>22</label>
                <mixed-citation publication-type="journal"><string-name><surname>Dalla
                            Bella</surname>, <given-names>S.</given-names></string-name>,
                            <string-name><surname>Bia&#322;u&#324;ska</surname>,
                            <given-names>A.</given-names></string-name>, &amp;
                            <string-name><surname>Sowi&#324;ski</surname>,
                            <given-names>J.</given-names></string-name> (<year>2013</year>).
                        <article-title>Why Movement Is Captured by Music, but Less by Speech: Role
                        of Temporal Regularity</article-title>. <source>PLoS ONE</source>,
                        <volume>8</volume>(<issue>8</issue>),
                        <fpage>1</fpage>&#8211;<lpage>16</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1371/journal.pone.0071945</pub-id></mixed-citation>
            </ref>
            <ref id="B23">
                <label>23</label>
                <mixed-citation publication-type="journal"><string-name><surname>Dalla
                            Bella</surname>, <given-names>S.</given-names></string-name>,
                            <string-name><surname>Farrugia</surname>,
                        <given-names>N.</given-names></string-name>,
                            <string-name><surname>Benoit</surname>, <given-names>C.
                        E.</given-names></string-name>, <string-name><surname>Begel</surname>,
                            <given-names>V.</given-names></string-name>,
                            <string-name><surname>Verga</surname>,
                        <given-names>L.</given-names></string-name>,
                            <string-name><surname>Harding</surname>,
                        <given-names>E.</given-names></string-name>, &amp;
                            <string-name><surname>Kotz</surname>, <given-names>S.
                        A.</given-names></string-name> (<year>2017</year>). <article-title>BAASTA:
                        Battery for the Assessment of Auditory Sensorimotor and Timing
                        Abilities</article-title>. <source>Behavior Research Methods</source>,
                        <volume>49</volume>(<issue>3</issue>),
                        <fpage>1128</fpage>&#8211;<lpage>1145</lpage>. DOI: <pub-id
                        pub-id-type="doi">10.3758/s13428-016-0773-6</pub-id></mixed-citation>
            </ref>
            <ref id="B24">
                <label>24</label>
                <mixed-citation publication-type="webpage"><string-name><surname>Dauer</surname>,
                            <given-names>R. M. M.</given-names></string-name> (<year>1983</year>).
                        <article-title>Stress-timing and syllable-timing reanalyzed</article-title>.
                        <source>Journal of Phonetics</source>,
                    <volume>11</volume>(<issue>1</issue>),
                    <fpage>51</fpage>&#8211;<lpage>62</lpage>.
                        <uri>https://psycnet.apa.org/record/1983-29886-001</uri>. DOI: <pub-id
                        pub-id-type="doi">10.1016/S0095-4470(19)30776-4</pub-id></mixed-citation>
            </ref>
            <ref id="B25">
                <label>25</label>
                <mixed-citation publication-type="journal"><string-name><surname>De Jong</surname>,
                            <given-names>K. J.</given-names></string-name> (<year>1994</year>).
                        <article-title>The correlation of P-center adjustments with articulatory and
                        acoustic events</article-title>. <source>Perception &amp;
                        Psychophysics</source>, <volume>56</volume>(<issue>4</issue>),
                        <fpage>447</fpage>&#8211;<lpage>460</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.3758/BF03206736</pub-id></mixed-citation>
            </ref>
            <ref id="B26">
                <label>26</label>
                <mixed-citation publication-type="webpage"><string-name><surname>Dellwo</surname>,
                            <given-names>V.</given-names></string-name>, &amp;
                            <string-name><surname>Wagner</surname>,
                        <given-names>P.</given-names></string-name> (<year>2003</year>).
                        <article-title>Relations between language rhythm and speech
                        rate</article-title>. <source>Proceedings of International Congress of
                        Phonetic Science</source>, <fpage>471</fpage>&#8211;<lpage>474</lpage>.
                        <uri>https://pub.uni-bielefeld.de/record/1785384</uri></mixed-citation>
            </ref>
            <ref id="B27">
                <label>27</label>
                <mixed-citation publication-type="journal"
                        ><string-name><surname>Deterding</surname>,
                        <given-names>D.</given-names></string-name> (<year>2001</year>).
                        <article-title>The measurement of rhythm: A comparison of Singapore and
                        British English</article-title>. <source>Journal of Phonetics</source>,
                        <volume>29</volume>(<issue>2</issue>),
                        <fpage>217</fpage>&#8211;<lpage>230</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1006/jpho.2001.0138</pub-id></mixed-citation>
            </ref>
            <ref id="B28">
                <label>28</label>
                <mixed-citation publication-type="journal"><string-name><surname>Deutsch</surname>,
                            <given-names>D.</given-names></string-name> (<year>2003</year>).
                        <source>Phantom words, and other curiosities</source>. Philomel
                    Records.</mixed-citation>
            </ref>
            <ref id="B29">
                <label>29</label>
                <mixed-citation publication-type="journal"><string-name><surname>Deutsch</surname>,
                            <given-names>D.</given-names></string-name>,
                            <string-name><surname>Henthorn</surname>,
                        <given-names>T.</given-names></string-name>, &amp;
                            <string-name><surname>Lapidis</surname>,
                        <given-names>R.</given-names></string-name> (<year>2011</year>).
                        <article-title>Illusory transformation from speech to song</article-title>.
                        <source>The Journal of the Acoustical Society of America</source>,
                        <volume>129</volume>(<issue>4</issue>),
                        <fpage>2245</fpage>&#8211;<lpage>2252</lpage>. DOI: <pub-id
                        pub-id-type="doi">10.1121/1.3562174</pub-id></mixed-citation>
            </ref>
            <ref id="B30">
                <label>30</label>
                <mixed-citation publication-type="confproc"><string-name><surname>Donovan</surname>,
                            <given-names>A.</given-names></string-name>, &amp;
                            <string-name><surname>Darwin</surname>,
                        <given-names>C.</given-names></string-name> (<year>1979</year>).
                        <article-title>The perceived rhythm of speech</article-title>.
                        <conf-name>Proceedings of the 9th International Congress of Phonetic
                        Sciences</conf-name>, <volume>2</volume>,
                        <fpage>268</fpage>&#8211;<lpage>274</lpage>.</mixed-citation>
            </ref>
            <ref id="B31">
                <label>31</label>
                <mixed-citation publication-type="webpage"><string-name><surname>Eerola</surname>,
                            <given-names>T.</given-names></string-name>, &amp;
                            <string-name><surname>Toiviainen</surname>,
                            <given-names>P.</given-names></string-name> (<year>2004</year>).
                        <article-title>MIDI Toolbox: MATLAB Tools for Music
                    Research</article-title>. In <source>University of Jyv&#228;skyl&#228;:
                        Kopijyv&#228;, Jyv&#228;skyl&#228;, Finland</source>.
                        <uri>http://www.jyu.fi/musica/miditoolbox/</uri></mixed-citation>
            </ref>
            <ref id="B32">
                <label>32</label>
                <mixed-citation publication-type="journal"
                            ><string-name><surname>Engstr&#246;m</surname>, <given-names>D.
                            A.</given-names></string-name>, <string-name><surname>Kelso</surname>,
                            <given-names>J. A. S.</given-names></string-name>, &amp;
                            <string-name><surname>Holroyd</surname>,
                        <given-names>T.</given-names></string-name> (<year>1996</year>).
                        <article-title>Reaction-anticipation transitions in human perception-action
                        patterns</article-title>. <source>Human Movement Science</source>,
                        <volume>15</volume>(<issue>6</issue>),
                        <fpage>809</fpage>&#8211;<lpage>832</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1016/S0167-9457(96)00031-0</pub-id></mixed-citation>
            </ref>
            <ref id="B33">
                <label>33</label>
                <mixed-citation publication-type="journal"><string-name><surname>Falk</surname>,
                            <given-names>S.</given-names></string-name>, &amp;
                            <string-name><surname>Dalla Bella</surname>,
                            <given-names>S.</given-names></string-name> (<year>2016</year>).
                        <article-title>It is better when expected: Aligning speech and motor rhythms
                        enhances verbal processing</article-title>. <source>Language, Cognition and
                        Neuroscience</source>, <volume>31</volume>(<issue>5</issue>),
                        <fpage>699</fpage>&#8211;<lpage>708</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1080/23273798.2016.1144892</pub-id></mixed-citation>
            </ref>
            <ref id="B34">
                <label>34</label>
                <mixed-citation publication-type="webpage"><string-name><surname>Falk</surname>,
                            <given-names>S.</given-names></string-name>, &amp;
                            <string-name><surname>Rathcke</surname>,
                        <given-names>T.</given-names></string-name> (<year>2010</year>).
                        <article-title>On the Speech-To-Song Illusion: Evidence from
                        German</article-title>. <source>Speech Prosody</source>,
                        <volume>169</volume>.
                        <uri>https://www.isca-speech.org/archive/sp2010/papers/sp10_169.pdf</uri></mixed-citation>
            </ref>
            <ref id="B35">
                <label>35</label>
                <mixed-citation publication-type="journal"><string-name><surname>Falk</surname>,
                            <given-names>S.</given-names></string-name>,
                            <string-name><surname>Rathcke</surname>,
                        <given-names>T.</given-names></string-name>, <string-name><surname>Dalla
                            Bella</surname>, <given-names>S.</given-names></string-name>, &amp;
                            <string-name><surname>Bella</surname>, <given-names>S.
                        D.</given-names></string-name> (<year>2014</year>). <article-title>When
                        speech sounds like music</article-title>. <source>Journal of Experimental
                        Psychology: Human Perception and Performance</source>,
                        <volume>40</volume>(<issue>4</issue>),
                        <fpage>1491</fpage>&#8211;<lpage>1506</lpage>. DOI: <pub-id
                        pub-id-type="doi">10.1037/a0036858</pub-id></mixed-citation>
            </ref>
            <ref id="B36">
                <label>36</label>
                <mixed-citation publication-type="journal"><string-name><surname>Falk</surname>,
                            <given-names>S.</given-names></string-name>,
                            <string-name><surname>Volpi-Moncorger</surname>,
                            <given-names>C.</given-names></string-name>, &amp;
                            <string-name><surname>Dalla Bella</surname>,
                            <given-names>S.</given-names></string-name> (<year>2017</year>).
                        <article-title>Auditory-motor rhythms and speech processing in French and
                        German listeners</article-title>. <source>Frontiers in Psychology</source>,
                        <volume>8</volume>(<issue>MAR</issue>),
                        <fpage>1</fpage>&#8211;<lpage>14</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.3389/fpsyg.2017.00395</pub-id></mixed-citation>
            </ref>
            <ref id="B37">
                <label>37</label>
                <mixed-citation publication-type="journal"><string-name><surname>Fowler</surname>,
                            <given-names>C. A.</given-names></string-name> (<year>1983</year>).
                        <article-title>Converging sources of evidence on spoken and perceived
                        rhythms of speech: Cyclic production of vowels in monosyllabic stress
                        feet</article-title>. <source>Journal of Experimental Psychology:
                        General</source>, <volume>112</volume>(<issue>3</issue>),
                        <fpage>386</fpage>&#8211;<lpage>412</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1037/0096-3445.112.3.386</pub-id></mixed-citation>
            </ref>
            <ref id="B38">
                <label>38</label>
                <mixed-citation publication-type="book"><string-name><surname>Fowler</surname>,
                            <given-names>C. A.</given-names></string-name>, &amp;
                            <string-name><surname>Tassinary</surname>,
                        <given-names>L.</given-names></string-name> (<year>1981</year>).
                        <chapter-title>Natural measurement criteria for speech: The anisochrony
                        illusion</chapter-title>. In <string-name><given-names>J.</given-names>
                        <surname>Long</surname></string-name> &amp;
                            <string-name><given-names>A.</given-names>
                        <surname>Baddeley</surname></string-name> (Eds.), <source>Attention and
                        performance, IX</source> (pp. <fpage>521</fpage>&#8211;<lpage>535</lpage>).
                        <publisher-name>Erlbaum</publisher-name>.</mixed-citation>
            </ref>
            <ref id="B39">
                <label>39</label>
                <mixed-citation publication-type="journal"
                            ><string-name><surname>G&#233;rard</surname>,
                            <given-names>C.</given-names></string-name>, &amp;
                            <string-name><surname>Rosenfeld</surname>,
                        <given-names>M.</given-names></string-name> (<year>1995</year>).
                        <article-title>Pratique musicale et r&#233;gulations
                        temporelles</article-title>. <source>L&#8217;ann&#233;e
                        Psychologique</source>, <volume>95</volume>(<issue>4</issue>),
                        <fpage>571</fpage>&#8211;<lpage>591</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.3406/psy.1995.28856</pub-id></mixed-citation>
            </ref>
            <ref id="B40">
                <label>40</label>
                <mixed-citation publication-type="journal"><string-name><surname>Goswami</surname>,
                            <given-names>U.</given-names></string-name>, &amp;
                            <string-name><surname>Leong</surname>,
                        <given-names>V.</given-names></string-name> (<year>2013</year>).
                        <article-title>Speech rhythm and temporal structure: Converging
                        perspectives</article-title>. <source>Laboratory Phonology</source>,
                        <volume>4</volume>(<issue>1</issue>),
                        <fpage>67</fpage>&#8211;<lpage>92</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1515/lp-2013-0004</pub-id></mixed-citation>
            </ref>
            <ref id="B41">
                <label>41</label>
                <mixed-citation publication-type="confproc"><string-name><surname>Goswami</surname>,
                            <given-names>U.</given-names></string-name>,
                            <string-name><surname>Thomson</surname>,
                        <given-names>J.</given-names></string-name>,
                            <string-name><surname>Richardson</surname>,
                            <given-names>U.</given-names></string-name>,
                            <string-name><surname>Stainthorp</surname>,
                            <given-names>R.</given-names></string-name>,
                            <string-name><surname>Hughes</surname>,
                        <given-names>D.</given-names></string-name>,
                            <string-name><surname>Rosen</surname>,
                        <given-names>S.</given-names></string-name>, &amp;
                            <string-name><surname>Scott</surname>, <given-names>S.
                        K.</given-names></string-name> (<year>2002</year>). <article-title>Amplitude
                        envelope onsets and developmental dyslexia: A new
                    hypothesis</article-title>. <conf-name>Proceedings of the National Academy of
                        Sciences of the United States of America</conf-name>,
                        <volume>99</volume>(<issue>16</issue>),
                        <fpage>10911</fpage>&#8211;<lpage>10916</lpage>. DOI: <pub-id
                        pub-id-type="doi">10.1073/pnas.122368599</pub-id></mixed-citation>
            </ref>
            <ref id="B42">
                <label>42</label>
                <mixed-citation publication-type="book"><string-name><surname>Grabe</surname>,
                            <given-names>E.</given-names></string-name>, &amp;
                            <string-name><surname>Low</surname>, <given-names>E.
                        L.</given-names></string-name> (<year>2002</year>).
                        <chapter-title>Durational variability in speech and the rhythm class
                        hypothesis</chapter-title>. In <string-name><given-names>C.</given-names>
                        <surname>Gussenhoven</surname></string-name> &amp;
                            <string-name><given-names>N.</given-names>
                        <surname>Warner</surname></string-name> (Eds.), <source>Laboratory
                        Phonology</source> (pp. <fpage>515</fpage>&#8211;<lpage>546</lpage>).
                        <publisher-name>Mouton de Gruyter</publisher-name>. DOI: <pub-id
                        pub-id-type="doi">10.1515/9783110197105</pub-id></mixed-citation>
            </ref>
            <ref id="B43">
                <label>43</label>
                <mixed-citation publication-type="journal"><string-name><surname>Grahn</surname>,
                            <given-names>J. A.</given-names></string-name>, &amp;
                            <string-name><surname>Rowe</surname>, <given-names>J.
                        B.</given-names></string-name> (<year>2009</year>). <article-title>Feeling
                        the beat: Premotor and striatal interactions in musicians and nonmusicians
                        during beat perception</article-title>. <source>Journal of
                        Neuroscience</source>, <volume>29</volume>(<issue>23</issue>),
                        <fpage>7540</fpage>&#8211;<lpage>7548</lpage>. DOI: <pub-id
                        pub-id-type="doi">10.1523/JNEUROSCI.2018-08.2009</pub-id></mixed-citation>
            </ref>
            <ref id="B44">
                <label>44</label>
                <mixed-citation publication-type="journal"><string-name><surname>Hommel</surname>,
                            <given-names>B.</given-names></string-name> (<year>2015</year>).
                        <article-title>The theory of event coding (TEC) as embodied-cognition
                        framework</article-title>. <source>Frontiers in Psychology</source>,
                        <volume>6</volume>, <fpage>1318</fpage>. DOI: <pub-id pub-id-type="doi"
                        >10.3389/fpsyg.2015.01318</pub-id></mixed-citation>
            </ref>
            <ref id="B45">
                <label>45</label>
                <mixed-citation publication-type="confproc"><string-name><surname>Iversen</surname>,
                            <given-names>J. R.</given-names></string-name>, &amp;
                            <string-name><surname>Patel</surname>, <given-names>A.
                        D.</given-names></string-name> (<year>2008</year>). <article-title>The Beat
                        Alignment Test (BAT): Surveying beat processing abilities in the general
                        population</article-title>. In <string-name><given-names>K.</given-names>
                        <surname>Miyazaki</surname></string-name>,
                            <string-name><given-names>M.</given-names>
                        <surname>Adachi</surname></string-name>,
                            <string-name><given-names>Y.</given-names>
                        <surname>Hiraga</surname></string-name>,
                            <string-name><given-names>Y.</given-names>
                        <surname>Nakajima</surname></string-name> &amp;
                            <string-name><given-names>M.</given-names>
                        <surname>Tsuzaki</surname></string-name> (Eds.), <conf-name>Proceedings of
                        the 10th International Conference on Music Perception &amp;
                        Cognition</conf-name> (pp. <fpage>465</fpage>&#8211;<lpage>468</lpage>).
                        <conf-sponsor>Causal Productions</conf-sponsor>.</mixed-citation>
            </ref>
            <ref id="B46">
                <label>46</label>
                <mixed-citation publication-type="journal"><string-name><surname>Jantzen</surname>,
                            <given-names>K. J.</given-names></string-name>,
                            <string-name><surname>Oullier</surname>,
                        <given-names>O.</given-names></string-name>,
                            <string-name><surname>Marshall</surname>,
                        <given-names>M.</given-names></string-name>,
                            <string-name><surname>Steinberg</surname>, <given-names>F.
                            L.</given-names></string-name>, &amp;
                            <string-name><surname>Kelso</surname>, <given-names>J. A.
                            S.</given-names></string-name> (<year>2007</year>). <article-title>A
                        parametric fMRI investigation of context effects in sensorimotor timing and
                        coordination</article-title>. <source>Neuropsychologia</source>,
                        <volume>45</volume>(<issue>4</issue>),
                        <fpage>673</fpage>&#8211;<lpage>684</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1016/j.neuropsychologia.2006.07.020</pub-id></mixed-citation>
            </ref>
            <ref id="B47">
                <label>47</label>
                <mixed-citation publication-type="journal"><string-name><surname>John</surname>,
                            <given-names>N. C.</given-names></string-name>,
                            <string-name><surname>Varadhan</surname>,
                        <given-names>R.</given-names></string-name>, &amp;
                            <string-name><surname>Gabor</surname>,
                        <given-names>G.</given-names></string-name> (<year>2020</year>).
                        <source>Package &#8220;optimx.&#8221;</source> (pp.
                        <fpage>1</fpage>&#8211;<lpage>87</lpage>).</mixed-citation>
            </ref>
            <ref id="B48">
                <label>48</label>
                <mixed-citation publication-type="journal"><string-name><surname>Jungers</surname>,
                            <given-names>M. K.</given-names></string-name>,
                            <string-name><surname>Palmer</surname>,
                        <given-names>C.</given-names></string-name>, &amp;
                            <string-name><surname>Speer</surname>, <given-names>S.
                        R.</given-names></string-name> (<year>2002</year>). <article-title>Time
                        after time: The coordinating influence of tempo in music and
                        speech</article-title>. <source>Cognitive Processing</source>,
                        <volume>2</volume>(<issue>614</issue>),
                        <fpage>21</fpage>&#8211;<lpage>35</lpage>.</mixed-citation>
            </ref>
            <ref id="B49">
                <label>49</label>
                <mixed-citation publication-type="journal"><string-name><surname>Killick</surname>,
                            <given-names>R.</given-names></string-name>, &amp;
                            <string-name><surname>Eckley</surname>, <given-names>I.
                        A.</given-names></string-name> (<year>2014</year>).
                        <article-title>Changepoint: An R package for changepoint
                        analysis</article-title>. <source>Journal of Statistical Software</source>,
                        <volume>58</volume>(<issue>3</issue>),
                        <fpage>1</fpage>&#8211;<lpage>19</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.18637/jss.v058.i03</pub-id></mixed-citation>
            </ref>
            <ref id="B50">
                <label>50</label>
                <mixed-citation publication-type="journal"><string-name><surname>Koch</surname>,
                            <given-names>G.</given-names></string-name>,
                            <string-name><surname>Oliveri</surname>,
                        <given-names>M.</given-names></string-name>, &amp;
                            <string-name><surname>Caltagirone</surname>,
                            <given-names>C.</given-names></string-name> (<year>2009</year>).
                        <article-title>Neural networks engaged in milliseconds and seconds time
                        processing: Evidence from transcranial magnetic stimulation and patients
                        with cortical or subcortical dysfunction</article-title>.
                        <source>Philosophical Transactions of the Royal Society B: Biological
                        Sciences</source>, <volume>364</volume>(<issue>1525</issue>),
                        <fpage>1907</fpage>&#8211;<lpage>1918</lpage>. DOI: <pub-id
                        pub-id-type="doi">10.1098/rstb.2009.0018</pub-id></mixed-citation>
            </ref>
            <ref id="B51">
                <label>51</label>
                <mixed-citation publication-type="journal"><string-name><surname>Kohler</surname>,
                            <given-names>K. J.</given-names></string-name> (<year>2009</year>).
                        <article-title>Rhythm in speech and language: A new research
                        paradigm</article-title>. <source>Phonetica</source>,
                        <volume>66</volume>(<issue>1&#8211;2</issue>),
                        <fpage>29</fpage>&#8211;<lpage>45</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1159/000208929</pub-id></mixed-citation>
            </ref>
            <ref id="B52">
                <label>52</label>
                <mixed-citation publication-type="journal"
                            ><string-name><surname>Kuznetsova</surname>,
                            <given-names>A.</given-names></string-name>,
                            <string-name><surname>Brockhoff</surname>, <given-names>P.
                            B.</given-names></string-name>, &amp;
                            <string-name><surname>Christensen</surname>, <given-names>R. H.
                            B.</given-names></string-name> (<year>2017</year>).
                        <article-title>lmerTest Package: Tests in Linear Mixed Effects
                        Models</article-title>. <source>Journal of Statistical Software</source>,
                        <volume>82</volume>(<issue>13</issue>),
                        <fpage>1</fpage>&#8211;<lpage>26</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.18637/jss.v082.i13</pub-id></mixed-citation>
            </ref>
            <ref id="B53">
                <label>53</label>
                <mixed-citation publication-type="journal"><string-name><surname>Large</surname>,
                            <given-names>E. W.</given-names></string-name>, &amp;
                            <string-name><surname>Jones</surname>, <given-names>M.
                        R.</given-names></string-name> (<year>1999</year>). <article-title>The
                        dynamics of attending: How people track time-varying events</article-title>.
                        <source>Psychological Review</source>,
                    <volume>106</volume>(<issue>1</issue>),
                        <fpage>119</fpage>&#8211;<lpage>159</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1037/0033-295X.106.1.119</pub-id></mixed-citation>
            </ref>
            <ref id="B54">
                <label>54</label>
                <mixed-citation publication-type="journal"><string-name><surname>Large</surname>,
                            <given-names>E. W.</given-names></string-name>, &amp;
                            <string-name><surname>Palmer</surname>,
                        <given-names>C.</given-names></string-name> (<year>2002</year>).
                        <article-title>Perceiving temporal regularity in music</article-title>.
                        <source>Cognitive Science</source>, <volume>26</volume>(<issue>1</issue>),
                        <fpage>1</fpage>&#8211;<lpage>37</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1016/S0364-0213(01)00057-X</pub-id></mixed-citation>
            </ref>
            <ref id="B55">
                <label>55</label>
                <mixed-citation publication-type="journal"><string-name><surname>Lehiste</surname>,
                            <given-names>I.</given-names></string-name> (<year>1977</year>).
                        <article-title>Isochrony reconsidered</article-title>. <source>Journal of
                        Phonetics</source>, <volume>5</volume>(<issue>3</issue>),
                        <fpage>253</fpage>&#8211;<lpage>263</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1016/S0095-4470(19)31139-8</pub-id></mixed-citation>
            </ref>
            <ref id="B56">
                <label>56</label>
                <mixed-citation publication-type="journal"><string-name><surname>Leong</surname>,
                            <given-names>V.</given-names></string-name>, &amp;
                            <string-name><surname>Goswami</surname>,
                        <given-names>U.</given-names></string-name> (<year>2014</year>).
                        <article-title>Assessment of rhythmic entrainment at multiple timescales
                        indyslexia: Evidence for disruption to syllable timing</article-title>.
                        <source>Hearing Research</source>, <volume>308</volume>,
                        <fpage>141</fpage>&#8211;<lpage>161</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1016/j.heares.2013.07.015</pub-id></mixed-citation>
            </ref>
            <ref id="B57">
                <label>57</label>
                <mixed-citation publication-type="journal"><string-name><surname>Lewis</surname>,
                            <given-names>P. A.</given-names></string-name>,
                            <string-name><surname>Wing</surname>, <given-names>A.
                        M.</given-names></string-name>, <string-name><surname>Pope</surname>,
                            <given-names>P. A.</given-names></string-name>,
                            <string-name><surname>Praamstra</surname>,
                        <given-names>P.</given-names></string-name>, &amp;
                            <string-name><surname>Miall</surname>, <given-names>R.
                        C.</given-names></string-name> (<year>2004</year>). <article-title>Brain
                        activity correlates differentially with increasing temporal complexity of
                        rhythms during initialisation, synchronisation, and continuation phases of
                        paced finger tapping</article-title>. <source>Neuropsychologia</source>,
                        <volume>42</volume>(<issue>10</issue>),
                        <fpage>1301</fpage>&#8211;<lpage>1312</lpage>. DOI: <pub-id
                        pub-id-type="doi"
                    >10.1016/j.neuropsychologia.2004.03.001</pub-id></mixed-citation>
            </ref>
            <ref id="B58">
                <label>58</label>
                <mixed-citation publication-type="journal"><string-name><surname>Liberman</surname>,
                            <given-names>M.</given-names></string-name>, &amp;
                            <string-name><surname>Prince</surname>,
                        <given-names>A.</given-names></string-name> (<year>1977</year>).
                        <article-title>On Stress and Linguistic Rhythm</article-title>.
                        <source>Linguistic Inquiry</source>, <volume>8</volume>(<issue>2</issue>),
                        <fpage>249</fpage>&#8211;<lpage>336</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.2307/4177987</pub-id></mixed-citation>
            </ref>
            <ref id="B59">
                <label>59</label>
                <mixed-citation publication-type="journal"><string-name><surname>Lidji</surname>,
                            <given-names>P.</given-names></string-name>,
                            <string-name><surname>Palmer</surname>,
                        <given-names>C.</given-names></string-name>,
                            <string-name><surname>Peretz</surname>,
                        <given-names>I.</given-names></string-name>, &amp;
                            <string-name><surname>Morningstar</surname>,
                            <given-names>M.</given-names></string-name> (<year>2011</year>).
                        <article-title>Listeners feel the beat: Entrainment to English and French
                        speech rhythms</article-title>. <source>Psychonomic Bulletin and
                        Review</source>, <volume>18</volume>(<issue>6</issue>),
                        <fpage>1035</fpage>&#8211;<lpage>1041</lpage>. DOI: <pub-id
                        pub-id-type="doi">10.3758/s13423-011-0163-0</pub-id></mixed-citation>
            </ref>
            <ref id="B60">
                <label>60</label>
                <mixed-citation publication-type="journal"><string-name><surname>ling Low</surname>,
                            <given-names>E.</given-names></string-name>,
                            <string-name><surname>Grabe</surname>,
                        <given-names>E.</given-names></string-name>, &amp;
                            <string-name><surname>Nolan</surname>,
                        <given-names>F.</given-names></string-name> (<year>2000</year>).
                        <article-title>Quantitative characterizations of speech rhythm:
                        Syllable-timing in Singapore English</article-title>. <source>Language and
                        Speech</source>, <volume>43</volume>(<issue>4</issue>),
                        <fpage>377</fpage>&#8211;<lpage>401</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1177/00238309000430040301</pub-id></mixed-citation>
            </ref>
            <ref id="B61">
                <label>61</label>
                <mixed-citation publication-type="journal"><string-name><surname>Liu</surname>,
                            <given-names>X.</given-names></string-name>,
                            <string-name><surname>Wang</surname>,
                        <given-names>S.</given-names></string-name>,
                            <string-name><surname>Yianni</surname>,
                        <given-names>J.</given-names></string-name>,
                            <string-name><surname>Nandi</surname>,
                        <given-names>D.</given-names></string-name>,
                            <string-name><surname>Bain</surname>, <given-names>P.
                        G.</given-names></string-name>, <string-name><surname>Gregory</surname>,
                            <given-names>R.</given-names></string-name>,
                            <string-name><surname>Stein</surname>, <given-names>J.
                        F.</given-names></string-name>, &amp; <string-name><surname>Aziz</surname>,
                            <given-names>T. Z.</given-names></string-name> (<year>2008</year>).
                        <article-title>The sensory and motor representation of synchronized
                        oscillations in the globus pallidus in patients with primary
                        dystonia</article-title>. <source>Brain</source>,
                        <volume>131</volume>(<issue>6</issue>),
                        <fpage>1562</fpage>&#8211;<lpage>1573</lpage>. DOI: <pub-id
                        pub-id-type="doi">10.1093/brain/awn083</pub-id></mixed-citation>
            </ref>
            <ref id="B62">
                <label>62</label>
                <mixed-citation publication-type="journal"
                            ><string-name><surname>L&#252;decke</surname>,
                            <given-names>D.</given-names></string-name> (<year>2019</year>).
                        <source>sjPlot: Data Visualization for Statistics in Social
                    Science</source>. DOI: <pub-id pub-id-type="doi"
                    >10.5281/zenodo.1308157</pub-id></mixed-citation>
            </ref>
            <ref id="B63">
                <label>63</label>
                <mixed-citation publication-type="journal"><string-name><surname>Madison</surname>,
                            <given-names>G.</given-names></string-name> (<year>2014</year>).
                        <article-title>Sensori-motor synchronisation variability decreases as the
                        number of metrical levels in the stimulus signal increases</article-title>.
                        <source>Acta Psychologica</source>, <volume>147</volume>,
                        <fpage>10</fpage>&#8211;<lpage>16</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1016/j.actpsy.2013.10.002</pub-id></mixed-citation>
            </ref>
            <ref id="B64">
                <label>64</label>
                <mixed-citation publication-type="webpage"><string-name><surname>Mairano</surname>,
                            <given-names>P.</given-names></string-name>,
                            <string-name><surname>Santiago</surname>,
                        <given-names>F.</given-names></string-name>, &amp;
                            <string-name><surname>Romano</surname>,
                        <given-names>A.</given-names></string-name> (<year>2015</year>).
                        <article-title>Cross-linguistic differences between accented vs. unaccented
                        vowel durations</article-title>. <source>International Congress on Phonetic
                        Sciences ICPhS</source>.
                        <uri>https://halshs.archives-ouvertes.fr/halshs-01440315/</uri></mixed-citation>
            </ref>
            <ref id="B65">
                <label>65</label>
                <mixed-citation publication-type="journal"><string-name><surname>Marcus</surname>,
                            <given-names>S. M.</given-names></string-name> (<year>1981</year>).
                        <article-title>Acoustic determinants of perceptual center (P-center)
                        location</article-title>. <source>Perception &amp; Psychophysics</source>,
                        <volume>30</volume>(<issue>3</issue>),
                        <fpage>247</fpage>&#8211;<lpage>256</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.3758/BF03214280</pub-id></mixed-citation>
            </ref>
            <ref id="B66">
                <label>66</label>
                <mixed-citation publication-type="journal"><string-name><surname>Margulis</surname>,
                            <given-names>E. H.</given-names></string-name>, &amp;
                            <string-name><surname>Simchy-Gross</surname>,
                            <given-names>R.</given-names></string-name> (<year>2016</year>).
                        <article-title>Repetition enhances the musicality of randomly generated tone
                        sequences</article-title>. <source>Music Perception</source>,
                        <volume>33</volume>(<issue>4</issue>),
                        <fpage>509</fpage>&#8211;<lpage>514</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1525/mp.2016.33.4.509</pub-id></mixed-citation>
            </ref>
            <ref id="B67">
                <label>67</label>
                <mixed-citation publication-type="journal"><string-name><surname>Mates</surname>,
                            <given-names>J.</given-names></string-name>,
                            <string-name><surname>M&#252;ller</surname>,
                            <given-names>U.</given-names></string-name>,
                            <string-name><surname>Radil</surname>,
                        <given-names>T.</given-names></string-name>, &amp;
                            <string-name><surname>P&#246;ppel</surname>,
                            <given-names>E.</given-names></string-name> (<year>1994</year>).
                        <article-title>Temporal integration in sensorimotor
                        synchronization</article-title>. <source>Journal of Cognitive
                        Neuroscience</source>, <volume>6</volume>(<issue>4</issue>),
                        <fpage>332</fpage>&#8211;<lpage>340</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1162/jocn.1994.6.4.332</pub-id></mixed-citation>
            </ref>
            <ref id="B68">
                <label>68</label>
                <mixed-citation publication-type="webpage"><string-name><surname>Merker</surname>,
                            <given-names>B.</given-names></string-name> (<year>2000</year>).
                        <chapter-title>Synchronous chorusing and human origins</chapter-title>. In
                            <string-name><given-names>N. L.</given-names>
                        <surname>Wallin</surname></string-name>,
                            <string-name><given-names>B.</given-names>
                        <surname>Merker</surname></string-name> &amp;
                            <string-name><given-names>S.</given-names>
                        <surname>Brown</surname></string-name> (Eds.), <source>The origins of
                        music</source> (pp. <fpage>315</fpage>&#8211;<lpage>327</lpage>).
                        <publisher-name>MIT</publisher-name>.
                        <uri>http://www.biolinguagem.com/ling_cog_cult/merker_2009_synchronouschorusing_humanorigins.pdf</uri></mixed-citation>
            </ref>
            <ref id="B69">
                <label>69</label>
                <mixed-citation publication-type="webpage"><string-name><surname>Miller</surname>,
                            <given-names>M.</given-names></string-name> (<year>1984</year>).
                        <article-title>On the perception of rhythm</article-title>. <source>Journal
                        of Phonetics</source>, <volume>12</volume>,
                        <fpage>75</fpage>&#8211;<lpage>83</lpage>.
                        <uri>https://scholar.google.com/scholar?hl=zh-TW&amp;as_sdt=0%2C5&amp;as_ylo=1970&amp;as_yhi=2000&amp;q=On+the+perception+of+rhythm+miller+1984&amp;btnG=</uri>.
                    DOI: <pub-id pub-id-type="doi"
                    >10.1016/S0095-4470(19)30852-6</pub-id></mixed-citation>
            </ref>
            <ref id="B70">
                <label>70</label>
                <mixed-citation publication-type="confproc"><string-name><surname>Morgan</surname>,
                            <given-names>N.</given-names></string-name>, &amp;
                            <string-name><surname>Fosler-Lussier</surname>,
                            <given-names>E.</given-names></string-name> (<year>1998</year>).
                        <article-title>Combining multiple estimators of speaking
                        rate</article-title>. <conf-name>ICASSP, IEEE International Conference on
                        Acoustics, Speech and Signal Processing &#8211; Proceedings</conf-name>,
                        <volume>2</volume>, <fpage>729</fpage>&#8211;<lpage>732</lpage>. DOI:
                        <pub-id pub-id-type="doi"
                    >10.1109/ICASSP.1998.675368</pub-id></mixed-citation>
            </ref>
            <ref id="B71">
                <label>71</label>
                <mixed-citation publication-type="journal"><string-name><surname>Morillon</surname>,
                            <given-names>B.</given-names></string-name>,
                            <string-name><surname>Schroeder</surname>, <given-names>C.
                            E.</given-names></string-name>, &amp;
                            <string-name><surname>Wyart</surname>,
                        <given-names>V.</given-names></string-name> (<year>2014</year>).
                        <article-title>Motor contributions to the temporal precision of auditory
                        attention</article-title>. <source>Nature Communications</source>,
                        <volume>5</volume>, <fpage>1</fpage>&#8211;<lpage>9</lpage>. DOI: <pub-id
                        pub-id-type="doi">10.1038/ncomms6255</pub-id></mixed-citation>
            </ref>
            <ref id="B72">
                <label>72</label>
                <mixed-citation publication-type="journal"><string-name><surname>Morton</surname>,
                            <given-names>J.</given-names></string-name>,
                            <string-name><surname>Marcus</surname>,
                        <given-names>S.</given-names></string-name>, &amp;
                            <string-name><surname>Frankish</surname>,
                        <given-names>C.</given-names></string-name> (<year>1976</year>).
                        <article-title>Perceptual centers (P-centers)</article-title>.
                        <source>Psychological Review</source>,
                    <volume>83</volume>(<issue>5</issue>),
                        <fpage>405</fpage>&#8211;<lpage>408</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1037/0033-295X.83.5.405</pub-id></mixed-citation>
            </ref>
            <ref id="B73">
                <label>73</label>
                <mixed-citation publication-type="book"><string-name><surname>Nespor</surname>,
                            <given-names>M.</given-names></string-name>, &amp;
                            <string-name><surname>Vogel</surname>, <given-names>S.
                        E.</given-names></string-name> (<year>1986</year>). <source>Prosodic
                        Phonology</source>. <publisher-name>Foris</publisher-name>.</mixed-citation>
            </ref>
            <ref id="B74">
                <label>74</label>
                <mixed-citation publication-type="journal"><string-name><surname>Nolan</surname>,
                            <given-names>F.</given-names></string-name>, &amp;
                            <string-name><surname>Jeon</surname>, <given-names>H.
                        S.</given-names></string-name> (<year>2014</year>). <article-title>Speech
                        rhythm: A metaphor?</article-title>
                    <source>Philosophical Transactions of the Royal Society B: Biological
                        Sciences</source>, <volume>369</volume>(<issue>1658</issue>),
                        <elocation-id>20130396</elocation-id>. DOI: <pub-id pub-id-type="doi"
                        >10.1098/rstb.2013.0396</pub-id></mixed-citation>
            </ref>
            <ref id="B75">
                <label>75</label>
                <mixed-citation publication-type="journal"><string-name><surname>Park</surname>,
                            <given-names>J. E.</given-names></string-name> (<year>2017</year>).
                        <article-title>Apraxia: Review and update</article-title>. In
                        <source>Journal of Clinical Neurology (Korea)</source>,
                        <volume>13</volume>(<issue>4</issue>),
                        <fpage>317</fpage>&#8211;<lpage>324</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.3988/jcn.2017.13.4.317</pub-id></mixed-citation>
            </ref>
            <ref id="B76">
                <label>76</label>
                <mixed-citation publication-type="journal"><string-name><surname>Patel</surname>,
                            <given-names>A. D.</given-names></string-name>, &amp;
                            <string-name><surname>Iversen</surname>, <given-names>J.
                            R.</given-names></string-name> (<year>2014</year>). <article-title>The
                        evolutionary neuroscience of musical beat perception: The Action Simulation
                        for Auditory Prediction (ASAP) hypothesis</article-title>. <source>Frontiers
                        in Systems Neuroscience</source>, <volume>8</volume>(<issue>May</issue>),
                        <fpage>1</fpage>&#8211;<lpage>14</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.3389/fnsys.2014.00057</pub-id></mixed-citation>
            </ref>
            <ref id="B77">
                <label>77</label>
                <mixed-citation publication-type="confproc"><string-name><surname>Patel</surname>,
                            <given-names>A. D.</given-names></string-name>,
                            <string-name><surname>L&#246;fqvist</surname>,
                            <given-names>A.</given-names></string-name>, &amp;
                            <string-name><surname>Naito</surname>,
                        <given-names>W.</given-names></string-name> (<year>1999</year>).
                        <article-title>The acoustics and kinematics of regularly timed speech: A
                        database and method for the study of the p-center problem</article-title>.
                        <conf-name>Proceedings of the 14th International Congress of Phonetic
                        Sciences</conf-name>, <volume>1</volume>,
                        <fpage>405</fpage>&#8211;<lpage>408</lpage>.
                        <uri>https://pdfs.semanticscholar.org/28c8/b08e2f68db0261a2fc17e6df59d27750967b.pdf</uri></mixed-citation>
            </ref>
            <ref id="B78">
                <label>78</label>
                <mixed-citation publication-type="journal"><string-name><surname>Peelle</surname>,
                            <given-names>J. E.</given-names></string-name>, &amp;
                            <string-name><surname>Davis</surname>, <given-names>M.
                        H.</given-names></string-name> (<year>2012</year>). <article-title>Neural
                        oscillations carry speech rhythm through to comprehension</article-title>.
                        <source>Frontiers in Psychology</source>, <volume>3</volume>(SEP),
                        <fpage>1</fpage>&#8211;<lpage>17</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.3389/fpsyg.2012.00320</pub-id></mixed-citation>
            </ref>
            <ref id="B79">
                <label>79</label>
                <mixed-citation publication-type="webpage"><string-name><surname>Pointon</surname>,
                            <given-names>G. E.</given-names></string-name> (<year>1980</year>).
                        <article-title>Is Spanish really syllable-timed?</article-title>
                    <source>Journal of Phonetics</source>, <volume>8</volume>(<issue>1</issue>),
                        <fpage>293</fpage>&#8211;<lpage>304</lpage>.
                        <uri>https://eric.ed.gov/?id=EJ236834</uri>. DOI: <pub-id pub-id-type="doi"
                        >10.1016/S0095-4470(19)31479-2</pub-id></mixed-citation>
            </ref>
            <ref id="B80">
                <label>80</label>
                <mixed-citation publication-type="journal"><string-name><surname>Polak</surname>,
                            <given-names>R.</given-names></string-name>,
                            <string-name><surname>Jacoby</surname>,
                        <given-names>N.</given-names></string-name>,
                            <string-name><surname>Fischinger</surname>,
                            <given-names>T.</given-names></string-name>,
                            <string-name><surname>Goldberg</surname>,
                        <given-names>D.</given-names></string-name>,
                            <string-name><surname>Holzapfel</surname>,
                        <given-names>A.</given-names></string-name>, &amp;
                            <string-name><surname>London</surname>,
                        <given-names>J.</given-names></string-name> (<year>2018</year>).
                        <article-title>Rhythmic Prototypes Across Cultures</article-title>.
                        <source>Music Perception: An Interdisciplinary Journal</source>,
                        <volume>36</volume>(<issue>1</issue>),
                        <fpage>1</fpage>&#8211;<lpage>23</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1525/mp.2018.36.1.1</pub-id></mixed-citation>
            </ref>
            <ref id="B81">
                <label>81</label>
                <mixed-citation publication-type="journal"
                            ><string-name><surname>P&#246;ppel</surname>,
                            <given-names>E.</given-names></string-name> (<year>1997</year>).
                        <article-title>A hierarchical model of temporal perception</article-title>.
                        <source>Trends in Cognitive Sciences</source>,
                        <volume>1</volume>(<issue>2</issue>),
                        <fpage>56</fpage>&#8211;<lpage>61</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1016/S1364-6613(97)01008-5</pub-id></mixed-citation>
            </ref>
            <ref id="B82">
                <label>82</label>
                <mixed-citation publication-type="confproc"><string-name><surname>Port</surname>,
                            <given-names>R.</given-names></string-name>,
                            <string-name><surname>Cummins</surname>,
                        <given-names>F.</given-names></string-name>, &amp;
                            <string-name><surname>Gasser</surname>,
                        <given-names>M.</given-names></string-name> (<year>1995</year>).
                        <article-title>A Dynamic Approach to Rhythm in Language: Toward a Temporal
                        Phonology</article-title>. In <string-name><given-names>B.</given-names>
                        <surname>Luka</surname></string-name> &amp;
                            <string-name><given-names>B.</given-names>
                        <surname>Need</surname></string-name> (Eds.), <conf-name>Proceedings of the
                        Chicago Linguistic Society</conf-name> (pp.
                        <fpage>375</fpage>&#8211;<lpage>397</lpage>). <conf-sponsor>University of
                        Chicago, Department of Linguistics</conf-sponsor>.
                        <uri>http://arxiv.org/abs/cmp-lg/9508007</uri></mixed-citation>
            </ref>
            <ref id="B83">
                <label>83</label>
                <mixed-citation publication-type="journal"><string-name><surname>Pressing</surname>,
                            <given-names>J.</given-names></string-name>, &amp;
                            <string-name><surname>Jolley-Rogers</surname>,
                            <given-names>G.</given-names></string-name> (<year>1997</year>).
                        <article-title>Spectral properties of human cognition and
                        skill</article-title>. <source>Biological Cybernetics</source>,
                        <volume>76</volume>(<issue>5</issue>),
                        <fpage>339</fpage>&#8211;<lpage>347</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1007/s004220050347</pub-id></mixed-citation>
            </ref>
            <ref id="B84">
                <label>84</label>
                <mixed-citation publication-type="journal"><string-name><surname>Prince</surname>,
                            <given-names>A. S.</given-names></string-name> (<year>1983</year>).
                        <article-title>Relating to the Grid</article-title>. <source>Linguistic
                        Inquiry</source>, <volume>14</volume>(<issue>1</issue>),
                        <fpage>19</fpage>&#8211;<lpage>100</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.2307/4178311</pub-id></mixed-citation>
            </ref>
            <ref id="B85">
                <label>85</label>
                <mixed-citation publication-type="journal"><string-name><surname>Ramus</surname>,
                            <given-names>F.</given-names></string-name>,
                            <string-name><surname>Nespor</surname>,
                        <given-names>M.</given-names></string-name>, &amp;
                            <string-name><surname>Mehler</surname>,
                        <given-names>J.</given-names></string-name> (<year>1999</year>).
                        <article-title>Correlates of linguistic rhythm in the speech
                        signal</article-title>. <source>Cognition</source>,
                        <volume>73</volume>(<issue>3</issue>),
                        <fpage>265</fpage>&#8211;<lpage>292</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1016/S0010-0277(99)00058-X</pub-id></mixed-citation>
            </ref>
            <ref id="B86">
                <label>86</label>
                <mixed-citation publication-type="journal"
                            ><string-name><surname>R&#228;s&#228;nen</surname>,
                            <given-names>O.</given-names></string-name>,
                            <string-name><surname>Doyle</surname>,
                        <given-names>G.</given-names></string-name>, &amp;
                            <string-name><surname>Frank</surname>, <given-names>M.
                        C.</given-names></string-name> (<year>2018</year>).
                        <article-title>Pre-linguistic segmentation of speech into syllable-like
                        units</article-title>. <source>Cognition</source>, <volume>171</volume>,
                        <fpage>130</fpage>&#8211;<lpage>150</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1016/j.cognition.2017.11.003</pub-id></mixed-citation>
            </ref>
            <ref id="B87">
                <label>87</label>
                <mixed-citation publication-type="confproc"><string-name><surname>Rathcke</surname>,
                            <given-names>T.</given-names></string-name>,
                            <string-name><surname>Lin</surname>,
                        <given-names>C.-Y.</given-names></string-name>,
                            <string-name><surname>Falk</surname>,
                        <given-names>S.</given-names></string-name>, &amp;
                            <string-name><surname>Dalla Bella</surname>,
                            <given-names>S.</given-names></string-name> (<year>2019</year>).
                        <article-title>When language hits the beat: Synchronising movement to simple
                        tonal and verbal stimuli</article-title>. In
                            <string-name><given-names>S.</given-names>
                        <surname>Calhoun</surname></string-name>,
                            <string-name><given-names>P.</given-names>
                        <surname>Escudero</surname></string-name>,
                            <string-name><given-names>M.</given-names>
                        <surname>Tabain</surname></string-name> &amp;
                            <string-name><given-names>P.</given-names>
                        <surname>Warren</surname></string-name> (Eds.), <conf-name>Proceedings of
                        the 19th International Congress of Phonetic Sciences, Melbourne,
                        Australia</conf-name> (pp. <fpage>1505</fpage>&#8211;<lpage>1509</lpage>).
                        <conf-sponsor>Australasian Speech Science and Technology Association
                        Inc</conf-sponsor>.
                        <uri>https://www.researchgate.net/profile/Tamara_Rathcke/publication/332060854_When_language_hits_the_beat_Synchronising_movement_to_simple_tonal_and_verbal_stimuli/links/5c9d11b292851cf0ae9da23e/When-language-hits-the-beat-Synchronising-movement-to-simple-ton</uri></mixed-citation>
            </ref>
            <ref id="B88">
                <label>88</label>
                <mixed-citation publication-type="confproc"><string-name><surname>Rathcke</surname>,
                            <given-names>T. V.</given-names></string-name>,
                            <string-name><surname>Falk</surname>,
                        <given-names>S.</given-names></string-name>, &amp;
                            <string-name><surname>Dalla Bella</surname>,
                            <given-names>S.</given-names></string-name> (<year>2018</year>).
                        <article-title>Linguistic structure and listener characteristics modulate
                        the &#8220;speech-to-song illusion.&#8221;</article-title>
                    <conf-name>15th International Conference on Music Perception and
                        Cognition</conf-name>.</mixed-citation>
            </ref>
            <ref id="B89">
                <label>89</label>
                <mixed-citation publication-type="journal"><string-name><surname>Rathcke</surname>,
                            <given-names>T. V.</given-names></string-name>, &amp;
                            <string-name><surname>Smith</surname>, <given-names>R.
                        H.</given-names></string-name> (<year>2015a</year>). <article-title>Speech
                        timing and linguistic rhythm: On the acoustic bases of rhythm
                        typologies</article-title>. <source>The Journal of the Acoustical Society of
                        America</source>, <volume>137</volume>(<issue>5</issue>),
                        <fpage>2834</fpage>&#8211;<lpage>2845</lpage>. DOI: <pub-id
                        pub-id-type="doi">10.1121/1.4919322</pub-id></mixed-citation>
            </ref>
            <ref id="B90">
                <label>90</label>
                <mixed-citation publication-type="confproc"><string-name><surname>Rathcke</surname>,
                            <given-names>T. V.</given-names></string-name>, &amp;
                            <string-name><surname>Smith</surname>, <given-names>R.
                        H.</given-names></string-name> (<year>2015b</year>). <article-title>Rhythm
                        class perception by expert phoneticians</article-title>. In <collab>T. S. C.
                        for Icp</collab>. 2015 (Ed.), <conf-name>Proceedings of the 18th
                        International Congress of Phonetic Sciences</conf-name> (pp.
                        <fpage>1</fpage>&#8211;<lpage>5</lpage>). <conf-loc>London</conf-loc>:
                        <conf-sponsor>International Phonetic Association</conf-sponsor>.
                        <uri>https://www.internationalphoneticassociation.org/icphs-proceedings/ICPhS2015/Papers/ICPHS0403.pdf</uri></mixed-citation>
            </ref>
            <ref id="B91">
                <label>91</label>
                <mixed-citation publication-type="journal"
                        ><string-name><surname>Ravignani</surname>,
                        <given-names>A.</given-names></string-name>, <string-name><surname>Dalla
                            Bella</surname>, <given-names>S.</given-names></string-name>,
                            <string-name><surname>Falk</surname>,
                        <given-names>S.</given-names></string-name>,
                            <string-name><surname>Kello</surname>, <given-names>C.
                        T.</given-names></string-name>, <string-name><surname>Noriega</surname>,
                            <given-names>F.</given-names></string-name>, &amp;
                            <string-name><surname>Kotz</surname>, <given-names>S.
                        A.</given-names></string-name> (<year>2019</year>). <article-title>Rhythm in
                        speech and animal vocalizations: a cross-species
                    perspective</article-title>. In <source>Annals of the New York Academy of
                        Sciences</source>. DOI: <pub-id pub-id-type="doi"
                        >10.1111/nyas.14166</pub-id></mixed-citation>
            </ref>
            <ref id="B92">
                <label>92</label>
                <mixed-citation publication-type="journal"><string-name><surname>Repp</surname>,
                            <given-names>B. H.</given-names></string-name> (<year>2003</year>).
                        <article-title>Rate Limits in Sensorimotor Synchronization With Auditory and
                        Visual Sequences: The Synchronization Threshold and the Benefits and Costs
                        of Interval Subdivision</article-title>. <source>Journal of Motor
                        Behavior</source>, <volume>35</volume>(<issue>4</issue>),
                        <fpage>355</fpage>&#8211;<lpage>370</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1080/00222890309603156</pub-id></mixed-citation>
            </ref>
            <ref id="B93">
                <label>93</label>
                <mixed-citation publication-type="journal"><string-name><surname>Repp</surname>,
                            <given-names>B. H.</given-names></string-name> (<year>2004</year>).
                        <article-title>On the nature of phase attraction in sensorimotor
                        synchronization with interleaved auditory sequences</article-title>.
                        <source>Human Movement Science</source>,
                        <volume>23</volume>(<issue>3&#8211;4 SPE. ISS.</issue>),
                        <fpage>389</fpage>&#8211;<lpage>413</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1016/j.humov.2004.08.014</pub-id></mixed-citation>
            </ref>
            <ref id="B94">
                <label>94</label>
                <mixed-citation publication-type="journal"><string-name><surname>Repp</surname>,
                            <given-names>B. H.</given-names></string-name> (<year>2005</year>).
                        <article-title>Sensorimotor synchronization: A review of the tapping
                        literature</article-title>. <source>Psychonomic Bulletin and
                    Review</source>, <volume>12</volume>(<issue>6</issue>),
                        <fpage>969</fpage>&#8211;<lpage>992</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.3758/BF03206433</pub-id></mixed-citation>
            </ref>
            <ref id="B95">
                <label>95</label>
                <mixed-citation publication-type="journal"><string-name><surname>Repp</surname>,
                            <given-names>B. H.</given-names></string-name>, &amp;
                            <string-name><surname>Penel</surname>,
                        <given-names>A.</given-names></string-name> (<year>2002</year>).
                        <article-title>Auditory Dominance in Temporal Processing: New Evidence from
                        Synchronization with Simultaneous Visual and Auditory
                        Sequences</article-title>. <source>Journal of Experimental Psychology: Human
                        Perception and Performance</source>, <volume>28</volume>(<issue>5</issue>),
                        <fpage>1085</fpage>&#8211;<lpage>1099</lpage>. DOI: <pub-id
                        pub-id-type="doi">10.1037/0096-1523.28.5.1085</pub-id></mixed-citation>
            </ref>
            <ref id="B96">
                <label>96</label>
                <mixed-citation publication-type="journal"><string-name><surname>Repp</surname>,
                            <given-names>B. H.</given-names></string-name>, &amp;
                            <string-name><surname>Su</surname>, <given-names>Y.-H.
                        H</given-names></string-name>. (<year>2013</year>).
                        <article-title>Sensorimotor synchronization: A review of recent research
                        (2006&#8211;2012)</article-title>. <source>Psychonomic Bulletin and
                        Review</source>, <volume>20</volume>(<issue>3</issue>),
                        <fpage>403</fpage>&#8211;<lpage>452</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.3758/s13423-012-0371-2</pub-id></mixed-citation>
            </ref>
            <ref id="B97">
                <label>97</label>
                <mixed-citation publication-type="webpage"><string-name><surname>Roach</surname>,
                            <given-names>P.</given-names></string-name> (<year>1982</year>).
                        <chapter-title>On the distinction between &#8220;stress-timed&#8221; and
                        &#8220;syllable-timed&#8221; languages</chapter-title>. In
                            <string-name><given-names>D.</given-names>
                        <surname>Crystal</surname></string-name> (Ed.), <source>Linguistics
                        Controversies</source> (pp. <fpage>73</fpage>&#8211;<lpage>79</lpage>).
                        <publisher-name>Edward Arnold</publisher-name>.
                        <uri>http://w3.salemstate.edu/%7B~%7Djaske/courses/readings/On_the_distinction_between_stress-timed_and_syllable-timed_languages_By_Peter_Roach.pdf</uri></mixed-citation>
            </ref>
            <ref id="B98">
                <label>98</label>
                <mixed-citation publication-type="journal"><string-name><surname>Rowland</surname>,
                            <given-names>J.</given-names></string-name>,
                            <string-name><surname>Kasdan</surname>,
                        <given-names>A.</given-names></string-name>, &amp;
                            <string-name><surname>Poeppel</surname>,
                        <given-names>D.</given-names></string-name> (<year>2019</year>).
                        <article-title>There is music in repetition: Looped segments of speech and
                        nonspeech induce the perception of music in a time-dependent
                        manner</article-title>. <source>Psychonomic Bulletin and Review</source>,
                        <volume>26</volume>(<issue>2</issue>),
                        <fpage>583</fpage>&#8211;<lpage>590</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.3758/s13423-018-1527-5</pub-id></mixed-citation>
            </ref>
            <ref id="B99">
                <label>99</label>
                <mixed-citation publication-type="journal"><string-name><surname>Scott</surname>,
                            <given-names>D. R.</given-names></string-name>,
                            <string-name><surname>Isard</surname>, <given-names>S.
                        D.</given-names></string-name>, &amp; <string-name><surname>de
                            Boysson-Bardies</surname>, <given-names>B.</given-names></string-name>
                        (<year>1985</year>). <article-title>Perceptual isochrony in {E}nglish and
                        {F}rench</article-title>. <source>Journal of Phonetics</source>,
                        <volume>13</volume>, <fpage>155</fpage>&#8211;<lpage>162</lpage>. DOI:
                        <pub-id pub-id-type="doi"
                    >10.1016/S0095-4470(19)30743-0</pub-id></mixed-citation>
            </ref>
            <ref id="B100">
                <label>100</label>
                <mixed-citation publication-type="journal"><string-name><surname>Seifart</surname>,
                            <given-names>F.</given-names></string-name>,
                            <string-name><surname>Meyer</surname>,
                        <given-names>J.</given-names></string-name>,
                            <string-name><surname>Grawunder</surname>,
                        <given-names>S.</given-names></string-name>, &amp;
                            <string-name><surname>Dentel</surname>,
                        <given-names>L.</given-names></string-name> (<year>2018</year>).
                        <article-title>Reducing language to rhythm: Amazonian Bora drummed language
                        exploits speech rhythm for long-distance communication</article-title>.
                        <source>Royal Society Open Science</source>,
                        <volume>5</volume>(<issue>4</issue>). DOI: <pub-id pub-id-type="doi"
                        >10.1098/rsos.170354</pub-id></mixed-citation>
            </ref>
            <ref id="B101">
                <label>101</label>
                <mixed-citation publication-type="book"><string-name><surname>Selkirk</surname>,
                            <given-names>E.</given-names></string-name> (<year>1984</year>).
                        <chapter-title>On the major class features and syllable
                        theory</chapter-title>. In <string-name><given-names>M.</given-names>
                        <surname>Aronoff</surname></string-name> &amp; <string-name><given-names>R.
                            T.</given-names>
                        <surname>Oehrle</surname></string-name> (Eds.), <source>Language sound
                        structure</source>. <publisher-name>MIT
                    Press</publisher-name>.</mixed-citation>
            </ref>
            <ref id="B102">
                <label>102</label>
                <mixed-citation publication-type="journal"><string-name><surname>Serrien</surname>,
                            <given-names>D. J.</given-names></string-name> (<year>2008</year>).
                        <article-title>The neural dynamics of timed motor tasks: Evidence from a
                        synchronization-continuation paradigm</article-title>. <source>European
                        Journal of Neuroscience</source>, <volume>27</volume>(<issue>6</issue>),
                        <fpage>1553</fpage>&#8211;<lpage>1560</lpage>. DOI: <pub-id
                        pub-id-type="doi">10.1111/j.1460-9568.2008.06110.x</pub-id></mixed-citation>
            </ref>
            <ref id="B103">
                <label>103</label>
                <mixed-citation publication-type="journal"
                            ><string-name><surname>&#352;turm</surname>,
                            <given-names>P.</given-names></string-name>, &amp;
                            <string-name><surname>Vol&#237;n</surname>,
                            <given-names>J.</given-names></string-name> (<year>2016</year>).
                        <article-title>P-centres in natural disyllabic Czech words in a large-scale
                        speech-metronome synchronization experiment</article-title>. <source>Journal
                        of Phonetics</source>, <volume>55</volume>,
                        <fpage>38</fpage>&#8211;<lpage>52</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1016/j.wocn.2015.11.003</pub-id></mixed-citation>
            </ref>
            <ref id="B104">
                <label>104</label>
                <mixed-citation publication-type="journal"><string-name><surname>Su</surname>,
                            <given-names>Y. H.</given-names></string-name>, &amp;
                            <string-name><surname>P&#246;ppel</surname>,
                            <given-names>E.</given-names></string-name> (<year>2012</year>).
                        <article-title>Body movement enhances the extraction of temporal structures
                        in auditory sequences</article-title>. <source>Psychological
                        Research</source>, <volume>76</volume>(<issue>3</issue>),
                        <fpage>373</fpage>&#8211;<lpage>382</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1007/s00426-011-0346-3</pub-id></mixed-citation>
            </ref>
            <ref id="B105">
                <label>105</label>
                <mixed-citation publication-type="confproc"><string-name><surname>Tanaka</surname>,
                            <given-names>H.</given-names></string-name>, &amp;
                            <string-name><surname>Rathcke</surname>,
                        <given-names>T.</given-names></string-name> (<year>2016</year>).
                        <article-title>Then, what is charisma? The role of audio-visual prosody in
                        L1 and L2 political speeches</article-title>. <conf-name>Proceedings of
                        Phonetik &amp; Phonologie Im Deutschsprachigen Raum, Munich: LMU</conf-name>
                    (pp. <fpage>294</fpage>&#8211;<lpage>306</lpage>).</mixed-citation>
            </ref>
            <ref id="B106">
                <label>106</label>
                <mixed-citation publication-type="webpage"><string-name><surname>Thaut</surname>,
                            <given-names>M. H.</given-names></string-name>,
                            <string-name><surname>Rathbun</surname>, <given-names>J.
                            A.</given-names></string-name>, &amp;
                            <string-name><surname>Miller</surname>, <given-names>R.
                        A.</given-names></string-name> (<year>1997</year>). <article-title>Music
                        versus metronome timekeeper in a rhythmic motor task</article-title>.
                        <source>International Journal of Arts Medicine</source>, <volume>5</volume>,
                        <fpage>4</fpage>&#8211;<lpage>12</lpage>.
                        <uri>https://psycnet.apa.org/record/1998-00056-001</uri></mixed-citation>
            </ref>
            <ref id="B107">
                <label>107</label>
                <mixed-citation publication-type="journal"><string-name><surname>Tilsen</surname>,
                            <given-names>S.</given-names></string-name>, &amp;
                            <string-name><surname>Arvaniti</surname>,
                        <given-names>A.</given-names></string-name> (<year>2013</year>).
                        <article-title>Speech rhythm analysis with decomposition of the amplitude
                        envelope: Characterizing rhythmic patterns within and across
                        languages</article-title>. <source>The Journal of the Acoustical Society of
                        America</source>, <volume>134</volume>(<issue>1</issue>),
                        <fpage>628</fpage>&#8211;<lpage>639</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1121/1.4807565</pub-id></mixed-citation>
            </ref>
            <ref id="B108">
                <label>108</label>
                <mixed-citation publication-type="journal"><string-name><surname>Truman</surname>,
                            <given-names>G.</given-names></string-name>, &amp;
                            <string-name><surname>Hammond</surname>, <given-names>G.
                            R.</given-names></string-name> (<year>1990</year>).
                        <article-title>Temporal regularity of tapping by the left and right hands in
                        timed and untimed finger tapping</article-title>. <source>Journal of Motor
                        Behavior</source>, <volume>22</volume>(<issue>4</issue>),
                        <fpage>521</fpage>&#8211;<lpage>535</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1080/00222895.1990.10735526</pub-id></mixed-citation>
            </ref>
            <ref id="B109">
                <label>109</label>
                <mixed-citation publication-type="book"><string-name><surname>Uldall</surname>,
                            <given-names>E. T.</given-names></string-name> (<year>1971</year>).
                        <chapter-title>Isochronous stresses in RP</chapter-title>. In
                            <string-name><given-names>L.</given-names>
                        <surname>Hammerich</surname></string-name>,
                            <string-name><given-names>R.</given-names>
                        <surname>Jacobson</surname></string-name> &amp;
                            <string-name><given-names>E.</given-names>
                        <surname>Zwirner</surname></string-name> (Eds.), <source>Form and
                        substance</source> (pp. <fpage>205</fpage>&#8211;<lpage>210</lpage>).
                        <publisher-name>Akademisk Forlag</publisher-name>.</mixed-citation>
            </ref>
            <ref id="B110">
                <label>110</label>
                <mixed-citation publication-type="journal"
                        ><string-name><surname>Valdesolo</surname>,
                        <given-names>P.</given-names></string-name>,
                            <string-name><surname>Ouyang</surname>,
                        <given-names>J.</given-names></string-name>, &amp;
                            <string-name><surname>DeSteno</surname>,
                        <given-names>D.</given-names></string-name> (<year>2010</year>).
                        <article-title>The rhythm of joint action: Synchrony promotes cooperative
                        ability</article-title>. <source>Journal of Experimental Social
                        Psychology</source>, <volume>46</volume>(<issue>4</issue>),
                        <fpage>693</fpage>&#8211;<lpage>695</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1016/j.jesp.2010.03.004</pub-id></mixed-citation>
            </ref>
            <ref id="B111">
                <label>111</label>
                <mixed-citation publication-type="journal"><string-name><surname>van
                            Santen</surname>, <given-names>J. P. H.</given-names></string-name>,
                    &amp; <string-name><surname>Shih</surname>,
                        <given-names>C.</given-names></string-name> (<year>2000</year>).
                        <article-title>Suprasegmental and segmental timing models in Mandarin
                        Chinese and American English</article-title>. <source>The Journal of the
                        Acoustical Society of America</source>,
                        <volume>107</volume>(<issue>2</issue>),
                        <fpage>1012</fpage>&#8211;<lpage>1026</lpage>. DOI: <pub-id
                        pub-id-type="doi">10.1121/1.428281</pub-id></mixed-citation>
            </ref>
            <ref id="B112">
                <label>112</label>
                <mixed-citation publication-type="journal"><string-name><surname>Villing</surname>,
                            <given-names>R. C.</given-names></string-name>,
                            <string-name><surname>Repp</surname>, <given-names>B.
                        H.</given-names></string-name>, <string-name><surname>Ward</surname>,
                            <given-names>T. E.</given-names></string-name>, &amp;
                            <string-name><surname>Timoney</surname>, <given-names>J.
                            M.</given-names></string-name> (<year>2011</year>).
                        <article-title>Measuring perceptual centers using the phase correction
                        response</article-title>. <source>Attention, Perception, and
                        Psychophysics</source>, <volume>73</volume>(<issue>5</issue>),
                        <fpage>1614</fpage>&#8211;<lpage>1629</lpage>. DOI: <pub-id
                        pub-id-type="doi">10.3758/s13414-011-0110-1</pub-id></mixed-citation>
            </ref>
            <ref id="B113">
                <label>113</label>
                <mixed-citation publication-type="journal"><string-name><surname>Vos</surname>,
                            <given-names>P. G.</given-names></string-name>,
                            <string-name><surname>van Kruysbergen</surname>, <given-names>N.
                            W.</given-names></string-name>, &amp;
                            <string-name><surname>Mates</surname>,
                        <given-names>J.</given-names></string-name> (<year>1995</year>).
                        <article-title>The Perceptual Centre of a Stimulus as the Cue for
                        Synchronization to a Metronome: Evidence from Asynchronies</article-title>.
                        <source>The Quarterly Journal of Experimental Psychology Section A</source>,
                        <volume>48</volume>(<issue>4</issue>),
                        <fpage>1024</fpage>&#8211;<lpage>1040</lpage>. DOI: <pub-id
                        pub-id-type="doi">10.1080/14640749508401427</pub-id></mixed-citation>
            </ref>
            <ref id="B114">
                <label>114</label>
                <mixed-citation publication-type="journal"><string-name><surname>Wagner</surname>,
                            <given-names>P.</given-names></string-name>,
                            <string-name><surname>Cwiek</surname>,
                        <given-names>A.</given-names></string-name>, &amp;
                            <string-name><surname>Samlowski</surname>,
                        <given-names>B.</given-names></string-name> (<year>2019</year>).
                        <article-title>Exploiting the speech-gesture link to capture fine-grained
                        prominence impressions and listening strategies</article-title>.
                        <source>Journal of Phonetics</source>, <volume>76</volume>,
                        <elocation-id>100911</elocation-id>. DOI: <pub-id pub-id-type="doi"
                        >10.1016/j.wocn.2019.07.001</pub-id></mixed-citation>
            </ref>
            <ref id="B115">
                <label>115</label>
                <mixed-citation publication-type="journal"><string-name><surname>Wagner</surname>,
                            <given-names>P.</given-names></string-name>,
                            <string-name><surname>Malisz</surname>,
                        <given-names>Z.</given-names></string-name>, &amp;
                            <string-name><surname>Kopp</surname>,
                        <given-names>S.</given-names></string-name> (<year>2014</year>).
                        <article-title>Gesture and speech in interaction: An
                        overview</article-title>. <source>Speech Communication</source>,
                        <volume>57</volume>, <fpage>209</fpage>&#8211;<lpage>232</lpage>. DOI:
                        <pub-id pub-id-type="doi"
                    >10.1016/j.specom.2013.09.008</pub-id></mixed-citation>
            </ref>
            <ref id="B116">
                <label>116</label>
                <mixed-citation publication-type="confproc"><string-name><surname>Wang</surname>,
                            <given-names>D.</given-names></string-name>, &amp;
                            <string-name><surname>Narayanan</surname>, <given-names>S.
                            S.</given-names></string-name> (<year>2007</year>).
                        <article-title>Robust speech rate estimation for spontaneous
                        speech</article-title>. <conf-name>IEEE Transactions on Audio, Speech and
                        Language Processing</conf-name>, <volume>15</volume>(<issue>8</issue>),
                        <fpage>2190</fpage>&#8211;<lpage>2201</lpage>. DOI: <pub-id
                        pub-id-type="doi">10.1109/TASL.2007.905178</pub-id></mixed-citation>
            </ref>
            <ref id="B117">
                <label>117</label>
                <mixed-citation publication-type="journal"><string-name><surname>White</surname>,
                            <given-names>L.</given-names></string-name>, &amp;
                            <string-name><surname>Mattys</surname>, <given-names>S.
                        L.</given-names></string-name> (<year>2007</year>).
                        <article-title>Calibrating rhythm: First language and second language
                        studies</article-title>. <source>Journal of Phonetics</source>,
                        <volume>35</volume>(<issue>4</issue>),
                        <fpage>501</fpage>&#8211;<lpage>522</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1016/j.wocn.2007.02.003</pub-id></mixed-citation>
            </ref>
            <ref id="B118">
                <label>118</label>
                <mixed-citation publication-type="journal"><string-name><surname>White</surname>,
                            <given-names>L.</given-names></string-name>,
                            <string-name><surname>Mattys</surname>, <given-names>S.
                        L.</given-names></string-name>, &amp; <string-name><surname>Wiget</surname>,
                            <given-names>L.</given-names></string-name> (<year>2012</year>).
                        <article-title>Language categorization by adults is based on sensitivity to
                        durational cues, not rhythm class</article-title>. <source>Journal of Memory
                        and Language</source>, <volume>66</volume>(<issue>4</issue>),
                        <fpage>665</fpage>&#8211;<lpage>679</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1016/j.jml.2011.12.010</pub-id></mixed-citation>
            </ref>
            <ref id="B119">
                <label>119</label>
                <mixed-citation publication-type="webpage"><string-name><surname>White</surname>,
                            <given-names>L.</given-names></string-name>,
                            <string-name><surname>Payne</surname>,
                        <given-names>E.</given-names></string-name>, &amp;
                            <string-name><surname>Mattys</surname>, <given-names>S.
                        L.</given-names></string-name> (<year>2009</year>). <chapter-title>Rhythmic
                        and prosodic contrast in Venetan and Sicilian Italian</chapter-title>. In
                            <string-name><given-names>M.</given-names>
                        <surname>Vigario</surname></string-name>,
                            <string-name><given-names>S.</given-names>
                        <surname>Frota</surname></string-name> &amp; <string-name><given-names>M.
                            J.</given-names>
                        <surname>Freitas</surname></string-name> (Eds.), <source>Phonetics and
                        Phonology: Interactions and Interrelations</source> (pp.
                        <fpage>137</fpage>&#8211;<lpage>158</lpage>). <publisher-name>John
                        Benjamins</publisher-name>.
                        <uri>https://www.researchgate.net/profile/Laurence_White3/publication/256944638_Rhythmic_and_prosodic_contrast_in_Venetan_and_Sicilian_Italian/links/57b19c0208ae15c76cbb163d/Rhythmic-and-prosodic-contrast-in-Venetan-and-Sicilian-Italian.pdf</uri>.
                    DOI: <pub-id pub-id-type="doi">10.1075/cilt.306.07whi</pub-id></mixed-citation>
            </ref>
            <ref id="B120">
                <label>120</label>
                <mixed-citation publication-type="book"><string-name><surname>Wickham</surname>,
                            <given-names>H.</given-names></string-name> (<year>2016</year>).
                        <source>ggplot2: Elegant Graphics for Data Analysis</source>.
                        <publisher-loc>New York</publisher-loc>:
                        <publisher-name>Springer-Verlag</publisher-name>. DOI: <pub-id
                        pub-id-type="doi">10.1007/978-3-319-24277-4_9</pub-id></mixed-citation>
            </ref>
            <ref id="B121">
                <label>121</label>
                <mixed-citation publication-type="journal"><string-name><surname>Wiget</surname>,
                            <given-names>L.</given-names></string-name>,
                            <string-name><surname>White</surname>,
                        <given-names>L.</given-names></string-name>,
                            <string-name><surname>Schuppler</surname>,
                        <given-names>B.</given-names></string-name>,
                            <string-name><surname>Grenon</surname>,
                        <given-names>I.</given-names></string-name>,
                            <string-name><surname>Rauch</surname>,
                        <given-names>O.</given-names></string-name>, &amp;
                            <string-name><surname>Mattys</surname>, <given-names>S.
                        L.</given-names></string-name> (<year>2010</year>). <article-title>How
                        stable are acoustic metrics of contrastive speech rhythm?</article-title>
                    <source>The Journal of the Acoustical Society of America</source>,
                        <volume>127</volume>(<issue>3</issue>),
                        <fpage>1559</fpage>&#8211;<lpage>1569</lpage>. DOI: <pub-id
                        pub-id-type="doi">10.1121/1.3293004</pub-id></mixed-citation>
            </ref>
            <ref id="B122">
                <label>122</label>
                <mixed-citation publication-type="journal"><string-name><surname>Wing</surname>,
                            <given-names>A. M.</given-names></string-name> (<year>2002</year>).
                        <article-title>Voluntary timing and brain function: An information
                        processing approach</article-title>. <source>Brain and Cognition</source>,
                        <volume>48</volume>(<issue>1</issue>),
                        <fpage>7</fpage>&#8211;<lpage>30</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1006/brcg.2001.1301</pub-id></mixed-citation>
            </ref>
            <ref id="B123">
                <label>123</label>
                <mixed-citation publication-type="book"
                            ><string-name><surname>Wohlschl&#228;ger</surname>,
                            <given-names>A.</given-names></string-name>, &amp;
                            <string-name><surname>Koch</surname>,
                        <given-names>R.</given-names></string-name> (<year>2000</year>).
                        <chapter-title>Synchronization error: An error in time
                        perception</chapter-title>. In <string-name><given-names>P.</given-names>
                        <surname>Desain</surname></string-name> &amp;
                            <string-name><given-names>L.</given-names>
                        <surname>Windsor</surname></string-name> (Eds.), <source>Rhythm Perception
                        and Production</source> (pp. <fpage>115</fpage>&#8211;<lpage>127</lpage>).
                        <publisher-name>Swets &amp; Zeitlinger</publisher-name>.</mixed-citation>
            </ref>
            <ref id="B124">
                <label>124</label>
                <mixed-citation publication-type="journal"><string-name><surname>Zatorre</surname>,
                            <given-names>R. J.</given-names></string-name>,
                            <string-name><surname>Chen</surname>, <given-names>J.
                        L.</given-names></string-name>, &amp;
                            <string-name><surname>Penhune</surname>, <given-names>V.
                            B.</given-names></string-name> (<year>2007</year>). <article-title>When
                        the brain plays music: Auditory-motor interactions in music perception and
                        production</article-title>. <source>Nature Reviews Neuroscience</source>,
                        <volume>8</volume>(<issue>7</issue>),
                        <fpage>547</fpage>&#8211;<lpage>558</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1038/nrn2152</pub-id></mixed-citation>
            </ref>
        </ref-list>
    </back>
</article>
