<?xml version="1.0" encoding="UTF-8"?>
<grammar 
	xmlns="http://relaxng.org/ns/structure/1.0"
	xmlns:a="http://relaxng.org/ns/compatibility/annotations/1.0"
	xmlns:w="http://www.wulfila.be/namespaces/legacy"
	datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
	<a:documentation>Schema for the tokens data file.</a:documentation>
	<!-- 
	Revision history:
	[2026-05-21] First draft.
	[2026-05-22] First full version.
	[2026-05-24] Finished documentation.
	[2026-05-28] Replaced look-up tables and numerical keys with schema-based tokens.
	[2026-06-14] Modified subtoken analysis encoding, to accommodate tokens like þat~ain or þat~ist.  
	
	Tom De Herdt
	https://www.wulfila.be/
	-->
	
	<include href="common.rng">
		<start>
			<ref name="root"/>
		</start>
	</include>
	
	<define name="root">
		<element name="tokens">
			<oneOrMore>
				<ref name="book"/>
			</oneOrMore>
		</element>
	</define>
	
	<define name="book">
		<element name="book">
			<a:documentation>Gospel or Pauline epistle for biblical sources, or one of the minor fragments.</a:documentation>
			<attribute name="id">
				<a:documentation>Numerical identifier.</a:documentation>
				<data type="unsignedByte"/>
			</attribute>
			<attribute name="name">
				<a:documentation>Name of the book or fragment.</a:documentation>
			</attribute>
			<attribute name="abbreviation">
				<a:documentation>Abbreviation used in the digital edition.</a:documentation>
			</attribute>
			<optional>
				<attribute name="streitberg">
					<a:documentation>Abbreviation used by Streitberg.</a:documentation>
				</attribute>
			</optional>
			<oneOrMore>
				<ref name="segment"/>
			</oneOrMore>
		</element>
	</define>
	
	<define name="segment">
		<element name="segment">
			<a:documentation>Bible verse or similar textual segment for the minor fragments.</a:documentation>
			<attribute name="id">
				<a:documentation>Segment identifier, corresponding to an identifier in the TEI edition.</a:documentation>
				<data type="ID">
					<param name="pattern">S[0-9]+</param>
				</data>
			</attribute>
			<attribute name="unit">
				<a:documentation>Chapter number for biblical segments, or main part number for other fragments, as determined by @type.</a:documentation>
				<data type="unsignedByte"/>
			</attribute>
			<attribute name="n">
				<a:documentation>Verse number for biblical segments, or ad hoc number for the minor fragments, as determined by @type.</a:documentation>
				<data type="unsignedByte"/>
			</attribute>
			<attribute name="wit">
				<a:documentation>Witness siglum used in the TEI edition.</a:documentation>
				<ref name="list.manuscripts"/>
			</attribute>
			<attribute name="type">
				<a:documentation>Segment type, determines a reference system and interpretation of @unit and @n. Biblical segments have chapter and verse; units for the minor fragments are ad hoc and somewhat artificial.</a:documentation>
				<ref name="list.segment.type"/>
			</attribute>
			<optional>
				<attribute name="var">
					<a:documentation>Information on variation between parallel readings in different manuscripts for this segment.</a:documentation>
					<ref name="list.segment.variation"/>
				</attribute>
			</optional>
			<zeroOrMore>
				<ref name="token"/>
			</zeroOrMore>
		</element>
	</define>
	
	<define name="token">
		<element name="token">
			<a:documentation>Describes a single token (word) in a reading of a text segment.</a:documentation>
			<attribute name="id">
				<a:documentation>Token identifier, corresponding to an identifier in the TEI edition.</a:documentation>
				<data type="ID">
					<param name="pattern">T[0-9]+</param>
				</data>
			</attribute>
			<optional>
				<attribute name="irregular">
					<a:documentation>Token has non-standard spelling or a scribal error. (NOTE: incomplete, started 2011-11-14 with John XVIII.)</a:documentation>
					<ref name="boolean.attribute.true"/>
				</attribute>
			</optional>
			<optional>
				<attribute name="capital">
					<a:documentation>Indicates that the token starts with a capital Streitberg's 1919 edition, excluding proper nouns.</a:documentation>
					<ref name="boolean.attribute.true"/>
				</attribute>
			</optional>
			<optional>
				<attribute name="enclisis">
					<a:documentation>Indicates that the token contains clitics, e.g. ‘ga-u-laubjats’.</a:documentation>
					<ref name="boolean.attribute.true"/>
				</attribute>
			</optional>
			<optional>
				<attribute name="var">
					<a:documentation>Indicates that there is a parallel reading for the token in another manuscript, and signals any difference(s).</a:documentation>
					<ref name="list.token.variation"/>
				</attribute>
			</optional>
			<element name="form">
				<a:documentation>Token type (word form), normalized according to Streitberg's emendations.</a:documentation>
				<text/>
			</element>
			<optional>
				<element name="manuscript">
					<a:documentation>Manuscript reading for tokens emended by Streitberg, as mentioned in the critical apparatus. (NOTE: incomplete, started 2011-11-14 with John XVIII.)</a:documentation>
					<text/>
				</element>
			</optional>
			<optional>
				<element name="emendation">
					<a:documentation>Emendations that Streitberg marked typographically, with brackets or italics (as opposed to mentioned in the apparatus).</a:documentation>
					<optional>
						<attribute name="addition">
							<a:documentation>Token is (part of) an addition or contains additions by Streitberg (angular brackets).</a:documentation>
							<ref name="list.emendation.range"/>
						</attribute>
					</optional>
					<optional>
						<attribute name="deletion">
							<a:documentation>Token has been deleted or contains deletions by Streitberg (square brackets).</a:documentation>
							<ref name="list.emendation.range"/>
						</attribute>
					</optional>
					<optional>
						<attribute name="unclear">
							<a:documentation>Some characters could not be read with certainty (italic text in the edition, here in parentheses).</a:documentation>
							<ref name="list.emendation.range"/>
						</attribute>
					</optional>
					<text/>
				</element>
			</optional>
			<optional>
				<element name="subtokens">
					<a:documentation>Analysis of compound tokens (clitics, assimilation).</a:documentation>
					<attribute name="template">
						<a:documentation>Structure of the compound token, as recorded in the legacy MS Access database with plain text microsyntax.</a:documentation>
					</attribute>
					<oneOrMore>
						<element name="st">
							<a:documentation>Subtoken, a token within the main token.</a:documentation>
							<attribute name="n">
								<a:documentation>Subtoken number.</a:documentation>
								<data type="positiveInteger"/>
							</attribute>
							<optional>
								<attribute name="part">
									<a:documentation>Subtoken part number, if a clitic splits the token in two halves, as in ‘ga-u-laubjats” = {1.1:ga}-{2:u}-{1.2:labjats} (galaubjan + particle -u).</a:documentation>
									<data type="positiveInteger"/>
								</attribute>
							</optional>
							<text/>
						</element>
					</oneOrMore>
				</element>
			</optional>
			<optional>
				<element name="analysis">
					<a:documentation>Morphosyntactic annotations generated by Gomorphv2. They are gradually being disambiguated by deleting incorrect analyses.</a:documentation>
					<optional>
						<attribute name="verified">
							<a:documentation>Verification date. If present, the analysis has been verified and disambiguated (if possible – some tokens are inherently ambiguous).</a:documentation>
							<data type="dateTime"/>
						</attribute>
					</optional>
					<choice>
						<oneOrMore>
							<ref name="lemma"/>
						</oneOrMore>
					</choice>
				</element>
			</optional>
			<optional>
				<element name="note">
					<a:documentation>Public note. Contains mixed content, marked up with a subset of HTML and some custom elements.</a:documentation>
					<attribute>
						<name ns="http://www.w3.org/XML/1998/namespace">space</name>
						<value>preserve</value>
					</attribute>
					<ref name="mixed"/>
				</element>
			</optional>
			<optional>
				<element name="meta">
					<a:documentation>Internal note about the token or its encoding, in Dutch.</a:documentation>
					<text/>
				</element>
			</optional>
		</element>
	</define>
	
	<define name="lemma">
		<element name="lemma">
			<optional>
				<attribute name="subtoken">
					<a:documentation>Subtoken index number for lemmas linked to a subtoken.</a:documentation>
					<data type="positiveInteger"/>
				</attribute>
			</optional>
			<attribute name="ref">
				<a:documentation>Foreign key: identifier of a lemma defined in the lemmas data file.</a:documentation>
				<data type="positiveInteger"/>
			</attribute>
			<oneOrMore>
				<element name="tag">
					<a:documentation>Morphosyntactic tag.</a:documentation>
					<attribute name="ref">
						<a:documentation>Foreign key: identifier of an inflectional tag defined in the grammar data file.</a:documentation>
						<data type="positiveInteger"/>
					</attribute>
					<optional>
						<attribute name="derivation">
							<a:documentation>The token is part of a derived or secondary inflectional paradigm, e.g. the present participle (adjective declension) of a verb.</a:documentation>
							<choice>
								<value>present-participle</value>
								<value>past-participle</value>
								<value>comparative</value>
								<value>superlative</value>
							</choice>
						</attribute>
					</optional>
				</element>
			</oneOrMore>
		</element>
	</define>
	
	<define name="list.emendation.range">
		<choice>
			<value>partial</value>
				<a:documentation>Emendation or indication of uncertainty in part(s) of the word.</a:documentation>
			<value>full</value>
				<a:documentation>Emendation or indication of uncertainty applies to the entire word.</a:documentation>
		</choice>
	</define>
	
	<define name="list.manuscripts">
		<a:documentation>List of witnesses, as used in the TEI edition.</a:documentation>
		<choice>
			<value>CA</value>
			<a:documentation w:id="1">Codex Argenteus</a:documentation>
			
			<value>A</value>
			<a:documentation w:id="2">Codex Ambrosianus A</a:documentation>
			
			<value>B</value>
			<a:documentation w:id="3">Codex Ambrosianus B</a:documentation>
			
			<value>C</value>
			<a:documentation w:id="4">Codex Ambrosianus C</a:documentation>
			
			<value>D</value>
			<a:documentation w:id="5">Codex Ambrosianus D</a:documentation>
			
			<value>E</value>
			<a:documentation w:id="6">Codex Ambrosianus E</a:documentation>
			
			<value>Car</value>
			<a:documentation w:id="7">Codex Carolinus</a:documentation>
			
			<value>Giss</value>
			<a:documentation w:id="8">Codex Gissensis</a:documentation>
			
			<value>Taur</value>
			<a:documentation w:id="9">*Codex Taurinensis [part of A]</a:documentation>
			
			<value>Lat5750</value>
			<a:documentation w:id="10">Codex Vaticanus Latinus 5750</a:documentation>
			
			<value>Naples</value>
			<a:documentation w:id="11">Naples Deed</a:documentation>
			
			<value>Arezzo</value>
			<a:documentation w:id="12">Arezzo Deed</a:documentation>
			
			<value>Speyer</value>
			<a:documentation w:id="13">*Speyer fragment [part of CA]</a:documentation>
		</choice>
	</define>
	
	<define name="list.segment.type">
		<a:documentation>Segment reference systems.</a:documentation>
		<choice>
			<value>biblical</value>
			<a:documentation>Biblical canonical reference system. Implies @unit = chapter, @n = verse.</a:documentation>
			
			<value>incipit</value>
			<a:documentation>Bible chapter opener. Implies @unit = chapter, @n = 0.</a:documentation>
			
			<value>explicit</value>
			<a:documentation>Bible chapter closer. Implies @unit = chapter, @n = 255.</a:documentation>
			
			<value>skeireins</value>
			<a:documentation>Skeireins. Implies @unit = leaf, @n = sentence.</a:documentation>
			
			<value>signature</value>
			<a:documentation>Gothic signatures. Implies @unit = deed, @n = signature number.</a:documentation>
			
			<value>calendar</value>
			<a:documentation>Gothic calendar. Implies @unit = month, @n = day (numerically).</a:documentation>
		</choice>
	</define>
	
	<define name="list.segment.variation">
		<a:documentation>Classification of differences between parallel readings, on the verse level.</a:documentation>
		<choice>
			<value>same</value>
			<a:documentation w:id="1">Identical. The selected verse has an identical parallel version in another manuscript.</a:documentation>
			
			<value>diff-reading</value>
			<a:documentation w:id="2">Different reading. The selected verse has a parallel version in another manuscript with a different reading, but the same interpretation by Streitberg.</a:documentation>
			
			<value>diff-language</value>
			<a:documentation w:id="3">Different language. The selected verse has a parallel version in another manuscript with different language (orthography, morphology, lexicon, syntax or style).</a:documentation>
			
			<value>same-incomplete</value>
			<a:documentation w:id="4">Identical (incomplete). Cf. "same", but one (or both) of the verses is (are) partially missing.</a:documentation>
			
			<value>diff-reading-incomplete</value>
			<a:documentation w:id="5">Different reading (incomplete). Cf. "different-reading", but one (or both) of the verses is (are) partially missing.</a:documentation>
			
			<value>diff-language-incomplete</value>
			<a:documentation w:id="6">Different language (incomplete). Cf. "different-language", but one (or both) of the verses is (are) partially missing.</a:documentation>
		</choice>
	</define>
	
	<define name="list.token.variation">
		<a:documentation>Classification of differences between parallel readings, on the token level.</a:documentation>
		<choice>
			<value>same</value>
			<a:documentation w:id="1">Identical. The token has an identical parallel version in another manuscript.</a:documentation>
			
			<value>diff-word</value>
			<a:documentation w:id="2">Different word. The token is part of a lexical difference between parallel versions, e.g. ‘runa’ and ‘garuni’.</a:documentation>
			
			<value>diff-spelling</value>
			<a:documentation w:id="3">Different spelling. The token has an orthographic variant in a parallel version, e.g. ‘sium’ and ‘sijum’.</a:documentation>
			
			<value>diff-grammar</value>
			<a:documentation w:id="4">Different grammar. The token is part of a grammatical difference between parallel versions, e.g. ‘dauþaus’ and ‘us dauþau’.</a:documentation>
			
			<value>diff-missing</value>
			<a:documentation w:id="5">Missing (text). The token is missing in a parallel version, i.e. it has been omitted there (or not been added) by the scribe or translator.</a:documentation>
			
			<value>missing</value>
			<a:documentation w:id="6">Missing (manuscript). The token is missing in an incomplete parallel version of the segment, i.e. its counterpart is not extant or unreadable.</a:documentation>
			
			<value>diff-order</value>
			<a:documentation w:id="8">Different word order. The token is part of a word-order difference between parallel versions, e.g. ‘Xristaus Iesuis’ and ‘Iesuis Xristaus’.</a:documentation>
			
			<value>diff-assimilation</value>
			<a:documentation w:id="9">Different assimilation. The token is part of a difference in word assimilation between parallel versions, e.g. ‘jah liban’ and ‘jal~liban’.</a:documentation>
			
			<value>diff-reading</value>
			<a:documentation w:id="10">Different reading. The token has a parallel version with different reading, emended by Streitberg, e.g. ‘mahte[de]deina’ and ‘mahtedeina’.</a:documentation>
			
			<value>diff-number</value>
			<a:documentation w:id="11">Different numerical notation. The token has a parallel version with different numerical notation, e.g. ‘fidwortaihun’ and ‘·id·’.</a:documentation>
			
			<value>problematic</value>
			<a:documentation w:id="12">Problematic. Unclassified token variation in parallel text (e.g. a combination of variation types mentioned above).</a:documentation>
		</choice>
	</define>
	
</grammar>