new suggestions for xpaths in collection.yaml
collection:
title:
comments:
- Title of the collection
short:
comments:
- Short title of the collection (required)
long:
comments:
- Long title of the collection (required)
rights_holder:
comments:
- 'Collection rights holder (required)'
- ' examples:'
- '- fullname: Lanegan, Mark'
- ' url: https://en.wikipedia.org/wiki/Mark_Lanegan'
- '- fullname: Gossard, Stone'
- ' url: https://en.wikipedia.org/wiki/Stone_Gossard'
required:
attr_name: rights_holder # Name of attribute in TEIParser
iso_date: false # No date validation needed
or_attrib: null # No alternative attributes available
and_attrib: null # No additional attributes required
collector:
comments:
- 'Collection collector (required)'
- ' examples:'
- '- fullname: Vedder, Eddie'
- ' url: https://en.wikipedia.org/wiki/Eddie_Vedder'
- '- fullname: Cornell, Chris'
- ' url: https://en.wikipedia.org/wiki/Chris_Cornell'
required:
attr_name: collector
iso_date: false
or_attrib: null
and_attrib: null
attributes:
comments:
- define an xpath for each attribute within TEI file
work:
title:
comments:
- Main title (required)
xpath:
proposals:
- //fileDesc//titleStmt//title
- //fileDesc//titleStmt//name
author:
id:
xpath:
proposals:
- //sourceDesc//author//idno[@type='gnd']/@type
- //fileDesc//titleStmt//author/@ref
fullname:
comments:
- "The author's name can be given as either first name and surname (preferred option) or full name. Every work must be attributed to a person. If no author is available (for example, for anonymous works), the name of the person responsible for the collection will be used as the collector."
xpath:
proposals:
- //fileDesc/titleStmt//author//name[@type="full"]
- //fileDesc/titleStmt//author
- //sourceDesc/bibl[@type="work"]/author/name
required:
attr_name: author_fullname
iso_date: false
or_attrib: null
and_attrib: null
firstname:
xpath:
proposals:
- //sourceDesc//author//forename
- //sourceDesc//author//firstname
- //fileDesc/titleStmt//author//persName//forename
lastname:
xpath:
proposals:
- //sourceDesc//author//surname
- //sourceDesc//author//lastname
- //fileDesc/titleStmt//author//persName//lastname
- //fileDesc/titleStmt//author//persName//surname
genre:
comments:
- "TextGrid-Repository genre (required). Possible values: drama, prose, verse, reference work, non-fiction, non-text, other"
xpath:
proposals:
- //profileDesc//term[@type='text.form']
- //profileDesc//term[@type='text.genre']
- //keywords[@scheme="http://textgrid.info/namespaces/metadata/core/2010#genre"]/term
required:
attr_name: genre
iso_date: false
or_attrib: null
and_attrib: null
dateOfCreation:
date:
comments:
- "It is required that every work is associated to some date of the creation. You can associate the works with one specific date (field date) or define a range of years for each work (through the fields notBefore and notAfter). One of the two options is required. Many projects don't have this information available for different reasons. One possibility would be to define a very large range of years between the work needed to be created, for example 1800 and 1900 or even -5000 and 2025."
xpath:
proposals:
- //listBibl//bibl[@type="print-source"]//date
- //sourceDesc//biblFull//publicationStmt//date
- //sourceDesc//bibl[@type="printSource"]//date
required:
attr_name: work_date
iso_date: true # Must be ISO date format
or_attrib: # OR: These attributes are alternatives
- work_notBefore
- work_notAfter
and_attrib: null
notBefore:
xpath:
proposals:
required:
attr_name: work_notBefore
iso_date: true
or_attrib:
- work_date
and_attrib: # AND: Needs end date too
- work_notAfter
notAfter:
xpath:
proposals:
required:
attr_name: work_notAfter
iso_date: true
or_attrib:
- work_date
and_attrib: # AND: Needs start date too
- work_notBefore
place:
xpath:
proposals:
- //listBibl//bibl[@type="print-source"]//pubPlace
- //sourceDesc//biblFull//publicationStmt//pubPlace
- //sourceDesc//bibl[@type="printSource"]//pubPlace
id:
xpath:
proposals:
- //sourceDesc//title//idno[@type]/@corresp
- //sourceDesc/bibl[@type="work"]/idno/@corresp
edition:
title:
comments:
- Main title of the edition (required)
xpath:
proposals:
- //sourceDesc//bibl[@type="firstEdition"]//title
- //fileDesc//titleStmt//title
- //fileDesc//titleStmt
author:
id:
xpath:
proposals:
- //sourceDesc//bibl[@type="firstEdition"]//author/@ref
- //sourceDesc//author//idno[@type='gnd']/@type
- //fileDesc//titleStmt//author/@ref
fullname:
comments:
- "The author's name can be given as either first name and surname (preferred option) or full name. Every edition must be attributed to a person. If no author is available (for example, for anonymous editions), the name of the person responsible for the collection will be used as the collector."
xpath:
proposals:
- //sourceDesc//bibl[@type="firstEdition"]//author
- //fileDesc/titleStmt//author//name[@type="full"]
- //fileDesc/titleStmt//author
firstname:
xpath:
proposals:
- //sourceDesc//bibl[@type="firstEdition"]//forename
- //sourceDesc//bibl[@type="firstEdition"]//firstname
- //sourceDesc//author//forename
- //sourceDesc//author//firstname
- //fileDesc/titleStmt//author//persName//forename
lastname:
xpath:
proposals:
- //sourceDesc//bibl[@type="firstEdition"]//surname
- //sourceDesc//bibl[@type="firstEdition"]//lastname
- //sourceDesc//author//surname
- //sourceDesc//author//lastname
license:
title:
comments: "License title, for example 'Creative Commons Attribution 4.0 International (CC-BY)' (required)"
xpath:
proposals:
- //licence
required:
attr_name: edition_license_title
iso_date: false
or_attrib: null
and_attrib: null
url:
comments: "License URL, for example 'https://creativecommons.org/licenses/by/4.0/' (required)"
xpath:
proposals:
- //licence/@target
required:
attr_name: edition_license_url
iso_date: false
or_attrib: null
and_attrib: null
date:
xpath:
proposals:
- //sourceDesc//bibl[@type="firstEdition"]//date
- //publicationStmt/date
place:
xpath:
proposals:
- //sourceDesc//bibl[@type="firstEdition"]//pubPlace
language:
comments: "Using the ISO norm 639-3, such as deu, eng, spa, fra, etc. More information: https://iso639-3.sil.org/code_tables/639/data"
xpath:
proposals:
- //profileDesc//term[@type='text.language']
- //profileDesc//term[@type='text.lang']
- //profileDesc/languageUsage/language
- //profileDesc/langUsage/language
- //profileDesc/langUsage/language/@ident
wordcount:
xpath:
proposals:
- //fileDesc/extent/measure[@unit='ws']
- //fileDesc/extent/measure[@unit='words']
- //fileDesc/extent/measure[@unit='tokens']
basic_classifications:
comments:
- 'basic_classifications can be defined as a list of dictionaries with the following'
- 'examples:'
- '- id:'
- ' value:'
- ' xpath: //find_me[@via="this"]/xpath'
- ' url:'
- ' value: https://d-nb.info/gnd/'
- ' xpath:'
- ' value:'
- ' value: I am a fixed value'
- ' xpath:'
id:
proposals:
- //keywords[@scheme="http://uri.gbv.de/terminology/bk/"]/term/@key
url:
proposals:
- //keywords[@scheme="http://uri.gbv.de/terminology/bk/"]/@scheme
value:
proposals:
- //keywords[@scheme="http://uri.gbv.de/terminology/bk/"]/term
gnd_subjects:
comments:
- 'gnd_subjects can be defined as a list of dictionaries with the following'
- 'examples:'
- '- id:'
- ' value:'
- ' xpath: //keywords[@scheme="https://d-nb.info/gnd/"]/term/@ref'
- ' url:'
- ' value: https://d-nb.info/gnd/'
- ' xpath:'
- ' value:'
- ' value:'
- ' xpath: //keywords[@scheme="https://d-nb.info/gnd/"]/term'
id:
proposals:
- //keywords[@scheme="https://d-nb.info/gnd/"]/term/@key
url:
proposals:
- //keywords[@scheme="https://d-nb.info/gnd/"]/@scheme
value:
proposals:
- //keywords[@scheme="https://d-nb.info/gnd/"]/term
eltec_specs:
comments:
- eltec_specifications
time_slot:
xpath:
proposals:
# - //profileDesc//@{http://distantreading.net/eltec/ns}/@timeSlot
author_gender:
xpath:
proposals:
- //profileDesc//term[@type='author.gender']
- //profileDesc//term[@type='author.sex']
- //fileDesc//notesStmt/note[@type='author-gender']
# - //profileDesc//@{http://distantreading.net/eltec/ns}/@authorGender
size:
xpath:
proposals:
# - //profileDesc//@{http://distantreading.net/eltec/ns}/@size
reprintCount:
xpath:
proposals:
# - //profileDesc//@{http://distantreading.net/eltec/ns}/@reprintCount
corpus_collection:
xpath:
proposals:
- //keywords//term[@type="collection"]
elements: []
Edited by Jose Calvo Tello