lido-v1.1-public-beta-schematron-rules.xsd 8.37 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:lido="http://www.lido-schema.org"
    xmlns:lido-qa="http://www.lido-schema.org/quality-assurance"
    xmlns:sch="http://purl.oclc.org/dsdl/schematron"
    xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:tei="http://www.tei-c.org/ns/1.0"
    xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xml="http://www.w3.org/XML/1998/namespace"
    targetNamespace="http://www.lido-schema.org/quality-assurance" elementFormDefault="qualified"
    attributeFormDefault="qualified">
    <!--
        xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
        xx This is an additional schema for LIDO v1.1. It is to be understood as complementary tool for
        xx assuring the quality of LIDO records. This means it is not obligatory to use this schema when
        xx validating your files; some rules, however, will warn you about changes in the upcoming LIDO v2.0
        xx which are not backwards compatible.
        xx
        xx This document implements the Schematron error roles as follows::
        xx * "info":    highlights elements which will be deprecated in the next LIDO version
        xx * "warn":    points out data values or elements that are correct according to the LIDO schema but
        xx              could/should be improved
        xx
        xx Prepared for CIDOC Working Group Data Harvesting and Interchange, CDWA Lite/museumdat Working Group,
        xx Collections Trust and Deutscher Museumsbund - Fachgruppe Dokumentation by:
        xx Michelle Weidling – Niedersaechsische Staats- und Universitaetsbiblithek Goettingen
        xx
        xx Copyright (c) 2020 ICOM-CIDOC for the Data Harvesting and Interchange Working Group.
        xx These are licensed under the Creative Commens Attribution 4.0 International (CC BY 4.0) license.
        xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
    -->
mmarkus1's avatar
mmarkus1 committed
29

30
31
32
33
34
    <xs:annotation>
        <xs:appinfo>
            <sch:ns uri="http://www.lido-schema.org" prefix="lido"/>
            <sch:ns uri="http://www.w3.org/2002/07/owl#" prefix="owl"/>
            <sch:ns uri="http://www.w3.org/2004/02/skos/core#" prefix="skos"/>
mmarkus1's avatar
mmarkus1 committed
35

36
            <sch:title>Abstract Schematron rules for quality assurance</sch:title>
mmarkus1's avatar
mmarkus1 committed
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
						<!-- Additional rules for quality assurance. They use XSLT 2.0 features and are provided for additional
	                quality and compliance checks. These can be extended/modified to create application profiles. -->

						<sch:pattern>
							<sch:title>Deprecation Warning: Controlled vocabulary instead of free text</sch:title>
							<sch:p>In upcoming versions of LIDO some element won't allow for free text anymore but will require terms
								taken from a (local) controlled vocabulary. This should improve the interoperability of the data and recall rates
								in aggregating web services.</sch:p>
							<sch:rule abstract="true" id="sch_MixedContentInfo">
								<sch:report
									test="text()[matches(., '[\w]')]" role="info">
									Deprecation: In upcoming versions of LIDO <sch:name/> will only allow for skos:Concept, lido:conceptID and lido:term as child elements.
									The use of free text will be deprecated.
								</sch:report>
							</sch:rule>
						</sch:pattern>

						<sch:pattern>
							<sch:title>[Data Quality] @pref: Discern preferred and alternative elements</sch:title>
							<sch:p>If there is more than one element holding a @pref, alternatives as well as the preferred element should be indicated.
								This isn't stated clearly in the LIDO v1.0 schema documentation but should be kept in mind when indexing objects; otherwise the preferred
								variant might be unclear to a data user. Also, omitting this attribute contradicts international best practices for retrieval quality.</sch:p>
							<sch:rule abstract="true" id="sch_pref">
								<sch:let name="current" value="current()"/>
								<sch:let name="currentName" value="name($current)"/>
								<sch:let name="parent" value="$current/.."/>
								<sch:let name="lang" value="string($current/@xml:lang)"/>
								<sch:let name="siblings" value="$parent/child::*[name(.) = $currentName and (string(@xml:lang) = $lang or not(@xml:lang or $current/@xml:lang))]"/>
								<sch:report test="
									(count($siblings) > 1) and
									not($siblings/@lido:pref = 'preferred' or
									$siblings/@lido:pref = 'http://terminology.lido-schema.org/pref/preferred' or
									$siblings/@lido:pref = 'http://terminology.lido-schema.org/lido00169')
									and
									not($siblings/@lido:pref = 'alternative' and $siblings/@lido:pref = 'alternate' or
									$siblings/@lido:pref = 'http://terminology.lido-schema.org/pref/alternative' or
									$siblings/@lido:pref = 'http://terminology.lido-schema.org/lido00170')"
									role="warn">
									Quality: When providing more than one <sch:name/> the preferred and alternative variant(s) should be cleary marked as such via @pref.
								</sch:report>
							</sch:rule>
						</sch:pattern>

						<sch:pattern>
							<sch:title>[Data Quality] @pref: "alternative" instead of "alternate"</sch:title>
							<sch:p>LIDO v1.0 falsely suggests the value 'alternate' for the pref attribute. It is established to use 'alternative' in this context.</sch:p>
							<sch:rule abstract="true" id="sch_alternate">
								<sch:report test="@lido:pref = 'alternate'" role="warn">
									Quality: Use 'alternative' instead of 'alternate' in this context. Consider changing the attribute's value or using the corresponding LIDO terminology.
								</sch:report>
							</sch:rule>
						</sch:pattern>

						<sch:pattern>
							<sch:title>[Data Quality] xs:dateTime Dates</sch:title>
							<sch:p>Check if a given string complies to the ISO 8601 date convention, i.e.
								an optional '-' to indicate CE or BCE date range followed by a minimum of
								4 digits denoting the year in the proleptic Gregorian calendar, an optional 2-digit month,
								an optional 2-digit day of month, an optional 'T' with a 2-digit hour (as 24-hour clock),
								an optional 2-digit minute and seconds, and an optional time zone designation of either Z for UTC
								or the timezone offset in hours:minutes. Omitting a timezone designation indicates using a
								local time reference. A timestamp should be used for recording the recordMetadataDates.
								This pattern is used for the cases where an element allows for xs:string in LIDO v1.0 while providing a date.</sch:p>
							<sch:rule abstract="true" id="sch_DateTime">
								<sch:assert role="warn" test="matches(., '-?([0-9]{4,})((-(0[1-9]|1[12]))?(-([0][1-9]|[12][0-9]|3[01])(T[01][0-9]|2[0-3])?(:[0-5][0-9]){0,2})?([Z]|(([+-][01][0-9]))(:?[0-5][0-9]){0,1})?)$')">
									Quality: The date provided in <sch:name/> should follow the pattern [-]YYYY[Y+][-MM[-DD[Thh[:mm[:ss[Z|(+|-)hh:mm]]]]]]).
								</sch:assert>
							</sch:rule>
						</sch:pattern>
106

mmarkus1's avatar
mmarkus1 committed
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
						<sch:pattern>
							<sch:title>[Data Quality] Avoid Providing Resource Measurements When Using IIIF</sch:title>
							<sch:p>IIIF resources provide information about their measurements in their
								info.json. Therefore it is redundant to also make the resource's measurements
								available in lido:resourceMeasurementsSet.</sch:p>
							<sch:rule abstract="true" id="sch_IIIF_Measurements">
								<sch:assert role="warn"
									test="
									not((lido:resourceMeasurementsSet)
									and (@lido:type='http://terminology.lido-schema.org/lido00911'
									or @lido:type='http://terminology.lido-schema.org/lido00912'))"
									> Quality: Do not set lido:resourceMeasurementsSet when providing a IIIF resource.
									Resource measurements are available in the resource's info.json.
								</sch:assert>
							</sch:rule>
						</sch:pattern>
123

124
125
126
127
128
129
130
131
132
133
            <sch:pattern>
      				<sch:title>[Data Quality] Use integer values for measurementValue</sch:title>
      				<sch:p>Check if measurementsValue contains a decimal fraction or other non numerals as the recommendation is to use integer values and appropriate measurementUnit.</sch:p>
      				<sch:rule abstract="true" id="sch_rule_measurementValue">
      					<sch:assert role="warn" test="matches(., '^[0-9]*$')">
      					Quality: Element should contain integer value for <sch:name/> for international interoperability.
      					</sch:assert>
      				</sch:rule>
      			</sch:pattern>

134
135
136
        </xs:appinfo>
    </xs:annotation>
</xs:schema>