schema.xml 6.99 KB
Newer Older
Dennis Neumann's avatar
Dennis Neumann committed
1
2
3
<?xml version="1.0" encoding="UTF-8"?>
<schema name="gfl" version="1.5">

bettin5's avatar
bettin5 committed
4
  <uniqueKey>id</uniqueKey>
Dennis Neumann's avatar
Dennis Neumann committed
5
6
7
8
9
10

  <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"
      indexed="true"
      stored="true"
      required="false"
      multiValued="false" />
Dennis Neumann's avatar
Dennis Neumann committed
11
  <fieldType name="date" class="solr.DateRangeField"
Dennis Neumann's avatar
Dennis Neumann committed
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
      indexed="true"
      stored="true"
      required="false"
      multiValued="false" />
  <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"
      indexed="true"
      stored="true"
      required="false"
      multiValued="false" />
  <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0" />
  <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="false"
      indexed="true"
      stored="true"
      required="false"
      multiValued="false" />
bettin5's avatar
bettin5 committed
27
28
29
30
31
32
    <fieldType name="string_ci" class="solr.TextField" sortMissingLast="true" omitNorms="true">
        <analyzer type="query">
            <tokenizer class="solr.KeywordTokenizerFactory"/>
            <filter class="solr.LowerCaseFilterFactory"/>
        </analyzer>
    </fieldType>
Dennis Neumann's avatar
Dennis Neumann committed
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

  <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"
      indexed="true"
      stored="true"
      required="false"
      multiValued="true"
      termVectors="true"
      termPositions="true"
      termOffsets="true">
    <analyzer type="index">
      <charFilter class="solr.HTMLStripCharFilterFactory" />
      <tokenizer class="solr.PatternTokenizerFactory" pattern="[‒&amp;&lt;&gt;&quot;&apos;\p{Z}\s′`″”∣%«»‛\$⅓⅙⅔·⅕#˄˚\{\}\\¼¾©@‚°=½§…℔*₰¶⸗˺˹„“+–\/?!;›‹\.,’·‘:]+" />
      <filter class="solr.LowerCaseFilterFactory" />
    </analyzer>
    <analyzer type="query">
      <tokenizer class="solr.PatternTokenizerFactory" pattern="[‒&amp;&lt;&gt;&quot;&apos;\p{Z}\s′`″”∣%«»‛\$⅓⅙⅔·⅕#˄˚\{\}\\¼¾©@‚°=½§…℔*₰¶⸗˺˹„“+–\/?!;›‹\.,’·‘:]+" />
      <filter class="solr.LowerCaseFilterFactory" />
    </analyzer>
  </fieldType>

bettin5's avatar
bettin5 committed
53
54
55
  <field name="id" type="string" required="true"/>
  
  <!-- 'article' or 'page' or 'literatur'-->
bettin5's avatar
bettin5 committed
56
  <field name="doctype" type="string" required="true"/>
bettin5's avatar
bettin5 committed
57
  
bettin5's avatar
bettin5 committed
58
  <!-- for doctype 'entity' -->
bettin5's avatar
bettin5 committed
59
  <field name="entity_name" type="string_ci" multiValued="false"/>
bettin5's avatar
bettin5 committed
60
  
61
  <!-- for doctype 'literature' -->
bettin5's avatar
bettin5 committed
62
  <field name="uri" type="string" multiValued="true"/>
63
  <field name="literature_author" type="string" multiValued="true"/>
bettin5's avatar
bettin5 committed
64
65
66
67
68
  <field name="editor" type="string" multiValued="true" />
  <field name="edition" type="string"  multiValued="true"/>
  <field name="pub_place" type="string" multiValued="true"/>
  <field name="publisher" type="string" multiValued="true"/>
  <field name="pub_date" type="string" multiValued="true"/>
bettin5's avatar
bettin5 committed
69
  <field name="idno" type="string" />
bettin5's avatar
bettin5 committed
70
  <field name="refs" type="string" multiValued="true" />
bettin5's avatar
bettin5 committed
71
72

    
73
   <!-- for doctype 'page' -->
74
75
76
   <field name="article_id" type="string" />
   <field name="page_number" type="int" />
   <field name="html_page" type="text_de" multiValued="false" />
bettin5's avatar
bettin5 committed
77
78
79
   <field name="image_ids" type="string" multiValued="true"/>
   <field name="image_urls" type="string" multiValued="true"/>
 
80
   
81
82
   <!-- for doctype 'article' -->
  <field name="number_of_pages" type="int" />
Dennis Neumann's avatar
Dennis Neumann committed
83
  <field name="language" type="string" multiValued="true" />
Dennis Neumann's avatar
Dennis Neumann committed
84
  <field name="license" type="string" />
Dennis Neumann's avatar
Dennis Neumann committed
85
  <field name="fulltext" type="text_de" multiValued="false" />
Dennis Neumann's avatar
Dennis Neumann committed
86
  <field name="fulltext_html" type="text_de" multiValued="false" />
Dennis Neumann's avatar
Dennis Neumann committed
87
  <field name="short_title" type="text_de" multiValued="false" />
bettin5's avatar
bettin5 committed
88
  <field name="title" type="text_de" multiValued="false" /><!-- maybe TODO multivalue = "false"--> 
89
  <field name="author" type="text_de" multiValued="false" />
Dennis Neumann's avatar
Dennis Neumann committed
90
  <field name="recipient" type="text_de" multiValued="false" />
Dennis Neumann's avatar
Dennis Neumann committed
91
92
  <field name="origin_place" type="text_de" multiValued="false" />
  <field name="destination_place" type="text_de" multiValued="false" />
Dennis Neumann's avatar
Dennis Neumann committed
93
  <field name="origin_date" type="date" />
94
  <field name="source_description" type="text_de" multiValued="true" />
Dennis Neumann's avatar
Dennis Neumann committed
95
96
  <field name="gnd_keyword" type="text_de" multiValued="true" />
  <field name="free_keyword" type="text_de" multiValued="true" />
Dennis Neumann's avatar
Dennis Neumann committed
97
  <field name="note_comment" type="text_de" multiValued="true" />
bettin5's avatar
bettin5 committed
98
  <field name="image_id" type="string" multiValued="true"/>
bettin5's avatar
bettin5 committed
99
100
101
102
103
  <field name="type_refs" type="string" multiValued="true"/>
  <field name="tartget" type="string" multiValued="true"/>
  <field name="ref_refs" type="string" multiValued="true"/>
  <field name="bibl_refs" type="string" multiValued="true"/>
  <field name="literature_refs" type="string" multiValued="true"/>
bettin5's avatar
bettin5 committed
104

Dennis Neumann's avatar
Dennis Neumann committed
105

106
107
108
  <field name="all_header_fields" type="text_de" multiValued="true" />
  <copyField source="id" dest="all_header_fields" />
  <copyField source="short_title" dest="all_header_fields" />
bettin5's avatar
bettin5 committed
109
  <copyField source="title" dest="all_header_fields" multiValued="true"/>
110
111
112
113
114
115
116
117
118
  <copyField source="author" dest="all_header_fields" />
  <copyField source="recipient" dest="all_header_fields" />
  <copyField source="origin_place" dest="all_header_fields" />
  <copyField source="destination_place" dest="all_header_fields" />
  <copyField source="origin_date" dest="all_header_fields" />
  <copyField source="source_description" dest="all_header_fields" />
  <copyField source="gnd_keyword" dest="all_header_fields" />
  <copyField source="free_keyword" dest="all_header_fields" />
  
Dennis Neumann's avatar
Dennis Neumann committed
119
  <!-- Solr needs string fields to sort correctly -->
120
  <field name="author_sort" type="string" multiValued="true"/>
Dennis Neumann's avatar
Dennis Neumann committed
121
  <copyField source="author" dest="author_sort" />
Dennis Neumann's avatar
Dennis Neumann committed
122
123
124
125
126
127
  <field name="recipient_sort" type="string" />
  <copyField source="recipient" dest="recipient_sort" />
  <field name="origin_place_sort" type="string" />
  <copyField source="origin_place" dest="origin_place_sort" />
  <field name="destination_place_sort" type="string" />
  <copyField source="destination_place" dest="destination_place_sort" />
bettin5's avatar
bettin5 committed
128
  <field name="title_sort" type="string" multiValued="true"/>
Dennis Neumann's avatar
Dennis Neumann committed
129
  <copyField source="title" dest="title_sort" />
130
131
  <field name="short_title_sort" type="string" />
  <copyField source="short_title" dest="short_title_sort" />
Dennis Neumann's avatar
Dennis Neumann committed
132
133
134
135
  <!-- Some dates are only years without month and day, so Solr doesn't want to sort the actual 'date' field -->
  <field name="origin_date_sort" type="string" />
  <copyField source="origin_date" dest="origin_date_sort" />
  
Dennis Neumann's avatar
Dennis Neumann committed
136
  <!-- Solr needs string fields for facets -->
137
  <field name="author_facet" type="string" multiValued="true"/>
Dennis Neumann's avatar
Dennis Neumann committed
138
139
140
141
142
143
144
145
  <copyField source="author" dest="author_facet" />
  <field name="recipient_facet" type="string" />
  <copyField source="recipient" dest="recipient_facet" />
  <field name="origin_place_facet" type="string" />
  <copyField source="origin_place" dest="origin_place_facet" />
  <field name="destination_place_facet" type="string" />
  <copyField source="destination_place" dest="destination_place_facet" />
  
Dennis Neumann's avatar
Dennis Neumann committed
146
147
148
149
150
151
  <!-- transaction log -->
  <field name="_version_" type="long" indexed="true" stored="true" />
  <!-- enable nested documents -->
  <field name="_root_" type="string" indexed="true" stored="false" />

</schema>