elastic.xqm 9.77 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
xquery version "3.1";

module namespace elastic="http://elastic.io";

import module namespace config="https://sade.textgrid.de/ns/config" at "config.xqm";

declare namespace tei="http://www.tei-c.org/ns/1.0";
declare namespace xhtml="http://www.w3.org/1999/xhtml";
declare namespace http="http://expath.org/ns/http-client";

declare namespace elastic="http://elastic.io";

declare variable $elastic:uri := 'http://127.0.0.1:9200';
declare variable $elastic:prefix := 'at_';

declare function elastic:search($index, $query) {
  let $json := elastic:to_json($query)
  let $response := http:send-request(
      <http:request method="POST" href="{$elastic:uri}/{$elastic:prefix}{$index}/_search">
          <http:body media-type="application/json" method="text">{$json}</http:body>
      </http:request>
  )
  let $str := util:base64-decode($response[2])
  let $data := parse-json($str)
  return $data
};

declare function elastic:drop-index($index) {
    let $response := http:send-request(
        <http:request method="DELETE" href="{$elastic:uri}/{$elastic:prefix}{$index}" />
    )
Moritz Schepp's avatar
Moritz Schepp committed
32
    let $ok := elastic:require_ok($response)
33
34
35
36
37
38
39
40
41
42
    return $response
};

declare function elastic:create-index($index, $data) {
    let $json := elastic:to_json($data)
    let $response := http:send-request(
        <http:request method="PUT" href="{$elastic:uri}/{$elastic:prefix}{$index}">
            <http:body media-type="application/json" method="text">{$json}</http:body>
        </http:request>
    )
Moritz Schepp's avatar
Moritz Schepp committed
43
    let $ok := elastic:require_ok($response)
44
45
46
47
48
49
50
51
52
53
    return $response
};

declare function elastic:index-doc($index, $doc) {
    let $json := elastic:to_json($doc)
    let $response := http:send-request(
        <http:request method="POST" href="{$elastic:uri}/{$elastic:prefix}{$index}/_doc">
            <http:body media-type="application/json" method="text">{$json}</http:body>
        </http:request>
    )
Moritz Schepp's avatar
Moritz Schepp committed
54
    let $ok := elastic:require_ok($response)
55
56
57
58
59
60
61
62
63
64
    return $response
};

declare function elastic:index-doc($index, $id, $doc) {
    let $json := elastic:to_json($doc)
    let $response := http:send-request(
        <http:request method="POST" href="{$elastic:uri}/{$elastic:prefix}{$index}/_doc/{$id}">
            <http:body media-type="application/json" method="text">{$json}</http:body>
        </http:request>
    )
Moritz Schepp's avatar
Moritz Schepp committed
65
    let $ok := elastic:require_ok($response)
66
67
68
    return $response
};

Moritz Schepp's avatar
Moritz Schepp committed
69
70
71
72
declare function elastic:refresh() {
    let $response := http:send-request(
        <http:request method="GET" href="{$elastic:uri}/_refresh" />
    )
Moritz Schepp's avatar
Moritz Schepp committed
73
    let $ok := elastic:require_ok($response)
Moritz Schepp's avatar
Moritz Schepp committed
74
75
76
    return $response
};

77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
declare function elastic:index-person($person) {
    let $doc := map {
        "id": string($person/@xml:id),
        "name": map {
            "de": data($person/tei:persName[@xml:lang='de']),
            "fr": data($person/tei:persName[@xml:lang='fra'])
        },
        "search_data": map {
            "de": $person/*[@xml:lang='de']/text(),
            "fr": $person/*[@xml:lang='fra']/text()
        }
    }
    return elastic:index-doc('people', $doc)
};

declare function elastic:index-place($place) {
    let $x := fn:trace($place, "Bla")
    let $doc := map {
        "id": string($place/@xml:id),
        "name": map {
            "de": data($place/tei:placeName[@xml:lang='de']),
            "fr": data($place/tei:placeName[@xml:lang='fra'])
        },
        "search_data": map {
            "de": data($place/*[@xml:lang='de']),
            "fr": data($place/*[@xml:lang='fra'])
        }
    }
    return elastic:index-doc('places', $doc)
};

declare function elastic:index-work($work) {
    let $doc := map {
        "id": string($work/@xml:id),
        "name": map {
            "de": data($work/tei:name[@xml:lang='de']),
            "fr": data($work/tei:name[@xml:lang='fra'])
        },
        "search_data": map {
            "de": data($work/*[@xml:lang='de']),
            "fr": data($work/*[@xml:lang='fra'])
        }
    }
    return elastic:index-doc('works', $doc)
};

declare function elastic:index-edition-page($page, $locale) {
Moritz Schepp's avatar
Moritz Schepp committed
124
125
126
    let $uri := string($page//xhtml:span[@id='tei-meta-textGridURI']/text())
    let $edition := fn:tokenize($uri, '[:\.]')[2]
    let $page_number := xs:integer($page//xhtml:span[@class='pb']/text())
127
    let $doc := map {
Moritz Schepp's avatar
Moritz Schepp committed
128
129
        "id": concat('edition-', $edition, '-', $page_number),
        "edition": $edition,
130
131
132
133
134
135
136
137
138
139
140
141
        "page_number": xs:integer($page//xhtml:span[@class='pb']/text()),
        "locale": $locale,
        "name": string($page//xhtml:h4[starts-with(@id, 'tei-title-main-')][1]),
        "search_data": fn:normalize-space(data($page))
    }
    return elastic:index-doc('edition_pages', $doc)
};

declare function elastic:index-wiki-page($page, $locale) {
    let $uri := string(base-uri($page))
    let $doc_name := fn:tokenize($uri, '/')[6]
    let $parts := fn:tokenize($doc_name, '[_\.]')
Moritz Schepp's avatar
Moritz Schepp committed
142
143
144
    let $id := $parts[1]
    let $locale := $parts[2]
    let $elastic_doc_id := concat($id, '_', $locale)
145
    let $doc := map {
Moritz Schepp's avatar
Moritz Schepp committed
146
        "uri": $uri,
Moritz Schepp's avatar
Moritz Schepp committed
147
148
        "id": $id,
        "locale": $locale,
149
150
151
        "name": data($page//h1),
        "search_data": fn:normalize-space(data($page))
    }
Moritz Schepp's avatar
Moritz Schepp committed
152
    return elastic:index-doc('wiki_pages', $elastic_doc_id, $doc)
153
154
};

Moritz Schepp's avatar
Moritz Schepp committed
155
156
157
158
159
160
161
162
163
164
165
166
declare function elastic:require_ok($response) {
    let $code := xs:integer($response[1]/@status)
    return 
        if ($code < 200 or $code > 299) then
            let $body := util:base64-decode($response[2])
            let $name := QName('http://datypic.com/err', 'ProdNumReq')
            let $msg := concat("(", string($code), ") elasticsearch responded with:", $body)
            return fn:error($name, $msg)
        else
            fn:true()
};

167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
declare function elastic:to_json($data) {
  let $result := fn:serialize($data, map { 'method': 'json'})
  return $result
};

declare function elastic:setup() {
    let $editions_de := collection(
        concat($config:app-root, '/templates/34zmq'), (: harrach :)
        concat($config:app-root, '/templates/34zs7'), (: sturm :)
        concat($config:app-root, '/templates/3ptwg'), (: corfey :)
        concat($config:app-root, '/templates/3qr4f'), (: neumann :)
        concat($config:app-root, '/templates/34znb'), (: pitzler :)
        concat($config:app-root, '/templates/3c0m2')  (: knesebeck :)
    )
    let $editions_fr := collection(
        concat($config:app-root, '/templates/3czfj'), (: harrach :)
        concat($config:app-root, '/templates/3q4rq'), (: sturm :)
        concat($config:app-root, '/templates/3r0fv'), (: corfey :)
        concat($config:app-root, '/templates/3r3nn'), (: neumann :)
        concat($config:app-root, '/templates/350mg'), (: pitzler :)
        concat($config:app-root, '/templates/3czn9')  (: knesebeck :)
    )
    let $register_mappings := map {
Moritz Schepp's avatar
Moritz Schepp committed
190
191
192
193
194
195
196
197
198
199
200
201
202
203
        "_doc": map {
            "properties": map {
                "id": map {"type": "keyword", "index": fn:false()},
                "name": map {
                    "properties": map {
                        "de": map {"type": "text", "index": fn:false()},
                        "fr": map {"type": "text", "index": fn:false()}
                    }
                },
                "search_data": map {
                    "properties": map {
                        "de": map {"type": "text"},
                        "fr": map {"type": "text"}
                    }
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
                }
            }
        }
    }
    let $r := [
        elastic:drop-index("people"),
        elastic:drop-index("places"),
        elastic:drop-index("works"),
        elastic:drop-index("wiki_pages"),
        elastic:drop-index("edition_pages"),
        
        elastic:create-index("people", map {"mappings": $register_mappings}),
        elastic:create-index("places", map {"mappings": $register_mappings}),
        elastic:create-index("works", map {"mappings": $register_mappings}),
        elastic:create-index("wiki_pages", map {"mappings": map {
Moritz Schepp's avatar
Moritz Schepp committed
219
220
221
222
223
224
225
            "_doc": map {
                "properties": map {
                    "id": map {"type": "keyword", "index": fn:false()},
                    "locale": map {"type": "keyword"},
                    "name": map {"type": "text"},
                    "search_data": map {"type": "text"}
                }
226
227
228
            }
        }}),
        elastic:create-index("edition_pages", map {"mappings": map {
Moritz Schepp's avatar
Moritz Schepp committed
229
230
231
232
233
234
235
236
237
            "_doc": map {
                "properties": map {
                    "id": map {"type": "keyword", "index": fn:false()},
                    "edition": map {"type": "keyword", "index": fn:false()},
                    "page_number": map {"type": "integer"},
                    "locale": map {"type": "keyword"},
                    "name": map {"type": "text"},
                    "search_data": map {"type": "text"}
                }
238
239
240
241
            }
        }}),
        
        for $doc in collection(concat($config:app-root, '/templates/register'))//tei:person
Moritz Schepp's avatar
Moritz Schepp committed
242
            return elastic:index-person($doc),
243
244
            
        for $doc in collection(concat($config:app-root, '/templates/register'))//tei:place
Moritz Schepp's avatar
Moritz Schepp committed
245
            return elastic:index-place($doc),
246
247
            
        for $doc in collection(concat($config:app-root, '/templates/register'))//tei:item
Moritz Schepp's avatar
Moritz Schepp committed
248
            return elastic:index-work($doc),
249
    
Moritz Schepp's avatar
Moritz Schepp committed
250
        for $doc in $editions_de/xhtml:div
Moritz Schepp's avatar
Moritz Schepp committed
251
            return elastic:index-edition-page($doc, "de"),
252
          
Moritz Schepp's avatar
Moritz Schepp committed
253
        for $doc in $editions_fr/xhtml:div
Moritz Schepp's avatar
Moritz Schepp committed
254
255
256
257
258
259
260
261
262
263
            return elastic:index-edition-page($doc, "fr"),
        
        for $doc in collection(concat($config:app-root, '/docs'))//div[@id='wiki']
            let $uri := string(base-uri($doc))
            let $doc_name := fn:tokenize($uri, '/')[6]
            let $parts := fn:tokenize($doc_name, '[_\.]')
            let $locale := $parts[2]
            let $revision := xs:integer(replace($parts[3], 'rev', ''))
            order by $revision
            return elastic:index-wiki-page($doc, $locale),
Moritz Schepp's avatar
Moritz Schepp committed
264
265
        
        elastic:refresh()
266
267
268
269
    ]
    
    return $r
};