Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
ARCHITRAVE
SADE
Commits
e34aea9d
Commit
e34aea9d
authored
Sep 23, 2020
by
Moritz Schepp
Browse files
implement elasticsearch indexing and triggers
parent
621043f3
Changes
6
Hide whitespace changes
Inline
Side-by-side
collection.xconf
View file @
e34aea9d
<?xml version="1.0" encoding="UTF-8"?>
<collection
xmlns=
"http://exist-db.org/collection-config/1.0"
>
<index
xmlns:tei=
"http://www.tei-c.org/ns/1.0"
xmlns:xs=
"http://www.w3.org/2001/XMLSchema"
xmlns:xhtml=
"http://www.w3.org/1999/xhtml"
>
<lucene>
<analyzer
class=
"org.apache.lucene.analysis.standard.StandardAnalyzer"
/>
<text
match=
"//xhtml:div"
/>
<text
qname=
"tei:person"
>
<field
name=
"name_de"
expression=
"tei:persName[@xml:lang='de']"
/>
<field
name=
"name_fr"
expression=
"tei:persName[@xml:lang='fra']"
/>
<field
name=
"nationality_de"
expression=
"tei:nationality[@xml:lang='de']"
/>
<field
name=
"nationality_fr"
expression=
"tei:nationality[@xml:lang='fra']"
/>
<field
name=
"occupation_de"
expression=
"tei:occupation[@xml:lang='de']"
/>
<field
name=
"occupation_fr"
expression=
"tei:occupation[@xml:lang='fra']"
/>
<field
name=
"note_de"
expression=
"tei:note[@xml:lang='de']"
/>
<field
name=
"note_fr"
expression=
"tei:note[@xml:lang='fra']"
/>
<field
name=
"birth"
expression=
"tei:birth"
/>
<field
name=
"death"
expression=
"tei:death"
/>
</text>
<text
qname=
"tei:place"
>
<field
name=
"name_de"
expression=
"tei:placeName[@xml:lang='de']"
/>
<field
name=
"name_fr"
expression=
"tei:placeName[@xml:lang='fra']"
/>
<field
name=
"note_de"
expression=
"tei:note[@xml:lang='de']"
/>
<field
name=
"note_fr"
expression=
"tei:note[@xml:lang='fra']"
/>
<field
name=
"artist"
expression=
"tei:note[@tei:type='artist']"
/>
</text>
<text
qname=
"tei:item"
>
<field
name=
"name_de"
expression=
"tei:name[@xml:lang='de']"
/>
<field
name=
"name_fr"
expression=
"tei:name[@xml:lang='fra']"
/>
<field
name=
"date_de"
expression=
"tei:date[@xml:lang='de']"
/>
<field
name=
"date_fr"
expression=
"tei:date[@xml:lang='fra']"
/>
<field
name=
"note_de"
expression=
"tei:note[@xml:lang='de']"
/>
<field
name=
"note_fr"
expression=
"tei:note[@xml:lang='fra']"
/>
<field
name=
"artist"
expression=
"tei:note[@tei:type='artist']/persName"
/>
<field
name=
"location"
expression=
"tei:location/placeName"
/>
</text>
</lucene>
<index
xmlns:xs=
"http://www.w3.org/2001/XMLSchema"
>
<fulltext
default=
"none"
attributes=
"false"
/>
</index>
<triggers>
<trigger
class=
"org.exist.extensions.exquery.restxq.impl.RestXqTrigger"
/>
...
...
helpers/redindex.xq
View file @
e34aea9d
xquery
version
"3.1"
;
xmldb:reindex
(
'/db/apps/sade-architrave'
)
\ No newline at end of file
(:xmldb:reindex('/db/apps/sade-architrave'):)
import
module
namespace
elastic
=
"http://elastic.io"
at
"elastic.xqm"
;
elastic:setup
()
modules/api.xq
View file @
e34aea9d
xquery
version
"3.1"
;
import
module
namespace
config
=
"https://sade.textgrid.de/ns/config"
at
"config.xqm"
;
import
module
namespace
kwic
=
"http://exist-db.org/xquery/kwic"
;
(:import module namespace config="https://sade.textgrid.de/ns/config" at "config.xqm";:)
(:import module namespace kwic="http://exist-db.org/xquery/kwic";:)
import
module
namespace
elastic
=
"http://elastic.io"
at
"elastic.xqm"
;
declare
namespace
output
=
"http://www.w3.org/2010/xslt-xquery-serialization"
;
declare
namespace
tei
=
"http://www.tei-c.org/ns/1.0"
;
declare
namespace
xhtml
=
"http://www.w3.org/1999/xhtml"
;
(:
declare namespace tei="http://www.tei-c.org/ns/1.0";
:)
(:
declare namespace xhtml="http://www.w3.org/1999/xhtml";
:)
declare
option
output:method
"json"
;
declare
option
output:media-type
"application/json"
;
declare
variable
$action
:=
request:get-parameter
(
'action'
,
'search'
);
(: :declare variable $terms := request:get-parameter('terms', '*:*');:)
declare
variable
$terms
:=
request:get-parameter
(
'terms'
,
''
);
declare
variable
$page
:=
xs:integer
(
request:get-parameter
(
'page'
,
1
));
declare
variable
$per_page
:=
xs:integer
(
request:get-parameter
(
'per_page'
,
10
));
declare
variable
$locale
:=
request:get-parameter
(
'locale'
,
'any'
);
declare
variable
$app-root
:=
'/db/apps/sade-architrave'
;
declare
function
local:search-editions-lucene
()
{
let
$editions_de
:=
collection
(
concat
(
$config:app-root
,
'/templates/34zmq'
)
,
(: harrach :)
concat
(
$config:app-root
,
'/templates/34zs7'
)
,
(: sturm :)
concat
(
$config:app-root
,
'/templates/3ptwg'
)
,
(: corfey :)
concat
(
$config:app-root
,
'/templates/3qr4f'
)
,
(: neumann :)
concat
(
$config:app-root
,
'/templates/34znb'
)
,
(: pitzler :)
concat
(
$config:app-root
,
'/templates/3c0m2'
)
(: knesebeck :)
)
let
$editions_fr
:=
collection
(
concat
(
$config:app-root
,
'/templates/3czfj'
)
,
(: harrach :)
concat
(
$config:app-root
,
'/templates/3q4rq'
)
,
(: sturm :)
concat
(
$config:app-root
,
'/templates/3r0fv'
)
,
(: corfey :)
concat
(
$config:app-root
,
'/templates/3r3nn'
)
,
(: neumann :)
concat
(
$config:app-root
,
'/templates/350mg'
)
,
(: pitzler :)
concat
(
$config:app-root
,
'/templates/3czn9'
)
(: knesebeck :)
)
(: yeah right, that's pretty nasty, but I didn't find another way since
'collection' can't be called with a collection and I didn't find anything
like a splat operator to convert a sequence to a list of parameters :)
let
$both
:=
collection
(
(: de :)
concat
(
$config:app-root
,
'/templates/34zmq'
)
,
(: harrach :)
concat
(
$config:app-root
,
'/templates/34zs7'
)
,
(: sturm :)
concat
(
$config:app-root
,
'/templates/3ptwg'
)
,
(: corfey :)
concat
(
$config:app-root
,
'/templates/3qr4f'
)
,
(: neumann :)
concat
(
$config:app-root
,
'/templates/34znb'
)
,
(: pitzler :)
concat
(
$config:app-root
,
'/templates/3c0m2'
)
,
(: knesebeck :)
(: fr :)
concat
(
$config:app-root
,
'/templates/3czfj'
)
,
(: harrach :)
concat
(
$config:app-root
,
'/templates/3q4rq'
)
,
(: sturm :)
concat
(
$config:app-root
,
'/templates/3r0fv'
)
,
(: corfey :)
concat
(
$config:app-root
,
'/templates/3r3nn'
)
,
(: neumann :)
concat
(
$config:app-root
,
'/templates/350mg'
)
,
(: pitzler :)
concat
(
$config:app-root
,
'/templates/3czn9'
)
(: knesebeck :)
)
let
$editions
:=
if
(
$locale
eq
'de'
)
then
$editions_de
else
if
(
$locale
eq
'fr'
)
then
$editions_fr
else
$both
let
$pages
:=
$editions
/
xhtml:div
let
$results
:=
for
$hit
in
$pages
[
ft:query
(
.,
$terms
)]
let
$textgrid_uri
:=
string
(
$hit
//
xhtml:span
[
@id
=
'tei-meta-textGridURI'
]
/
text
())
let
$edition
:=
fn:tokenize
(
$textgrid_uri
,
'[:\.]'
)[
2
]
let
$page_number
:=
xs:integer
(
$hit
//
xhtml:span
[
@class
=
'pb'
]
/
text
())
let
$text
:=
fn:normalize-space
(
data
(
$hit
))
let
$name
:=
string
(
$hit
//
xhtml:h4
[
starts-with
(
@id
,
'tei-title-main-'
)][
1
])
let
$score
as
xs:float
:=
ft:score
(
$hit
)
order
by
$name
,
$page_number
return
map
{
'type'
:
'edition'
,
'id'
:
concat
(
'edition-'
,
$edition
,
'-'
,
$page_number
)
,
'name'
:
$name
,
'edition'
:
$edition
,
'page'
:
$page_number
,
'text'
:
$text
,
'score'
:
$score
,
'hit'
:
$hit
}
let
$paginated
:=
local:paginate
(
$results
,
fn:true
())
return
$paginated
};
(: we wrap all wiki content with an "artificial" element to hold the signature
: as in <id>-<locale> and the revision. This makes it easier later to iterate
: grouped constellations based on those values. :)
declare
function
local:wrap-wiki-pages
()
{
let
$wikis
:=
collection
(
concat
(
$config:app-root
,
'/docs'
))
//
div
[
@id
=
'wiki'
]
let
$wraps
:=
for
$wiki
in
$wikis
let
$uri
:=
string
(
base-uri
(
$wiki
))
let
$doc_name
:=
fn:tokenize
(
$uri
,
'/'
)[
6
]
let
$parts
:=
fn:tokenize
(
$doc_name
,
'[_\.]'
)
let
$id
:=
$parts
[
1
]
let
$l
:=
$parts
[
2
]
let
$revision
:=
xs:integer
(
replace
(
$parts
[
3
]
,
'rev'
,
''
))
return
<arch-wiki-page
wiki-id=
"
{
$id
}
"
locale=
"
{
$l
}
"
signature=
"
{
$id
}
-
{
$l
}
"
revision=
"
{
$revision
}
"
>
{
$wiki
}
</arch-wiki-page>
return
$wraps
};
(: this discards all old wiki page revisions :)
declare
function
local:recent-wiki-content
()
{
let
$wraps
:=
local:wrap-wiki-pages
()
let
$result
:=
for
$sig
in
distinct-values
(
$wraps
/
@signature
)
let
$revisions
:=
for
$revision
in
$wraps
[
@signature
=
$sig
]
/
@revision
order
by
xs:integer
(
$revision
)
descending
return
$revision
let
$latest
:=
$revisions
[
1
]
return
$wraps
[
@signature
=
$sig
][
@revision
=
$latest
]
return
$result
};
(: doesn't work because the search base is generic as they are wrapped documents
: so the index doesn't get triggered on them and ft:query simply returns the
: empty sequence :)
(: :declare function local:search-pages-lucene() {
let $wikis := local:recent-wiki-content()
let $hits := $wikis[ft:query(., $terms)]
let $results :=
for $hit in $hits
let $score as xs:float := ft:score($hit)
let $id := string($hit/@id)
let $l := string($hit/@locale)
let $revision := xs:integer($hit/@revision)
let $text := fn:normalize-space(data($hit))
let $title := string(($hit//h1)[1])
let $localeMatch :=
local:locale(fn:false()) eq 'any' or
$l eq local:locale(fn:false())
where $localeMatch
return
map {
'type': 'page',
'id': concat('page-', $id, '-', $l, '-', $revision),
'name': $title,
'confluence_id': $id,
'locale': $l,
'revision': $revision,
'text': $text,
'title': $title,
'hit': $hit,
'score': $score
}
return local:paginate($results, fn:true())
};
:)
declare
function
local:search-pages
()
{
let
$results
:=
for
$page
in
local:recent-wiki-content
()
let
$id
:=
$page
/
@id
let
$l
:=
string
(
$page
/
@locale
)
let
$revision
:=
xs:integer
(
$page
/
@revision
)
let
$text
:=
fn:normalize-space
(
data
(
$page
))
let
$title
:=
string
((
$page
//
h1
)[
1
])
let
$localeMatch
:=
local:locale
(
fn:false
())
eq
'any'
or
$l
eq
local:locale
(
fn:false
())
where
fn:matches
(
$text
,
$terms
,
'i'
)
and
$localeMatch
return
map
{
'type'
:
'page'
,
'id'
:
concat
(
'page-'
,
$id
,
'-'
,
$l
,
'-'
,
$revision
)
,
'name'
:
$title
,
'confluence_id'
:
$id
,
'locale'
:
$l
,
'revision'
:
$revision
,
'text'
:
$text
,
'title'
:
$title
declare
function
local:search-editions-elastic
()
{
let
$filters
:=
if
(
$locale
eq
'any'
)
then
[]
else
[
map
{
"term"
:
map
{
"locale"
:
$locale
}}
]
let
$response
:=
elastic:search
(
"edition_pages"
,
map
{
"from"
:
(
$page
-
1
)
*
$per_page
,
"size"
:
$per_page
,
"query"
:
map
{
"bool"
:
map
{
"must"
:
[
map
{
"query_string"
:
map
{
"query"
:
$terms
}
}
]
,
"filter"
:
$filters
}
return
local:paginate
(
$results
,
fn:false
())
};
declare
function
local:search-people-lucene
()
{
let
$people
:=
collection
(
concat
(
$config:app-root
,
'/templates/register'
))
//
tei:person
let
$query
:=
if
(
$locale
eq
'any'
)
then
$terms
else
fn:string-join
(
(
'name_'
,
$locale
,
':('
,
$terms
,
') OR '
,
'nationality_'
,
$locale
,
':('
,
$terms
,
') OR '
,
'occupation_'
,
$locale
,
':('
,
$terms
,
') OR '
,
'note_'
,
$locale
,
':('
,
$terms
,
') OR '
,
'birth:('
,
$terms
,
') OR '
,
'death:('
,
$terms
,
')'
)
,
''
)
let
$hits
:=
$people
[
ft:query
(
.,
$query
)]
let
$l
:=
if
(
$locale
eq
'any'
)
then
'de'
else
local:locale
()
let
$results
:=
for
$hit
in
$hits
let
$id
:=
string
(
$hit
/
@xml:id
)
let
$name
:=
string
(
$hit
/
tei:persName
[
@xml:lang
=
$l
])
return
map
{
'type'
:
'person'
,
'id'
:
concat
(
'person-'
,
$id
)
,
'name'
:
$name
,
'textgrid_id'
:
$id
,
'de'
:
ft:get-field
(
$hit
,
'de'
)
}
,
"highlight"
:
map
{
"fragment_size"
:
100
,
"fields"
:
map
{
"search_data"
:
map
{}
}
return
local:paginate
(
$results
,
fn:false
())
};
(:
declare function local:search-people() {
let $persons := doc(concat($config:app-root, '/templates/register/persons.xml'))
let $l :=
if (local:locale() eq 'any') then 'de'
else local:locale()
let $results :=
for $person in $persons//tei:listPerson/tei:person
let $id := string($person/@xml:id)
let $name := string($person/tei:persName[@xml:lang=$l])
where fn:matches($name, $terms, 'i')
order by $name
return
map {
'type': 'person',
'id': concat('person-', $id),
'name': $name,
'textgrid_id': $id
}
return
local:paginate($results, fn:false())
};
:)
declare
function
local:search-works-lucene
()
{
let
$people
:=
collection
(
concat
(
$config:app-root
,
'/templates/register'
))
//
tei:item
let
$query
:=
if
(
$locale
eq
'any'
)
then
$terms
else
fn:string-join
(
(
'name_'
,
$locale
,
':('
,
$terms
,
') OR '
,
'date_'
,
$locale
,
':('
,
$terms
,
') OR '
,
'note_'
,
$locale
,
':('
,
$terms
,
') OR '
,
'artist:('
,
$terms
,
') OR '
,
'location:('
,
$terms
,
')'
)
,
''
)
let
$hits
:=
$people
[
ft:query
(
.,
$query
)]
let
$l
:=
if
(
$locale
eq
'any'
)
then
'de'
else
local:locale
()
let
$results
:=
for
$hit
in
$hits
let
$id
:=
string
(
$hit
/
@xml:id
)
let
$name
:=
string
(
$hit
/
tei:name
[
@xml:lang
=
$l
])
return
map
{
'type'
:
'work'
,
'id'
:
concat
(
'work-'
,
$id
)
,
'name'
:
$name
,
'textgrid_id'
:
$id
}
})
return
$response
};
declare
function
local:search-pages-elastic
()
{
let
$filters
:=
if
(
$locale
eq
'any'
)
then
[]
else
[
map
{
"term"
:
map
{
"locale"
:
$locale
}}
]
let
$response
:=
elastic:search
(
"wiki_pages"
,
map
{
"from"
:
(
$page
-
1
)
*
$per_page
,
"size"
:
$per_page
,
"query"
:
map
{
"bool"
:
map
{
"must"
:
[
map
{
"query_string"
:
map
{
"query"
:
$terms
}
}
]
,
"filter"
:
$filters
}
return
local:paginate
(
$results
,
fn:false
())
};
(:
declare function local:search-works() {
let $works := doc(concat($config:app-root, '/templates/register/works.xml'))
let $l :=
if (local:locale() eq 'any') then 'de'
else local:locale()
let $results :=
for $work in $works//tei:list[@type='artworks']/tei:item
let $id := string($work/@xml:id)
let $name := string($work/tei:name[@xml:lang=$l])
where fn:matches($name, $terms, 'i')
order by $name
return
map {
'type': 'work',
'id': concat('work-', $id),
'name': $name,
'textgrid_id': $id
}
,
"highlight"
:
map
{
"fragment_size"
:
100
,
"fields"
:
map
{
"search_data"
:
map
{}
}
return
local:paginate($results, fn:false())
}
})
return
$response
};
:)
declare
function
local:search-places-lucene
()
{
let
$people
:=
collection
(
concat
(
$config:app-root
,
'/templates/register'
))
//
tei:place
let
$query
:=
if
(
$locale
eq
'any'
)
then
$terms
declare
function
local:search-register-elastic
(
$type
)
{
let
$fields
:=
if
(
$locale
eq
'de'
)
then
[
'search_data.de'
]
else
if
(
$locale
eq
'fr'
)
then
[
'search_data.fr'
]
else
fn:string-join
(
(
'name_'
,
$locale
,
':('
,
$terms
,
') OR '
,
'note_'
,
$locale
,
':('
,
$terms
,
') OR '
,
'artist:('
,
$terms
,
')'
)
,
''
)
let
$hits
:=
$people
[
ft:query
(
.,
$query
)]
let
$l
:=
if
(
$locale
eq
'any'
)
then
'de'
else
local:locale
()
let
$results
:=
for
$hit
in
$hits
let
$id
:=
string
(
$hit
/
@xml:id
)
let
$name
:=
string
(
$hit
/
tei:placeName
[
@type
=
'current'
and
@xml:lang
=
$l
])
let
$description
:=
string
(
$hit
/
tei:note
[
@type
=
'description'
and
@xml:lang
=
$l
])
return
map
{
'type'
:
'place'
,
'id'
:
concat
(
'place-'
,
$id
)
,
'name'
:
$name
,
'description'
:
$description
,
'textgrid_id'
:
$id
[
'search_data.de'
,
'search_data.fr'
]
let
$response
:=
elastic:search
(
$type
,
map
{
"from"
:
(
$page
-
1
)
*
$per_page
,
"size"
:
$per_page
,
"query"
:
map
{
"query_string"
:
map
{
"query"
:
$terms
,
"fields"
:
$fields
}
return
local:paginate
(
$results
,
fn:false
())
};
(:
declare function local:search-places() {
let $places := doc(concat($config:app-root, '/templates/register/places.xml'))
let $l :=
if (local:locale() eq 'any') then 'de'
else local:locale()
let $results :=
for $place in $places//tei:listPlace/tei:place
let $id := string($place/@xml:id)
let $name := string($place/tei:placeName[@type='current' and @xml:lang=$l])
let $description := string($place/tei:note[@type='description'])
where
fn:matches($name, $terms, 'i') or
fn:matches($description, $terms, 'i')
order by $name
return
map {
'type': 'place',
'id': concat('place-', $id),
'name': $name,
'description': $description,
'textgrid_id': $id
}
,
"highlight"
:
map
{
"fragment_size"
:
100
,
"fields"
:
map
{
"search_data.de"
:
map
{}
,
"search_data.fr"
:
map
{}
}
return
local:paginate($results, fn:false())
};
:)
declare
function
local:paginate
(
$results
,
$add_summaries
)
{
let
$results
:=
if
(
fn:count
(
$results
)
eq
0
)
then
()
else
$results
let
$total
:=
fn:count
(
$results
)
let
$pages
:=
fn:ceiling
(
xs:double
(
$total
)
div
$per_page
)
let
$new_page
:=
fn:min
((
$pages
,
$page
))
let
$limited
:=
for
$result
in
subsequence
(
$results
,
(
$new_page
-
1
)
*
$per_page
+
1
,
$per_page
)
return
if
(
$add_summaries
eq
fn:true
())
then
map
:
merge
((
$result
,
map
{
'summary'
:
kwic:summarize
(
$result
(
'hit'
)
,
<config
width=
"80"
/>
)
,
'hit'
:
()
}
))
else
$result
return
map
{
'results'
:
if
(
$total
eq
0
)
then
[]
else
if
(
$total
eq
1
)
then
[
$limited
]
else
$limited
,
'total'
:
$total
,
'page'
:
$new_page
,
'per_page'
:
$per_page
,
'pages'
:
$pages
}
};
declare
function
local:locale
()
{
if
(
$locale
eq
'fr'
)
then
'fra'
else
$locale
};
declare
function
local:locale
(
$fix_fra
as
xs:boolean
)
{
if
(
$fix_fra
eq
fn:true
())
then
local:locale
()
else
$locale
})
return
$response
};
declare
function
local:error
(
$message
as
xs:string
,
$status
as
xs:integer
)
{
...
...
@@ -410,15 +111,15 @@ declare function local:error($message as xs:string, $status as xs:integer) {
declare
function
local:route
()
{
if
(
$action
eq
'search-people'
)
then
local:search-
people-lucene
(
)
local:search-
register-elastic
(
'people'
)
else
if
(
$action
eq
'search-works'
)
then
local:search-
works-lucene
(
)
local:search-
register-elastic
(
'works'
)
else
if
(
$action
eq
'search-places'
)
then
local:search-
places-lucene
(
)
local:search-
register-elastic
(
'places'
)
else
if
(
$action
eq
'search-editions'
)
then
local:search-editions-
lucene
()
local:search-editions-
elastic
()
else
if
(
$action
eq
'search-pages'
)
then
local:search-pages
()
local:search-pages
-elastic
()
else
local:error
(
'unknown action'
,
400
)
};
...
...
modules/elastic.xqm
0 → 100644
View file @
e34aea9d
xquery version "3.1";
module namespace elastic="http://elastic.io";
import module namespace config="https://sade.textgrid.de/ns/config" at "config.xqm";
declare namespace tei="http://www.tei-c.org/ns/1.0";
declare namespace xhtml="http://www.w3.org/1999/xhtml";
declare namespace http="http://expath.org/ns/http-client";
declare namespace elastic="http://elastic.io";
declare variable $elastic:uri := 'http://127.0.0.1:9200';
declare variable $elastic:prefix := 'at_';
declare function elastic:search($index, $query) {
let $json := elastic:to_json($query)
let $response := http:send-request(
<http:request method="POST" href="{$elastic:uri}/{$elastic:prefix}{$index}/_search">
<http:body media-type="application/json" method="text">{$json}</http:body>
</http:request>
)
let $str := util:base64-decode($response[2])
let $data := parse-json($str)
return $data
};
declare function elastic:drop-index($index) {
let $response := http:send-request(
<http:request method="DELETE" href="{$elastic:uri}/{$elastic:prefix}{$index}" />
)
return $response
};
declare function elastic:create-index($index, $data) {
let $json := elastic:to_json($data)
let $response := http:send-request(
<http:request method="PUT" href="{$elastic:uri}/{$elastic:prefix}{$index}">
<http:body media-type="application/json" method="text">{$json}</http:body>
</http:request>
)
return $response
};