Commit f704ce6e authored by Moritz Schepp's avatar Moritz Schepp
Browse files

adds custom french analyzer, changes indexing data

parent 9ef5e4a6
......@@ -91,15 +91,14 @@ declare function elastic:index-person($person) {
"fr": data($person/tei:persName[@xml:lang='fra'])
},
"search_data": map {
"de": $person/*[@xml:lang='de']/text(),
"fr": $person/*[@xml:lang='fra']/text()
"de": data($person/*[@xml:lang='de'] | $person/*[not(@xml:lang)]),
"fr": data($person/*[@xml:lang='fra'] | $person/*[not(@xml:lang)])
}
}
return elastic:index-doc('people', $doc)
};
declare function elastic:index-place($place) {
let $x := fn:trace($place, "Bla")
let $doc := map {
"id": string($place/@xml:id),
"name": map {
......@@ -133,13 +132,14 @@ declare function elastic:index-edition-page($page, $locale) {
let $uri := string($page//xhtml:span[@id='tei-meta-textGridURI']/text())
let $edition := fn:tokenize($uri, '[:\.]')[2]
let $page_number := xs:integer($page//xhtml:span[@class='pb']/text())
let $body := data($page/xhtml:div[@class='body'])
let $doc := map {
"id": concat('edition-', $edition, '-', $page_number),
"edition": $edition,
"page_number": xs:integer($page//xhtml:span[@class='pb']/text()),
"locale": $locale,
"name": string($page//xhtml:h4[starts-with(@id, 'tei-title-main-')][1]),
"search_data": fn:normalize-space(data($page))
"search_data": fn:normalize-space($body)
}
return elastic:index-doc('edition_pages', $doc)
};
......@@ -155,7 +155,7 @@ declare function elastic:index-wiki-page($page, $locale) {
"uri": $uri,
"id": $id,
"locale": $locale,
"name": data($page//h1),
"name": data($page//h1[1]),
"search_data": fn:normalize-space(data($page))
}
return elastic:index-doc('wiki_pages', $elastic_doc_id, $doc)
......@@ -195,20 +195,54 @@ declare function elastic:setup() {
concat($config:app-root, '/templates/350mg'), (: pitzler :)
concat($config:app-root, '/templates/3czn9') (: knesebeck :)
)
let $settings := map {
"analysis": map {
"filter": map {
"french_elision": map {
"type": "elision",
"articles_case": fn:true(),
"articles": [
"l", "m", "t", "qu", "n", "s",
"j", "d", "c", "jusqu", "quoiqu",
"lorsqu", "puisqu"
]
},
"french_stop": map {
"type": "stop",
"stopwords": "_french_"
},
"french_stemmer": map {
"type": "stemmer",
"language": "light_french"
}
},
"analyzer": map {
"at_french": map {
"tokenizer": "standard",
"filter": [
"french_elision",
"lowercase",
"asciifolding",
"french_stop"
]
}
}
}
}
let $register_mappings := map {
"_doc": map {
"properties": map {
"id": map {"type": "keyword", "index": fn:false()},
"name": map {
"properties": map {
"de": map {"type": "text", "index": fn:false()},
"fr": map {"type": "text", "index": fn:false()}
"de": map {"type": "text"},
"fr": map {"type": "text", "analyzer": "at_french"}
}
},
"search_data": map {
"properties": map {
"de": map {"type": "text"},
"fr": map {"type": "text"}
"fr": map {"type": "text", "analyzer": "at_french"}
}
}
}
......@@ -221,28 +255,28 @@ declare function elastic:setup() {
elastic:drop-index("wiki_pages"),
elastic:drop-index("edition_pages"),
elastic:create-index("people", map {"mappings": $register_mappings}),
elastic:create-index("places", map {"mappings": $register_mappings}),
elastic:create-index("works", map {"mappings": $register_mappings}),
elastic:create-index("wiki_pages", map {"mappings": map {
elastic:create-index("people", map {"settings": $settings, "mappings": $register_mappings}),
elastic:create-index("places", map {"settings": $settings, "mappings": $register_mappings}),
elastic:create-index("works", map {"settings": $settings, "mappings": $register_mappings}),
elastic:create-index("wiki_pages", map {"settings": $settings, "mappings": map {
"_doc": map {
"properties": map {
"id": map {"type": "keyword", "index": fn:false()},
"locale": map {"type": "keyword"},
"name": map {"type": "text"},
"search_data": map {"type": "text"}
"name": map {"type": "text", "analyzer": "at_french"},
"search_data": map {"type": "text", "analyzer": "at_french"}
}
}
}}),
elastic:create-index("edition_pages", map {"mappings": map {
elastic:create-index("edition_pages", map {"settings": $settings, "mappings": map {
"_doc": map {
"properties": map {
"id": map {"type": "keyword", "index": fn:false()},
"edition": map {"type": "keyword", "index": fn:false()},
"page_number": map {"type": "integer"},
"locale": map {"type": "keyword"},
"name": map {"type": "text"},
"search_data": map {"type": "text"}
"name": map {"type": "text", "analyzer": "at_french"},
"search_data": map {"type": "text", "analyzer": "at_french"}
}
}
}}),
......
......@@ -21,6 +21,10 @@
"webpack-cli": "^3.3.10",
"webpack-dev-server": "^3.9.0"
},
"scripts": {
"dev": "webpack-dev-server",
"build": "webpack-cli --mode=production"
},
"dependencies": {
"react-router-dom": "^5.1.2"
}
......
......@@ -45,6 +45,7 @@ export class ArchSearchResult extends React.Component {
}
openResult() {
console.log('source', this.props.data._source)
if (this.isType('person')) {openPersName(this.props.data._source.id)}
if (this.isType('work')) {openArtWork(this.props.data._source.id)}
if (this.isType('place')) {openPlaceName(this.props.data._source.id)}
......@@ -69,14 +70,20 @@ export class ArchSearchResult extends React.Component {
if (this.isType('register')) {
const d = this.props.data
console.log(d, locale())
const hl =
d.highlight[`name.${locale()}`] ||
d._source.name[locale()] ||
d.highlight[`name.${otherLocale()}`] ||
d._source.name[otherLocale()] ||
[]
let result = hl[0] ||
d._source.name[locale()] ||
d._source.name[otherLocale()]
let result = hl
// let result = hl[0] ||
// d._source.name[locale()] ||
// d._source.name[otherLocale()]
// its an array if there are e.g. historical names (or empty tags for
// historical names)
......
......@@ -67,7 +67,7 @@ export default class ArchSearchResults extends React.Component {
<ul className="nav nav-pills nav-fill">
<TabSwitcher
tab="editions"
label={tcap('editions')}
label={tcap('edition_pages')}
active={tab}
clickHandler={(e) => this.tabTo(e, 'editions')}
total={this.state.results.editions.hits.total}
......
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment