diff --git a/README.md b/README.md index 48a9fbc9006f36d7ffc003a280875600165961f4..949f1dabbeff50a93f6747f441941c2c400cc2f6 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # bdn:IntermediateFormat.v2 - eXistDB Application -This repos contains the BdN IntermediateFormat-conversion Application to produce several kinds of intermediate formats of the XML-Data of the DFG-Project "Bibliothek der Neologie" +This repository contains the BdN IntermediateFormat-conversion application to produce several kinds of intermediate formats of the XML-Data of the DFG-Project "Bibliothek der Neologie" # Requirements @@ -8,29 +8,9 @@ This repos contains the BdN IntermediateFormat-conversion Application to produce # Setup -- Call `ant` to build the app as `build/interformat.{VERSION}.xar` +- call `ant` to build the app as `build/interformat.{VERSION}.xar` - integrate it into your eXistDB instance -# Modules -## modules/intermediate_format/identification.xqm - - This is the main module integrating the main conversion and node identification functions - - place it in your app modules path: "/modules/intermediate_format/inter_form.xqm" - -## modules/intermediate_format/markerset.xqm - - Functions to collect and construct markers - - place it in your app modules path: "/modules/intermediate_format/markerset.xqm" - -## modules/intermediate_format/preprocessing.xqm - - Contains the preprocessing routine - - place it in your app modules path: "/modules/intermediate_format/preprocessing.xqm" - -## modules/intermediate_format/whitespace-handling.xqm - - Functions for whitespace-handling - - place it in your app modules path: "/modules/intermediate_format/whitespace-handling.xqm" - -## rest/intermediate_function.xql - - This is the REST script running the conversion on a given document within eXist-DB - - place it in your app somewhere or as suggested here in a subfolder /rest # Use - call the IntermediateFormat API with GET -Parameter `resource`: http://localhost:{PORT}/exist/apps/interformat/rest/convert?resource={RESOURCE-URI} @@ -38,33 +18,33 @@ This repos contains the BdN IntermediateFormat-conversion Application to produce - call intermediate_format.xql via REST with the GET-Parameter `resource`: http://localhost:{PORT}/exist/rest/apps/interformat/rest/intermediate_format.xql?resource={RESOURCE-URI} - the resource must be from your eXist-instance context - - If you like to store the result add method=store as get-parameter - + - if you like to store the result add method=store as get-parameter + # Changes of the Intermediate Format - note: All changes are done (not quite right) in the tei-namespace! -## "editorialNotes Section" +## "editorialNotes section" - Section as last child of tei:TEI where all note[@type="editorial"] are collected during the preprocessing. Every note[@type="editorial"] is thus ignored in its original context ## "aligned" - new element; all tei:hi[@rend ="right-aligned" or @rend="center-aligned"] are converted to aligned[@rend ="right-aligned" or @rend="center-aligned"] - - name: "aligned", + - name: "aligned", - attributes: same as tei:hi in original data ## "seg[@type='item' or @type='head' or @type='row']" vs tei:item or tei:head or tei:row - conversion of seg[@type='item' or @type=''head or @type='row'] into tei:item or tei:head or tei:row ## "rdgMarker" - - new element representing Siglae + - new element representing scribal abbreviations (sigla) - name: "rdgMarker" - - attributes: @wit(Witness without '#'), @ref(Reference to @id of tei:lem or tei:rdg without '#'), @type(same as tei:rdg), @mark('open' or 'close'), @context(Context of the Marker - "rdg" or "lem") + - attributes: @wit (witness without '#'), @ref (reference to @id of tei:lem or tei:rdg without '#'), @type (same as tei:rdg), @mark ('open' or 'close'), @context (context of the marker - "rdg" or "lem") ## "tei:rdg" or "tei:lem" - - new attribute: @id(generated id during the preprocessing, serving as referenced id by the rdgMarkers) + - new attribute: @id (generated ID during the preprocessing, serving as referenced id by the rdgMarkers) ## "tei:pb[@break="no"]" - - new attribute: @break="no"(In cases a tei:pb is directly preceeded or followed by a character not self whitespace) + - new attribute: @break="no" (In cases a tei:pb is directly preceeded or followed by a character not self whitespace) ## text() and whitespace - - All whitespaces in text() are replaced by NON-BREAKING SPACE (U+00A0,  ) to save whitespace during the processing + - All whitespaces in text() are replaced by @ to save whitespace during the processing diff --git a/modules/intermediate_format/markerset.xqm b/modules/intermediate_format/markerset.xqm index 4a9f0fc33f03b3c79714ec95d932b7da4bb939fe..c260317fc2cc69279908dbf259c10c759cb9816f 100644 --- a/modules/intermediate_format/markerset.xqm +++ b/modules/intermediate_format/markerset.xqm @@ -17,7 +17,7 @@ declare default element namespace "http://www.tei-c.org/ns/1.0"; (:~ : markerset:collect-markers() : This function collect markers for a given reading. - : It destinguishes tei:lem and tei:rdg. In case of tei:lem it collects all sibling tei:rdgs. In case of tei:rdg it collect itself. + : It distinguishes tei:lem and tei:rdg. In case of tei:lem it collects all sibling tei:rdgs. In case of tei:rdg it collects itself. : : @param $reading the reading node to collect readings for : @return node() representing a markerset of readings for the given node @@ -31,9 +31,9 @@ declare function markerset:collect-markers let $markers := ( if ($reading[self::lem]) then ( - (: In case of tei:lem ignore all siglae for types "typo_corr", "invisible-ref", "varying-target" :) + (: In case of tei:lem ignore all sigla for types "typo-correction", "invisible-ref", "varying-target" :) attribute {"count"}{count($reading/following-sibling::rdg)}, - for $sibling in $reading/following-sibling::rdg[ not(@type="typo_corr" or @type="invisible-ref" or @type="varying-target") ] + for $sibling in $reading/following-sibling::rdg[ not(@type="typo-correction" or @type="invisible-ref" or @type="varying-target") ] return( element {name($sibling)} { $sibling/@*, @@ -42,22 +42,10 @@ declare function markerset:collect-markers ) ) else if ($reading[self::rdg]) then ( - (: Preparing "Leittextwechsel" :) - if ($reading[@type = "ppl" or @type = "pp"][descendant::lem[@wit]]) then ( - let $children-readings := $reading/descendant::rdg - return( - element {name($reading)} { - $reading/@*, - attribute {"context"}{"changed-lem ", data($children-readings/@wit)} - } - ) - ) - else ( element {name($reading)} { $reading/@*, attribute {"context"}{"rdg"} } - ) ) else () ) diff --git a/modules/intermediate_format/preprocessing.xqm b/modules/intermediate_format/preprocessing.xqm index 436c3e69a985ab0217c3e18e9895e79d2bb15b6d..8a82f358d566a8cbee4ecb8e9c2015013c8fd29e 100644 --- a/modules/intermediate_format/preprocessing.xqm +++ b/modules/intermediate_format/preprocessing.xqm @@ -114,7 +114,7 @@ declare function pre:default-element (if($following-node[matches(., "[\s\n\r\t]") and normalize-space(.) = ""] and $following-sibling[self::ref or self::app or self::hi or self::bibl or self::foreign or self::choice or self::milestone or self::persName - or self::choice or self::index or self::seg] + or self::choice or self::index or self::seg or self::ptr] and not($node[self::index])) then attribute {"break-after"}{"yes"} diff --git a/modules/intermediate_format/whitespace-handling.xqm b/modules/intermediate_format/whitespace-handling.xqm index 485d513884266e8ef99970f53145cc9647098ae8..cd2a09e31fb4961478fea7d19041b28b052ba5ea 100644 --- a/modules/intermediate_format/whitespace-handling.xqm +++ b/modules/intermediate_format/whitespace-handling.xqm @@ -9,7 +9,6 @@ xquery version "3.0"; : @author Uwe Sikora :) module namespace whitespace="http://bdn-edition.de/intermediate_format/whitespace_handling"; -import module namespace pre="http://bdn-edition.de/intermediate_format/preprocessing" at "preprocessing.xqm"; declare default element namespace "http://www.tei-c.org/ns/1.0"; @@ -31,7 +30,6 @@ declare default element namespace "http://www.tei-c.org/ns/1.0"; declare function whitespace:text ( $text as text()*, $escape-char as xs:string? ) as text()* { - let $normalized := normalize-space($text) let $whitespace-node := $text[matches(., "[\s\n\r\t]") and normalize-space(.) = ""] let $single-whitespace-between-nodes := $text = ' ' return @@ -61,7 +59,7 @@ declare function whitespace:text : @author Uwe Sikora :) declare function whitespace:escape-text - ( $text, $escape as xs:string ) as text()* { + ($text as text()*, $escape as xs:string) as text()* { text {replace($text, '[\s]+', $escape)} };