Merge pull request #15 from MRodz/master

Leittextwechsel + a bit of documentation

Merge pull request #15 from MRodz/master
589c2146 · uwe · GitHub · 1be098f6 · bfc4bd1c · 589c2146
Unverified Commit 589c2146 authored 6 years ago by uwe Committed by GitHub 6 years ago
--- a/README.md
+++ b/README.md
 # bdn:IntermediateFormat.v2 - eXistDB Application
-This repos contains the BdN IntermediateFormat-conversion Application to produce several kinds of intermediate formats of the XML-Data of the DFG-Project "Bibliothek der Neologie" 
+This repository contains the BdN IntermediateFormat-conversion application to produce several kinds of intermediate formats of the XML-Data of the DFG-Project "Bibliothek der Neologie"
 # Requirements
@@ -8,29 +8,9 @@ This repos contains the BdN IntermediateFormat-conversion Application to produce
 # Setup
- Call `ant` to build the app as `build/interformat.{VERSION}.xar` 
+- call `ant` to build the app as `build/interformat.{VERSION}.xar`
 - integrate it into your eXistDB instance
-# Modules
-## modules/intermediate_format/identification.xqm
-  - This is the main module integrating the main conversion and node identification functions
-  - place it in your app modules path: "/modules/intermediate_format/inter_form.xqm" 
-## modules/intermediate_format/markerset.xqm
-  - Functions to collect and construct markers 
-  - place it in your app modules path: "/modules/intermediate_format/markerset.xqm"
-## modules/intermediate_format/preprocessing.xqm
-  - Contains the preprocessing routine 
-  - place it in your app modules path: "/modules/intermediate_format/preprocessing.xqm" 
-## modules/intermediate_format/whitespace-handling.xqm
-  - Functions for whitespace-handling
-  - place it in your app modules path: "/modules/intermediate_format/whitespace-handling.xqm"
-## rest/intermediate_function.xql
-  - This is the REST script running the conversion on a given document within eXist-DB
-  - place it in your app somewhere or as suggested here in a subfolder /rest
 # Use
  - call the IntermediateFormat API with GET -Parameter `resource`: http://localhost:{PORT}/exist/apps/interformat/rest/convert?resource={RESOURCE-URI}
@@ -38,33 +18,33 @@ This repos contains the BdN IntermediateFormat-conversion Application to produce
  - call intermediate_format.xql via REST with the GET-Parameter `resource`: http://localhost:{PORT}/exist/rest/apps/interformat/rest/intermediate_format.xql?resource={RESOURCE-URI}
  - the resource must be from your eXist-instance context
-  - If you like to store the result add method=store as get-parameter
+  - if you like to store the result add method=store as get-parameter
 # Changes of the Intermediate Format
  - note: All changes are done (not quite right) in the tei-namespace!
-## "editorialNotes Section" 
+## "editorialNotes section"
  - Section as last child of tei:TEI where all note[@type="editorial"] are collected during the preprocessing. Every note[@type="editorial"] is thus ignored in its original context
 ## "aligned"
  - new element; all tei:hi[@rend ="right-aligned" or @rend="center-aligned"] are converted to aligned[@rend ="right-aligned" or @rend="center-aligned"]
-  - name: "aligned", 
+  - name: "aligned",
  - attributes: same as tei:hi in original data
 ## "seg[@type='item' or @type='head' or @type='row']" vs tei:item or tei:head or tei:row
  - conversion of seg[@type='item' or @type=''head or @type='row'] into tei:item or tei:head or tei:row
 ## "rdgMarker"
-  - new element representing Siglae
+  - new element representing scribal abbreviations (sigla)
  - name: "rdgMarker"
-  - attributes: @wit(Witness without '#'), @ref(Reference to @id of tei:lem or tei:rdg without '#'), @type(same as tei:rdg), @mark('open' or 'close'), @context(Context of the Marker - "rdg" or "lem") 
+  - attributes: @wit (witness without '#'), @ref (reference to @id of tei:lem or tei:rdg without '#'), @type (same as tei:rdg), @mark ('open' or 'close'), @context (context of the marker - "rdg" or "lem")
 ## "tei:rdg" or "tei:lem"
-  - new attribute: @id(generated id during the preprocessing, serving as referenced id by the rdgMarkers)
+  - new attribute: @id (generated ID during the preprocessing, serving as referenced id by the rdgMarkers)
 ## "tei:pb[@break="no"]"
-  - new attribute: @break="no"(In cases a tei:pb is directly preceeded or followed by a character not self whitespace)
+  - new attribute: @break="no" (In cases a tei:pb is directly preceeded or followed by a character not self whitespace)
 ## text() and whitespace
-  - All whitespaces in text() are replaced by NON-BREAKING SPACE (U+00A0, &#160) to save whitespace during the processing
+  - All whitespaces in text() are replaced by @ to save whitespace during the processing
--- a/modules/intermediate_format/markerset.xqm
+++ b/modules/intermediate_format/markerset.xqm
@@ -17,7 +17,7 @@ declare default element namespace "http://www.tei-c.org/ns/1.0";
 (:~
 : markerset:collect-markers()
 : This function collect markers for a given reading.
- : It destinguishes tei:lem and tei:rdg. In case of tei:lem it collects all sibling tei:rdgs. In case of tei:rdg it collect itself.
+ : It distinguishes tei:lem and tei:rdg. In case of tei:lem it collects all sibling tei:rdgs. In case of tei:rdg it collects itself.
 :
 : @param $reading the reading node to collect readings for
 : @return node() representing a markerset of readings for the given node
@@ -31,9 +31,9 @@ declare function markerset:collect-markers
    let $markers := (
        if ($reading[self::lem]) then (
-            (: In case of tei:lem ignore all siglae for types "typo_corr", "invisible-ref", "varying-target" :)
+            (: In case of tei:lem ignore all sigla for types "typo-correction", "invisible-ref", "varying-target" :)
            attribute {"count"}{count($reading/following-sibling::rdg)},
-            for $sibling in $reading/following-sibling::rdg[ not(@type="typo_corr" or @type="invisible-ref" or @type="varying-target") ]
+            for $sibling in $reading/following-sibling::rdg[ not(@type="typo-correction" or @type="invisible-ref" or @type="varying-target") ]
            return(
                element {name($sibling)} {
                    $sibling/@*,
@@ -42,22 +42,10 @@ declare function markerset:collect-markers
            )
        )
        else if ($reading[self::rdg]) then (
-            (: Preparing "Leittextwechsel" :)
-            if ($reading[@type = "ppl" or @type = "pp"][descendant::lem[@wit]]) then (
-                let $children-readings := $reading/descendant::rdg
-                return(
-                    element {name($reading)} {
-                        $reading/@*,
-                        attribute {"context"}{"changed-lem ", data($children-readings/@wit)}
-                    }
-                )
-            ) 
-            else (
                element {name($reading)} {
                    $reading/@*,
                    attribute {"context"}{"rdg"}
                }
-            )
        )
        else ()
    )

--- a/modules/intermediate_format/preprocessing.xqm
+++ b/modules/intermediate_format/preprocessing.xqm
@@ -114,7 +114,7 @@ declare function pre:default-element
            (if($following-node[matches(., "[\s\n\r\t]") and normalize-space(.) = ""]
            and $following-sibling[self::ref or self::app or self::hi or self::bibl
            or self::foreign or self::choice or self::milestone or self::persName
-            or self::choice or self::index or self::seg]
+            or self::choice or self::index or self::seg or self::ptr]
            and not($node[self::index]))
            then
                attribute {"break-after"}{"yes"}

--- a/modules/intermediate_format/whitespace-handling.xqm
+++ b/modules/intermediate_format/whitespace-handling.xqm
@@ -9,7 +9,6 @@ xquery version "3.0";
 : @author Uwe Sikora
 :)
 module namespace whitespace="http://bdn-edition.de/intermediate_format/whitespace_handling";
-import module namespace pre="http://bdn-edition.de/intermediate_format/preprocessing" at "preprocessing.xqm";
 declare default element namespace "http://www.tei-c.org/ns/1.0";
@@ -31,7 +30,6 @@ declare default element namespace "http://www.tei-c.org/ns/1.0";
 declare function whitespace:text
    ( $text as text()*, $escape-char as xs:string? ) as text()* {
-    let $normalized := normalize-space($text)
    let $whitespace-node := $text[matches(., "[\s\n\r\t]") and normalize-space(.) = ""]
    let $single-whitespace-between-nodes := $text = ' '
    return
@@ -61,7 +59,7 @@ declare function whitespace:text
 : @author Uwe Sikora
 :)
 declare function whitespace:escape-text
-    ( $text, $escape as xs:string ) as text()* {
+    ($text as text()*, $escape as xs:string) as text()* {
    text {replace($text, '[\s]+', $escape)}
 };