diff --git a/.gitignore b/.gitignore index bef899f0e9cb22305e31d08965ad150f349752f0..60935295ff9f81f251d46e1aae79076d3e645987 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ /archive /xml +.DS_Store +.com.apple.timemachine.supported diff --git a/README.md b/README.md index 050f90121423016b0ea70db7110ad58e0c1c8fee..d5fa45c6e52b9f025421b52f9ae704557bb488a2 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,36 @@ # bdn:IntermediateFormat Scripts to convert bdn-TEI into an intermediate-format dealing with reading markers +# Notes in Advance + - The Directory "stable_old" contains the old version and is just of documentary nature + - The Directory "oxygen" contains files for the Development in oxygen + - The Directories "modules" and "rest" hold all files of the new Intermediate-Format version + # setup and description -## stable/modules/intermediate_format/inter_form.xqm - - This is the main module integrating the conversion functions +## modules/intermediate_format/identification.xqm + - This is the main module integrating the main conversion and node identification functions - place it in your app modules path: "/modules/intermediate_format/inter_form.xqm" -## stable/modules/string.xqm - - This is the a helper module dealing with strings - - place it in your app modules path: "/modules/string.xqm" +## modules/intermediate_format/markerset.xqm + - Functions to collect and construct markers + - place it in your app modules path: "/modules/intermediate_format/markerset.xqm" + +## modules/intermediate_format/preprocessing.xqm + - Contains the preprocessing routine + - place it in your app modules path: "/modules/intermediate_format/preprocessing.xqm" + +## modules/intermediate_format/whitespace-handling.xqm + - Functions for whitespace-handling + - place it in your app modules path: "/modules/intermediate_format/whitespace-handling.xqm" -## stable/rest/intermediate_function.xql - - This is the a conversion script running the conversion on a given document - - place it in your app somewhere or as suggested here: "/rest/intermediate_function.xql" +## rest/intermediate_function.xql + - This is the REST script running the conversion on a given document within eXist-DB + - place it in your app somewhere or as suggested here in a subfolder /rest # running the conversion - - call intermediate_function.xql via REST with the GET-Parameter "path" + - call intermediate_format.xql via REST with the GET-Parameter "path" - "path" must be a XML-URI existing in your app context (There is no exitence check yet) - wait # Sample call -http://localhost:8080/exist/rest/apps/bdn/rest/intermediate_format.xql?path=/db/apps/bdn/data/samples/griesbach_full.xml +http://localhost:8080/exist/rest/apps/YOUR_APP/rest/intermediate_format.xql?path=/db/apps/bdn/data/samples/griesbach_full.xml diff --git a/modules/intermediate_format/identification.xqm b/modules/intermediate_format/identification.xqm new file mode 100644 index 0000000000000000000000000000000000000000..ef9757f63fb86948ac13575dd455f256a8552967 --- /dev/null +++ b/modules/intermediate_format/identification.xqm @@ -0,0 +1,264 @@ +xquery version "3.0"; + +module namespace ident="http://bdn.edition.de/intermediate_format/identification"; +import module namespace markerset = "http://bdn.edition.de/intermediate_format/markerset" at "markerset.xqm"; + +declare default element namespace "http://www.tei-c.org/ns/1.0"; + + +declare variable $ident:blocklevel-elements := ('titlePage', 'titlePart', 'aligned', 'div', 'list', 'item', 'table', 'row', 'cell', 'head', 'p', 'note'); + +declare function ident:in-sequence + ( $values as xs:anyAtomicType* , $sequence as xs:anyAtomicType*) as xs:boolean { + + $values = $sequence +}; + +declare function ident:is-or-are-ble + ( $values as xs:anyAtomicType* ) as xs:boolean { + + $values = $ident:blocklevel-elements +}; + + +declare function ident:first-descendants-path + ($node as node()?) as node()* { + + let $first-child := ( + let $target := $node/child::node()[1] + return + (: PATH CONTROLL for the last-descendants-path :) + + (: IN CASE there is an tei:app, be ready to change the path to + the first tei:rdg[ppl, ptl] and its first child::node()! :) + if ( $target[self::app] ) then ( + + (: If tei:app has an empty tei:lem change the path to tei:lems last child() :) + if ( empty($target/child::lem/node()) ) then ( + $target/child::rdg[@type eq "ppl" or @type eq "ptl"][1]/node()[1] + ) + + (: If tei:app has no empty tei:lem + follow the normal path from tei:app :) + else ( + $target + ) + ) + + (: If there is no tei:app proceed on normal path by default :) + else ( + $target + ) + ) + return + if($first-child) then ($first-child, ident:first-descendants-path($first-child)) else () +}; + + +declare function ident:last-descendants-path + ($node as node()?) as node()* { + + let $last-child := ( + let $target := $node/child::node()[last()] + return + (: PATH CONTROLL for the last-descendants-path :) + + (: IN CASE there is an tei:app, be ready to change the path ! :) + if ( $target[self::app] ) then ( + + (: If tei:apps last child is a tei:rdg[ppl, ptl] change the path to this rdg and + its last child() :) + if ( $target/child::node()[last()][ self::rdg[@type eq "ppl" or @type eq "ptl"] ] ) then ( + $target/child::node()[last()]/child::node()[last()] + ) + + (: If tei:app has no last child tei:rdg[ppl, ptl] and its tei:lem is not empty + change the path to tei:lems last child() :) + else if ( not(empty($target/child::lem/node())) ) then ( + $target/lem/child::node()[last()] + ) + + (: If tei:app has no last child tei:rdg[ppl, ptl] and its tei:lem is empty + follow the normal path from tei:app :) + else ( + $target + ) + ) + + (: If there is no tei:app proceed on normal path by default :) + else ( + $target + ) + ) + return + if($last-child) then ($last-child, ident:last-descendants-path($last-child)) else () +}; + + +declare function ident:first-save-node + ($node as node()) as node()* { + + let $first := ident:first-descendants-path($node) + [not ( ident:is-or-are-ble(self::node()/name()) )] + [not ( ident:is-or-are-ble( ident:first-descendants-path(self::node())/name() ) )] + + return $first[1] +}; + + +declare function ident:last-save-node + ($node as node()) as node()* { + + let $last := ident:last-descendants-path($node) + [not ( ident:is-or-are-ble(self::node()/name()) )] + [not ( ident:is-or-are-ble( ident:last-descendants-path(self::node())/name() ) )] + + return $last[1] +}; + + +declare function ident:identify-targets + ($node as node()) as node()* { + + let $first := ident:first-save-node($node) + let $last := ident:last-save-node($node) + let $marker-set := markerset:collect-markers($node) + let $markers := markerset:construct-marker-from-markerset("rdgMarker", "open", $marker-set) + + return + element {$node/name()}{ + $node/@*, + element {"target"}{ + attribute {"type"}{ "open" }, + attribute {"gid"}{ generate-id($first) }, + $first + }, + element {"target"}{ + attribute {"type"}{ "close" }, + attribute {"gid"}{ generate-id($last) }, + $last + }, + element {"marker"}{ + attribute {"type"}{ "open" }, + markerset:construct-marker-from-markerset("rdgMarker", "open", $marker-set) + }, + element {"marker"}{ + attribute {"type"}{ "close" }, + reverse( markerset:construct-marker-from-markerset("rdgMarker", "close", $marker-set) ) + } + } +}; + + +declare function ident:walk + ($nodes as node()*, $reading-sequence as item()*) as item()* { + + for $node in $nodes + return + typeswitch($node) + case processing-instruction() return () + case text() return ( + if (normalize-space($node) eq "") then () else ( + ident:mark-node($node, $reading-sequence) + ) + ) + + case element(rdg) return ( + if ( not($node/parent::app[ @type eq "structural-variance" ]) ) then ( + let $identified-targets := ident:identify-targets($node) + return ident:mark-node( $node, ($reading-sequence, ident:identify-targets($node)) ) + ) else ( + ident:mark-node($node, $reading-sequence) + ) + ) + + case element(lem) return ( + if ( not($node/parent::app[ @type eq "structural-variance" ]) ) then ( + let $identified-targets := ident:identify-targets($node) + return ident:mark-node( $node, ($reading-sequence, ident:identify-targets($node)) ) + ) else ( + ident:mark-node($node, $reading-sequence) + ) + ) + + default return ( + ident:mark-node($node, $reading-sequence) + ) +}; + +declare function ident:mark-node + ($node as node(), $reading-sequence as item()* ) as node()* { + + let $node-id := generate-id( $node ) + let $in-reading-sequence := $reading-sequence//target[@gid eq $node-id] + return + if ($in-reading-sequence) then ( + let $marker := ident:fetch-marker-from-sequence($node-id, $reading-sequence) + let $open := $marker[@type = "open"]/node() + let $close := (for $item in reverse($marker[@type = "close"]) return $item/node()) + return( + $open, + if ( $node[ not(self::text()) ] ) then ( + element{$node/name()}{ + $node/@*, + ident:walk($node/node(), $reading-sequence) + } + ) else ( + $node + ), + $close + ) + ) else ( + if ( $node[ not(self::text()) ] ) then ( + element{$node/name()}{ + $node/@*, + ident:walk($node/node(), $reading-sequence) + } + ) else ( + $node + ) + ) +}; + +(:declare function ident:mark-text + ($node as node(), $reading-sequence as item()* ) as node()* { + + let $node-id := generate-id( $node ) + let $in-reading-sequence := $reading-sequence//target[@gid eq $node-id] + return + if ($in-reading-sequence) then ( + let $marker := ident:fetch-marker-from-sequence($node-id, $reading-sequence) + let $open := $marker[@type = "open"]/node() + let $close := (for $item in reverse($marker[@type = "close"]) return $item/node()) + return( + $open, $node, $close + ) + ) else ( $node ) +};:) + + +declare function ident:fetch-marker-from-sequence + ($node-id as xs:string, $reading-sequence as item()* ) as node()* { + + for $seq-item in $reading-sequence + let $found := $seq-item/target[@gid = $node-id] + let $found-type := $found/string(@type) + let $markers := $seq-item/marker[@type = $found-type] + where $found + return + $markers +}; + + +declare function ident:identify-unit-test + ($nodes as node()*) as node()* { + + for $node at $nr in $nodes//node()[self::lem or self::rdg] + let $identified-targets := ident:identify-targets($node) + return + element{"UTEST"}{ + attribute {"n"}{$nr}, + element {"SELF"} {$node}, + $identified-targets + } +}; \ No newline at end of file diff --git a/modules/intermediate_format/markerset.xqm b/modules/intermediate_format/markerset.xqm new file mode 100644 index 0000000000000000000000000000000000000000..44532426187008f90291c8e0ea8381ae12987de3 --- /dev/null +++ b/modules/intermediate_format/markerset.xqm @@ -0,0 +1,270 @@ +xquery version "3.0"; + +module namespace markerset="http://bdn.edition.de/intermediate_format/markerset"; +declare default element namespace "http://www.tei-c.org/ns/1.0"; + + +declare function markerset:collect-markers + ( $reading as node()* ) as item() { + + let $markers := ( + if ($reading[self::lem]) then ( + attribute {"count"}{count($reading/following-sibling::rdg)}, + for $sibling in $reading/following-sibling::rdg + return( + element {name($sibling)} { + $sibling/@*, + attribute {"context"}{"lem"} + } + ) + ) + else if ($reading[self::rdg]) then ( + element {name($reading)} { + $reading/@*, + attribute {"context"}{"rdg"} + } + ) + else () + ) + return + element {"markerset"}{ + markerset:merge-markers($markers) + (:$markers:) + } +}; + + +declare function markerset:merge-markers + ( $markerset as node()* ) as item()* { + + let $order := ("om","ppl", "ptl", "pp", "pt" , "v") + let $reading-types := distinct-values( $markerset[self::rdg or self::lem]/string(@type) ) + + return ( + attribute {"order"}{distinct-values( ($order, $reading-types) ) }, + for $type in distinct-values( ($order, $reading-types) ) + let $rdgs := $markerset[@type = $type] + return + if ($rdgs) then ( + element {"rdg"}{ + attribute wit {$rdgs/@wit}, + attribute id {$rdgs/@id}, + attribute context {distinct-values($rdgs/@context)}, + attribute type {$type} + } + ) else () + + ) +}; + + +(:~ + : interform:marker() - Marker Constructor + : Constructor function whch creates the marker element with name, mark-type and references + : + : @param $name The name of the marker element + : @param $mark The mark type e.g. open or close + : @param $rdg_node The node which is marked + : @return element() the marker element + : + : @version 1.1 (2017-09-13) + : @author Uwe Sikora + :) +declare function markerset:marker + ($name as xs:string, $type as xs:string, $reading as node()) as element(){ + + element {$name} { + (:attribute bdnp_parent {$node/parent::node()/name()}, :) + attribute wit { replace(data($reading/@wit), '#', '') }, + attribute type { data($reading/@type) }, + attribute ref { data($reading/@id) }, + attribute mark { $type }, + attribute context { $reading/@context } + } +}; + + +declare function markerset:construct-marker-from-markerset + ( $name as xs:string, $marker-type as xs:string, $marker-set as node()* ) as item()* { + + for $marker in $marker-set/node() + return ( + markerset:marker($name, $marker-type, $marker) + ) +}; + +(:~ + : interform:are-nodes-in-sequence() + : This function checks if a node() from a given nodeset is or contains named Elements in a sequence. + : In this case it returns 'true' else 'false' + : + : @param $nodes the nodes() to check for BLEs + : @param $bleElements a list of defined BLEs + : @return xs:boolean ('true' else 'false') + : + : @version 1.1 (2017-09-22) + : @status working + : @author Uwe Sikora + +declare function interform:are-nodes-in-sequence + ($nodes as node()*, $sequence as item()*) as xs:boolean{ + + some $node in $nodes + satisfies + if(functx:is-value-in-sequence($node/name(), $sequence)) then( + fn:true() + ) + + else ( + fn:false() + ) +}; + :) + + +(: ************************************************************************************************************** + : Target Mapping Conversion + : ************************************************************************************************************** :) + +(:declare function interform:marker-targets + ($app-index) { + + let $targets := $app-index//node()[self::first or self::last] + let $ids := distinct-values( $targets/string(@target) ) + let $map := map:merge( + for $id in $ids + let $targets-by-id := $targets[@target eq $id] + return + map:entry($id , + element {"target"} { + attribute {"id"}{$id}, + (\:element {"COMPARE"}{ + $targets-by-id/ancestor::node()[self::lem or self::rdg]/parent::node()/parent::node() + },:\) + element {"targetNode"}{ + $targets-by-id[1]/node() + }, + element {"markers"}{ +(\: element {"open"}{interform:first-marker-set($id, $app-index)},:\) +(\: element {"close"}{interform:last-marker-set($id, $app-index)}:\) + element {"open"}{interform:create-marker-sets($targets-by-id[self::first], "open")}, + element {"close"}{ reverse(interform:create-marker-sets($targets-by-id[self::last], "close")) } + } + } + ) + ) + + return + ($map) + +};:) + + +(:~ + : interform:create-marker-sets + : This function creates marker sets for each given target. The input needs to be the last- or first-nodes(). + : Afterwards the single readings are merged for each set and rdgMarkers are build + : + : @param $marker-set the nodes() representing a set of Markers + : @param $marker-type the type of the marker ("open" or "close") + : @return set of element("rdgMarker")* + : + : @version 1.1 (2017-09-22) + : @status working + : @author Uwe Sikora + :) +(:declare function interform:create-marker-sets + ( $marker-set as node()* , $marker-type as xs:string) as item()* { + + let $targets := ( + for $item in $marker-set + let $entry-index := $item/ancestor::entry/string(@n) + let $markers := $item/parent::position/following-sibling::markers/node() + let $merged := interform:merge-readings($markers[not(@type eq "v")]) + order by $entry-index ascending + return + interform:build-markers($marker-type, $merged) + ) + + return $targets +};:) + + +(:~ + : interform:build-markers() + : constructs rdgMarker elements from set of tei:rdg nodes + : + : @param $type The type of the marker element + : @param nodes A set of tei:rdg elements + : @return rdgMarker element()s for each rdg in the set + : + : @version 1.1 (2017-09-13) + : @author Uwe Sikora + :) +(:declare function interform:build-markers + ($type as xs:string, $nodes as node()*) as item()* { + + for $node in $nodes + return + interform:marker('rdgMarker', $type, $node) +};:) + + + + + +(:~ + : interform:merge-readings() + : This function merges all readings in the given set sharing the same tei:rdg[@type] + : If no type was provided 'none' is set as type + : + : @param $readings the readings as a sequence + : @return $node the merged readings + : + : @version 1.0 (2017-09-14) + : @author Uwe Sikora + :) +(:declare function interform:merge-readings + ($readings as node()*) as item()* { + + let $targets := ( + for $reading in $readings + return + if ($reading[@type]) then ( + $reading + ) + else ( + element { name($reading) } { + $reading/@*, + attribute type {'none'} + } + ) + ) + + return ( + for $type in distinct-values($targets/@type) + let $rdgs := $targets[@type = $type] + return + element {"rdg"}{ + attribute wit {$rdgs/@wit}, + attribute id {$rdgs/@id}, + attribute context {distinct-values($rdgs/@context)}, + attribute type {$type} + } + ) +};:) + +(:declare function interform:get-marks + ($node as node(), $map) as item()* { + + if (data($node/@id) and map:contains( $map, data($node/@id)) ) then ( + let $map-item := $map(data($node/@id)) + let $open-marks := $map-item/*:markers/*:open + let $close-marks := $map-item/*:markers/*:close + + return ( + $open-marks, + $close-marks + ) + ) else () +};:) \ No newline at end of file diff --git a/modules/intermediate_format/preprocessing.xqm b/modules/intermediate_format/preprocessing.xqm new file mode 100644 index 0000000000000000000000000000000000000000..654491ac1e04b971f9ee11bcd173e155d08b7160 --- /dev/null +++ b/modules/intermediate_format/preprocessing.xqm @@ -0,0 +1,197 @@ +xquery version "3.0"; + +module namespace pre="http://bdn.edition.de/intermediate_format/preprocessing"; +import module namespace whitespace = "http://bdn.edition.de/intermediate_format/whitespace_handling" at "whitespace-handling.xqm"; + +declare default element namespace "http://www.tei-c.org/ns/1.0"; + + +declare function pre:preprocessing-textNode + ($nodes as node()*) as item()* { + + for $node in $nodes + return + typeswitch($node) + case processing-instruction() return () + case text() return ( + if (normalize-space($node) eq "") then () else ( + element {"textNode"} { + (:attribute {"interformId"}{ generate-id($node) },:) + $node + } + ) + ) + + case element(TEI) return ( + element{$node/name()}{ + $node/@*, + pre:preprocessing-textNode($node/node()), + element{"editorialNotes"}{ + $node//note[@type eq "editorial"] + } + } + ) + + case element(lem) return ( + element{$node/name()}{ + $node/@*, + attribute {"id"}{ generate-id($node)}, + pre:preprocessing-textNode($node/node()) + } + ) + + case element(rdg) return ( + element{$node/name()}{ + $node/@*, + attribute {"id"}{ generate-id($node)}, + pre:preprocessing-textNode($node/node()) + } + ) + + case element(note) return ( + if ($node[@type eq "editorial"]) then ( + ) else ( + element{$node/name()}{ + $node/@*, + pre:preprocessing-textNode($node/node()) + } + ) + ) + + default return ( + element{$node/name()}{ + $node/@*, + pre:preprocessing-textNode($node/node()) + } + ) +}; + + +(: Would be great if $recursive-function would be a real function and not a node-sequence :) +declare function pre:default-element + ( $node as node(), $recursive-function as node()* ) as item()* { + + element{$node/name()}{ + $node/@*, + $recursive-function + } +}; + +declare function pre:preprocessing + ($nodes as node()*) as item()* { + + for $node in $nodes + return + typeswitch($node) + case processing-instruction() return () + case text() return ( + whitespace:text($node, " ") + ) + + case comment() return () + + case element(TEI) return ( + element{$node/name()}{ + $node/@*, + pre:preprocessing($node/node()), + element{"editorialNotes"}{ + for $editorial-note in $node//note[@type eq "editorial"] + return + pre:default-element( $editorial-note, pre:preprocessing($editorial-note/node()) ) + } + } + ) + + case element(teiHeader) return ( + element {name($node)} { + $node/@*, + $node/node() + } + ) + + case element(div) return ( + if ($node[@type = 'section-group']) then ( + pre:preprocessing($node/node()) + ) + else ( + pre:default-element( $node, pre:preprocessing($node/node()) ) + ) + + ) + + case element(lem) return ( + element{$node/name()}{ + $node/@*, + attribute {"id"}{ generate-id($node)}, + pre:preprocessing($node/node()) + } + ) + + case element(rdg) return ( + element{$node/name()}{ + $node/@*, + attribute {"id"}{ generate-id($node)}, + pre:preprocessing($node/node()) + } + ) + + case element(note) return ( + if ( $node[@type != "editorial"] ) then ( + pre:default-element( $node, pre:preprocessing($node/node()) ) + ) else ( ) + ) + + case element(pb) return ( + let $preceeding-sibling := $node/preceding-sibling::node()[1] + let $following-sibling := $node/following-sibling::node()[1] + return + element {$node/name()}{ + $node/@*, + if ( ends-with($preceeding-sibling, " ") eq false() and starts-with($following-sibling, " ") eq false() ) then ( + attribute {"break"}{"no"} + ) else ( )(:, + attribute {"whitespace"}{ + if (ends-with($preceeding-sibling, " ")) then ( + "before" + ) else (), + if (starts-with($following-sibling, " ")) then ( + "after" + ) else () + }:) + } + ) + + case element(hi) return ( + if($node[@rend = 'right-aligned' or @rend = 'center-aligned']) then( + element {'aligned'} { + $node/@*, + pre:preprocessing($node/node()) + } + ) + else ( + pre:default-element( $node, pre:preprocessing($node/node()) ) + ) + ) + + case element(seg) return ( + if($node[@type = 'item']) then( + element {'item'} { + $node/@*[name() != 'type'], + pre:preprocessing($node/node()) + } + ) + else if($node[@type = 'row']) then( + element {'row'} { + $node/@*[name() != 'type'], + pre:preprocessing($node/node()) + } + ) + else ( + pre:default-element( $node, pre:preprocessing($node/node()) ) + ) + ) + + default return ( + pre:default-element( $node, pre:preprocessing($node/node()) ) + ) +}; \ No newline at end of file diff --git a/modules/intermediate_format/whitespace-handling.xqm b/modules/intermediate_format/whitespace-handling.xqm new file mode 100644 index 0000000000000000000000000000000000000000..0fc375feedfcbcc12bbf220c55389047afd0f408 --- /dev/null +++ b/modules/intermediate_format/whitespace-handling.xqm @@ -0,0 +1,37 @@ +xquery version "3.0"; + +module namespace whitespace="http://bdn.edition.de/intermediate_format/whitespace_handling"; +declare default element namespace "http://www.tei-c.org/ns/1.0"; + +declare function whitespace:text + ( $text as text()*, $escape-char as xs:string? ) as text()* { + + let $normalized := normalize-space($text) + let $single-whitespace-between-nodes := $text + [ self::node() = ' '] + [preceding-sibling::node()[not(self::node() = text())]] + [following-sibling::node()[not(self::node() = text())]] + return + if ( $normalized != "" or $single-whitespace-between-nodes) then ( + + if ($escape-char) then ( + whitespace:escape-text($text, $escape-char) + ) else ( whitespace:escape-text($text, " ") ) + + ) + else () +}; + +(:~ + : string:escape-whitespace + : This function replaces whitespaces in a text() + : with one defined preservation character + : + : @version 1.0 (2017-09-14) + : @author Uwe Sikora + :) +declare function whitespace:escape-text + ($text, $escape as xs:string) as text()* { + + text {replace($text, '[\s]+', $escape)} +}; diff --git a/oxygen/functx.xqm b/oxygen/functx.xqm new file mode 100644 index 0000000000000000000000000000000000000000..749a38f90c8450b3986e35a0cfa3b74ea29fda5a --- /dev/null +++ b/oxygen/functx.xqm @@ -0,0 +1,65 @@ +xquery version "1.0"; +module namespace functx = "http://www.functx.com"; + + +declare function functx:is-value-in-sequence + ( $value as xs:anyAtomicType? , + $seq as xs:anyAtomicType* ) as xs:boolean { + + $value = $seq +} ; + + +declare function functx:first-node + ( $nodes as node()* ) as node()? { + + ($nodes/.)[1] +} ; + + +declare function functx:last-node + ( $nodes as node()* ) as node()? { + + ($nodes/.)[last()] +} ; + + +declare function functx:index-of-node + ( $nodes as node()* , + $nodeToFind as node() ) as xs:integer* { + + for $seq in (1 to count($nodes)) + return $seq[$nodes[$seq] is $nodeToFind] +} ; + + +declare function functx:add-attributes + ( $elements as element()* , + $attrNames as xs:QName* , + $attrValues as xs:anyAtomicType* ) as element()? { + + for $element in $elements + return element { node-name($element)} + { for $attrName at $seq in $attrNames + return if ($element/@*[node-name(.) = $attrName]) + then () + else attribute {$attrName} + {$attrValues[$seq]}, + $element/@*, + $element/node() } +} ; + +declare function functx:is-node-in-sequence + ( $node as node()? , + $seq as node()* ) as xs:boolean { + + some $nodeInSeq in $seq satisfies $nodeInSeq is $node + } ; + + + declare function functx:is-node-in-sequence-deep-equal + ( $node as node()? , + $seq as node()* ) as xs:boolean { + + some $nodeInSeq in $seq satisfies deep-equal($nodeInSeq,$node) + } ; \ No newline at end of file diff --git a/oxygen/intermediate_format.oxygen.devel.xqm b/oxygen/intermediate_format.oxygen.devel.xqm new file mode 100644 index 0000000000000000000000000000000000000000..3a79d9808dc03f4ac00a74c74ba38b9b28e92f72 --- /dev/null +++ b/oxygen/intermediate_format.oxygen.devel.xqm @@ -0,0 +1,187 @@ +xquery version "3.0"; + + +declare default element namespace "http://www.tei-c.org/ns/1.0"; + +import module namespace functx = "http://www.functx.com" at "functx.xqm"; +import module namespace markerset = "http://bdn.edition.de/intermediate_format/markerset" at "../modules/intermediate_format/markerset.xqm"; +import module namespace pre = "http://bdn.edition.de/intermediate_format/preprocessing" at "../modules/intermediate_format/preprocessing.xqm"; +import module namespace ident = "http://bdn.edition.de/intermediate_format/identification" at "../modules/intermediate_format/identification.xqm"; + +(:declare namespace target = "http://www.interform.com/target_index"; +import module "http://www.interform.com/target_index" at "targetindex.xqm"; +:) + + +declare namespace tei = "http://www.tei-c.org/ns/1.0"; +(:declare option saxon:output "indent=no";:) + + +declare variable $apparatus := ('app'); +declare variable $apparatus-childs := ('lem', 'rdg'); +declare variable $blocklevel-elements := ('titlePage', 'titlePart', 'aligned', 'div', 'list', 'item', 'table', 'row', 'cell', 'head', 'p', 'note'); + + +declare function local:in-sequence + ( $values as xs:anyAtomicType* , $sequence as xs:anyAtomicType*) as xs:boolean { + + $values = $sequence +}; + + +declare function local:app-index + ( $apps as node()* ) as item()* { + + let $entries := ( + for $app at $nr in $apps + let $readings := $app/child::node()[self::lem or self::rdg] + return + element {$app/name()}{ + attribute {"n"}{$nr}, + local:check-readings($readings) + } + ) + return element {"appIndex"}{ attribute {"count"}{ count($entries) }, $entries } +}; + + +(:declare function local:target-in-index + ( $target-id as xs:string?, $app-index as node() ) as item()* { + + if ( $target-id ) then ( + let $readings := $app-index//node()[self::first or self::last][@target = $target-id] + return + $readings + ) else ( ) + +};:) + + +declare function local:controll-app + ($app as node()) as item()* { + + let $self := $app + let $readings := $app/child::node()[self::lem or self::rdg] + return + element {$app/name()}{ + $app/@*, + local:check-readings($readings) + } +}; + +(: WORKS :) +declare function local:first-descendants + ($node as node()?) as node()* { + + let $first-child := $node/child::node()[1][not(self::text())] + return + if($first-child) then ($first-child, local:first-descendants($first-child)) else () +}; + + +(: WORKS :) +declare function local:last-descendants + ($node as node()?) as node()* { + + let $last-child := ( + let $target := $node/child::node()[last()][not(self::text())] + return + if ($target[self::app]) then ( + (: Possibility to jump into rdg[type="ppl, ptl, pp, pt"]:) + $target/lem + ) + else ( + $target + ) + ) + return + if($last-child) then ($last-child, local:last-descendants($last-child)) else () +}; + + +declare function local:check-readings + ( $readings as node()* ) as item()* { + + for $reading in $readings + return local:check-reading($reading) +}; + + + +declare function local:check-reading + ( $reading as node() ) as item()* { + + let $first-save-node := local:first-descendants($reading)[ local:is-save-first-node(self::node()) ][1] + let $last-save-node := local:last-descendants($reading)[ local:is-save-last-node(self::node()) ][1] + return + element {$reading/name()}{ + $reading/@*, + (:element {"SELF"}{ + $reading + },:) + if ( $reading[ not(@type eq "om" or empty($reading/node())) ] ) then ( + if ($first-save-node eq $last-save-node) then ( + element{"target"}{ + (:attribute {"id"}{ $first-save-node/string(@interformId) },:) + attribute {"gid"}{ generate-id( $first-save-node ) }, + attribute {"type"} {"open close"}, + $first-save-node + } + ) else ( + element {"target"}{ + (:attribute {"id"}{ $first-save-node/string(@interformId) },:) + attribute {"gid"}{ generate-id( $first-save-node ) }, + attribute {"type"} {"open"}, + $first-save-node + }, + element {"target"}{ + (:attribute {"id"}{ $last-save-node/string(@interformId) },:) + attribute {"gid"}{ generate-id( $last-save-node ) }, + attribute {"type"} {"close"}, + $last-save-node + } + ), + markerset:collect-markers($reading) + ) else () + } +}; + + + + + +declare function local:is-save-first-node + ($node as node()) as xs:boolean { + let $first-descendants := local:first-descendants($node) + let $has-save-first-descendants := not ( local:in-sequence($first-descendants/name(), $blocklevel-elements) ) + let $self-ble := functx:is-value-in-sequence( $node/name(), $blocklevel-elements ) + return + if ($has-save-first-descendants and not ($self-ble)) then (true()) else (false()) +}; + +declare function local:is-save-last-node + ($node as node()) as xs:boolean { + let $last-descendants := local:last-descendants($node) + let $has-save-last-descendants := not ( local:in-sequence($last-descendants/name(), $blocklevel-elements) ) + let $self-ble := functx:is-value-in-sequence( $node/name(), $blocklevel-elements ) + return + if ($has-save-last-descendants and not ($self-ble)) then (true()) else (false()) +}; + + + + +let $doc := . +let $pre := pre:preprocessing($doc/node()) +(:let $pre := pre:preprocessing($doc/node()) +let $app-index := local:app-index( $pre//app[not(@type)] ) +let $target-index := target:index($app-index):) + +return ( + (:ident:identify-unit-test($pre):) +(: $pre:) + ident:walk($pre, ()) +(: $target-index:) + (:local:target-in-index("d0t36", $app-index),:) + (:target:conversion-by-target-index($pre, $target-index):) +) \ No newline at end of file diff --git a/rest/intermediate_format.xql b/rest/intermediate_format.xql new file mode 100644 index 0000000000000000000000000000000000000000..8029efb53037e95b599b5a2480858a012af9a993 --- /dev/null +++ b/rest/intermediate_format.xql @@ -0,0 +1,18 @@ +xquery version "3.1"; + +declare namespace tei = "http://www.tei-c.org/ns/1.0"; +import module namespace functx="http://www.functx.com"; +import module namespace pre="http://bdn.edition.de/intermediate_format/preprocessing" at "xmldb:exist:///db/apps/interform/modules/intermediate_format/preprocessing.xqm"; +import module namespace ident = "http://bdn.edition.de/intermediate_format/identification" at "xmldb:exist:///db/apps/interform/modules/intermediate_format/identification.xqm"; + + +declare option exist:serialize "method=xml media-type=text/xml omit-xml-declaration=no indent=no"; + +(: http://localhost:8080/exist/rest/apps/interform/rest/intermediate_format.xql :) +(: :declare variable $doc-path := request:get-parameter("path", ()); :) +declare variable $doc-path := "/apps/interform/data/sample1.xml"; +let $doc := doc($doc-path) +let $preprocessed-data := pre:preprocessing($doc/tei:TEI) +return ( + ident:walk($preprocessed-data, ()) +) \ No newline at end of file diff --git a/stable/modules/intermediate_format/inter_form.xqm b/stable_old/modules/intermediate_format/inter_form.xqm similarity index 100% rename from stable/modules/intermediate_format/inter_form.xqm rename to stable_old/modules/intermediate_format/inter_form.xqm diff --git a/stable/modules/string.xqm b/stable_old/modules/string.xqm similarity index 100% rename from stable/modules/string.xqm rename to stable_old/modules/string.xqm diff --git a/stable/rest/intermediate_format.xql b/stable_old/rest/intermediate_format.xql similarity index 100% rename from stable/rest/intermediate_format.xql rename to stable_old/rest/intermediate_format.xql