Skip to content
Snippets Groups Projects
preprocessing.xqm 13 KiB
Newer Older
  • Learn to ignore specific revisions
  • Michelle Weidling's avatar
    Michelle Weidling committed
    xquery version "3.1";
    
    uwe's avatar
    uwe committed
     : PREPROCESSING Module ("pre", "http://bdn.edition.de/intermediate_format/preprocessing")
     : *******************************************************************************************
     : This module contains the preprocessing routines for the intermediate format
     :
     : It imports the whitespace handling helper module to make some whitespace handling duricng the preprocessing
    
    uwe's avatar
    uwe committed
     : @version 2.0 (2018-01-29)
     : @status working
     : @author Uwe Sikora
    
    uwe's avatar
    uwe committed
     :)
    
    module namespace pre="http://bdn-edition.de/intermediate_format/preprocessing";
    import module namespace whitespace = "http://bdn-edition.de/intermediate_format/whitespace_handling" at "whitespace-handling.xqm";
    
    MRodz's avatar
    MRodz committed
    import module namespace console="http://exist-db.org/xquery/console";
    
    
    declare default element namespace "http://www.tei-c.org/ns/1.0";
    
    
    
    uwe's avatar
    uwe committed
    (:############################# Modules Functions #############################:)
    
    
    uwe's avatar
    uwe committed
     : pre:preprocessing-textNode
     : preprocessing function which converts each text() into a xml-node "textNode". This function is a experimental fall back solution and not the main preprocessing routine!
     :
     : @param $nodes the nodes to be converted
     : @return item()* representing the converted node
    
    uwe's avatar
    uwe committed
     : @version 1.2 (2017-10-15)
     : @status working
     : @author Uwe Sikora
     :)
    
    declare function pre:preprocessing-textNode
        ($nodes as node()*) as item()* {
    
        for $node in $nodes
        return
            typeswitch($node)
                case processing-instruction() return ()
                case text() return (
                    if (normalize-space($node) eq "") then () else (
                        element {"textNode"} {
                            (:attribute {"interformId"}{ generate-id($node) },:)
                            $node
                        }
                    )
                )
    
                case element(TEI) return (
                    element{$node/name()}{
                        $node/@*,
                        pre:preprocessing-textNode($node/node()),
    
    Michelle Weidling's avatar
    Michelle Weidling committed
                        pre:get-editorial-notes($node, $replace-whitespace)
    
                case element(lem) return (
                    element{$node/name()}{
                        $node/@*,
                        attribute {"id"}{ generate-id($node)},
                        pre:preprocessing-textNode($node/node())
                    }
                )
    
                case element(rdg) return (
                    element{$node/name()}{
                        $node/@*,
                        attribute {"id"}{ generate-id($node)},
                        pre:preprocessing-textNode($node/node())
                    }
                )
    
                case element(note) return (
    
                    if ($node[@type eq "editorial-commentary"]) then (
    
                    ) else (
                        element{$node/name()}{
                            $node/@*,
                            pre:preprocessing-textNode($node/node())
                        }
                    )
                )
    
    
                default return (
    
                    element{$node/name()}{
                        $node/@*,
                        pre:preprocessing-textNode($node/node())
                    }
                )
    };
    
    
    
    uwe's avatar
    uwe committed
     : pre:pre:default-element
     : function that suites as default element constructor for the preproseccing conversion.
     : It is more or less a copy function, copying the elements name and its node and recurively leeds the conversion to its child-nodes
     :
     : @param $node the node to be copied
     : @param $recursive-function the recursive function as some kind of call back to the main conversion
     : @return item()* representing the converted node
    
    uwe's avatar
    uwe committed
     : @version 1.0 (2018-01-31)
     : @note Would be great if $recursive-function would be a real function and not a node-sequence (TO-DO)
     : @status working
     : @author Uwe Sikora
     :)
    
    declare function pre:default-element
    
        ( $node as node(), $recursive-function as node()* ) as element()* {
    
        let $following-node := $node/following-sibling::node()[1]
        let $following-sibling := $node/following-sibling::*[1]
        return
    
    Michelle Rodzis's avatar
    Michelle Rodzis committed
                attribute id {generate-id($node)},
    
                (if($following-node[matches(., "[\s\n\r\t]") and normalize-space(.) = ""]
                and $following-sibling[self::ref or self::app or self::hi or self::bibl
                or self::foreign or self::choice or self::milestone or self::persName
    
    Michelle Weidling's avatar
    Michelle Weidling committed
                or self::choice or self::index or self::seg or self::ptr]
    
    MRodz's avatar
    MRodz committed
                and not($node[self::index])
    
                or ($node[self::milestone]) and $following-node[self::text()]
                (: ptr in the critical apparatus produce the siglum '[E]' which should
                be followed by a whitespace :)
                or ($node[self::ptr] and $node/ancestor::rdg[@type = ("v", "pp", "pt")]))
    
                then
                    attribute {"break-after"}{"yes"}
                else ()),
                $recursive-function
    
    uwe's avatar
    uwe committed
     : pre:preprocessing
     : main preprocessing function.
     :
     : @param $nodes the nodes to be converted
     : @return item()* representing the converted node
    
    uwe's avatar
    uwe committed
     : @version 2.0 (2018-02-01)
     : @status working
     : @author Uwe Sikora
     :)
    
    declare function pre:preprocessing
    
        ($nodes as node()*, $replace-whitespace as xs:boolean)
    
        for $node in $nodes
        return
            typeswitch($node)
                case processing-instruction() return ()
    
                case text() return (
    
                  if($replace-whitespace) then (
    
                    whitespace:text($node, " ")
    
                case comment() return ()
    
                case element(TEI) return (
                    element{$node/name()}{
                        $node/@*,
    
                        pre:preprocessing($node/node(), $replace-whitespace),
    
    Michelle Weidling's avatar
    Michelle Weidling committed
                        pre:get-editorial-notes($node, $replace-whitespace)
    
    usikora's avatar
    usikora committed
                case element(teiHeader) return ( $node )
    
                case element(div) return (
    
                    if ($node[@type = "section-group"]) then (
                        pre:preprocessing($node/node(), $replace-whitespace)
    
                    else if($node[@type]) then
                        element{$node/name()}{
                            $node/@*,
    
                            attribute id {generate-id($node)},
    
                            pre:preprocessing($node/node(), $replace-whitespace)
    
                    else if ($node[not(@type)][not(descendant::div)]) then
                        element{$node/name()}{
                            attribute type {"single-div"},
                            attribute id {generate-id($node)},
                            pre:preprocessing($node/node(), $replace-whitespace)
    
                        pre:default-element( $node, pre:preprocessing($node/node(), $replace-whitespace) )
    
                case element(app) return
                    if($node/parent::div[@type = "section-group"]) then
                        element{$node/name()}{
                            attribute type {"standalone"},
                            attribute id {generate-id($node)},
                            pre:preprocessing($node/node(), $replace-whitespace)
    
                    else
                        pre:default-element( $node, pre:preprocessing($node/node(), $replace-whitespace) )
    
                case element(lem) return (
                    element{$node/name()}{
                        $node/@*,
                        attribute {"id"}{ generate-id($node)},
    
                        pre:preprocessing($node/node(), $replace-whitespace)
    
                case element(rdg) return (
    
                    element{$node/name()}{
                        $node/@*,
                        attribute {"id"}{ generate-id($node)},
                        pre:preprocessing($node/node(), $replace-whitespace)
                    }
    
                case element(note) return (
    
                    if ( $node[@type != "editorial-commentary"] or $node[ not(@type) ] ) then (
                        pre:default-element( $node, pre:preprocessing($node/node(), $replace-whitespace) )
                    ) else ( )
    
    (:                pre:default-element( $node, pre:preprocessing($node/node(), $replace-whitespace) ):)
    
                case element(pb) return (
    
                    let $preceding-sibling := $node/preceding-sibling::node()[1]
    
                    let $following-sibling := $node/following-sibling::node()[1]
                    return
                        element {$node/name()}{
                            $node/@*,
    
                                ( $preceding-sibling[self::text() and not(normalize-space(.) = '')] and ends-with($preceding-sibling, " ") = false() )
                                and
                                ( $following-sibling[self::text() and not(normalize-space(.) = '')] and starts-with($following-sibling, " ") = false() )
    
                            ) then ( attribute {"break"}{"no"} )
                            else if (
    
                                ( $preceding-sibling[matches(., "[\s\n\r\t]") and normalize-space(.) = ""] )
    
                                ( $following-sibling[matches(., "[\s\n\r\t]") and normalize-space(.) = ""] )
    
                                attribute {"break-before"}{"yes"},
                                attribute {"break-after"}{"yes"}
                            )
                            else if (
                                $preceding-sibling[matches(., "[\s\n\r\t]") and normalize-space(.) = ""]
                            ) then (
                                attribute {"break-before"}{"yes"}
                            )
                            else if (
                                $following-sibling[matches(., "[\s\n\r\t]") and normalize-space(.) = ""]
                            ) then (
                                attribute {"break-after"}{"yes"}
    
                case element(hi) return (
    
    Michelle Weidling's avatar
    Michelle Weidling committed
                if($node[@rend = ('right-aligned', 'center-aligned')]) then(
    
                        element {'aligned'} {
                            $node/@*,
    
                            pre:preprocessing($node/node(), $replace-whitespace)
    
    Michelle Weidling's avatar
    Michelle Weidling committed
                else if($node[@rend = 'spaced-out']) then
                    element {'hi'} {
                        $node/@*,
    
    Michelle Weidling's avatar
    Michelle Weidling committed
                        let $text := $node/text()
                        let $str-length := string-length($text)
                        let $spaced-out :=
                            for $iii in 1 to $str-length return
                                if(not($iii = $str-length)) then
                                    (substring($text, $iii, 1), "@")
                                else
                                    substring($text, $iii, 1)
                        return string-join($spaced-out, '')
    
    Michelle Weidling's avatar
    Michelle Weidling committed
                    }
                else (
    
                        pre:default-element( $node, pre:preprocessing($node/node(), $replace-whitespace) )
    
                case element(seg) return (
                    if($node[@type = 'item']) then(
                        element {'item'} {
                            $node/@*[name() != 'type'],
    
                            pre:preprocessing($node/node(), $replace-whitespace)
    
                    else if($node[@type = 'head']) then(
                        element {'head'} {
                            $node/@*[name() != 'type'],
    
                            pre:preprocessing($node/node(), $replace-whitespace)
    
                    else if($node[@type = 'row']) then(
                        element {'row'} {
                            $node/@*[name() != 'type'],
    
                            pre:preprocessing($node/node(), $replace-whitespace)
    
    MRodz's avatar
    MRodz committed
                    else if($node[@type = 'cell']) then(
    
                        element {'cell'} {
    
    MRodz's avatar
    MRodz committed
                            $node/@*[name() != 'type'],
    
                            pre:preprocessing($node/node(), $replace-whitespace)
    
    MRodz's avatar
    MRodz committed
                        }
                    )
    
                        pre:default-element( $node, pre:preprocessing($node/node(), $replace-whitespace) )
    
    
                default return (
    
                    pre:default-element( $node, pre:preprocessing($node/node(), $replace-whitespace) )
    
    Michelle Weidling's avatar
    Michelle Weidling committed
    
    (:~
     : Returns all editorial notes of a document in a new element.
     :
     : @author Michelle Weidling :)
    declare function pre:get-editorial-notes($node as node()*, $replace-whitespace)
    as element(editorial-notes) {
        element{"editorial-notes"}{
            for $editorial-note in $node//note[@type eq "editorial-commentary"]
            return
                pre:default-element($editorial-note, pre:preprocessing($editorial-note/node(), $replace-whitespace))
        }
    };
    
    Michelle Weidling's avatar
    Michelle Weidling committed
    
    declare function pre:tokenize-by-character($text as text()) as xs:string* {
        for $iii in 1 to string-length($text) return
            substring($text, $iii, 1)
    };
    
    declare function pre:add-spaces-between-chars($characters as xs:string*) as xs:string {
        string-join($characters, '@')
    };