diff --git a/src/Controller/SimplexmlController.php b/src/Controller/SimplexmlController.php index 10eeb6ca6d2c9dc3aaef4d6ebeba5b7346788936..9f8dd2a16c1e8dc6f6fbb3930e07467278d4ada6 100755 --- a/src/Controller/SimplexmlController.php +++ b/src/Controller/SimplexmlController.php @@ -501,6 +501,7 @@ class SimplexmlController extends AbstractController */ public function tei2solr(): void { + $this->client->getEndpoint()->setOptions(['timeout' => 60]); $finder = new Finder(); $finder->files()->in($this->teiDir); @@ -782,6 +783,28 @@ class SimplexmlController extends AbstractController } } } + } else { + $filesystem = new Filesystem(); + $teiImportLogFile = './data/log/teiImportLogs.txt'; + + if (!$filesystem->exists($teiImportLogFile)) { + $filesystem->mkdir('./data/log'); + $filesystem->touch($teiImportLogFile); + } + + $errors = []; + foreach (libxml_get_errors() as $key => $error) { + if (0 === $key) { + $errors[] = explode('/', $error->file)[4].PHP_EOL; + $errors[] = '--------------------'.PHP_EOL; + } + + $errors[] = $error->message; + } + + $filesystem->appendToFile($teiImportLogFile, implode('', $errors)); + + libxml_clear_errors(); } } } @@ -840,288 +863,285 @@ class SimplexmlController extends AbstractController public function getTextVersions(string $filePath, array $graphics): SolrDocument { - libxml_use_internal_errors(TRUE); - $solrDocument = new SolrDocument(); $doc = new DOMDocument(); $doc->load($filePath); - if (!libxml_get_errors()) { - $xpath = new DOMXPath($doc); - $xpath->registerNamespace('tei', 'http://www.tei-c.org/ns/1.0'); - $pagesNodes = $xpath->query('//tei:body'); + $xpath = new DOMXPath($doc); + $xpath->registerNamespace('tei', 'http://www.tei-c.org/ns/1.0'); + $pagesNodes = $xpath->query('//tei:body'); - $elements = []; - foreach ($pagesNodes as $pagesNode) { - $elements = $this->getPagesNodes($pagesNode, $elements); - } + $elements = []; + foreach ($pagesNodes as $pagesNode) { + $elements = $this->getPagesNodes($pagesNode, $elements); + } - $editedTextArr = []; - $transcriptedTextArr = []; - $gndsUuids = []; - $pagesGndsUuids = []; - - foreach ($elements as $k => $myelement) { - $renditions = []; - $myeditedText = ''; - $mytranscriptedText = ''; - - foreach ($myelement as $element) { - if (isset($element->nodeName) && ( - $element->nodeName === 'p' || - $element->nodeName === 'dateline' || - $element->nodeName === 'address' || - $element->nodeName === 'closer' || - $element->nodeName === 'list' - )) { - - $ele = []; - $el = $this->getNodeChilds($element, $ele); - $tText = ''; - $eText = ''; - $n = 0; - $liNumber = 1; - - foreach ($el as $e) { - if ($e->nodeName === '#text' - && ($e->parentNode->nodeName !== 'abbr' && ($e->nodeName !== 'del' || $e->parentNode->nodeName !== 'add')) - && $e->parentNode->nodeName !== 'note' - - ) { - - if (isset($add) && !empty($add)) { - $tText .= ' 〈' . $e->data . ' ' . $add; - - $eText .= ' ' . $e->data; - - $add = ''; - } elseif (isset($del) && !empty($del)) { - $tText .= ' [' . $e->data . ' ' . $del; - - $del = ''; - } elseif (isset($li) && !empty($li) && !empty($e->data) && 'item' === $e->parentNode->nodeName) { - - if (isset($italic) && true === $italic) { - $tText .= $li . $e->data . ''; - $eText .= $li . $e->data . ''; - $italic = false; - } else { - $tText .= $li . $e->data; - $eText .= $li . $e->data; - } + $editedTextArr = []; + $transcriptedTextArr = []; + $gndsUuids = []; + $pagesGndsUuids = []; + + foreach ($elements as $k => $myelement) { + $renditions = []; + $myeditedText = ''; + $mytranscriptedText = ''; + + foreach ($myelement as $element) { + if (isset($element->nodeName) && ( + $element->nodeName === 'p' || + $element->nodeName === 'dateline' || + $element->nodeName === 'address' || + $element->nodeName === 'closer' || + $element->nodeName === 'list' + )) { + + $ele = []; + $el = $this->getNodeChilds($element, $ele); + $tText = ''; + $eText = ''; + $n = 0; + $liNumber = 1; + + foreach ($el as $e) { + if ($e->nodeName === '#text' + && ($e->parentNode->nodeName !== 'abbr' && ($e->nodeName !== 'del' || $e->parentNode->nodeName !== 'add')) + && $e->parentNode->nodeName !== 'note' + + ) { + + if (isset($add) && !empty($add)) { + $tText .= ' 〈' . $e->data . ' ' . $add; + + $eText .= ' ' . $e->data; + + $add = ''; + } elseif (isset($del) && !empty($del)) { + $tText .= ' [' . $e->data . ' ' . $del; + + $del = ''; + } elseif (isset($li) && !empty($li) && !empty($e->data) && 'item' === $e->parentNode->nodeName) { + + if (isset($italic) && true === $italic) { + $tText .= $li . $e->data . ''; + $eText .= $li . $e->data . ''; + $italic = false; + } else { + $tText .= $li . $e->data; + $eText .= $li . $e->data; + } - $li = ''; - } elseif ('hi' === $e->parentNode->nodeName && isset($e->parentNode->attributes[0])) { + $li = ''; + } elseif ('hi' === $e->parentNode->nodeName && isset($e->parentNode->attributes[0])) { - $hi = explode(':', $e->parentNode->attributes[0]->value); + $hi = explode(':', $e->parentNode->attributes[0]->value); - if (isset($hi[1]) && !empty($hi[1]) && 'superscript' === $hi[1]) { - $tText .= '' . $e->data . ''; - } elseif (isset($hi[1]) && !empty($hi[1]) && 'italic' === $hi[1]) { + if (isset($hi[1]) && !empty($hi[1]) && 'superscript' === $hi[1]) { + $tText .= '' . $e->data . ''; + } elseif (isset($hi[1]) && !empty($hi[1]) && 'italic' === $hi[1]) { - $tText .= '' . $e->data . ''; + $tText .= '' . $e->data . ''; - $italic = true; - } - } elseif (isset($gnd) && true === $gnd) { - $eText .= $e->data . ''; - $tText .= $e->data; - $gnd = false; - } else { - $eText .= $e->data; + $italic = true; + } + } elseif (isset($gnd) && true === $gnd) { + $eText .= $e->data . ''; + $tText .= $e->data; + $gnd = false; + } else { + $eText .= $e->data; - if (isset($renditions) && !empty($renditions)) { - $classOpeningTag = ''; - $classEndTag = ''; + if (isset($renditions) && !empty($renditions)) { + $classOpeningTag = ''; + $classEndTag = ''; - foreach ($renditions as $rendition) { - if ('italic' === $rendition) { - $class = 'i'; + foreach ($renditions as $rendition) { + if ('italic' === $rendition) { + $class = 'i'; - } elseif ('underline' === $rendition) { - $class = 'u'; - } - if (isset($class) && !empty($class)) { - $classOpeningTag .= '<' . $class . '>'; - $classEndTag .= '' . $class . '>'; - } - - $renditions = []; + } elseif ('underline' === $rendition) { + $class = 'u'; + } + if (isset($class) && !empty($class)) { + $classOpeningTag .= '<' . $class . '>'; + $classEndTag .= '' . $class . '>'; } - } - if (isset($classOpeningTag) && !empty($classOpeningTag)) { - $tText .= $classOpeningTag; - $classOpeningTag = ''; + $renditions = []; } + } - $tText .= $e->data; + if (isset($classOpeningTag) && !empty($classOpeningTag)) { + $tText .= $classOpeningTag; + $classOpeningTag = ''; + } - if (isset($classEndTag) && !empty($classEndTag)) { - $tText .= $classEndTag; - $classEndTag = ''; - } + $tText .= $e->data; - } - } elseif ('item' === $e->nodeName) { - if ($liNumber++ === 1) { - $li = '
' . $eText . '
'; - $mytranscriptedText .= '' . $tText . '
'; + $myeditedText .= '' . $eText . '
'; + $mytranscriptedText .= '' . $tText . '
'; - } elseif ('pb' === $element->nodeName) { - foreach ($element->attributes as $key => $attribute) { - if ('facs' === $attribute->name) { - if (isset($graphics[trim($attribute->value, '#')])) { - $graphic = $graphics[trim($attribute->value, '#')]; + } elseif ('pb' === $element->nodeName) { + foreach ($element->attributes as $key => $attribute) { + if ('facs' === $attribute->name) { + if (isset($graphics[trim($attribute->value, '#')])) { + $graphic = $graphics[trim($attribute->value, '#')]; - if (str_ends_with($graphic, '.jpg')) { - $graphic = substr($graphic, 0, strlen($graphic) - 4); - } + if (str_ends_with($graphic, '.jpg')) { + $graphic = substr($graphic, 0, strlen($graphic) - 4); } - } elseif ('n' === $attribute->name) { - $pageNumber = $attribute->value; - } - - if (!empty($pageNumber) && !empty($graphic)) { - $href = '' . $pageNumber . ''; - } elseif (!empty($pageNumber) && empty($graphic)) { - $href = $pageNumber; - } else { - $href = ''; } + } elseif ('n' === $attribute->name) { + $pageNumber = $attribute->value; } - $tText = $href; - $eText = $href; - $myeditedText .= '' . $eText . '
'; - $mytranscriptedText .= '' . $tText . '
'; + if (!empty($pageNumber) && !empty($graphic)) { + $href = '' . $pageNumber . ''; + } elseif (!empty($pageNumber) && empty($graphic)) { + $href = $pageNumber; + } else { + $href = ''; + } } - } - $editedTextArr[] = $myeditedText; - $transcriptedTextArr[] = $mytranscriptedText; + $tText = $href; + $eText = $href; + $myeditedText .= '' . $eText . '
'; + $mytranscriptedText .= '' . $tText . '
'; + } } - $transcriptedText = ''; - foreach ($transcriptedTextArr as $mytranscriptedText) { - $transcriptedText .= $mytranscriptedText; - } + $editedTextArr[] = $myeditedText; + $transcriptedTextArr[] = $mytranscriptedText; + } - $editedText = ''; - foreach ($editedTextArr as $myeditedText) { - $editedText .= $myeditedText; - } + $transcriptedText = ''; + foreach ($transcriptedTextArr as $mytranscriptedText) { + $transcriptedText .= $mytranscriptedText; + } - $solrDocument->setTranscriptedText($transcriptedText); - $solrDocument->setTranscriptedTextArr($transcriptedTextArr); - $solrDocument->setEditedText($editedText); - $solrDocument->setEditedTextArr($editedTextArr); - $solrDocument->setGndsUuids($gndsUuids); - $solrDocument->setPagesGndsUuids($pagesGndsUuids); + $editedText = ''; + foreach ($editedTextArr as $myeditedText) { + $editedText .= $myeditedText; } + $solrDocument = new SolrDocument(); + $solrDocument->setTranscriptedText($transcriptedText); + $solrDocument->setTranscriptedTextArr($transcriptedTextArr); + $solrDocument->setEditedText($editedText); + $solrDocument->setEditedTextArr($editedTextArr); + $solrDocument->setGndsUuids($gndsUuids); + $solrDocument->setPagesGndsUuids($pagesGndsUuids); + return $solrDocument; }