diff --git a/src/Import/HTMLDocument.php b/src/Import/HTMLDocument.php
index 849949a7938a0d703790c2492bfc812b54637d49..0fd732f3a5c19ce33b8a939a359b94abfee479f7 100644
--- a/src/Import/HTMLDocument.php
+++ b/src/Import/HTMLDocument.php
@@ -44,6 +44,16 @@ class HTMLDocument extends DOMDocument
return $element;
}
+ public function ul(string $classes = ''): DOMElement
+ {
+ return $this->createCustomElement('ul', $classes);
+ }
+
+ public function li(string $classes = ''): DOMElement
+ {
+ return $this->createCustomElement('li', $classes);
+ }
+
public function div(string $classes = ''): DOMElement
{
return $this->createCustomElement('div', $classes);
diff --git a/src/Import/Importer.php b/src/Import/Importer.php
index e4e122034e0763a4034e3dfd2c526114774f5f25..91883413c26fb7f431d0728d248e4e7e0477b872 100644
--- a/src/Import/Importer.php
+++ b/src/Import/Importer.php
@@ -17,19 +17,23 @@ class Importer implements ImporterInterface
public function import(): void
{
+ $filesystem = new Filesystem();
+ if (!$filesystem->exists($this->teiDir)) {
+ $filesystem->mkdir($this->teiDir);
+ }
+ $invalidTeiList = $this->getInvalidTeiList();
+
for ($i = 1; $i <= 100; ++$i) {
try {
$files = file_get_contents($this->gitlabRepoTreeUrl.'&access_token='.$this->gitlabRepoToken.'&page='.$i);
$files = json_decode($files, true);
- foreach ($files as $file) {
- $invalidTeiList = $this->getInvalidTeiList();
+ foreach ($files as $file) {
if ([] !== $invalidTeiList && !in_array(trim($file['name']), $invalidTeiList)) {
$fileData = file_get_contents(
$this->gitlabProcessedTeiRepoUrl.$file['name'].'?access_token='.$this->gitlabRepoToken.'&ref=master'
);
$fileData = json_decode($fileData, true);
- $filesystem = new Filesystem();
try {
$filesystem->dumpFile($this->teiDir.$file['name'], base64_decode($fileData['content']));
@@ -56,8 +60,8 @@ class Importer implements ImporterInterface
private function getInvalidTeiList(): array
{
$invalidTeiList = [];
-
- if (file_exists($this->invalidTeiListFile)) {
+ $file_headers = @get_headers($this->invalidTeiListFile);
+ if($file_headers[0] !== 'HTTP/1.1 404 Not Found') {
$invalidTeiList = json_decode(file_get_contents($this->invalidTeiListFile), true);
}
diff --git a/src/Import/Indexer.php b/src/Import/Indexer.php
index a180ecf7d11170da9943520bb266aaeadfccc75e..9288f5f932e8433b2c2088fea11017bbd899b925 100644
--- a/src/Import/Indexer.php
+++ b/src/Import/Indexer.php
@@ -65,7 +65,6 @@ class Indexer implements IndexerInterface
$this->transcriptionService->setGraphics($graphics);
$this->editedTextService->setGraphics($graphics);
$pages = $this->preProcessingService->splitByPages($body);
-
$pageLevelEditedText = [];
$pageLevelTranscriptedText = [];
$pagesGndsUuids = [];
diff --git a/src/Service/CommonTransformService.php b/src/Service/CommonTransformService.php
index 1f282f463022f0f37fb11753c3e73dbea9fc456c..375f3db7f4ba293366fc5777c9b56e3b8e66aa0d 100644
--- a/src/Service/CommonTransformService.php
+++ b/src/Service/CommonTransformService.php
@@ -138,8 +138,14 @@ class CommonTransformService
return $doc->div();
}
- protected function handleLabel(DOMElement $teiEl, HTMLDocument $doc): DOMNode
+ protected function handleLabel(DOMElement $teiEl, HTMLDocument $doc): ?DOMNode
{
+ // This is temporarily implemented till styling requirements
+ // for lable are specified.
+ if ('item' === $teiEl->parentNode->nodeName) {
+ return $doc->text($teiEl->textContent);
+ }
+
$classes = '';
if (
'div' === $teiEl->parentNode->nodeName &&
diff --git a/src/Service/EditedTextService.php b/src/Service/EditedTextService.php
index 3933409da077d9e68ca439527ef06a9aa4496463..567639f608039d77ce5788adbdd9890f3938a374 100644
--- a/src/Service/EditedTextService.php
+++ b/src/Service/EditedTextService.php
@@ -32,6 +32,16 @@ class EditedTextService extends CommonTransformService
private array $notes = [];
private array $works = [];
+ protected function handleItem(DOMElement $teiEl, HTMLDocument $doc): ?DOMNode
+ {
+ return $doc->li();
+ }
+
+ protected function handleList(DOMElement $teiEl, HTMLDocument $doc): ?DOMNode
+ {
+ return $doc->ul();
+ }
+
public function clear()
{
$this->gndsUuids = [];
diff --git a/src/Service/PreProcessingService.php b/src/Service/PreProcessingService.php
index 96dce41396ab4eebe3166688afc990f257c71650..e4b46984583dbbff9e2ca982aabdf901158c69be 100644
--- a/src/Service/PreProcessingService.php
+++ b/src/Service/PreProcessingService.php
@@ -118,10 +118,11 @@ class PreProcessingService
$lastNode = $node;
}
- // Lastly we append the element itself to the new page to maintain it for further processing
+ // Lastly we insert the element itself as the first element
+ // to the new page to maintain it for further processing
$pbClone = $newPage->createElement('pb');
$pbClone = $this->cloneAttributes($pbEl->attributes, $pbClone);
- $this->lastParent->appendChild($pbClone);
+ $newPage->insertBefore($pbClone, $newPage->firstChild);
return $newPage;
}
diff --git a/src/Service/TranscriptionService.php b/src/Service/TranscriptionService.php
index 3f6443a202f91ac64bab5c627e5708ea79115516..f3db7b04a4d4827369aa9a54f8ac7f924501a7cb 100644
--- a/src/Service/TranscriptionService.php
+++ b/src/Service/TranscriptionService.php
@@ -36,6 +36,16 @@ class TranscriptionService extends CommonTransformService
'wavyunderline' => 'underline',
];
+ protected function handleItem(DOMElement $teiEl, HTMLDocument $doc): ?DOMNode
+ {
+ return $doc->li();
+ }
+
+ protected function handleList(DOMElement $teiEl, HTMLDocument $doc): ?DOMNode
+ {
+ return $doc->ul();
+ }
+
protected function handleAdd(DOMElement $teiEl, HTMLDocument $doc): ?DOMNode
{
$type = $teiEl->getAttribute('type');