diff --git a/.gitignore b/.gitignore index b8701540d1e4585a7427264259deaf90e6725645..dff3035202925c7904b40cfb171518b4a7f5974a 100755 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,5 @@ yarn-error.log ###> symfony/web-server-bundle ### /.web-server-pid ###< symfony/web-server-bundle ### + +/teis \ No newline at end of file diff --git a/config/services.yaml b/config/services.yaml index d4a8b45e59d0fa52a9186770a67c90da4fc4cdc4..9df44acb7ba74f7e2214f31dc14aaa0c213cf840 100755 --- a/config/services.yaml +++ b/config/services.yaml @@ -87,7 +87,7 @@ services: App\Service\FileService: arguments: ["@cache_filesystem", "@source_filesystem", "@pdf_filesystem", "@image_filesystem", "@tei_filesystem"] - App\Controller\SimplexmlController: + App\Controller\Tei2SolrController: calls: - method: 'setConfigs' arguments: diff --git a/src/Command/SolrIndexing.php b/src/Command/SolrIndexing.php index f6fe37baf66a8ccab561456e9272457bddea249b..ac071b432921a44abbd19f3d69429254a9386760 100644 --- a/src/Command/SolrIndexing.php +++ b/src/Command/SolrIndexing.php @@ -4,23 +4,21 @@ declare(strict_types=1); namespace App\Command; -use App\Controller\SimplexmlController; +use App\Controller\Tei2SolrController; use Symfony\Component\Console\Command\Command; use Symfony\Component\Console\Input\InputInterface; use Symfony\Component\Console\Output\OutputInterface; -//use Symfony\Component\Filesystem\Filesystem; use Symfony\Component\HttpKernel\KernelInterface; class SolrIndexing extends Command { - private SimplexmlController $simplexmlController; - + private Tei2SolrController $tei2SolrController; protected $projectDir; - public function __construct(SimplexmlController $simplexmlController, KernelInterface $kernel) + public function __construct(Tei2SolrController $tei2SolrController, KernelInterface $kernel) { parent::__construct(); - $this->simplexmlController = $simplexmlController; + $this->tei2SolrController = $tei2SolrController; $this->projectDir = $kernel->getProjectDir(); } @@ -41,9 +39,9 @@ class SolrIndexing extends Command { $output->writeln('Start solr indexing.'); - $this->simplexmlController->fetchTeis(); - $this->simplexmlController->deleteSolrIndex(); - $this->simplexmlController->tei2solr(); + $this->tei2SolrController->fetchTeis(); + $this->tei2SolrController->deleteSolrIndex(); + $this->tei2SolrController->tei2solr(); $time = microtime(true) - $_SERVER['REQUEST_TIME_FLOAT']; $time = $time/60; diff --git a/src/Controller/SimplexmlController.php b/src/Controller/Tei2SolrController.php similarity index 96% rename from src/Controller/SimplexmlController.php rename to src/Controller/Tei2SolrController.php index 9f8dd2a16c1e8dc6f6fbb3930e07467278d4ada6..4b7bc767b41f6d1059581ca791de2e5ef6769b92 100755 --- a/src/Controller/SimplexmlController.php +++ b/src/Controller/Tei2SolrController.php @@ -2,7 +2,6 @@ namespace App\Controller; - use App\Model\SolrDocument; use DOMDocument; use DOMXPath; @@ -14,7 +13,7 @@ use Symfony\Component\Routing\Annotation\Route; use Solarium\Client; use Symfony\Component\Finder\Finder; -class SimplexmlController extends AbstractController +class Tei2SolrController extends AbstractController { private Client $client; @@ -457,6 +456,17 @@ class SimplexmlController extends AbstractController $documentGndsNodes = $xpath->query('//tei:text[@xml:lang="ger"]//tei:name'); $documentEntities = []; foreach ($documentGndsNodes as $documentGndsNode) { + if (is_iterable($documentGndsNode->childNodes)) { + $entityName = ''; + foreach ($documentGndsNode->childNodes as $childNode) { + if (!empty($childNode->data)) { + + $entityName .= ' ' . trim(preg_replace('/\s+/', ' ', $childNode->data)); + } + } + $entityName = str_replace('- ', '', $entityName); + } + foreach ($documentGndsNode->attributes as $attribute) { if (strstr($attribute->nodeValue, 'gnd')) { $gnd = str_replace('gnd:', '', $attribute->nodeValue); @@ -466,10 +476,6 @@ class SimplexmlController extends AbstractController } } - if (!empty($documentGndsNode->nodeValue)) { - $entityName = trim(preg_replace('/\s+/', ' ', $documentGndsNode->nodeValue)); - } - if (isset($entityName) && isset($type) && isset($gnd)) { $entities[] = ['doctype' => 'entity', 'name' => $entityName, 'entity_type' => $type, 'gnd' => $gnd]; } @@ -907,7 +913,6 @@ class SimplexmlController extends AbstractController && $e->parentNode->nodeName !== 'note' ) { - if (isset($add) && !empty($add)) { $tText .= ' ⟨' . $e->data . ' ' . $add; @@ -943,7 +948,7 @@ class SimplexmlController extends AbstractController $italic = true; } } elseif (isset($gnd) && true === $gnd) { - $eText .= $e->data . ''; + $eText .= ''; $tText .= $e->data; $gnd = false; } else { @@ -1068,10 +1073,27 @@ class SimplexmlController extends AbstractController } } } elseif ('name' === $e->nodeName && (isset($e->attributes[1]->value) && !empty($e->attributes[1]->value)) && (isset($e->attributes[0]->value) && !empty($e->attributes[0]->value))) { + + + $entityName = ''; + foreach ($e->childNodes as $childNode) { + if (!empty($childNode->data)) { + + $entityName .= ' '.trim(preg_replace('/\s+/', ' ', $childNode->data)); + } + } + $entityName = str_replace('- ', '', $entityName); $uuid = $this->getUuid(); - $gndsUuids[$uuid] = str_replace('gnd:', '', $e->attributes[1]->value); - $pagesGndsUuids[$k][$uuid] = str_replace('gnd:', '', $e->attributes[1]->value); - $eText .= ''; + + if (str_contains($e->attributes[1]->value, 'gnd.')) { + $gndsUuids[$uuid] = str_replace('gnd.', '', $e->attributes[1]->value); + $pagesGndsUuids[$k][$uuid] = str_replace('gnd.', '', $e->attributes[1]->value); + } elseif (str_contains($e->attributes[1]->value, 'gnd:')) { + $gndsUuids[$uuid] = str_replace('gnd:', '', $e->attributes[1]->value); + $pagesGndsUuids[$k][$uuid] = str_replace('gnd:', '', $e->attributes[1]->value); + } + + $eText .= ''.$entityName; $gnd = true; } } diff --git a/teis/sampletei/Z_1822-02-20_k.xml b/teis/sampletei/Z_1822-02-20_k.xml index 299f5db890d1b3174a63d6f4c67a58997707a96d..332aef37186a06d9ade5339abe626acd4fe784db 100755 --- a/teis/sampletei/Z_1822-02-20_k.xml +++ b/teis/sampletei/Z_1822-02-20_k.xml @@ -280,7 +280,6 @@ Hier. -