diff --git a/config/services.yaml b/config/services.yaml index e0844a3025596b736368f954d9a9d256418a2aa2..38faa4b6579bfb8d6b50f6e8b6d0a9e5b33be071 100755 --- a/config/services.yaml +++ b/config/services.yaml @@ -95,6 +95,7 @@ services: - '%env(GITLAB_REPO_TOKEN)%' - '%env(GITLAB_REPO_TREE_URL)%' - '%env(GITLAB_PROCESSED_TEI_REPO_URL)%' + - '%env(INVALIDE_TEI_LIST_FILE)%' App\Import\Indexer: calls: diff --git a/src/Command/SolrIndexing.php b/src/Command/SolrIndexing.php index 430ff8ffbc3da2be6407cd69a104bc8f23230ed4..680331c6107f0dc4fcd673bdf357e79f588d6e2b 100644 --- a/src/Command/SolrIndexing.php +++ b/src/Command/SolrIndexing.php @@ -38,7 +38,7 @@ class SolrIndexing extends Command { $output->writeln('Start solr indexing.'); -// $this->importer->import(); + $this->importer->import(); $this->indexer->deleteSolrIndex(); $this->indexer->tei2solr(); diff --git a/src/Import/Importer.php b/src/Import/Importer.php index 0fc40b0b9a834fe4c6a718db8a1f130b5e7de5c0..ca76d3fa8c2af87970077bbb54207cff6a2cd1bd 100644 --- a/src/Import/Importer.php +++ b/src/Import/Importer.php @@ -13,9 +13,13 @@ class Importer implements ImporterInterface private ?string $gitlabRepoToken; private ?string $gitlabRepoTreeUrl; private ?string $teiDir = null; + private ?string $invalidTeiListFile; - public function __construct() + private function getInvalidTeiList(): array { + $invalidTeiList = json_decode(file_get_contents($this->invalidTeiListFile), true); + + return $invalidTeiList; } public function import(): void @@ -25,14 +29,20 @@ class Importer implements ImporterInterface $files = file_get_contents($this->gitlabRepoTreeUrl.'&access_token='.$this->gitlabRepoToken.'&page='.$i); $files = json_decode($files, true); foreach ($files as $file) { - $fileData = file_get_contents($this->gitlabProcessedTeiRepoUrl.$file['name'].'?access_token='.$this->gitlabRepoToken.'&ref=master'); - $fileData = json_decode($fileData, true); - $filesystem = new Filesystem(); - - try { - $filesystem->dumpFile($this->teiDir.$file['name'], base64_decode($fileData['content'])); - } catch (FileException $exception) { - echo $file['name'].' could not be imported.'; + $invalidTeiList = $this->getInvalidTeiList(); + + if ([] !== $invalidTeiList && !in_array(trim($file['name']), $invalidTeiList)) { + $fileData = file_get_contents( + $this->gitlabProcessedTeiRepoUrl.$file['name'].'?access_token='.$this->gitlabRepoToken.'&ref=master' + ); + $fileData = json_decode($fileData, true); + $filesystem = new Filesystem(); + + try { + $filesystem->dumpFile($this->teiDir.$file['name'], base64_decode($fileData['content'])); + } catch (FileException $exception) { + echo $file['name'].' could not be imported.'; + } } } } catch (FileException $exception) { @@ -41,11 +51,12 @@ class Importer implements ImporterInterface } } - public function setConfigs(string $teiDir, string $gitlabRepoToken, string $gitlabRepoTreeUrl, string $gitlabProcessedTeiRepoUrl): void + public function setConfigs(string $teiDir, string $gitlabRepoToken, string $gitlabRepoTreeUrl, string $gitlabProcessedTeiRepoUrl, string $invalidTeiListFile): void { $this->teiDir = $teiDir; $this->gitlabRepoToken = $gitlabRepoToken; $this->gitlabRepoTreeUrl = $gitlabRepoTreeUrl; $this->gitlabProcessedTeiRepoUrl = $gitlabProcessedTeiRepoUrl; + $this->invalidTeiListFile = $invalidTeiListFile; } } diff --git a/src/Import/Indexer.php b/src/Import/Indexer.php index 04cf24049c00beec3ed7c72a856e22f6bb9278b7..227d422033070caf811bb42ebd9443e52e449ea0 100644 --- a/src/Import/Indexer.php +++ b/src/Import/Indexer.php @@ -19,11 +19,10 @@ use Symfony\Component\Finder\Finder; class Indexer implements IndexerInterface { - private const ARTICLE_DOC_TYPE = 'article;'; - private const PAGE_DOC_TYPE = 'page;'; - private const NOTE_DOC_TYPE = 'note;'; - private const ENTITY_DOC_TYPE = 'entity;'; - + private const ARTICLE_DOC_TYPE = 'article'; + private const PAGE_DOC_TYPE = 'page'; + private const NOTE_DOC_TYPE = 'note'; + private const ENTITY_DOC_TYPE = 'entity'; private Client $client; private EditedTextService $editedTextService; private PreProcessingService $preProcessingService;