Commit 7f579419 authored by asajedi's avatar asajedi
Browse files

Merge branch 'imagesToS3Storage' into 'master'

Implement import of images from Owncloud to S3 storage and synchronize them

See merge request !96
parents 0bf8ace6 909f50e4
Pipeline #299279 passed with stages
in 14 minutes and 41 seconds
......@@ -2,12 +2,8 @@ nelmio_solarium:
endpoints:
default:
host: "%env(SOLR_HOST)%"
#port: "%env(SOLR_PORT)%"
core: "%env(SOLR_CORE)%"
# The following are the default parameters for Solarium client:
# scheme: http
# path: /solr
# timeout: 5
clients:
default:
endpoints: [default]
adapter_timeout: 30
\ No newline at end of file
......@@ -26,6 +26,10 @@ parameters:
tei_dir: '%kernel.project_dir%/data/gitlab/'
tei_sample_dir: '%kernel.project_dir%/data/sampletei/'
lit_dir: '%kernel.project_dir%/data/lit/'
owncloud_images_dir: '%kernel.project_dir%/data/images/'
owncloud_base_url: 'https://owncloud.gwdg.de/remote.php/nonshib-webdav/'
owncloud_root_dir: 'Goethes%20Farbenlehre'
owncloud_image_archive: 'Graph_Archiv'
document_languages:
eng: Englisch
fre: Französisch
......@@ -54,6 +58,8 @@ parameters:
title_m_sub: monographic_sub_title
title_s_sub: series_sub_title
title_u_sub: unpublished_sub_title
owncloud_username: '%env(OWNCLOUD_USERNAME)%'
owncloud_password: '%env(OWNCLOUD_PASSWORD)%'
services:
# default configuration for services in *this* file
......@@ -124,6 +130,12 @@ services:
- '%tei_dir%'
- '%tei_sample_dir%'
- '%lit_dir%'
- '%owncloud_images_dir%'
- '%owncloud_base_url%'
- '%owncloud_root_dir%'
- '%owncloud_image_archive%'
- '%owncloud_username%'
- '%owncloud_password%'
- '%env(GITLAB_REPO_TOKEN)%'
- '%env(GITLAB_REPO_TREE_URL)%'
- '%env(GITLAB_PROCESSED_TEI_REPO_URL)%'
......
<?php
namespace App\Command;
use App\Import\ImporterInterface;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
class ImagesToS3Storage extends Command
{
private OutputInterface $output;
private ImporterInterface $importer;
public function __construct(ImporterInterface $importer)
{
parent::__construct();
$this->importer = $importer;
}
protected function configure()
{
$this
->setName('app:images_to_s3')
->setDescription('Synchronize images in S3 with Owncloud.');
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$output->writeln('Start importing images into S3 storage.');
$message = $this->importer->importImagesToS3Storage();
if (!empty($message)) {
$this->output->writeln('IMPORT ERROR: '.$message);
return Command::FAILURE;
}
$time = microtime(true) - $_SERVER['REQUEST_TIME_FLOAT'];
$time /= 60;
$output->writeln('Import process completed in '.$time.' minutes.');
return Command::SUCCESS;
}
}
......@@ -42,7 +42,7 @@ class SolrIndexing extends Command
{
$server = $input->getArgument('server');
$output->writeln('Start solr indexing.');
$this->importer->import($server);
$this->importer->importTeisForIndexing($server);
$this->indexer->deleteSolrIndex();
$this->indexer->tei2Solr($server);
$this->indexer->lit2Solr();
......
......@@ -41,6 +41,6 @@ class TeiToS3Storage extends Command
$time /= 60;
$output->writeln('Import process completed in '.$time.' minutes.');
return 1;
return Command::SUCCESS;
}
}
......@@ -4,7 +4,13 @@ declare(strict_types=1);
namespace App\Import;
use Exception;
use App\Service\FileService;
use Imagine\Exception\RuntimeException;
use Imagine\Image\ImageInterface;
use Imagine\Imagick\Imagine;
use Sabre\DAV\Client;
use Sabre\DAV\Xml\Property\ResourceType;
use Symfony\Component\Filesystem\Filesystem;
use Symfony\Component\HttpFoundation\File\Exception\FileException;
......@@ -18,6 +24,12 @@ class Importer implements ImporterInterface
private ?string $gitlabRepoTreeUrl;
private ?string $invalidTeiListFile;
private ?string $litDir;
private ?string $owncloudImagesDir = null;
private ?string $owncloudBaseUrl = null;
private ?string $owncloudRootDir = null;
private ?string $owncloudImageArchive = null;
private ?string $owncloudUsername;
private ?string $owncloudPassword;
private ?string $sampleTeiDocumentUrl;
private ?string $teiDir = null;
private ?string $teiSampleDir = null;
......@@ -27,7 +39,27 @@ class Importer implements ImporterInterface
$this->fileService = $fileService;
}
public function import(string $server): void
public function setConfigs(string $teiDir, string $teiSampleDir, string $litDir, string $owncloudImagesDir, string $owncloudBaseUrl, string $owncloudRootDir, string $owncloudImageArchive, string $owncloudUsername, string $owncloudPassword, string $gitlabRepoToken, string $gitlabRepoTreeUrl, string $gitlabProcessedTeiRepoUrl, string $invalidTeiListFile, string $sampleTeiDocumentUrl, string $gitlabLitRepoUrl, string $gitlabProcessedLitRepoUrl): void
{
$this->teiDir = $teiDir;
$this->teiSampleDir = $teiSampleDir;
$this->litDir = $litDir;
$this->owncloudImagesDir = $owncloudImagesDir;
$this->owncloudBaseUrl = $owncloudBaseUrl;
$this->owncloudRootDir = $owncloudRootDir;
$this->owncloudImageArchive = $owncloudImageArchive;
$this->owncloudUsername = $owncloudUsername;
$this->owncloudPassword = $owncloudPassword;
$this->gitlabRepoToken = $gitlabRepoToken;
$this->gitlabRepoTreeUrl = $gitlabRepoTreeUrl;
$this->gitlabProcessedTeiRepoUrl = $gitlabProcessedTeiRepoUrl;
$this->invalidTeiListFile = $invalidTeiListFile;
$this->sampleTeiDocumentUrl = $sampleTeiDocumentUrl;
$this->gitlabLitRepoUrl = $gitlabLitRepoUrl;
$this->gitlabProcessedLitRepoUrl = $gitlabProcessedLitRepoUrl;
}
public function importTeisForIndexing(string $server): void
{
if ('dev' === $server) {
$this->importSampleTeiDocument();
......@@ -154,20 +186,6 @@ class Importer implements ImporterInterface
}
}
public function setConfigs(string $teiDir, string $teiSampleDir, string $litDir, string $gitlabRepoToken, string $gitlabRepoTreeUrl, string $gitlabProcessedTeiRepoUrl, string $invalidTeiListFile, string $sampleTeiDocumentUrl, string $gitlabLitRepoUrl, string $gitlabProcessedLitRepoUrl): void
{
$this->teiDir = $teiDir;
$this->teiSampleDir = $teiSampleDir;
$this->litDir = $litDir;
$this->gitlabRepoToken = $gitlabRepoToken;
$this->gitlabRepoTreeUrl = $gitlabRepoTreeUrl;
$this->gitlabProcessedTeiRepoUrl = $gitlabProcessedTeiRepoUrl;
$this->invalidTeiListFile = $invalidTeiListFile;
$this->sampleTeiDocumentUrl = $sampleTeiDocumentUrl;
$this->gitlabLitRepoUrl = $gitlabLitRepoUrl;
$this->gitlabProcessedLitRepoUrl = $gitlabProcessedLitRepoUrl;
}
private function downloadTeiFile($teiFilesystem, array $file, string $teiFileUrl): bool
{
try {
......@@ -229,4 +247,149 @@ class Importer implements ImporterInterface
$filesystem->dumpFile($this->teiSampleDir.'sample.xml', $sampleTeiDocument);
}
}
public function importImagesToS3Storage(): string
{
$settings = [
'baseUri' => $this->owncloudBaseUrl,
'userName' => $this->owncloudUsername,
'password' => $this->owncloudPassword,
];
$this->client = new Client($settings);
try {
$this->importImagesFromOwncloudToS3Storage();
} catch (Exception $e) {
$message = $e->getMessage();
if (strlen($message) > 2000) {
$message = substr($message, 0,400) . " [...] " . substr($message, -1500);
}
return $message;
}
return "";
}
private function importImagesFromOwncloudToS3Storage()
{
$imagesPaths = $this->getImagePaths();
$downloadedImages = [];
$importedImages = [];
if (is_array($imagesPaths) && !empty($imagesPaths)) {
foreach ($imagesPaths as $imagesPath) {
if ('' !== $imagesPath) {
$convertedImagePath = urldecode(
str_replace(
['Graph_Archiv', 'TIF', 'tif'],
['', 'jpg', 'jpg'],
$imagesPath['name']
)
);
$imageFilesystem = $this->fileService->getImageFilesystem();
if (!$imageFilesystem->has($convertedImagePath)) {
$imageName = array_reverse(explode('/', $imagesPath['name']))[0];
$imageName = urldecode($imageName);
$extension = pathinfo($imageName, PATHINFO_EXTENSION);
if ('tif' === $extension) {
$response = $this->client->request('GET', $this->owncloudRootDir.'/'.$imagesPath['name']);
if (!$response['body']) {
return;
}
$filesystem = new Filesystem();
if (!$filesystem->exists($this->owncloudImagesDir)) {
$filesystem->mkdir($this->owncloudImagesDir);
}
$filesystem->dumpFile(
$this->owncloudImagesDir.$imageName,
$response['body']
);
$convertedImagePath = trim($convertedImagePath, '/');
$this->saveJpegToS3($convertedImagePath, $imageName);
$importedImages[] = $convertedImagePath;
}
}
}
}
}
if (is_array($importedImages) && !empty($importedImages)) {
echo "The following images were imported into S3 storage:" . PHP_EOL;
foreach ($importedImages as $importedImage) {
echo $importedImage . PHP_EOL;
}
}
}
private function saveJpegToS3(string $convertedImagePath, string $downloadedImage)
{
$imageFilesystem = $this->fileService->getImageFilesystem();
$filesystem = new Filesystem();
if (file_exists($this->owncloudImagesDir.$downloadedImage)) {
try {
$options = [
'resolution-units' => ImageInterface::RESOLUTION_PIXELSPERINCH,
'resolution-x' => 150,
'resolution-y' => 150,
'jpeg_quality' => 50,
];
$jpgImageName = explode('.', $downloadedImage)[0];
$imagine = new Imagine();
$imagine
->open($this->owncloudImagesDir.$downloadedImage)
->save($this->owncloudImagesDir.$jpgImageName.'.'.self::s3ImageExtension, $options);
$file = file_get_contents($this->owncloudImagesDir.$jpgImageName.'.'.self::s3ImageExtension);
$imageFilesystem->put($convertedImagePath, $file);
$filesystem->remove($this->owncloudImagesDir.$downloadedImage);
$filesystem->remove($this->owncloudImagesDir.$jpgImageName.'.'.self::s3ImageExtension);
} catch (RuntimeException $e) {
echo 'Msg:'.$e->getMessage();
}
}
}
private function getImagePaths(): array
{
$path = $this->owncloudRootDir.'/'.$this->owncloudImageArchive;
$content = $this->client->propFind($path, [
'{DAV:}getlastmodified',
'{DAV:}resourcetype',
], 10);
$keys = array_keys($content);
array_shift($keys);
$images = [];
foreach ($keys as $key) {
/* @var ResourceType $resType; */
$type = $content[$key]['{DAV:}resourcetype'];
if (!(null !== $type && $type->is('{DAV:}collection'))) {
$segments = explode('/', $key);
$segments = array_values(array_filter($segments));
$length = count($segments) - 3;
$offset = 3;
$segments = array_slice($segments, $offset, $length);
$imagePath = implode('/', $segments);
$images[] = ['name' => $imagePath];
}
}
return $images;
}
}
......@@ -4,5 +4,9 @@ namespace App\Import;
interface ImporterInterface
{
public function import(string $server): void;
public function importTeiToS3Storage(): void;
public function importTeisForIndexing(string $server): void;
public function importImagesToS3Storage(): string;
}
......@@ -22,7 +22,6 @@ class Indexer implements IndexerInterface
{
private const ARTICLE_DOC_TYPE = 'article';
private const ENTITY_DOC_TYPE = 'entity';
private const LITERATURE_DOC_TYPE = 'literature';
private const NOTE_DOC_TYPE = 'note';
private const PAGE_DOC_TYPE = 'page';
private Client $client;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment