Skip to content
Snippets Groups Projects
Unverified Commit 3a2c7272 authored by mweidling's avatar mweidling Committed by GitHub
Browse files

Add reichsanzeiger gt (#5)

* feat: add Reichsanzeiger-GT to prepare.sh

* add Reichsanzeiger subsets

* add download for Reichsanzeiger subsets

* add first draft for reichsanzeiger gt

* extract sample bundles

* update .gitignore

* clean up

* tidy up project root

* tidy up volumes

* update README

* update data

* build: roll back to version where cis runs still runs

* build: remove default model mounting
parent cbe93933
No related branches found
No related tags found
1 merge request!1Merge GitHub's state
Showing with 1542 additions and 7655 deletions
...@@ -6,7 +6,8 @@ workflows/workspaces ...@@ -6,7 +6,8 @@ workflows/workspaces
work/ work/
workflows/nf-results/* workflows/nf-results/*
workflows/results workflows/results
workflows/ocrd-workflows/*.nf workflows/ocrd_workflows/*.nf
models models
.idea .idea
gt/* gt/*
build/
FROM ocrd/all:maximum FROM ocrd/all:2023-02-07
WORKDIR /app WORKDIR /app
...@@ -12,6 +12,8 @@ RUN apt-get install -y --fix-missing openjdk-11-jre ...@@ -12,6 +12,8 @@ RUN apt-get install -y --fix-missing openjdk-11-jre
COPY src src COPY src src
COPY setup.py setup.py COPY setup.py setup.py
COPY README.md README.md COPY README.md README.md
COPY scripts scripts
COPY data_srcs data_srcs
RUN git init RUN git init
RUN git submodule add https://github.com/MehmedGIT/OtoN_Converter submodules/oton RUN git submodule add https://github.com/MehmedGIT/OtoN_Converter submodules/oton
...@@ -22,9 +24,6 @@ RUN cd submodules/oton && \ ...@@ -22,9 +24,6 @@ RUN cd submodules/oton && \
sed -i "s \$projectDir/ocrd-workspace/ $WORKSPACE_DIR/CURRENT/ g" oton/config.toml && \ sed -i "s \$projectDir/ocrd-workspace/ $WORKSPACE_DIR/CURRENT/ g" oton/config.toml && \
pip install . pip install .
COPY prepare.sh prepare.sh
COPY default_data_sources.txt default_data_sources.txt
RUN pip3 install -r requirements.txt RUN pip3 install -r requirements.txt
RUN pip3 install . RUN pip3 install .
RUN nextflow RUN nextflow
......
...@@ -26,7 +26,7 @@ Otherwise, the tool will download all `ocrd-tesserocr-recognize` models as well ...@@ -26,7 +26,7 @@ Otherwise, the tool will download all `ocrd-tesserocr-recognize` models as well
- (optional) [customize](#custom-workflows-and-data) QuiVer Benchmarks according to your needs - (optional) [customize](#custom-workflows-and-data) QuiVer Benchmarks according to your needs
- build the image with `docker compose build` - build the image with `docker compose build`
- spin up a container with `docker compose run -d app` - spin up a container with `docker compose run -d app`
- run `docker compose exec app bash prepare.sh` - run `docker compose exec app bash scripts/prepare.sh`
- run `docker compose exec app bash workflows/execute_workflows.sh` - run `docker compose exec app bash workflows/execute_workflows.sh`
- the benchmarks and the evaluation results will be available at `data/workflows.json` on your host system - the benchmarks and the evaluation results will be available at `data/workflows.json` on your host system
......
Source diff could not be displayed: it is too large. Options to address this: view the blob.
File moved
https://github.com/UB-Mannheim/reichsanzeiger-gt
\ No newline at end of file
053-7972/0044.jp2&CVT=jpeg 1883_55_0044.jpg
041-7960/0662.jp2&CVT=jpeg 1881_1_0662.jpg
102-9983/0045.jp2&CVT=jpeg 1871_65_0045.jpg
101-9982/0017.jp2&CVT=jpeg 1873_1_0017.jpg
003-7922/0279.jp2&CVT=jpeg 1871_155_0279.jpg
104-8023/0452.jp2&CVT=jpeg 1891_33_0452.jpg
090-8009/0018.jp2&CVT=jpeg 1889_1_0018.jpg
065-7984/0055.jp2&CVT=jpeg 1885_5_0055.jpg
093-9053/0488.jp2&CVT=jpeg 1932_300_0488.jpg
014-8455/0292.jp2&CVT=jpeg 1936_123_0292.jpg
035-7954/0344.jp2&CVT=jpeg 1879_45_0344.jpg
093-9053/0488.jp2&CVT=jpeg 1932_300_0488.jpg
014-8455/0292.jp2&CVT=jpeg 1936_123_0292.jpg
076-9036/0019.jp2&CVT=jpeg 1929_250_0019.jpg
027-7946/0059.jp2&CVT=jpeg 1877_7_0059.jpg
053-7972/0044.jp2&CVT=jpeg 1883_55_0044.jpg
044-7963/0163.jp2&CVT=jpeg 1881_115_0163.jpg
079-7998/0444.jp2&CVT=jpeg 1887_134_0444.jpg
153-9561/0087.jp2&CVT=jpeg 1916_169_0087.jpg
167-9449/0129.jp2&CVT=jpeg 1918_267_0129.jpg
065-7984/0054.jp2&CVT=jpeg 1885_5_0054.jpg
...@@ -10,7 +10,6 @@ services: ...@@ -10,7 +10,6 @@ services:
- ./data:/app/data # this will write the results to your host system - ./data:/app/data # this will write the results to your host system
- ./gt:/app/gt - ./gt:/app/gt
# mount your modules, custom workflows and data here # mount your modules, custom workflows and data here
- ./models:/usr/local/share/ocrd-resources/ #- ./models/ocrd-tesserocr-recognize:/usr/local/share/tessdata/
#- ./workflows:/app/workflows #- ./models/ocrd-calamari-recognize:/usr/local/share/ocrd-resources/
# - ./workflows/ocrd_workflows:/app/workflows/ocrd_workflows
# - TODO/custom/data # - TODO/custom/data
\ No newline at end of file
#!/bin/bash
mkdir gt
while IFS= read -r URL; do
OWNER=$(echo "$URL" | cut -d'/' -f4)
REPO=$(echo "$URL" | cut -d'/' -f5)
if [[ ! -f gt/"$REPO".zip ]]; then
echo "Downloading $REPO …"
RESULT=$(curl -L \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/repos/"$OWNER"/"$REPO"/releases/latest)
ZIP_URL=$(echo "$RESULT" | jq -r '.assets | .[].browser_download_url')
curl -L -o gt/"$REPO".zip "$ZIP_URL"
fi
done < default_data_sources.txt
cd gt || exit
# the default data is structured like this:
# repository_name.zip
# |___ subordinate_work_1.zip
# |___ subordinate_work_2.zip
# |___ ...
# $ZIP refers to the release itself which is on level "repository_name.zip"
# the subordinate works are also OCR-D BagIts / zips. these are referred to by $INNER_ZIP.
for ZIP in *.zip; do
NAME=$(echo "$ZIP" | cut -d"." -f1)
echo "Processing $NAME"
unzip -qq -d "$NAME" "$ZIP"
mv "$NAME"/ocrdzip_out/* "$NAME" && rm -r "$NAME"/ocrdzip_out
for INNER_ZIP in "$NAME"/*.zip; do
echo "Dealing with inner zip files …"
INNER_ZIP_NAME=$(basename "$INNER_ZIP" .ocrd.zip)
unzip -qq -d "$NAME"/"$INNER_ZIP_NAME" "$INNER_ZIP" && rm "$INNER_ZIP"
echo "Done."
done
done
echo " … and ready to go!"
#!env python3
# This script has been copied from https://github.com/OCR-D/spec/blob/master/scripts/yaml-to-json.py
from yaml import safe_load
from json import dumps
from click import command, argument, option
@command()
@option('--indent', default=2, type=int)
@argument('src')
@argument('dst')
def cli(src, dst, indent):
kwargs = {}
if indent > 0:
kwargs['indent'] = indent
with open(src, 'r', encoding='utf-8') as f_in, open(dst, 'w', encoding='utf-8') as f_out:
ret = safe_load(f_in)
f_out.write(dumps(ret, **kwargs))
if __name__ == '__main__':
cli()
\ No newline at end of file
#!/bin/bash
mkdir gt
echo "Prepare OCR-D Ground Truth …"
while IFS= read -r URL; do
OWNER=$(echo "$URL" | cut -d'/' -f4)
REPO=$(echo "$URL" | cut -d'/' -f5)
if [[ ! -f gt/"$REPO".zip ]]; then
echo "Downloading $REPO …"
RESULT=$(curl -L \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/repos/"$OWNER"/"$REPO"/releases/latest)
ZIP_URL=$(echo "$RESULT" | jq -r '.assets | .[].browser_download_url')
curl -L -o gt/"$REPO".zip "$ZIP_URL"
fi
done < data_srcs/default_data_sources.txt
cd gt || exit
# the default data is structured like this:
# repository_name.zip
# |___ subordinate_work_1.zip
# |___ subordinate_work_2.zip
# |___ ...
# $ZIP refers to the release itself which is on level "repository_name.zip"
# the subordinate works are also OCR-D BagIts / zips. these are referred to by $INNER_ZIP.
for ZIP in *.zip; do
NAME=$(echo "$ZIP" | cut -d"." -f1)
echo "Processing $NAME"
if [[ ! -d $NAME && $NAME != "reichsanzeiger-gt" ]]; then
unzip -qq -d "$NAME" "$ZIP"
mv "$NAME"/ocrdzip_out/* "$NAME" && rm -r "$NAME"/ocrdzip_out
for INNER_ZIP in "$NAME"/*.zip; do
echo "Dealing with inner zip files …"
INNER_ZIP_NAME=$(basename "$INNER_ZIP" .ocrd.zip)
unzip -qq -d "$NAME"/"$INNER_ZIP_NAME" "$INNER_ZIP" && rm "$INNER_ZIP"
echo "Recreate required directory structure for $INNER_ZIP_NAME."
mkdir "$NAME"/"$INNER_ZIP_NAME"/data/"$INNER_ZIP_NAME"
mv "$NAME"/"$INNER_ZIP_NAME"/data/OCR-* "$NAME"/"$INNER_ZIP_NAME"/data/"$INNER_ZIP_NAME"
mv "$NAME"/"$INNER_ZIP_NAME"/data/mets.xml "$NAME"/"$INNER_ZIP_NAME"/data/"$INNER_ZIP_NAME"
cp "$NAME"/metadata.json "$NAME"/"$INNER_ZIP_NAME"/data/"$INNER_ZIP_NAME"/metadata.json
echo "Moving $INNER_ZIP_NAME higher in dir structure."
mv "$NAME"/"$INNER_ZIP_NAME" .
echo "Done."
done
rm -rf "$NAME"
fi
done
echo "Prepare Reichsanzeiger GT …"
if [[ $1 == "ra-full" ]]; then
echo "Preparing the full Reichsanzeiger GT."
if [ ! -d reichsanzeiger-gt ]; then
git clone https://github.com/UB-Mannheim/reichsanzeiger-gt
fi
RA_GT=/app/gt/reichsanzeiger-gt
DATA_DIR=/$RA_GT/data
cd $DATA_DIR|| exit
if [[ -d reichsanzeiger-1820-1939/OCR-D-IMG ]]; then
echo "Skip downloading Reichsanzeiger images."
else
bash download_images.sh
fi
cd reichsanzeiger-1820-1939 || exit
ocrd workspace init
mkdir OCR-D-IMG
cp ../images/* OCR-D-IMG
rm -rf ../images
rm -rf ../reichsanzeiger-1820-1939_with-TableRegion
cp -r GT-PAGE OCR-D-GT-SEG-LINE
echo "Adding images to mets …"
FILEGRP="OCR-D-IMG"
EXT=".jpg" # the actual extension of the image files
MEDIATYPE='image/jpeg' # the actual media type of the image files
for i in "$FILEGRP"/*"$EXT"; do
BASE=$(basename "${i}" $EXT);
ocrd workspace add -G $FILEGRP -i ${FILEGRP}_"${BASE}" -g P_"${BASE}" -m $MEDIATYPE "${i}";
done
python3 /app/scripts/convert-yml-to-json.py --indent 2 $RA_GT/METADATA.yml $RA_GT/metadata.json
echo " … and ready to go!"
else
echo "Prepare smaller sets of Reichsanzeiger GT."
cd /app || exit
bash /app/scripts/prepare_reichsanzeiger_sets.sh
fi
#!/bin/sh
echo "Download Reichsanzeiger GT repository."
cd gt || exit
if [ ! -d reichsanzeiger-gt ]; then
git clone https://github.com/UB-Mannheim/reichsanzeiger-gt
fi
cd .. || exit
PREFIX="data_srcs"
files=(
"$PREFIX"/reichsanzeiger_many_ads.list
"$PREFIX"/reichsanzeiger_random.list
"$PREFIX"/reichsanzeiger_tables.list
"$PREFIX"/reichsanzeiger_title_pages.list
)
for FILE in "${files[@]}"; do
NAME=$(basename "$FILE" .list)
if [ -d gt/"$NAME" ]; then
echo "Directory gt/$NAME already exists. Skipping download."
else
echo "Processing $FILE."
mkdir -p gt/"$NAME"/data/"$NAME"/OCR-D-IMG
mkdir -p gt/"$NAME"/data/"$NAME"/OCR-D-GT-SEG-LINE
urlbase=$(echo "aHR0cHM6Ly9kaWdpLmJpYi51bmktbWFubmhlaW0uZGUvcmVpY2hzYW56ZWlnZXIuZmNnaT9GSUY9
L3JlaWNoc2FuemVpZ2VyL2ZpbG0vCg==" | base64 -d)
while read -r line; do
wget --limit-rate=500k "${urlbase}${line% *}" -O ./gt/"$NAME"/data/"$NAME"/OCR-D-IMG/"${line#* }"
IMG_NAME=$(basename "${line#* }" .jpg)
cp gt/reichsanzeiger-gt/data/reichsanzeiger-1820-1939/GT-PAGE/"$IMG_NAME".xml gt/"$NAME"/data/"$NAME"/OCR-D-GT-SEG-LINE/"$IMG_NAME".xml
done < "$FILE"
fi
if [ ! -f gt/"$NAME"/mets.xml ]; then
echo "Preparing OCR-D workspace for $NAME".
cd gt/"$NAME"/data/"$NAME" || exit
ocrd workspace init
ocrd workspace set-id "$NAME"
FILEGRP="OCR-D-IMG"
FILEGRP_2="OCR-D-GT-SEG-LINE"
# add images to mets
EXT=".jpg" # the actual extension of the image files
MEDIATYPE='image/jpeg' # the actual media type of the image files
for i in OCR-D-IMG/*"$EXT"; do
BASE=$(basename "${i}" $EXT)
ocrd workspace add -G $FILEGRP -i "${FILEGRP}"_"${BASE}" -g P_"${BASE}" -m $MEDIATYPE "${i}"
done
# add GT to mets
for i in "$FILEGRP_2"/*.xml; do
BASE=$(basename "${i}" ".xml")
ocrd workspace add -G $FILEGRP_2 -i "${FILEGRP_2}"_"${BASE}" -g P_"${BASE}" -m text/xml "${i}"
done
fi
if [ ! -f gt/"$NAME"/data/"$NAME"/metadata.json ]; then
cp /app/gt/reichsanzeiger-gt/METADATA.yml /app/gt/"$NAME"/data/"$NAME"/METADATA.yml
python3 /app/scripts/convert-yml-to-json.py --indent 2 /app/gt/"$NAME"/data/"$NAME"/METADATA.yml /app/gt/"$NAME"/data/"$NAME"/metadata.json
fi
cd /app || exit
done
echo "Preparation of Reichsanzeiger GT subsets done."
...@@ -106,17 +106,8 @@ def get_eval_tool(mets_path: str) -> str: ...@@ -106,17 +106,8 @@ def get_eval_tool(mets_path: str) -> str:
def get_gt_workspace(workspace_path: str) -> Dict[str, str]: def get_gt_workspace(workspace_path: str) -> Dict[str, str]:
current_workspace = get_workspace_name(workspace_path) current_workspace = get_workspace_name(workspace_path)
split_workspace_name = current_workspace.split('_') url = 'https://github.com/OCR-D/quiver-data/blob/main/' + current_workspace + '.ocrd.zip'
workspace_name_wo_workflow = split_workspace_name[0] + '_' + split_workspace_name[1] + '_' + split_workspace_name[2] label = f'GT workspace {current_workspace}'
font = ''
if split_workspace_name[1] == 'ant':
font = 'Antiqua'
elif split_workspace_name[1] == 'frak':
font = 'Fraktur'
else:
font = 'Font Mix'
url = 'https://github.com/OCR-D/quiver-data/blob/main/' + workspace_name_wo_workflow + '.ocrd.zip'
label = f'GT workspace {split_workspace_name[0]}th century {font} {split_workspace_name[2]} layout'
return { return {
'@id': url, '@id': url,
'label': label 'label': label
...@@ -196,7 +187,8 @@ def get_nextflow_completed_process_file(workspace_path: str): ...@@ -196,7 +187,8 @@ def get_nextflow_completed_process_file(workspace_path: str):
return file return file
def get_nextflow_time(workspace_path: str, time_type: str) -> float: def get_nextflow_time(workspace_path: str, time_type: str) -> float:
files = listdir(workspace_path) highest_workspace_dir = '/'.join(workspace_path.split('/')[:-2])
files = listdir(highest_workspace_dir)
logs = [] logs = []
for file in files: for file in files:
if '.command.log' in file: if '.command.log' in file:
...@@ -204,7 +196,7 @@ def get_nextflow_time(workspace_path: str, time_type: str) -> float: ...@@ -204,7 +196,7 @@ def get_nextflow_time(workspace_path: str, time_type: str) -> float:
time_per_workflow_step = [] time_per_workflow_step = []
for log in logs: for log in logs:
with open(workspace_path + '/' + log, 'r', encoding='utf-8') as l: with open(highest_workspace_dir + '/' + log, 'r', encoding='utf-8') as l:
log_file = l.read() log_file = l.read()
no_sec_s = re.search(rf'([0-9]+?\.[0-9]+?)s \({time_type}\)', log_file).group(1) no_sec_s = re.search(rf'([0-9]+?\.[0-9]+?)s \({time_type}\)', log_file).group(1)
time_per_workflow_step.append(float(no_sec_s)) time_per_workflow_step.append(float(no_sec_s))
......
...@@ -39,9 +39,9 @@ convert_ocrd_wfs_to_NextFlow() { ...@@ -39,9 +39,9 @@ convert_ocrd_wfs_to_NextFlow() {
download_models() { download_models() {
echo "Download the necessary models if not available" echo "Download the necessary models if not available"
if [[ ! -f /usr/local/share/ocrd-resources/ocrd-tesserocr-recognize/ ]] if [[ ! -d /usr/local/share/tessdata ]]
then then
mkdir -p /usr/local/share/ocrd-resources/ #mkdir -p /usr/local/share/ocrd-resources/
ocrd resmgr download ocrd-tesserocr-recognize '*' ocrd resmgr download ocrd-tesserocr-recognize '*'
fi fi
if [[ ! -d /usr/local/share/ocrd-resources/ocrd-calamari-recognize/qurator-gt4histocr-1.0 ]] if [[ ! -d /usr/local/share/ocrd-resources/ocrd-calamari-recognize/qurator-gt4histocr-1.0 ]]
...@@ -58,32 +58,15 @@ create_wf_specific_workspaces() { ...@@ -58,32 +58,15 @@ create_wf_specific_workspaces() {
# create workspace for all OCR workflows. # create workspace for all OCR workflows.
# each workflow has a separate workspace to work with. # each workflow has a separate workspace to work with.
echo "Create workflow specific workspaces for each dir in ./gt …"
for DIR in "$ROOT"/gt/*/; do for DIR in "$ROOT"/gt/*/; do
DIR_NAME=$(basename "$DIR") DIR_NAME=$(basename "$DIR")
if grep -q "multivolume work" <<< "$(cat $DIR/mets.xml)"; then if [[ ! $DIR_NAME == "reichsanzeiger-gt" ]]; then
echo "$DIR_NAME is a multivolume work" echo "Create workflow specific workspace for $DIR_NAME."
for WORKFLOW in "$OCRD_WORKFLOW_DIR"/*ocr.txt.nf
do
WF_NAME=$(basename -s .txt.nf "$WORKFLOW")
for SUB_WORK in $DIR/*/; do
SUB_WORK_DIR_NAME=$(basename "$SUB_WORK")
TARGET="$WORKSPACE_DIR"/tmp/"$DIR_NAME"_"$SUB_WORK_DIR_NAME"_"$WF_NAME"
cp -r "$ROOT"/gt/"$DIR_NAME"/"$SUB_WORK_DIR_NAME" "$TARGET"
if [[ -f "$ROOT"/gt/"$DIR_NAME"/metadata.json ]]; then
cp -r "$ROOT"/gt/"$DIR_NAME"/metadata.json "$TARGET"/metadata.json
fi
cp "$WORKFLOW" "$TARGET"/data/
done
done
else
for WORKFLOW in "$OCRD_WORKFLOW_DIR"/*ocr.txt.nf for WORKFLOW in "$OCRD_WORKFLOW_DIR"/*ocr.txt.nf
do do
WF_NAME=$(basename -s .txt.nf "$WORKFLOW") WF_NAME=$(basename -s .txt.nf "$WORKFLOW")
cp -r "$ROOT"/gt/"$DIR_NAME" "$WORKSPACE_DIR"/tmp/"$DIR_NAME"_"$WF_NAME" cp -r "$ROOT"/gt/"$DIR_NAME" "$WORKSPACE_DIR"/tmp/"$DIR_NAME"_"$WF_NAME"
cp "$WORKFLOW" "$WORKSPACE_DIR"/tmp/"$DIR_NAME"_"$WF_NAME"/ cp "$WORKFLOW" "$WORKSPACE_DIR"/tmp/"$DIR_NAME"_"$WF_NAME"/data/*/
done done
fi fi
done done
...@@ -93,11 +76,10 @@ clean_up_tmp_dirs() { ...@@ -93,11 +76,10 @@ clean_up_tmp_dirs() {
echo "Clean up intermediate dirs …" echo "Clean up intermediate dirs …"
for DIR in "$WORKSPACE_DIR"/tmp/* for DIR in "$WORKSPACE_DIR"/tmp/*
do do
echo "Cleaning up $DIR."
DIR_NAME=$(basename "$DIR") DIR_NAME=$(basename "$DIR")
mv "$DIR" "$WORKSPACE_DIR"/"$DIR_NAME" mv "$DIR" "$WORKSPACE_DIR"/"$DIR_NAME"
cp "$OCRD_WORKFLOW_DIR"/*eval.txt.nf "$WORKSPACE_DIR"/"$DIR_NAME" cp "$OCRD_WORKFLOW_DIR"/*eval.txt.nf "$WORKSPACE_DIR"/"$DIR_NAME"/data/*/
ls "$WORKSPACE_DIR"/"$DIR_NAME"
cp -r "$WORKSPACE_DIR"/"$DIR_NAME"/data/* "$WORKSPACE_DIR"/"$DIR_NAME"/
done done
rm -rf "$WORKSPACE_DIR"/tmp rm -rf "$WORKSPACE_DIR"/tmp
...@@ -114,16 +96,16 @@ execute_wfs_and_extract_benchmarks() { ...@@ -114,16 +96,16 @@ execute_wfs_and_extract_benchmarks() {
DIR_NAME=$(basename $WS_DIR) DIR_NAME=$(basename $WS_DIR)
run "$WS_DIR"/*ocr.txt.nf "$DIR_NAME" "$WS_DIR" run "$WS_DIR"/data/*/*ocr.txt.nf "$DIR_NAME" "$WS_DIR"
run "$WS_DIR"/*eval.txt.nf "$DIR_NAME" "$WS_DIR" run "$WS_DIR"/data/*/*eval.txt.nf "$DIR_NAME" "$WS_DIR"
# create a result JSON according to the specs # create a result JSON according to the specs
echo "Get Benchmark JSON …" echo "Get Benchmark JSON …"
quiver benchmarks-extraction "$WS_DIR" "$WORKFLOW" quiver benchmarks-extraction "$WS_DIR"/data/* "$WORKFLOW"
echo "Done." echo "Done."
# move data to results dir # move data to results dir
mv "$WS_DIR"/*.json "$WORKFLOW_DIR"/results mv "$WS_DIR"/data/*/*.json "$WORKFLOW_DIR"/results
fi fi
done done
cd "$ROOT" || exit cd "$ROOT" || exit
...@@ -132,7 +114,7 @@ execute_wfs_and_extract_benchmarks() { ...@@ -132,7 +114,7 @@ execute_wfs_and_extract_benchmarks() {
adjust_workflow_settings() { adjust_workflow_settings() {
# $1: $WORKFLOW # $1: $WORKFLOW
# $2: $DIR_NAME # $2: $DIR_NAME
sed -i "s CURRENT app/workflows/workspaces/$2 g" "$1" sed -i "s CURRENT app/workflows/workspaces/$2/data/*/ g" "$1"
} }
rename_and_move_nextflow_result() { rename_and_move_nextflow_result() {
...@@ -142,13 +124,13 @@ rename_and_move_nextflow_result() { ...@@ -142,13 +124,13 @@ rename_and_move_nextflow_result() {
WORKFLOW_NAME=$(basename -s .txt.nf "$1") WORKFLOW_NAME=$(basename -s .txt.nf "$1")
rm "$WORKFLOW_DIR"/nf-results/*process_completed.json rm "$WORKFLOW_DIR"/nf-results/*process_completed.json
mv "$WORKFLOW_DIR"/nf-results/*_completed.json "$WORKFLOW_DIR"/results/"$2"_"$WORKFLOW_NAME"_completed.json mv "$WORKFLOW_DIR"/nf-results/*_completed.json "$WORKFLOW_DIR"/results/"$2"_"$WORKFLOW_NAME"_completed.json
if [ $WORKFLOW_NAME != "dinglehopper_eval" ]; then if [ "$WORKFLOW_NAME" != "dinglehopper_eval" ]; then
for DIR in "$WORKSPACE_DIR"/work/* for DIR in "$WORKSPACE_DIR"/work/*
do do
WORK_DIR_NAME=$(basename $DIR) WORK_DIR_NAME=$(basename "$DIR")
for SUB_WORK_DIR in $DIR/* for SUB_WORK_DIR in "$DIR"/*
do do
SUB_WORK_DIR_NAME=$(basename $SUB_WORK_DIR) SUB_WORK_DIR_NAME=$(basename "$SUB_WORK_DIR")
mv "$WORKSPACE_DIR"/work/"$WORK_DIR_NAME"/"$SUB_WORK_DIR_NAME"/.command.log "$WORKSPACE_DIR"/"$2"/"$WORK_DIR_NAME"_"$SUB_WORK_DIR_NAME".command.log mv "$WORKSPACE_DIR"/work/"$WORK_DIR_NAME"/"$SUB_WORK_DIR_NAME"/.command.log "$WORKSPACE_DIR"/"$2"/"$WORK_DIR_NAME"_"$SUB_WORK_DIR_NAME".command.log
done done
...@@ -165,7 +147,7 @@ run() { ...@@ -165,7 +147,7 @@ run() {
adjust_workflow_settings "$1" "$2" adjust_workflow_settings "$1" "$2"
nextflow run "$1" -with-weblog http://127.0.0.1:8000/nextflow/ nextflow run "$1" -with-weblog http://127.0.0.1:8000/nextflow/
rename_and_move_nextflow_result "$1" "$2" rename_and_move_nextflow_result "$1" "$2"
save_workspaces "$3" "$2" "$1" save_workspaces "$3"/data "$2" "$1"
} }
save_workspaces() { save_workspaces() {
...@@ -173,7 +155,7 @@ save_workspaces() { ...@@ -173,7 +155,7 @@ save_workspaces() {
# $2: $DIR_NAME # $2: $DIR_NAME
# $3: $WORKFLOW # $3: $WORKFLOW
echo "Zipping workspace $1" echo "Zipping workspace $1"
ocrd zip bag -d $1 -i $1 $1 ocrd zip bag -d "$DIR_NAME"/data/* -i "$DIR_NAME"/data/* "$DIR_NAME"
WORKFLOW_NAME=$(basename -s .txt.nf "$3") WORKFLOW_NAME=$(basename -s .txt.nf "$3")
mv "$WORKSPACE_DIR"/"$2".zip "$WORKFLOW_DIR"/results/"$2"_"$WORKFLOW_NAME".zip mv "$WORKSPACE_DIR"/"$2".zip "$WORKFLOW_DIR"/results/"$2"_"$WORKFLOW_NAME".zip
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment