Commit 7d7f1210 authored by donatus.herre's avatar donatus.herre
Browse files

extraction updated

parent f40b6df8
no GND identifier present for bertram-august-wilhelm
no GND identifier present for henrici-heinrich
no GND identifier present for metz-johann-albrecht-friedrich
no GND identifier present for ostrow-ostrowsky-martin
no GND identifier present for supprian-friedrich-leberecht
no GND identifier present for tribechow-johannes
......@@ -176,5 +176,8 @@
"138519285",
"141780061",
"141878886",
"143695991"
"143695991",
"104171421",
"1020385375",
"1042343306"
]
\ No newline at end of file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Creator: D. Herre
GitLab: dikon/afh-gnd
Created: 2020-04-30
Last Modified: 2020-06-19
"""
from librair.parsers import Beacon
from librair.schemas import json
BEACON = "https://dikon.gitlab.io/cph-beacon/data/gnd.txt"
HREF = "edits/index1694-href.txt"
NAMES = "edits/index1694-names.txt"
OUT = "data/index1694.json"
href = []
with open(HREF, 'r', encoding="utf-8") as f:
href = [l.strip() for l in f.readlines()]
names = []
with open(NAMES, 'r', encoding="utf-8") as f:
names = [l.strip() for l in f.readlines()]
cph = Beacon(url=BEACON)
gnds = []
targets = [t[1] for t in cph.targets]
additions = {
"morgenstern-nikolaus": "104171421",
"reichhelm-karl": "1020385375",
"sperlette-bartholomaeus-johann": "1042343306"
}
for h in href:
if h not in targets and h not in additions:
print("no GND identifier present for", h)
for i, target in enumerate(cph.targets):
if target[1] in href:
pos = href.index(target[1])
gnds.append(cph.links[i])
for person in additions:
gnds.append(additions[person])
json.writer(gnds, OUT)
#!/usr/bin/env bash
source env/bin/activate
python -m prep > build.log 2>&1
deactivate
#!/usr/bin/env bash
virtualenv -p python3 env
source env/bin/activate
pip install -r requirements.txt
deactivate
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment