Commit 84bb64ce authored by dirk.wintergruen's avatar dirk.wintergruen

return also the corpusname in iterator

parent b0537399
......@@ -128,7 +128,7 @@ class Analyser(object):
for doc in getDocs(self.corpus,self.lang):
for k,doc in getDocs(self.corpus,self.lang):
md = textacy.spacier.doc_extensions.get_meta(doc)
if "creator" in md:
del md["creator"]
......@@ -152,8 +152,9 @@ class Analyser(object):
if not date: # date not set set month = 1
md["_month"] = 1
if not md["_year"]: # kein Jahr weder in Date noch Feld Year, assume korpus name ist year
if not md["_year"]: # kein Jahr weder in Date noch Feld Year, assume corpus name ist year
try:
y,ext = os.path.splitext(k)
year = int(y)
md["_year"] = year
if not date: # date not set set month = 1
......@@ -659,6 +660,7 @@ if __name__ == '__main__':
def getDocs(corpus,lang="en"):
for k,corps in corpus.items():
logging.info(f"Analysing {k}")
if not isinstance(corps,Corpus):
try:
corps = textacy.Corpus.load(lang,corps)
......@@ -667,7 +669,7 @@ def getDocs(corpus,lang="en"):
continue
for doc in corps:
yield doc
yield k,doc
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment