From 428df41cc199b0073f6ae72adf56e725fde53e82 Mon Sep 17 00:00:00 2001 From: "j.vondoemming" <j.vondoemming@stud.uni-goettingen.de> Date: Thu, 11 Aug 2022 17:45:57 +0200 Subject: [PATCH] generalized readers --- config.json | 7 +++++-- fgs/__main__.py | 42 +++++++++++++++++++++++++++++++----------- fgs/reader.py | 48 ++++++++++++++++++++++++++++++++++-------------- 3 files changed, 70 insertions(+), 27 deletions(-) diff --git a/config.json b/config.json index 167e18e..70f62e4 100644 --- a/config.json +++ b/config.json @@ -13,8 +13,11 @@ }, "default_status": "published", "pandoc": { - "base": "markdown", - "args": [], + "mimetypes": { + "text/markdown": { + "base": "markdown" + } + }, "extensions": [ "+abbreviations", "+all_symbols_escapable", diff --git a/fgs/__main__.py b/fgs/__main__.py index 395c1ad..babc606 100644 --- a/fgs/__main__.py +++ b/fgs/__main__.py @@ -3,6 +3,7 @@ import json import os import sys +import mimetypes import common @@ -37,12 +38,19 @@ def main(): factories['config'] = datatypes.LocalizedConfigFactory(config, factories) factories['file'] = datatypes.FileFactory(config, factories) - readers = {} - readers['generic'] = reader.FileReader(config, factories) - readers['md'] = reader.MarkdownReader(config, factories) - parse_dir(os.path.join(CONTENT_DIR, "."), readers) - parse_dir(THEME_DIR + '/static', readers, [config['theme']['static_dir']]) + mimetypes.init() + + readers = [] + readers.append(reader.RawFileReader(config, factories)) + for mimetype, mimeconfig in config['pandoc']['mimetypes'].items(): + extensions = config['pandoc']['extensions'] + if 'extensions' in mimeconfig: + extensions = mimeconfig['extensions'] + readers.append(reader.PandocReader(config, factories, mimetype, None, base=mimeconfig['base'], extensions=extensions)) + + read_dir(os.path.join(CONTENT_DIR, "."), readers) + read_dir(THEME_DIR + '/static', readers, [config['theme']['static_dir']]) context = {} @@ -55,18 +63,30 @@ def main(): -def parse_dir(directory, readers, subpath = []): - print("parse_dir: " + directory); +def read_dir(directory, readers, subpath = []): + print("read_dir: " + directory); for filename in os.listdir(directory): if filename.startswith("."): continue f = os.path.join(directory, filename) if os.path.isfile(f): - readers['generic'].read_file(f, subpath) - if filename.endswith(".md"): - readers['md'].read_and_parse_file(f, subpath) + mimetype, mimeencoding = mimetypes.guess_type(f) + #print("mimetype:",f,mimetype,mimeencoding) + read_file = False + for reader in readers: + if reader.can_read_file(mimetype, mimeencoding): + read_file = True + reader.read_file(f, subpath, mimetype, mimeencoding) + if read_file: + print("read file: ", f, subpath, mimetype, mimeencoding) + else: + print("WARN: no reader for file: ", f, subpath, mimetype, mimeencoding) + + #readers['generic'].read_file(f, subpath) + #if filename.endswith(".md"): + # readers['md'].read_file(f, subpath) elif os.path.isdir(f): - parse_dir(f, readers, subpath + [filename]) + read_dir(f, readers, subpath + [filename]) diff --git a/fgs/reader.py b/fgs/reader.py index 0dc76b6..32ad9cc 100644 --- a/fgs/reader.py +++ b/fgs/reader.py @@ -9,13 +9,24 @@ import subprocess import os -class FileReader: - +class Reader: def __init__(self, config, factories): self.config = config self.factories = factories - + def can_read_file(self, mimetype, mimeencoding): + raise Exception("Function not implemented.") def read_file(self, path, subpath): + raise Exception("Function not implemented.") + + +class RawFileReader(Reader): + + def can_read_file(self, mimetype, mimeencoding): + #if mimeencoding != None: + # return False + return True + + def read_file(self, path, subpath, mimetype, mimeencoding): f = open(path, 'rb') rawcontents = f.read() f.close() @@ -25,21 +36,29 @@ class FileReader: self.factories['file'].get(filename).init(rawcontents, subpathlist) -class MarkdownReader: +class PandocReader(Reader): - def __init__(self, config, factories): - self.config = config - self.factories = factories + def __init__(self, config, factories, mimetype, mimeencoding, base, extensions): + super().__init__(config, factories) + self.mimetype = mimetype + self.mimeencoding = mimeencoding + self.base = base + self.extensions = extensions + + def can_read_file(self, mimetype, mimeencoding): + if self.mimetype != mimetype: + return False + if self.mimeencoding != mimeencoding: + return False + return True - def read_and_parse_file(self, path, subpath): - if not path.endswith(".md"): - raise Exception("can only parse markdown files: ", path) - elif len(subpath) > 1: - raise Exception("markdown file is too deep in directory structure: ", path, subpath) + def read_file(self, path, subpath, mimetype, mimeencoding): + if len(subpath) > 1: + raise Exception("file is too deep in directory structure: ", path, subpath) - print("parsing file: ", path, subpath) + #print("parsing file: ", path, subpath, mimetype, mimeencoding) f = open(path) rawfile = f.read() @@ -79,7 +98,8 @@ class MarkdownReader: #print("lang: ", lang) # content - content, contentmetadata = pandoc.run_pandoc(factories=self.factories, lang=lang, source=rawcontent, base=self.config['pandoc']['base'], extensions=self.config['pandoc']['extensions']) + #content, contentmetadata = pandoc.run_pandoc(factories=self.factories, lang=lang, source=rawcontent, base=self.config['pandoc']['base'], extensions=self.config['pandoc']['extensions']) + content, contentmetadata = pandoc.run_pandoc(factories=self.factories, lang=lang, source=rawcontent, base=self.base, extensions=self.extensions) metadata.update(contentmetadata) # merge content specific metadata into metadata #print(content) -- GitLab