From 428df41cc199b0073f6ae72adf56e725fde53e82 Mon Sep 17 00:00:00 2001
From: "j.vondoemming" <j.vondoemming@stud.uni-goettingen.de>
Date: Thu, 11 Aug 2022 17:45:57 +0200
Subject: [PATCH] generalized readers

---
 config.json     |  7 +++++--
 fgs/__main__.py | 42 +++++++++++++++++++++++++++++++-----------
 fgs/reader.py   | 48 ++++++++++++++++++++++++++++++++++--------------
 3 files changed, 70 insertions(+), 27 deletions(-)

diff --git a/config.json b/config.json
index 167e18e..70f62e4 100644
--- a/config.json
+++ b/config.json
@@ -13,8 +13,11 @@
 	},
 	"default_status": "published",
 	"pandoc": {
-		"base": "markdown",
-		"args": [],
+		"mimetypes": {
+			"text/markdown": {
+				"base": "markdown"
+			}
+		},
 		"extensions": [
 			"+abbreviations",
 			"+all_symbols_escapable",
diff --git a/fgs/__main__.py b/fgs/__main__.py
index 395c1ad..babc606 100644
--- a/fgs/__main__.py
+++ b/fgs/__main__.py
@@ -3,6 +3,7 @@
 import json
 import os
 import sys
+import mimetypes
 
 
 import common
@@ -37,12 +38,19 @@ def main():
     factories['config'] = datatypes.LocalizedConfigFactory(config, factories)
     factories['file'] = datatypes.FileFactory(config, factories)
 
-    readers = {}
-    readers['generic'] = reader.FileReader(config, factories)
-    readers['md'] = reader.MarkdownReader(config, factories)
 
-    parse_dir(os.path.join(CONTENT_DIR, "."), readers)
-    parse_dir(THEME_DIR + '/static', readers, [config['theme']['static_dir']])
+    mimetypes.init()
+
+    readers = []
+    readers.append(reader.RawFileReader(config, factories))
+    for mimetype, mimeconfig in config['pandoc']['mimetypes'].items():
+        extensions = config['pandoc']['extensions']
+        if 'extensions' in mimeconfig:
+            extensions = mimeconfig['extensions']
+        readers.append(reader.PandocReader(config, factories, mimetype, None, base=mimeconfig['base'], extensions=extensions))
+
+    read_dir(os.path.join(CONTENT_DIR, "."), readers)
+    read_dir(THEME_DIR + '/static', readers, [config['theme']['static_dir']])
 
 
     context = {}
@@ -55,18 +63,30 @@ def main():
 
 
 
-def parse_dir(directory, readers, subpath = []):
-    print("parse_dir: " + directory);
+def read_dir(directory, readers, subpath = []):
+    print("read_dir: " + directory);
     for filename in os.listdir(directory):
         if filename.startswith("."):
             continue
         f = os.path.join(directory, filename)
         if os.path.isfile(f):
-            readers['generic'].read_file(f, subpath)
-            if filename.endswith(".md"):
-                readers['md'].read_and_parse_file(f, subpath)
+            mimetype, mimeencoding = mimetypes.guess_type(f)
+            #print("mimetype:",f,mimetype,mimeencoding)
+            read_file = False
+            for reader in readers:
+                if reader.can_read_file(mimetype, mimeencoding):
+                    read_file = True
+                    reader.read_file(f, subpath, mimetype, mimeencoding)
+            if read_file:
+                print("read file: ", f, subpath, mimetype, mimeencoding)
+            else:
+                print("WARN: no reader for file: ", f, subpath, mimetype, mimeencoding)
+
+            #readers['generic'].read_file(f, subpath)
+            #if filename.endswith(".md"):
+            #    readers['md'].read_file(f, subpath)
         elif os.path.isdir(f):
-            parse_dir(f, readers, subpath + [filename])
+            read_dir(f, readers, subpath + [filename])
 
 
 
diff --git a/fgs/reader.py b/fgs/reader.py
index 0dc76b6..32ad9cc 100644
--- a/fgs/reader.py
+++ b/fgs/reader.py
@@ -9,13 +9,24 @@ import subprocess
 import os
 
 
-class FileReader:
-
+class Reader:
     def __init__(self, config, factories):
         self.config = config
         self.factories = factories
-
+    def can_read_file(self, mimetype, mimeencoding):
+        raise Exception("Function not implemented.")
     def read_file(self, path, subpath):
+        raise Exception("Function not implemented.")
+
+
+class RawFileReader(Reader):
+
+    def can_read_file(self, mimetype, mimeencoding):
+        #if mimeencoding != None:
+        #    return False
+        return True
+
+    def read_file(self, path, subpath, mimetype, mimeencoding):
         f = open(path, 'rb')
         rawcontents = f.read()
         f.close()
@@ -25,21 +36,29 @@ class FileReader:
 
         self.factories['file'].get(filename).init(rawcontents, subpathlist)
 
-class MarkdownReader:
+class PandocReader(Reader):
 
-    def __init__(self, config, factories):
-        self.config = config
-        self.factories = factories
+    def __init__(self, config, factories, mimetype, mimeencoding, base, extensions):
+        super().__init__(config, factories)
+        self.mimetype = mimetype
+        self.mimeencoding = mimeencoding
+        self.base = base
+        self.extensions = extensions
+
+    def can_read_file(self, mimetype, mimeencoding):
+        if self.mimetype != mimetype:
+            return False
+        if self.mimeencoding != mimeencoding:
+            return False
+        return True
 
 
-    def read_and_parse_file(self, path, subpath):
-        if not path.endswith(".md"):
-            raise Exception("can only parse markdown files: ", path)
-        elif len(subpath) > 1:
-            raise Exception("markdown file is too deep in directory structure: ", path, subpath)
+    def read_file(self, path, subpath, mimetype, mimeencoding):
+        if len(subpath) > 1:
+            raise Exception("file is too deep in directory structure: ", path, subpath)
 
 
-        print("parsing file: ", path, subpath)
+        #print("parsing file: ", path, subpath, mimetype, mimeencoding)
 
         f = open(path)
         rawfile = f.read()
@@ -79,7 +98,8 @@ class MarkdownReader:
         #print("lang: ", lang)
 
         # content
-        content, contentmetadata = pandoc.run_pandoc(factories=self.factories, lang=lang, source=rawcontent, base=self.config['pandoc']['base'], extensions=self.config['pandoc']['extensions'])
+        #content, contentmetadata = pandoc.run_pandoc(factories=self.factories, lang=lang, source=rawcontent, base=self.config['pandoc']['base'], extensions=self.config['pandoc']['extensions'])
+        content, contentmetadata = pandoc.run_pandoc(factories=self.factories, lang=lang, source=rawcontent, base=self.base, extensions=self.extensions)
         metadata.update(contentmetadata) # merge content specific metadata into metadata
         #print(content)
 
-- 
GitLab