From 5a36148a72266ac70a2c1c1fba8eaecafe3364a2 Mon Sep 17 00:00:00 2001
From: Jake <j.vondoemming@stud.uni-goettingen.de>
Date: Tue, 2 Aug 2022 10:16:08 +0200
Subject: [PATCH] added fgs

---
 fgs/Makefile        |   4 +
 fgs/__main__.py     |  86 +++++++++++++++++
 fgs/generator.py    |  88 ++++++++++++++++++
 fgs/page.py         |  89 ++++++++++++++++++
 fgs/pandoc_toc.html |   1 +
 fgs/reader.py       | 218 ++++++++++++++++++++++++++++++++++++++++++++
 fgs/writer.py       |  61 +++++++++++++
 7 files changed, 547 insertions(+)
 create mode 100644 fgs/Makefile
 create mode 100644 fgs/__main__.py
 create mode 100644 fgs/generator.py
 create mode 100644 fgs/page.py
 create mode 100644 fgs/pandoc_toc.html
 create mode 100644 fgs/reader.py
 create mode 100644 fgs/writer.py

diff --git a/fgs/Makefile b/fgs/Makefile
new file mode 100644
index 0000000..6cfa260
--- /dev/null
+++ b/fgs/Makefile
@@ -0,0 +1,4 @@
+
+.PHONY: run
+run:
+	python3 __main__.py
diff --git a/fgs/__main__.py b/fgs/__main__.py
new file mode 100644
index 0000000..6e226fb
--- /dev/null
+++ b/fgs/__main__.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+
+import json
+import os
+import sys
+
+
+import reader
+import generator
+import writer
+
+CONTENT_DIR = '../content'
+OUTPUT_DIR = '../public'
+THEME_DIR = '../theme'
+
+
+def main():
+    print("Hello World")
+    config = {}
+    with open('../config.json') as f:
+        config.update(json.loads(f.read()))
+    with open('../lang.json') as f:
+        config['lang'] = json.loads(f.read())
+    with open(CONTENT_DIR + '/config.json') as f:
+        config.update(json.loads(f.read()))
+    print(config)
+
+    mdreader = reader.MarkdownReader(config)
+    pages = []
+    directory = os.path.join(CONTENT_DIR, ".")
+    parse_dir(directory, pages, mdreader)
+
+    static_dirs = []
+    for sdir in config['static_directories']:
+        if "." in sdir:
+            raise Exception("Illegal static directory name: ", sdir)
+        static_dirs.append(CONTENT_DIR + "/"+ sdir)
+    static_files = {}
+    for sdir in static_dirs:
+        read_static_dir(sdir, static_files)
+
+    read_static_dir(THEME_DIR + '/static', static_files, [config['theme']['static_dir']])
+
+
+    context = {}
+    gen = generator.Generator(config, context)
+    gen.generate_context(pages, static_files)
+
+    wrt = writer.Writer(config, context, OUTPUT_DIR, THEME_DIR)
+
+    gen.generate_output(wrt)
+
+
+
+def read_static_dir(directory, static_files, subpath = []):
+    print("static_dir: " + directory);
+    for filename in os.listdir(directory):
+        fp = os.path.join(directory, filename)
+        if os.path.isfile(fp) and not filename.startswith("."):
+            lpath = '/'.join(subpath + [filename])
+            with open(fp, "rb") as f:
+                static_files[lpath] = f.read()
+            print("read: ", fp, " as: ", lpath)
+        elif os.path.isdir(fp) and not filename.startswith("."):
+            read_static_dir(fp, static_files, subpath + [filename])
+
+
+def parse_dir(directory, pages, mdreader, subpath = []):
+    print("parse_dir: " + directory);
+    for filename in os.listdir(directory):
+        f = os.path.join(directory, filename)
+        if os.path.isfile(f) and filename.endswith(".md"):
+            pages.append(mdreader.read_and_parse_file(f, subpath))
+        elif os.path.isdir(f) and not filename.startswith("."):
+            parse_dir(f, pages, mdreader, subpath + [filename])
+
+
+
+
+
+
+    
+
+if __name__ == '__main__':
+    main()
+
diff --git a/fgs/generator.py b/fgs/generator.py
new file mode 100644
index 0000000..540f32b
--- /dev/null
+++ b/fgs/generator.py
@@ -0,0 +1,88 @@
+
+class Generator:
+    def __init__(self, config, context):
+        self.config = config
+        self.context = context
+
+    def generate_context(self, pages, static_files):
+        # static_files
+        self.context['static_files'] = static_files
+
+        published_pages = []
+        for page in pages:
+            if page.status == "published":
+                published_pages.append(page)
+
+
+        # pages
+        all_pages = {}
+        for page in published_pages:
+            if page.lang not in all_pages:
+                all_pages[page.lang] = {}
+            if page.slug in all_pages[page.lang]:
+                raise Exception("duplicate language (",lang,") for slug '", slug ,"'")
+            all_pages[page.lang][page.slug] = page
+        self.context['pages'] = all_pages
+
+        # pages_modified
+        pages_modified = {}
+        for lang in self.config['lang']['supported']:
+            lang_pages = []
+            for page in published_pages:
+                if page.lang == lang:
+                    lang_pages.append(page)
+            lang_pages.sort()
+            pages_modified[lang] = lang_pages
+        self.context['pages_modified'] = pages_modified
+
+        # TODO hidden pages
+        # TODO draft pages
+        # TODO authors
+
+        # categories
+        categories = {}
+        for page in published_pages:
+            if page.category not in categories:
+                categories[page.category] = {}
+            if page.lang not in categories[page.category]:
+                categories[page.category][page.lang] = []
+            categories[page.category][page.lang].append(page)
+        self.context['categories'] = categories
+
+        # tags
+        tags = {}
+        for page in published_pages:
+            for tag in page.tags:
+                if tag not in tags:
+                    tags[tag] = {}
+                if page.lang not in tags[tag]:
+                    tags[tag][page.lang] = []
+                tags[tag][page.lang].append(page)
+        self.context['tags'] = tags
+
+
+
+    def generate_homepage(self, writer, lang, path):
+            page = self.context['pages'][lang][self.config['theme']['homepage_slug']]
+            writer.write_template(page.template, path , lang, {'page': page})
+
+
+    def generate_output(self, writer):
+        for sf, raw in self.context['static_files'].items():
+                print ("writing binary file: ", sf)
+                writer.write_file(sf, raw, mode="wb")
+
+        for lang in self.config['lang']['supported']:
+            # all pages
+            for page in self.context['pages'][lang].values():
+                writer.write_template(page.template, page.url, lang, {'page': page})
+
+            # homepages for languages
+            self.generate_homepage(writer, lang, lang + "/index.html")
+        
+            
+        # homepage
+        self.generate_homepage(writer, self.config['lang']['default'], "index.html")
+            
+        
+
diff --git a/fgs/page.py b/fgs/page.py
new file mode 100644
index 0000000..a2d9336
--- /dev/null
+++ b/fgs/page.py
@@ -0,0 +1,89 @@
+class Page:
+
+    #filename = None
+    #subpath = None
+    #raw = None
+    #metadata = None
+    #content = None
+    #toc = None
+    #title = None
+    #category = None
+    #slug = None
+    #lang = None
+    #date_created = None
+    #date_modified = None
+    #status = None
+    #authors = None
+    #tags = None
+    #template = None
+    
+
+    def __init__(self, filename, subpath, raw, metadata, content, toc, title, category, slug, lang, date_created, date_modified, authors, last_modification_author, status, config):
+        self.filename =  filename
+        self.subpath =  subpath
+        self.raw =  raw
+        self.metadata =  metadata
+        self.content =  content
+        self.toc =  toc
+        self.title =  title
+        self.category =  category
+        self.slug =  slug
+        self.lang =  lang
+        self.date_created =  Date(date_created, config)
+        self.date_modified =  Date(date_modified, config)
+        self.status =  status
+
+        #self.config =  config
+
+        # authors
+        self.authors = []
+        for local_part, domain, name in authors:
+            self.authors.append(Author(local_part, domain, name, config))
+        if last_modification_author:
+            self.last_modification_author = Author(last_modification_author[0], last_modification_author[1], last_modification_author[2], config)
+        else:
+            self.last_modification_author = None
+        #print(authors)
+        #print(last_modification_author)
+
+
+        # tags
+        self.tags = []
+        if 'tags' in metadata:
+            for t in metadata['tags']:
+                if (t != category):
+                    self.tags.append(t)
+        self.tags.append(self.category) # the category is also a default tag
+
+        # template
+        if 'template' in metadata:
+            self.template = metadata['template']
+        else:
+            self.template = config['theme']['default_template']
+
+        # url
+        self.url = self.lang + '/' + self.category + '/' + self.slug + ".html"
+
+    def __lt__(self, other):
+        return self.date_modified < other.date_modified
+
+
+class Date:
+    def __init__(self, dt, config):
+        self.dt = dt
+        #self.config = config
+        # TODO various formats
+    def isoformat(self):
+        return self.dt.isoformat()
+
+    def __lt__(self, other):
+        return self.dt.timestamp() < other.dt.timestamp()
+
+class Author:
+    def __init__(self, email_local_part, email_domain, name, config):
+        self.email_local_part = email_local_part
+        self.email_domain = email_domain
+        self.email = email_local_part + '@' + email_domain
+        self.name = name
+        #self.config = config
+        # TODO md5 hash etc.
diff --git a/fgs/pandoc_toc.html b/fgs/pandoc_toc.html
new file mode 100644
index 0000000..2fd5a7b
--- /dev/null
+++ b/fgs/pandoc_toc.html
@@ -0,0 +1 @@
+$table-of-contents$
diff --git a/fgs/reader.py b/fgs/reader.py
new file mode 100644
index 0000000..96ddee4
--- /dev/null
+++ b/fgs/reader.py
@@ -0,0 +1,218 @@
+import page
+
+import frontmatter
+
+from datetime import datetime
+from dateutil import parser as dtparser
+
+import subprocess
+import os
+
+class MarkdownReader:
+
+    def __init__(self, config):
+        self.config = config
+
+
+    def read_and_parse_file(self, path, subpath):
+        if not path.endswith(".md"):
+            raise Exception("can only parse markdown files: ", path)
+        elif len(subpath) > 1:
+            raise Exception("markdown file is too deep in directory structure: ", path, subpath)
+
+
+        print("parsing file: ", path, subpath)
+
+        f = open(path)
+        rawcontent = f.read()
+        f.close()
+        metadata, _ = frontmatter.parse(rawcontent)
+
+        #print(metadata)
+
+        category_name = self.get_category_name(metadata, subpath)
+
+        # content
+        content = self.run_pandoc(rawcontent, self.config['pandoc']['base'], self.config['pandoc']['extensions'], "html5")
+        #print(content)
+
+        # TOC
+        toc = self.run_pandoc(rawcontent, self.config['pandoc']['base'], self.config['pandoc']['extensions'], "html5", ["--template", "./pandoc_toc.html", "--toc", "--toc-depth", str(self.config['toc_depth'])])
+        #print((toc))
+    
+        # title
+        if 'title' not in metadata:
+            raise Exception("File is missing title in metadata: ", path, subpath)
+        title = metadata['title']
+
+        # slug and lang
+        pathlist = path.split('/')
+        filename = pathlist[-1]
+        filenamelist = filename.split('.')
+        filenamelist = filenamelist[:-1] # remove .md
+        slug = None
+        lang = None
+        if len(filenamelist) < 1:
+            raise Exception("filename is empty?", path, subpath)
+        elif len(filenamelist) == 1:
+            slug = filenamelist[0]
+        elif len(filenamelist) == 2:
+            slug = filenamelist[0]
+            lang = filenamelist[1]
+        if 'slug' in metadata:
+            slug = metadata['slug']
+        if 'lang' in metadata:
+            lang = metadata['lang']
+
+        if lang == None:
+            lang = self.config['lang']['default']
+
+        if not self.is_supported_lang(lang):
+            raise Exception("language is not supported: ", lang)
+        slug = self.secure_slug(slug)
+        #print("slug: ", slug)
+        #print("lang: ", lang)
+
+        # date_created and date_modified
+        date_modified = datetime.now()
+        date_created = datetime.now()
+        date_changes = self.run_git(path, "log", ["--follow", "--format=%ad", "--date", "iso-strict"]).splitlines()
+        #print("date_changes: ", date_changes)
+        if (len(date_changes) > 0):
+            date_modified = datetime.fromisoformat(date_changes[0])
+            date_created = datetime.fromisoformat(date_changes[-1])
+        if 'date' in metadata:
+            date_created = dtparser.parse(metadata['date'])
+        if 'modified' in metadata:
+            date_modified = dtparser.parse(metadata['modified'])
+        #print("created: ", date_created)
+        #print("last changed: ", date_modified)
+
+        # author
+        # TODO author from metadata
+        authors_raw = self.run_git(path, "log", ["--follow", "--format=%aE@%aN", "--use-mailmap"]).splitlines()
+        authors = []
+        known_author_raws = []
+        for author_raw in authors_raw:
+            if author_raw not in known_author_raws:
+                authors.append(self.extract_author(author_raw))
+                known_author_raws.append(author_raw)
+        if len(authors_raw) > 0:
+            last_modification_author = self.extract_author(authors_raw[0])
+        else:
+            last_modification_author = None
+
+
+
+        # status
+        status = self.config['default_status']
+        if 'status' in metadata:
+            status = metadata['status']
+        valid_status = ["published", "draft", "hidden"]
+        if status not in valid_status:
+            raise Exception("invalid status '", status, "' must be one of ", valid_status)
+
+        # TODO summary
+
+        p = page.Page(
+                filename,
+                subpath,
+                rawcontent,
+                metadata,
+                content,
+                toc,
+                title,
+                category_name,
+                slug,
+                lang,
+                date_created,
+                date_modified,
+                authors,
+                last_modification_author,
+                status,
+                self.config)
+        return p
+
+    def extract_author(self, raw):
+        author_split = raw.split('@')
+        author_local_part = author_split[0]
+        author_domain = author_split[1]
+        author_name = '@'.join(author_split[2:])
+        return (author_local_part, author_domain, author_name)
+
+    def is_supported_lang(self, lang):
+        if not isinstance(lang, str):
+            return False
+        return (lang in self.config['lang']['supported'])
+
+
+    def secure_slug(self, slug):
+        if not isinstance(slug, str):
+            raise Exception("slug is not a string: '", slug, "'")
+        slug = slug.lower()
+        whitelist = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
+        res = ""
+        for c in slug:
+            if c in whitelist:
+                #print ("c: '", c,"'")
+                res += c
+        #print("res: '", res, "'")
+        if len(res) == 0:
+            raise Exception("slug is empty")
+        return res
+
+
+    def get_category_name(self, metadata, subpath):
+        if 'category' in metadata:
+            return metadata['category']
+        elif len(subpath) == 1:
+            return subpath[0]
+        else:
+            return 'misc'
+
+    def run_git(self, path, subcmd, extra_args):
+        real_path = os.path.realpath(path)
+        filename = os.path.basename(real_path)
+        dir_path = os.path.dirname(real_path)
+        git_bin = "git"
+        args = [git_bin, subcmd] + extra_args + ["--", filename]
+        p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, cwd=dir_path)
+        out, _ = p.communicate("".encode('utf-8', errors='strict'))
+        out_str = out.decode('utf-8')
+        return out_str
+
+
+    def run_pandoc(self, source, base="markdown", extensions=[], to="json", extra_args=[]):
+        ext_str = ""
+        if isinstance(extensions, list):
+            for ext in extensions:
+                if ext.startswith('#'):
+                    continue
+                if ext.startswith('+') or ext.startswith('-'):
+                    ext_str = ext_str + ext
+                elif len(ext) > 0:
+                    ext_str = ext_str + '+' + ext
+        elif isinstance(extensions, dict):
+            for ext_key in extensions:
+                # TODO catch 'illegal' ext_keys (containing spaces for example)
+                ext = extensions[ext_key]
+                if "ignore" in ext and ext["ignore"]:
+                    continue
+                flag='+'
+                if "enabled" in ext and not ext["enabled"]:
+                    flag='-'
+                ext_str = ext_str + flag + ext_key
+    
+        #print(ext_str)
+        pandoc_bin = "pandoc"
+        args = [pandoc_bin, "-f", base + ext_str, "-t", to] + extra_args
+        p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+        out, _ = p.communicate(source.encode('utf-8', errors='strict'))
+        out_str = out.decode('utf-8')
+        #print("----------------------")
+        #print(out_str)
+        #print("----------------------")
+        #json_dict = json.loads(out.decode('utf-8'))
+        return out_str
+
+
diff --git a/fgs/writer.py b/fgs/writer.py
new file mode 100644
index 0000000..a5eb1e6
--- /dev/null
+++ b/fgs/writer.py
@@ -0,0 +1,61 @@
+from jinja2 import Environment, FileSystemLoader
+
+import os
+
+class Writer:
+
+    def __init__(self, config, context, output_dir, theme_dir):
+        self.config = config
+        self.context = context
+        self.output_dir = output_dir
+        self.theme_dir = theme_dir
+
+        self.env = Environment(
+            loader=FileSystemLoader(theme_dir + "/templates"),
+            autoescape=False
+        )
+
+        print("templates: ", self.env.list_templates())
+
+    def write_template(self, template, path, lang, extra_context):
+        tmpl = self.env.get_template(template)
+
+        pathsplit = path.split('/')
+        #pathsplit.remove('.') # TODO remove all not just one
+        #pathsplit.remove('..') # TODO remove all not just one
+
+        siteurl = "."
+        if self.config['relative_urls']:
+            count = len(pathsplit) - 1
+            for i in range(count):
+                siteurl = siteurl + '/..'
+        else:
+            siteurl = config['siteurl']
+
+        # render template
+        context = {}
+        context.update(self.context)
+        context["config"] = self.config
+        context["theme"] = self.config['theme']
+        context["template"] = template
+        context["t"] = self.config["lang"] # translate
+        context["l"] = lang # current language
+        context["path"] = path
+        context["siteurl"] = siteurl
+        context.update(extra_context)
+        out = tmpl.render(context)
+
+        # write file
+        self.write_file(path, out)
+
+    def write_file(self, path, out, mode="w"):
+        # write to file
+        fullpath = self.output_dir + '/' + path
+        directory = os.path.dirname(fullpath)
+        print("fullpath: ", fullpath)
+        print("dir: ", directory)
+        os.makedirs(directory, exist_ok=True)
+        with open(fullpath, mode) as f:
+            f.write(out)
+
+
-- 
GitLab