From 5a36148a72266ac70a2c1c1fba8eaecafe3364a2 Mon Sep 17 00:00:00 2001 From: Jake <j.vondoemming@stud.uni-goettingen.de> Date: Tue, 2 Aug 2022 10:16:08 +0200 Subject: [PATCH] added fgs --- fgs/Makefile | 4 + fgs/__main__.py | 86 +++++++++++++++++ fgs/generator.py | 88 ++++++++++++++++++ fgs/page.py | 89 ++++++++++++++++++ fgs/pandoc_toc.html | 1 + fgs/reader.py | 218 ++++++++++++++++++++++++++++++++++++++++++++ fgs/writer.py | 61 +++++++++++++ 7 files changed, 547 insertions(+) create mode 100644 fgs/Makefile create mode 100644 fgs/__main__.py create mode 100644 fgs/generator.py create mode 100644 fgs/page.py create mode 100644 fgs/pandoc_toc.html create mode 100644 fgs/reader.py create mode 100644 fgs/writer.py diff --git a/fgs/Makefile b/fgs/Makefile new file mode 100644 index 0000000..6cfa260 --- /dev/null +++ b/fgs/Makefile @@ -0,0 +1,4 @@ + +.PHONY: run +run: + python3 __main__.py diff --git a/fgs/__main__.py b/fgs/__main__.py new file mode 100644 index 0000000..6e226fb --- /dev/null +++ b/fgs/__main__.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 + +import json +import os +import sys + + +import reader +import generator +import writer + +CONTENT_DIR = '../content' +OUTPUT_DIR = '../public' +THEME_DIR = '../theme' + + +def main(): + print("Hello World") + config = {} + with open('../config.json') as f: + config.update(json.loads(f.read())) + with open('../lang.json') as f: + config['lang'] = json.loads(f.read()) + with open(CONTENT_DIR + '/config.json') as f: + config.update(json.loads(f.read())) + print(config) + + mdreader = reader.MarkdownReader(config) + pages = [] + directory = os.path.join(CONTENT_DIR, ".") + parse_dir(directory, pages, mdreader) + + static_dirs = [] + for sdir in config['static_directories']: + if "." in sdir: + raise Exception("Illegal static directory name: ", sdir) + static_dirs.append(CONTENT_DIR + "/"+ sdir) + static_files = {} + for sdir in static_dirs: + read_static_dir(sdir, static_files) + + read_static_dir(THEME_DIR + '/static', static_files, [config['theme']['static_dir']]) + + + context = {} + gen = generator.Generator(config, context) + gen.generate_context(pages, static_files) + + wrt = writer.Writer(config, context, OUTPUT_DIR, THEME_DIR) + + gen.generate_output(wrt) + + + +def read_static_dir(directory, static_files, subpath = []): + print("static_dir: " + directory); + for filename in os.listdir(directory): + fp = os.path.join(directory, filename) + if os.path.isfile(fp) and not filename.startswith("."): + lpath = '/'.join(subpath + [filename]) + with open(fp, "rb") as f: + static_files[lpath] = f.read() + print("read: ", fp, " as: ", lpath) + elif os.path.isdir(fp) and not filename.startswith("."): + read_static_dir(fp, static_files, subpath + [filename]) + + +def parse_dir(directory, pages, mdreader, subpath = []): + print("parse_dir: " + directory); + for filename in os.listdir(directory): + f = os.path.join(directory, filename) + if os.path.isfile(f) and filename.endswith(".md"): + pages.append(mdreader.read_and_parse_file(f, subpath)) + elif os.path.isdir(f) and not filename.startswith("."): + parse_dir(f, pages, mdreader, subpath + [filename]) + + + + + + + + +if __name__ == '__main__': + main() + diff --git a/fgs/generator.py b/fgs/generator.py new file mode 100644 index 0000000..540f32b --- /dev/null +++ b/fgs/generator.py @@ -0,0 +1,88 @@ + +class Generator: + def __init__(self, config, context): + self.config = config + self.context = context + + def generate_context(self, pages, static_files): + # static_files + self.context['static_files'] = static_files + + published_pages = [] + for page in pages: + if page.status == "published": + published_pages.append(page) + + + # pages + all_pages = {} + for page in published_pages: + if page.lang not in all_pages: + all_pages[page.lang] = {} + if page.slug in all_pages[page.lang]: + raise Exception("duplicate language (",lang,") for slug '", slug ,"'") + all_pages[page.lang][page.slug] = page + self.context['pages'] = all_pages + + # pages_modified + pages_modified = {} + for lang in self.config['lang']['supported']: + lang_pages = [] + for page in published_pages: + if page.lang == lang: + lang_pages.append(page) + lang_pages.sort() + pages_modified[lang] = lang_pages + self.context['pages_modified'] = pages_modified + + # TODO hidden pages + # TODO draft pages + # TODO authors + + # categories + categories = {} + for page in published_pages: + if page.category not in categories: + categories[page.category] = {} + if page.lang not in categories[page.category]: + categories[page.category][page.lang] = [] + categories[page.category][page.lang].append(page) + self.context['categories'] = categories + + # tags + tags = {} + for page in published_pages: + for tag in page.tags: + if tag not in tags: + tags[tag] = {} + if page.lang not in tags[tag]: + tags[tag][page.lang] = [] + tags[tag][page.lang].append(page) + self.context['tags'] = tags + + + + def generate_homepage(self, writer, lang, path): + page = self.context['pages'][lang][self.config['theme']['homepage_slug']] + writer.write_template(page.template, path , lang, {'page': page}) + + + def generate_output(self, writer): + for sf, raw in self.context['static_files'].items(): + print ("writing binary file: ", sf) + writer.write_file(sf, raw, mode="wb") + + for lang in self.config['lang']['supported']: + # all pages + for page in self.context['pages'][lang].values(): + writer.write_template(page.template, page.url, lang, {'page': page}) + + # homepages for languages + self.generate_homepage(writer, lang, lang + "/index.html") + + + # homepage + self.generate_homepage(writer, self.config['lang']['default'], "index.html") + + + diff --git a/fgs/page.py b/fgs/page.py new file mode 100644 index 0000000..a2d9336 --- /dev/null +++ b/fgs/page.py @@ -0,0 +1,89 @@ +class Page: + + #filename = None + #subpath = None + #raw = None + #metadata = None + #content = None + #toc = None + #title = None + #category = None + #slug = None + #lang = None + #date_created = None + #date_modified = None + #status = None + #authors = None + #tags = None + #template = None + + + def __init__(self, filename, subpath, raw, metadata, content, toc, title, category, slug, lang, date_created, date_modified, authors, last_modification_author, status, config): + self.filename = filename + self.subpath = subpath + self.raw = raw + self.metadata = metadata + self.content = content + self.toc = toc + self.title = title + self.category = category + self.slug = slug + self.lang = lang + self.date_created = Date(date_created, config) + self.date_modified = Date(date_modified, config) + self.status = status + + #self.config = config + + # authors + self.authors = [] + for local_part, domain, name in authors: + self.authors.append(Author(local_part, domain, name, config)) + if last_modification_author: + self.last_modification_author = Author(last_modification_author[0], last_modification_author[1], last_modification_author[2], config) + else: + self.last_modification_author = None + #print(authors) + #print(last_modification_author) + + + # tags + self.tags = [] + if 'tags' in metadata: + for t in metadata['tags']: + if (t != category): + self.tags.append(t) + self.tags.append(self.category) # the category is also a default tag + + # template + if 'template' in metadata: + self.template = metadata['template'] + else: + self.template = config['theme']['default_template'] + + # url + self.url = self.lang + '/' + self.category + '/' + self.slug + ".html" + + def __lt__(self, other): + return self.date_modified < other.date_modified + + +class Date: + def __init__(self, dt, config): + self.dt = dt + #self.config = config + # TODO various formats + def isoformat(self): + return self.dt.isoformat() + + def __lt__(self, other): + return self.dt.timestamp() < other.dt.timestamp() + +class Author: + def __init__(self, email_local_part, email_domain, name, config): + self.email_local_part = email_local_part + self.email_domain = email_domain + self.email = email_local_part + '@' + email_domain + self.name = name + #self.config = config + # TODO md5 hash etc. diff --git a/fgs/pandoc_toc.html b/fgs/pandoc_toc.html new file mode 100644 index 0000000..2fd5a7b --- /dev/null +++ b/fgs/pandoc_toc.html @@ -0,0 +1 @@ +$table-of-contents$ diff --git a/fgs/reader.py b/fgs/reader.py new file mode 100644 index 0000000..96ddee4 --- /dev/null +++ b/fgs/reader.py @@ -0,0 +1,218 @@ +import page + +import frontmatter + +from datetime import datetime +from dateutil import parser as dtparser + +import subprocess +import os + +class MarkdownReader: + + def __init__(self, config): + self.config = config + + + def read_and_parse_file(self, path, subpath): + if not path.endswith(".md"): + raise Exception("can only parse markdown files: ", path) + elif len(subpath) > 1: + raise Exception("markdown file is too deep in directory structure: ", path, subpath) + + + print("parsing file: ", path, subpath) + + f = open(path) + rawcontent = f.read() + f.close() + metadata, _ = frontmatter.parse(rawcontent) + + #print(metadata) + + category_name = self.get_category_name(metadata, subpath) + + # content + content = self.run_pandoc(rawcontent, self.config['pandoc']['base'], self.config['pandoc']['extensions'], "html5") + #print(content) + + # TOC + toc = self.run_pandoc(rawcontent, self.config['pandoc']['base'], self.config['pandoc']['extensions'], "html5", ["--template", "./pandoc_toc.html", "--toc", "--toc-depth", str(self.config['toc_depth'])]) + #print((toc)) + + # title + if 'title' not in metadata: + raise Exception("File is missing title in metadata: ", path, subpath) + title = metadata['title'] + + # slug and lang + pathlist = path.split('/') + filename = pathlist[-1] + filenamelist = filename.split('.') + filenamelist = filenamelist[:-1] # remove .md + slug = None + lang = None + if len(filenamelist) < 1: + raise Exception("filename is empty?", path, subpath) + elif len(filenamelist) == 1: + slug = filenamelist[0] + elif len(filenamelist) == 2: + slug = filenamelist[0] + lang = filenamelist[1] + if 'slug' in metadata: + slug = metadata['slug'] + if 'lang' in metadata: + lang = metadata['lang'] + + if lang == None: + lang = self.config['lang']['default'] + + if not self.is_supported_lang(lang): + raise Exception("language is not supported: ", lang) + slug = self.secure_slug(slug) + #print("slug: ", slug) + #print("lang: ", lang) + + # date_created and date_modified + date_modified = datetime.now() + date_created = datetime.now() + date_changes = self.run_git(path, "log", ["--follow", "--format=%ad", "--date", "iso-strict"]).splitlines() + #print("date_changes: ", date_changes) + if (len(date_changes) > 0): + date_modified = datetime.fromisoformat(date_changes[0]) + date_created = datetime.fromisoformat(date_changes[-1]) + if 'date' in metadata: + date_created = dtparser.parse(metadata['date']) + if 'modified' in metadata: + date_modified = dtparser.parse(metadata['modified']) + #print("created: ", date_created) + #print("last changed: ", date_modified) + + # author + # TODO author from metadata + authors_raw = self.run_git(path, "log", ["--follow", "--format=%aE@%aN", "--use-mailmap"]).splitlines() + authors = [] + known_author_raws = [] + for author_raw in authors_raw: + if author_raw not in known_author_raws: + authors.append(self.extract_author(author_raw)) + known_author_raws.append(author_raw) + if len(authors_raw) > 0: + last_modification_author = self.extract_author(authors_raw[0]) + else: + last_modification_author = None + + + + # status + status = self.config['default_status'] + if 'status' in metadata: + status = metadata['status'] + valid_status = ["published", "draft", "hidden"] + if status not in valid_status: + raise Exception("invalid status '", status, "' must be one of ", valid_status) + + # TODO summary + + p = page.Page( + filename, + subpath, + rawcontent, + metadata, + content, + toc, + title, + category_name, + slug, + lang, + date_created, + date_modified, + authors, + last_modification_author, + status, + self.config) + return p + + def extract_author(self, raw): + author_split = raw.split('@') + author_local_part = author_split[0] + author_domain = author_split[1] + author_name = '@'.join(author_split[2:]) + return (author_local_part, author_domain, author_name) + + def is_supported_lang(self, lang): + if not isinstance(lang, str): + return False + return (lang in self.config['lang']['supported']) + + + def secure_slug(self, slug): + if not isinstance(slug, str): + raise Exception("slug is not a string: '", slug, "'") + slug = slug.lower() + whitelist = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + res = "" + for c in slug: + if c in whitelist: + #print ("c: '", c,"'") + res += c + #print("res: '", res, "'") + if len(res) == 0: + raise Exception("slug is empty") + return res + + + def get_category_name(self, metadata, subpath): + if 'category' in metadata: + return metadata['category'] + elif len(subpath) == 1: + return subpath[0] + else: + return 'misc' + + def run_git(self, path, subcmd, extra_args): + real_path = os.path.realpath(path) + filename = os.path.basename(real_path) + dir_path = os.path.dirname(real_path) + git_bin = "git" + args = [git_bin, subcmd] + extra_args + ["--", filename] + p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, cwd=dir_path) + out, _ = p.communicate("".encode('utf-8', errors='strict')) + out_str = out.decode('utf-8') + return out_str + + + def run_pandoc(self, source, base="markdown", extensions=[], to="json", extra_args=[]): + ext_str = "" + if isinstance(extensions, list): + for ext in extensions: + if ext.startswith('#'): + continue + if ext.startswith('+') or ext.startswith('-'): + ext_str = ext_str + ext + elif len(ext) > 0: + ext_str = ext_str + '+' + ext + elif isinstance(extensions, dict): + for ext_key in extensions: + # TODO catch 'illegal' ext_keys (containing spaces for example) + ext = extensions[ext_key] + if "ignore" in ext and ext["ignore"]: + continue + flag='+' + if "enabled" in ext and not ext["enabled"]: + flag='-' + ext_str = ext_str + flag + ext_key + + #print(ext_str) + pandoc_bin = "pandoc" + args = [pandoc_bin, "-f", base + ext_str, "-t", to] + extra_args + p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + out, _ = p.communicate(source.encode('utf-8', errors='strict')) + out_str = out.decode('utf-8') + #print("----------------------") + #print(out_str) + #print("----------------------") + #json_dict = json.loads(out.decode('utf-8')) + return out_str + + diff --git a/fgs/writer.py b/fgs/writer.py new file mode 100644 index 0000000..a5eb1e6 --- /dev/null +++ b/fgs/writer.py @@ -0,0 +1,61 @@ +from jinja2 import Environment, FileSystemLoader + +import os + +class Writer: + + def __init__(self, config, context, output_dir, theme_dir): + self.config = config + self.context = context + self.output_dir = output_dir + self.theme_dir = theme_dir + + self.env = Environment( + loader=FileSystemLoader(theme_dir + "/templates"), + autoescape=False + ) + + print("templates: ", self.env.list_templates()) + + def write_template(self, template, path, lang, extra_context): + tmpl = self.env.get_template(template) + + pathsplit = path.split('/') + #pathsplit.remove('.') # TODO remove all not just one + #pathsplit.remove('..') # TODO remove all not just one + + siteurl = "." + if self.config['relative_urls']: + count = len(pathsplit) - 1 + for i in range(count): + siteurl = siteurl + '/..' + else: + siteurl = config['siteurl'] + + # render template + context = {} + context.update(self.context) + context["config"] = self.config + context["theme"] = self.config['theme'] + context["template"] = template + context["t"] = self.config["lang"] # translate + context["l"] = lang # current language + context["path"] = path + context["siteurl"] = siteurl + context.update(extra_context) + out = tmpl.render(context) + + # write file + self.write_file(path, out) + + def write_file(self, path, out, mode="w"): + # write to file + fullpath = self.output_dir + '/' + path + directory = os.path.dirname(fullpath) + print("fullpath: ", fullpath) + print("dir: ", directory) + os.makedirs(directory, exist_ok=True) + with open(fullpath, mode) as f: + f.write(out) + + -- GitLab