Newer
Older
import frontmatter
from datetime import datetime
import subprocess
import os
def __init__(self, config, factories):
self.config = config
self.factories = factories
def can_read_file(self, mimetype, mimeencoding):
raise Exception("Function not implemented.")
raise Exception("Function not implemented.")
class RawFileReader(Reader):
def can_read_file(self, mimetype, mimeencoding):
#if mimeencoding != None:
# return False
return True
def read_file(self, path, subpath, mimetype, mimeencoding):
f = open(path, 'rb')
rawcontents = f.read()
f.close()
pathlist = path.split('/')
subpathlist = pathlist[:-1]
filename = pathlist[-1]
self.factories['file'].get(filename).init(rawcontents, subpathlist)
def __init__(self, config, factories, mimetype, mimeencoding, base, extensions):
super().__init__(config, factories)
self.mimetype = mimetype
self.mimeencoding = mimeencoding
self.base = base
self.extensions = extensions
def can_read_file(self, mimetype, mimeencoding):
if self.mimetype != mimetype:
return False
if self.mimeencoding != mimeencoding:
return False
return True
def read_file(self, path, subpath, mimetype, mimeencoding):
if len(subpath) > 1:
raise Exception("file is too deep in directory structure: ", path, subpath)
#print("parsing file: ", path, subpath, mimetype, mimeencoding)
#print(metadata)
category_name = self.get_category_name(metadata, subpath)
# slug and lang
pathlist = path.split('/')
filename = pathlist[-1]
filenamelist = filename.split('.')
filenamelist = filenamelist[:-1] # remove .md
slug = None
lang = None
if len(filenamelist) < 1:
raise Exception("filename is empty?", path, subpath)
elif len(filenamelist) == 1:
slug = filenamelist[0]
elif len(filenamelist) == 2:
slug = filenamelist[0]
lang = filenamelist[1]
if 'slug' in metadata:
slug = metadata['slug']
if 'lang' in metadata:
lang = metadata['lang']
if lang == None:
lang = self.config['lang']['default']
if not self.is_supported_lang(lang):
raise Exception("language is not supported: ", lang)
slug = self.secure_slug(slug)
#print("slug: ", slug)
#print("lang: ", lang)
#content, contentmetadata = pandoc.run_pandoc(factories=self.factories, lang=lang, source=rawcontent, base=self.config['pandoc']['base'], extensions=self.config['pandoc']['extensions'])
content, contentmetadata = pandoc.run_pandoc(factories=self.factories, lang=lang, source=rawcontent, base=self.base, extensions=self.extensions)
metadata.update(contentmetadata) # merge content specific metadata into metadata
#print(content)
# title
if 'title' not in metadata:
raise Exception("File is missing title in metadata: ", path, subpath)
title = metadata['title']
# date_created and date_modified
date_modified = datetime.now()
date_created = datetime.now()
date_changes = self.run_git(path, "log", ["--follow", "--format=%ad", "--date", "iso-strict"]).splitlines()
#print("date_changes: ", date_changes)
if (len(date_changes) > 0):
date_modified = datetime.fromisoformat(date_changes[0])
date_created = datetime.fromisoformat(date_changes[-1])
if 'date' in metadata:
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#print("created: ", date_created)
#print("last changed: ", date_modified)
# author
# TODO author from metadata
authors_raw = self.run_git(path, "log", ["--follow", "--format=%aE@%aN", "--use-mailmap"]).splitlines()
authors = []
known_author_raws = []
for author_raw in authors_raw:
if author_raw not in known_author_raws:
authors.append(self.extract_author(author_raw))
known_author_raws.append(author_raw)
if len(authors_raw) > 0:
last_modification_author = self.extract_author(authors_raw[0])
else:
last_modification_author = None
# status
status = self.config['default_status']
if 'status' in metadata:
status = metadata['status']
valid_status = ["published", "draft", "hidden"]
if status not in valid_status:
raise Exception("invalid status '", status, "' must be one of ", valid_status)
# TODO summary
metadata,
content,
title,
category_name,
date_created,
date_modified,
authors,
last_modification_author,
return p
def extract_author(self, raw):
author_split = raw.split('@')
author_local_part = author_split[0]
author_domain = author_split[1]
author_name = '@'.join(author_split[2:])
return (author_local_part, author_domain, author_name)
def is_supported_lang(self, lang):
if not isinstance(lang, str):
return False
return (lang in self.config['lang']['supported'])
def secure_slug(self, slug):
if not isinstance(slug, str):
raise Exception("slug is not a string: '", slug, "'")
slug = slug.lower()
whitelist = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-"
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
res = ""
for c in slug:
if c in whitelist:
#print ("c: '", c,"'")
res += c
#print("res: '", res, "'")
if len(res) == 0:
raise Exception("slug is empty")
return res
def get_category_name(self, metadata, subpath):
if 'category' in metadata:
return metadata['category']
elif len(subpath) == 1:
return subpath[0]
else:
return 'misc'
def run_git(self, path, subcmd, extra_args):
real_path = os.path.realpath(path)
filename = os.path.basename(real_path)
dir_path = os.path.dirname(real_path)
git_bin = "git"
args = [git_bin, subcmd] + extra_args + ["--", filename]
p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, cwd=dir_path)
out, _ = p.communicate("".encode('utf-8', errors='strict'))
out_str = out.decode('utf-8')
return out_str