Skip to content
Snippets Groups Projects
reader.py 7.05 KiB
Newer Older
  • Learn to ignore specific revisions
  • Jake's avatar
    Jake committed
    import datatypes
    
    Jake's avatar
    Jake committed
    import pandoc
    
    Jake's avatar
    Jake committed
    
    import frontmatter
    
    from datetime import datetime
    
    import subprocess
    import os
    
    
    Jake's avatar
    Jake committed
    class Reader:
    
        def __init__(self, config, factories):
            self.config = config
            self.factories = factories
    
    Jake's avatar
    Jake committed
        def can_read_file(self, mimetype, mimeencoding):
            raise Exception("Function not implemented.")
    
        def read_file(self, path, subpath):
    
    Jake's avatar
    Jake committed
            raise Exception("Function not implemented.")
    
    
    class RawFileReader(Reader):
    
        def can_read_file(self, mimetype, mimeencoding):
            #if mimeencoding != None:
            #    return False
            return True
    
        def read_file(self, path, subpath, mimetype, mimeencoding):
    
            f = open(path, 'rb')
            rawcontents = f.read()
            f.close()
            pathlist = path.split('/')
            subpathlist = pathlist[:-1]
            filename = pathlist[-1]
    
            self.factories['file'].get(filename).init(rawcontents, subpathlist)
    
    
    Jake's avatar
    Jake committed
    class PandocReader(Reader):
    
    Jake's avatar
    Jake committed
    
    
    Jake's avatar
    Jake committed
        def __init__(self, config, factories, mimetype, mimeencoding, base, extensions):
            super().__init__(config, factories)
            self.mimetype = mimetype
            self.mimeencoding = mimeencoding
            self.base = base
            self.extensions = extensions
    
        def can_read_file(self, mimetype, mimeencoding):
            if self.mimetype != mimetype:
                return False
            if self.mimeencoding != mimeencoding:
                return False
            return True
    
    Jake's avatar
    Jake committed
    
    
    
    Jake's avatar
    Jake committed
        def read_file(self, path, subpath, mimetype, mimeencoding):
            if len(subpath) > 1:
                raise Exception("file is too deep in directory structure: ", path, subpath)
    
    Jake's avatar
    Jake committed
    
    
    
    Jake's avatar
    Jake committed
            #print("parsing file: ", path, subpath, mimetype, mimeencoding)
    
    Jake's avatar
    Jake committed
    
            f = open(path)
    
    Jake's avatar
    Jake committed
            rawfile = f.read()
    
    Jake's avatar
    Jake committed
            f.close()
    
    Jake's avatar
    Jake committed
            metadata, rawcontent = frontmatter.parse(rawfile)
    
    Jake's avatar
    Jake committed
    
            #print(metadata)
    
            category_name = self.get_category_name(metadata, subpath)
    
            # slug and lang
            pathlist = path.split('/')
            filename = pathlist[-1]
            filenamelist = filename.split('.')
            filenamelist = filenamelist[:-1] # remove .md
            slug = None
            lang = None
            if len(filenamelist) < 1:
                raise Exception("filename is empty?", path, subpath)
            elif len(filenamelist) == 1:
                slug = filenamelist[0]
            elif len(filenamelist) == 2:
                slug = filenamelist[0]
                lang = filenamelist[1]
            if 'slug' in metadata:
                slug = metadata['slug']
            if 'lang' in metadata:
                lang = metadata['lang']
    
            if lang == None:
                lang = self.config['lang']['default']
    
            if not self.is_supported_lang(lang):
                raise Exception("language is not supported: ", lang)
            slug = self.secure_slug(slug)
            #print("slug: ", slug)
            #print("lang: ", lang)
    
    
    Jake's avatar
    Jake committed
            # content
    
    Jake's avatar
    Jake committed
            #content, contentmetadata = pandoc.run_pandoc(factories=self.factories, lang=lang, source=rawcontent, base=self.config['pandoc']['base'], extensions=self.config['pandoc']['extensions'])
            content, contentmetadata = pandoc.run_pandoc(factories=self.factories, lang=lang, source=rawcontent, base=self.base, extensions=self.extensions)
    
    Jake's avatar
    Jake committed
            metadata.update(contentmetadata) # merge content specific metadata into metadata
            #print(content)
    
            # title
            if 'title' not in metadata:
                raise Exception("File is missing title in metadata: ", path, subpath)
            title = metadata['title']
    
    
    Jake's avatar
    Jake committed
            # date_created and date_modified
            date_modified = datetime.now()
            date_created = datetime.now()
            date_changes = self.run_git(path, "log", ["--follow", "--format=%ad", "--date", "iso-strict"]).splitlines()
            #print("date_changes: ", date_changes)
            if (len(date_changes) > 0):
                date_modified = datetime.fromisoformat(date_changes[0])
                date_created = datetime.fromisoformat(date_changes[-1])
            if 'date' in metadata:
    
    Jake's avatar
    Jake committed
                date_created = metadata['date']
    
    Jake's avatar
    Jake committed
            if 'modified' in metadata:
    
    Jake's avatar
    Jake committed
                date_modified = metadata['modified']
    
    Jake's avatar
    Jake committed
            #print("created: ", date_created)
            #print("last changed: ", date_modified)
    
            # author
            # TODO author from metadata
            authors_raw = self.run_git(path, "log", ["--follow", "--format=%aE@%aN", "--use-mailmap"]).splitlines()
            authors = []
            known_author_raws = []
            for author_raw in authors_raw:
                if author_raw not in known_author_raws:
                    authors.append(self.extract_author(author_raw))
                    known_author_raws.append(author_raw)
            if len(authors_raw) > 0:
                last_modification_author = self.extract_author(authors_raw[0])
            else:
                last_modification_author = None
    
    
    
            # status
            status = self.config['default_status']
            if 'status' in metadata:
                status = metadata['status']
            valid_status = ["published", "draft", "hidden"]
            if status not in valid_status:
                raise Exception("invalid status '", status, "' must be one of ", valid_status)
    
            # TODO summary
    
    
    Jake's avatar
    Jake committed
            p = self.factories['page'].get(slug, lang)
            p.init(
    
    Jake's avatar
    Jake committed
                    filename,
                    subpath,
    
    Jake's avatar
    Jake committed
                    rawfile,
    
    Jake's avatar
    Jake committed
                    metadata,
                    content,
                    title,
                    category_name,
                    date_created,
                    date_modified,
                    authors,
                    last_modification_author,
    
                    status)
    
    Jake's avatar
    Jake committed
            return p
    
        def extract_author(self, raw):
            author_split = raw.split('@')
            author_local_part = author_split[0]
            author_domain = author_split[1]
            author_name = '@'.join(author_split[2:])
            return (author_local_part, author_domain, author_name)
    
        def is_supported_lang(self, lang):
            if not isinstance(lang, str):
                return False
            return (lang in self.config['lang']['supported'])
    
    
        def secure_slug(self, slug):
            if not isinstance(slug, str):
                raise Exception("slug is not a string: '", slug, "'")
            slug = slug.lower()
    
    Jake's avatar
    Jake committed
            whitelist = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-"
    
    Jake's avatar
    Jake committed
            res = ""
            for c in slug:
                if c in whitelist:
                    #print ("c: '", c,"'")
                    res += c
            #print("res: '", res, "'")
            if len(res) == 0:
                raise Exception("slug is empty")
            return res
    
    
        def get_category_name(self, metadata, subpath):
            if 'category' in metadata:
                return metadata['category']
            elif len(subpath) == 1:
                return subpath[0]
            else:
                return 'misc'
    
        def run_git(self, path, subcmd, extra_args):
            real_path = os.path.realpath(path)
            filename = os.path.basename(real_path)
            dir_path = os.path.dirname(real_path)
            git_bin = "git"
            args = [git_bin, subcmd] + extra_args + ["--", filename]
            p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, cwd=dir_path)
            out, _ = p.communicate("".encode('utf-8', errors='strict'))
            out_str = out.decode('utf-8')
            return out_str