#!/usr/bin/env python3 import subprocess import json import sys def run_pandoc(source, factories, lang, base="markdown", extensions=[], extra_args=[]): to = "json" ext_str = "" if isinstance(extensions, list): for ext in extensions: if ext.startswith('#'): continue if ext.startswith('+') or ext.startswith('-'): ext_str = ext_str + ext elif len(ext) > 0: ext_str = ext_str + '+' + ext elif isinstance(extensions, dict): for ext_key in extensions: # TODO catch 'illegal' ext_keys (containing spaces for example) ext = extensions[ext_key] if "ignore" in ext and ext["ignore"]: continue flag='+' if "enabled" in ext and not ext["enabled"]: flag='-' ext_str = ext_str + flag + ext_key #print(ext_str) pandoc_bin = "pandoc" args = [pandoc_bin, "-f", base + ext_str, "-t", to] + extra_args p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE) out, _ = p.communicate(source.encode('utf-8', errors='strict')) out_str = out.decode('utf-8') out_dict = json.loads(out.decode('utf-8')) if json.dumps(out_dict["pandoc-api-version"]) != "[1, 22, 2]": raise Exception("Unsupported pandoc-api version", out_dict["pandoc-api-version"]) custom_syntax_handler = CustomSyntaxHandler(factories, lang, custom_syntax_register) # parse blocks raw_blocks = out_dict['blocks'] blocks = [] for raw_block in raw_blocks: #print('raw_block: ', type(raw_block), raw_block) block = parse_from_register(factories, lang, block_parsing_register, raw_block, custom_syntax_handler) if block != None: blocks.append(block) contentmetadata = {} contentmetadata["toc_list"] = [] contentmetadata["toc_count"] = 0 # TODO TOC #contentmetadata["toc"] = build_toc(n["toc_list"].copy()) #blocks = json.loads(json.dumps(blocks, cls=ElementEncoder)) # Reduce to 'simple' dict, which can be converted to JSON in Jinja2. return (blocks, contentmetadata) def parse_from_register(factories, lang, reg: dict, h: dict, custom_syntax_handler): t = h['t'] # pandoc type if t not in reg: raise Exception("pandoc type not in register", t, h) entry = reg[t] # registry entry c = None if 'c' in h: c = h['c'] res = None if isinstance(entry, dict): if "TODO" in entry and entry["TODO"]: print("Warning: entry is marked as TODO: ",t, entry,file=sys.stderr) return None handler = entry['handler'] res = handler(factories, lang, custom_syntax_handler, entry['etype']) else: handler = entry res = handler(factories, lang, custom_syntax_handler) res.parse(c) # Handle custom syntax res = custom_syntax_handler.handle(res) return res #class ElementEncoder(json.JSONEncoder): # def default(self, obj): # if isinstance(obj, Element): # res = {} # for key, name in obj.export.items(): # res[name] = getattr(obj, key) # return res # return super().default(obj) class Element(): def __init__(self, factories, lang, custom_syntax_handler, etype = None): self.factories = factories self.lang = lang self.custom_syntax_handler = custom_syntax_handler if etype != None: self.etype = etype self.children = [] self.export = {} #self.type = etype #self.export_key('etype', 'type') #self.export_key('eclass', 'class') #self.export_key('children') def addChild(self, child): if child != None: self.children.append(child) def parse_internal(self, pandocraw): raise Exception("parse_internal not overridden: ", self) def parse(self, pandocraw): #prevkeys = dir(self) self.parse_internal(pandocraw) #afterkeys = dir(self) #for key in afterkeys: # if key not in prevkeys: # self.export_key(key) def parse_blocks(self, raw_blocks): if not isinstance(raw_blocks, list): raise Exception("raw_blocks is not a list: ", raw_blocks) res = [] for raw_block in raw_blocks: block = self.parse_block(raw_block) if block != None: res.append(block) return res def parse_inlines(self, raw_inlines): if not isinstance(raw_inlines, list): raise Exception("raw_inlines is not a list: ", raw_inlines) res = [] for raw_inline in raw_inlines: inline = self.parse_inline(raw_inline) if inline != None: res.append(inline) return res def parse_block(self, raw_block): res = parse_from_register(self.factories, self.lang, block_parsing_register, raw_block, self.custom_syntax_handler) self.addChild(res) return res def parse_inline(self, raw_inline): res = parse_from_register(self.factories, self.lang, inline_parsing_register, raw_inline, self.custom_syntax_handler) self.addChild(res) return res def parse_attr(self, raw_attr): #print("called parse_attr: ", raw_attr) res = {} res['id'] = self.parse_text(raw_attr[0]) classes = [] for c in raw_attr[1]: classes.append(self.parse_text(c)) res['classes'] = classes # convert [ "key1", "value1", "key2", "value2" ] to {"key1":"value1", "key2", "value2"} it = iter(raw_attr[2]) kvp = dict(zip(it, it)) # key-value pairs extra = {} for key, value in kvp.items(): res[key] = self.parse_text(value) res['extra'] = extra return res def parse_text(self, raw_text): if len(raw_text) > 0: return raw_text else: return None def parse_int(self, raw_num): return raw_num # TODO def parse_double_as_percentage(self, raw_num): res = {} res["raw"] = raw_num res["percentage"] = raw_num * 100 return res def parse_target(self, raw_target): # For URLs res = {} rawurl = self.parse_text(raw_target[0]) res['link'] = self.factories['link'].get_by_raw(rawurl, self.lang) res['title'] = self.parse_text(raw_target[1]) return res def parse_code(self, code): res = {} res["code"] = self.parse_text(code) res["code_lines"] = code.splitlines() return res def parse_enum(self, mapping, enum): if len(enum.keys()) != 1 or "t" not in enum: raise Exception("enum is not a valid enum", enum, mapping) enum = enum["t"] if enum not in mapping: raise Exception("enum not found in mapping") return mapping[enum] def update(self, d): # Like dict.update() for key, value in d.items(): setattr(self, key, value) ################################ BLOCK ######################################### class Block(Element): eclass = "block" class BlockHeader(Block): # Int Attr [Inline] etype = "header" def parse_internal(self, pandocraw): self.level = self.parse_int(pandocraw[0]) self.attr = self.parse_attr(pandocraw[1]) self.content = self.parse_inlines(pandocraw[2]) class BlockRaw(Block): # Format Text etype = "rawblock" def parse_internal(self, pandocraw): self.format = self.parse_text(pandocraw[0]) self.raw = self.parse_text(pandocraw[1]) class BlockList(Block): # [[Block]] def parse_listitems(self, rawitems): res = {} res['items'] = [] for itemrawblocks in rawitems: item = self.parse_blocks(itemrawblocks) res['items'].append(item) res['count'] = len(res['items']) return res class BlockBulletList(BlockList): # [[Block]] etype = "bulletlist" def parse_internal(self, pandocraw): self.update(self.parse_listitems(pandocraw)) class BlockOrderedList(BlockList): # ListAttributes [[Block]] etype = "orderedlist" def parse_internal(self, pandocraw): self.update(self.parse_orderedlist_attr(pandocraw[0])) self.update(self.parse_listitems(pandocraw[1])) def parse_orderedlist_attr(self, attrs: list): res = {} res["start"] = self.parse_int(attrs[0]) styles = { "DefaultStyle": "default", "Example" : "example" , "Decimal" : "decimal" , "LowerRoman" : "lower_roman" , "UpperRoman" : "upper_roman" , "LowerAlpha" : "lower_alpha" , "UpperAlpha" : "upper_alpha" , } res["style"] = self.parse_enum(styles, attrs[1]) delims = { "DefaultDelim" : "default" , "Period" : "period" , "OneParen" : "one_parenthesis" , "TwoParens" : "two_parentheses" , } res["delim"] = self.parse_enum(delims, attrs[2]) return res class BlockQuote(Block): # [Block] etype = "blockquote" def parse_internal(self, pandocraw): self.content = self.parse_blocks(pandocraw) # TODO add name, color, time class BlockPlain(Block): # [Inline] etype = "plain" def parse_internal(self, pandocraw): self.content = self.parse_inlines(pandocraw) class BlockParagraph(Block): # [Inline] etype = "paragraph" def parse_internal(self, pandocraw): self.content = self.parse_inlines(pandocraw) class BlockCode(Block): # Attr Text etype = "codeblock" def parse_internal(self, pandocraw): self.attr = self.parse_attr(pandocraw[0]) self.update(self.parse_code(pandocraw[1])) class BlockHorizontalRule(Block): # etype = "horizontalrule" def parse_internal(self, pandocraw): pass class BlockContainer(Block): # Attr [Block] # a div etype = "blockcontainer" def parse_internal(self, pandocraw): self.attr = self.parse_attr(pandocraw[0]) self.content = self.parse_blocks(pandocraw[1]) # TODO handle alerts class BlockDefinitionList(Block): # [([Inline], [[Block]])] etype = "definitionlist" def parse_internal(self, pandocraw): """ [ [ [{'t': 'Str', 'c': 'AStA'}], [ [{'t': 'Plain', 'c': [{'t': 'Str', 'c': 'Allgemeiner'}, {'t': 'Space'}, {'t': 'Str', 'c': 'studierenden'}, {'t': 'Space'}, {'t': 'Str', 'c': 'Ausschuss'}]}] ] ] ] """ self.items = [] for rawitem in pandocraw: item = {} item["term"] = self.parse_inlines(rawitem[0]) item["definitions"] = [] for rawdefinition in rawitem[1]: item["definitions"].append(self.parse_blocks(rawdefinition)) self.items.append(item) class BlockTable(Block): # Attr Caption [ColSpec] TableHead [TableBody] TableFoot etype = "table" def parse_internal(self, pandocraw): """ [ ['', [], []],# Attr [None, []], # Caption [ # [ColSpec] [{'t': 'AlignLeft'}, {'t': 'ColWidth', 'c': 0.5279503105590062}], [{'t': 'AlignCenter'}, {'t': 'ColWidth', 'c': 0.11180124223602485}], [{'t': 'AlignCenter'}, {'t': 'ColWidth', 'c': 0.09937888198757763}], [{'t': 'AlignCenter'}, {'t': 'ColWidth', 'c': 0.10559006211180125}], [{'t': 'AlignCenter'}, {'t': 'ColWidth', 'c': 0.07453416149068323}], [{'t': 'AlignCenter'}, {'t': 'ColWidth', 'c': 0.08074534161490683}] ], [ # TableHead ['', [], []], [ [ ['', [], []], [ [['', [], []], {'t': 'AlignDefault'}, 1, 1, []], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': 'Owner'}, {'t': 'Space'}, {'t': 'Str', 'c': 'read/write'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': 'Signed-in'}, {'t': 'Space'}, {'t': 'Str', 'c': 'read'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': 'Signed-in'}, {'t': 'Space'}, {'t': 'Str', 'c': 'write'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': 'Guest'}, {'t': 'Space'}, {'t': 'Str', 'c': 'read'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': 'Guest'}, {'t': 'Space'}, {'t': 'Str', 'c': 'write'}]}]] ] ] ] ], [ # [TableBody] [['', [], []], 0, [], [[['', [], []], [[['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Span', 'c': [['', ['text-nowrap'], []], [{'t': 'RawInline', 'c': ['html', '<i class="fa fa-leaf fa-fw">']}, {'t': 'RawInline', 'c': ['html', '</i>']}, {'t': 'Space'}, {'t': 'Strong', 'c': [{'t': 'Str', 'c': 'Freely'}]}]]}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✔'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✔'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✔'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✔'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✔'}]}]]]], [['', [], []], [[['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Span', 'c': [['', ['text-nowrap'], []], [{'t': 'RawInline', 'c': ['html', '<i class="fa fa-pencil fa-fw">']}, {'t': 'RawInline', 'c': ['html', '</i>']}, {'t': 'Space'}, {'t': 'Strong', 'c': [{'t': 'Str', 'c': 'Editable'}]}]]}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✔'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✔'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✔'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✔'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✖'}]}]]]], [['', [], []], [[['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Span', 'c': [['', ['text-nowrap'], []], [{'t': 'RawInline', 'c': ['html', '<i class="fa fa-id-card fa-fw">']}, {'t': 'RawInline', 'c': ['html', '</i>']}, {'t': 'Space'}, {'t': 'Strong', 'c': [{'t': 'Str', 'c': 'Limited'}]}]]}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✔'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✔'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✔'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✖'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✖'}]}]]]], [['', [], []], [[['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Span', 'c': [['', ['text-nowrap'], []], [{'t': 'RawInline', 'c': ['html', '<i class="fa fa-lock fa-fw">']}, {'t': 'RawInline', 'c': ['html', '</i>']}, {'t': 'Space'}, {'t': 'Strong', 'c': [{'t': 'Str', 'c': 'Locked'}]}]]}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✔'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✔'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✖'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✔'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✖'}]}]]]], [['', [], []], [[['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Span', 'c': [['', ['text-nowrap'], []], [{'t': 'RawInline', 'c': ['html', '<i class="fa fa-umbrella fa-fw">']}, {'t': 'RawInline', 'c': ['html', '</i>']}, {'t': 'Space'}, {'t': 'Strong', 'c': [{'t': 'Str', 'c': 'Protected'}]}]]}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✔'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✔'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✖'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✖'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✖'}]}]]]], [['', [], []], [[['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Span', 'c': [['', ['text-nowrap'], []], [{'t': 'RawInline', 'c': ['html', '<i class="fa fa-hand-stop-o fa-fw">']}, {'t': 'RawInline', 'c': ['html', '</i>']}, {'t': 'Space'}, {'t': 'Strong', 'c': [{'t': 'Str', 'c': 'Private'}]}]]}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✔'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✖'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✖'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✖'}]}]], [['', [], []], {'t': 'AlignDefault'}, 1, 1, [{'t': 'Plain', 'c': [{'t': 'Str', 'c': '✖'}]}]]]]]] ], [['', [], []], []] # TableFoot ] """ self.attr = self.parse_attr(pandocraw[0]) self.caption = self.parse_caption(pandocraw[1]) self.column_specs = self.parse_column_specs(pandocraw[2]) self.head = self.parse_head(pandocraw[3]) self.bodies = self.parse_bodies(pandocraw[4]) self.foot = self.parse_foot(pandocraw[5]) def parse_caption(self, raw_caption): #print("called parse_caption: ", raw_caption) if raw_caption[0] != None: print("Warning: Found table with unsupported short caption:", raw_caption, file=sys.stderr) return self.parse_blocks(raw_caption[1]) def parse_column_specs(self, raw_column_specs): #print("called parse_column_specs: ", raw_column_specs) res = [] for cur in raw_column_specs: colspec = {} colspec["alignment"] = self.parse_alignment(cur[0]) if cur[1]['t'] == "ColWidthDefault": colspec["has_width"] = False else: colspec["has_width"] = True colspec["width"] = self.parse_double_as_percentage(cur[1]['c']) res.append(colspec) return res def parse_alignment(self, raw_alignment): return self.parse_enum({ "AlignLeft": "left", "AlignRight": "right", "AlignCenter": "center", "AlignDefault": "default" }, raw_alignment) def parse_cells(self, raw_cells): cells = [] for cur in raw_cells: cell = {} cell["attr"] = self.parse_attr(cur[0]) cell["alignment"] = self.parse_alignment(cur[1]) cell["row_span"] = self.parse_int(cur[2]) # number of rows occupied by a cell; the height of a cell cell["column_span"] = self.parse_int(cur[3]) # number of columns occupied by a cell; the width of a cell cell["content"] = self.parse_blocks(cur[4]) cells.append(cell) return cells def parse_rows(self, raw_rows): rows = [] for cur in raw_rows: row = {} row["attr"] = self.parse_attr(cur[0]) row["cells"] = self.parse_cells(cur[1]) rows.append(row) return rows def parse_head(self, raw_head): #print("called parse_head: ", raw_head) res = {} res["attr"] = self.parse_attr(raw_head[0]) res["rows"] = self.parse_rows(raw_head[1]) return res def parse_bodies(self, raw_bodies): #print("called parse_bodies: ", raw_bodies) bodies = [] for cur in raw_bodies: body = {} body["attr"] = self.parse_attr(cur[0]) body["row_head_columns"] = self.parse_int(cur[1]) # number of row header columns in the intermediate body body["intermediate_head"] = self.parse_rows(cur[2]) body["intermediate_body"] = self.parse_rows(cur[3]) bodies.append(body) return bodies def parse_foot(self, raw_foot): #print("called parse_foot: ", raw_foot) res = {} res["attr"] = self.parse_attr(raw_foot[0]) res["rows"] = self.parse_rows(raw_foot[1]) return res class BlockLineBlock(Block): # [[Inline]] etype = "lineblock" """ [ [ {'t': 'Str', 'c': 'Fachgruppe'}, {'t': 'Space'}, {'t': 'Str', 'c': 'Informatik'}, {'t': 'Space'}, {'t': 'Str', 'c': 'Georg-August-Universität'}, {'t': 'Space'}, {'t': 'Str', 'c': 'Göttingen'} ], [ {'t': 'Str', 'c': 'Goldschmidtstr.'}, {'t': 'Space'}, {'t': 'Str', 'c': '7'} ], [ {'t': 'Str', 'c': 'Institut'}, {'t': 'Space'}, {'t': 'Str', 'c': 'für'}, {'t': 'Space'}, {'t': 'Str', 'c': 'Informatik'} ], [ {'t': 'Str', 'c': '37077'}, {'t': 'Space'}, {'t': 'Str', 'c': 'Göttingen'} ] ] }) """ def parse_internal(self, pandocraw): self.lines = [] for line in pandocraw: self.lines.append(self.parse_inlines(line)) self.linecount = len(self.lines) ############################## INLINE ######################################### class Inline(Element): eclass = "inline" class InlineSpace(Inline): # etype = "space" def parse_internal(self, pandocraw): pass class InlineLineBreak(Inline): # etype = "linebreak" def parse_internal(self, pandocraw): pass class InlineSoftBreak(Inline): # etype = "softbreak" def parse_internal(self, pandocraw): pass class InlineString(Inline): # Text etype = "string" def parse_internal(self, pandocraw): self.text = self.parse_text(pandocraw) # TODO handle abbreviations class InlineSimple(Inline): # [Inline] def parse_internal(self, pandocraw): self.content = self.parse_inlines(pandocraw) class InlineLink(Inline): # Attr [Inline] Target etype = "link" def parse_internal(self, pandocraw): self.attr = self.parse_attr(pandocraw[0]) self.content = self.parse_inlines(pandocraw[1]) self.update(self.parse_target(pandocraw[2])) class InlineImage(Inline): # Attr [Inline] Target etype = "image" def parse_internal(self, pandocraw): self.attr = self.parse_attr(pandocraw[0]) self.alt = self.parse_inlines(pandocraw[1]) self.update(self.parse_target(pandocraw[2])) class InlineQuoted(Inline): # QuoteType Text etype = "quoted" def parse_internal(self, pandocraw): self.quotetype = self.parse_enum({"SingleQuote": "single", "DoubleQuote": "double"}, pandocraw[0]) self.content = self.parse_inlines(pandocraw[1]) class InlineMath(Inline): # MathType Text etype = "math" def parse_internal(self, pandocraw): self.mathtype = self.parse_enum({"DisplayMath": "display", "InlineMath": "inline"}, pandocraw[0]) self.math = self.parse_text(pandocraw[1]) class InlineCode(Inline): # Attr Text etype = "code" def parse_internal(self, pandocraw): self.attr = self.parse_attr(pandocraw[0]) self.update(self.parse_code(pandocraw[1])) class InlineContainer(Inline): # Attr [Inline] etype = "inlinecontainer" def parse_internal(self, pandocraw): self.attr = self.parse_attr(pandocraw[0]) self.content = self.parse_inlines(pandocraw[1]) # TODO handle emojis class InlineFootnote(Inline): # [Block] etype = "footnote" def parse_internal(self, pandocraw): self.content = self.parse_blocks(pandocraw) # TODO add footnote ids # TODO add back references # TODO handle duplicates class InlineRaw(Inline): # Format Text etype = "rawinline" def parse_internal(self, pandocraw): self.format = self.parse_text(pandocraw[0]) self.raw = self.parse_text(pandocraw[1]) ############################# CUSTOM SYNTAX #################################### class CustomSyntaxHandler: def __init__(self, factories, lang, custom_syntax_register): self.factories = factories self.lang = lang self.register = custom_syntax_register # Executed after the parsing of every element. # Returns either origelement or the new one to replace it. def handle(self, origelement): for curreg in self.register: if self.run_tests(curreg, origelement): #raise Exception("Found replacement!!!", curreg, origelement) print("DEBUG: Found custom syntax!!!", curreg, origelement) res = curreg["replace_with"](self.factories, self.lang, self) # create new replacement element res.replace(origelement, curreg) # tell it what it is replacing return res # return new replacement element return origelement # no custom syntax detected def run_test(self, test, origelement): if "key" not in test: # This is just another test container. return self.run_tests(test, origelement) tkey = test["key"] if not hasattr(origelement, tkey): raise Exception("Key not in Element:", tkey, origelement, test) obj = getattr(origelement, tkey) ttype = test["type"] #print("DEBUG: Running test: ", ttype, "tkey:", tkey, "test:", test, "obj:", obj, "origelement:",origelement) if ttype == "str": # Test: str if not isinstance(obj, str): raise Exception("Object is not a string:", obj, type(obj), origelement, test) if "is" in test: return test["is"] == obj elif "contains" in test: return test["contains"] in obj else: raise Exception("Don't know what to do with this test:", ttype, test, obj, origelement) if ttype == "list": # Test: list if not isinstance(obj, list): raise Exception("Object is not a list:", obj, type(obj), origelement, test) olen = len(obj) if "len" in test and test["len"] != olen: return False if "len_min" in test and test["len_min"] > olen: return False if "len_max" in test and test["len_max"] < olen: return False if olen == 0: return False if "for_each" in test: if olen == 0: return False tfor = test["for_each"] for element in obj: if not self.run_tests(tfor, element): return False if "first" in test: if not self.run_tests(test["first"], obj[0]): return False if "last" in test: if not self.run_tests(test["last"], obj[-1]): return False return True else: # unknown Test raise Exception("Unknown Test Type:", ttype, test) def run_tests(self, testcontainer, origelement): res = False if "tests_all" in testcontainer: res = self.run_tests_all(testcontainer["tests_all"], origelement) elif "tests_any" in testcontainer: res = self.run_tests_any(testcontainer["tests_any"], origelement) else: raise Exception("No tests found:", testcontainer, origelement) #print("DEBUG: run_tests returned ",res," with ", testcontainer, " on ", origelement) return res def run_tests_all(self, tests, origelement): for test in tests: if not self.run_test(test, origelement): return False return True def run_tests_any(self, tests, origelement): for test in tests: if self.run_test(test, origelement): return True return False class CustomBlockTOC(Block): etype = "toc" def replace(self, origelement, custom_sytax_register_entry): pass # TODO continue ############################## REGISTER ####################################### inline_parsing_register = { "Space" : InlineSpace, "Str" : InlineString, "Strong" :{"handler" : InlineSimple, "etype":"strong" }, "Emph" :{"handler" : InlineSimple, "etype":"emph" }, "Underline" :{"handler" : InlineSimple, "etype":"underline" }, "Strikeout" :{"handler" : InlineSimple, "etype":"strikeout" }, "Superscript":{"handler" : InlineSimple, "etype":"superscript"}, "Subscript" :{"handler" : InlineSimple, "etype":"subscript" }, "SmallCaps" :{"handler" : InlineSimple, "etype":"smallcaps" }, "Link" : InlineLink, "Image" : InlineImage, "Quoted" : InlineQuoted, "Math" : InlineMath, "Code" : InlineCode, "Span" : InlineContainer, "RawInline" : InlineRaw, "Note" : InlineFootnote, #"Cite" :{"type":"citation","TODO": True, "c" : [] }, # [Citation] [Inline] # TODO find file that triggers Cite "SoftBreak" : InlineSoftBreak, "LineBreak" : InlineLineBreak, } block_parsing_register = { "Plain" : BlockPlain, "Para" : BlockParagraph, "BlockQuote" : BlockQuote, "BulletList" : BlockBulletList, "RawBlock" : BlockRaw, "Header" : BlockHeader, "CodeBlock" : BlockCode, "Div" : BlockContainer, "OrderedList" : BlockOrderedList, "HorizontalRule": BlockHorizontalRule, "Table" : BlockTable, "DefinitionList": BlockDefinitionList, "LineBlock" : BlockLineBlock, #"Null" :{"type":"nothing" }, # TODO find file that triggers Null } # Define custom syntax custom_syntax_register = [ # Jeder Eintrag in dieser Liste beschreibt eine custom syntax. Die gesamte Liste wird nach jedem parsen eines Elementes getestet. Falls die Tests erfolgreich sind, wird das Element mit 'replace_with' ersetzt. { # Table of Contents via a Paragraph containing '[TOC]' as its only content. "replace_with": CustomBlockTOC, # "tests_any": [] "tests_all": [ { "key": "eclass", "type": "str", "is": "block" }, { "key": "etype", "type": "str", "is": "paragraph" }, { "key": "content","type": "list", "len": 1, #"len_min": 1, #"len_max": 1, #"for_each": { "tests_all": [ ] }, "first": { "tests_all": [ { "key": "eclass", "type": "str", "is": "inline" }, { "key": "etype", "type": "str", "is": "string" }, { "key": "text", "type": "str", "is": "[TOC]" } ]} #"last": { "tests_all": [ ] }, } ] } ]