From 857440e1e0f9681f0a9f6ec904ada148865717f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tillmann=20D=C3=B6nicke?=
 <doenicke@MacBook-Pro-von-Tillmann.local>
Date: Wed, 9 Aug 2023 14:12:42 +0200
Subject: [PATCH] refactor(pipeline): add super classes for all pipeline
 components

---
 main/example.py                               |  2 +-
 .../annotation_reader/annotation_reader.py    | 22 +++++++++
 .../catma_annotation_reader.py                | 14 +++---
 .../attribution_tagger/attribution_tagger.py  | 17 +++++++
 .../neural_attribution_tagger.py              | 13 +++---
 src/monapipe/pipeline/clausizer/clausizer.py  | 45 +++++++++++++++++++
 .../clausizer/dependency_clausizer.py         | 25 +++--------
 src/monapipe/pipeline/coref/coref.py          | 35 +++++++++++++++
 src/monapipe/pipeline/coref/rb_coref.py       | 29 +++---------
 .../pipeline/event_tagger/event_tagger.py     | 17 +++++++
 .../event_tagger/neural_event_tagger.py       | 13 +++---
 .../pipeline/formatter/conllu_formatter.py    | 25 ++++++-----
 src/monapipe/pipeline/formatter/formatter.py  | 32 +++++++++++++
 .../pipeline/gen_tagger/gen_tagger.py         | 19 ++++++++
 .../pipeline/gen_tagger/neural_gen_tagger.py  | 12 ++---
 .../normalizer/identity_normalizer.py         | 27 +++--------
 .../pipeline/normalizer/normalizer.py         | 36 +++++++++++++++
 .../neural_reflection_tagger.py               | 12 ++---
 .../reflection_tagger/reflection_tagger.py    | 19 ++++++++
 .../germanet_semantic_tagger.py               | 13 +++---
 .../semantic_tagger/semantic_tagger.py        | 21 +++++++++
 .../pipeline/slicer/from_start_slicer.py      | 10 ++---
 src/monapipe/pipeline/slicer/slicer.py        | 21 +++++++++
 .../speaker_extractor/rb_speaker_extractor.py | 17 +++----
 .../speaker_extractor/speaker_extractor.py    | 21 +++++++++
 .../speech_tagger/flair_speech_tagger.py      | 19 +++-----
 .../quotation_marks_speech_tagger.py          | 24 +++++-----
 .../pipeline/speech_tagger/speech_tagger.py   | 26 +++++++++++
 .../heideltime_temponym_tagger.py             | 14 +++---
 .../temponym_tagger/temponym_tagger.py        | 20 +++++++++
 .../verb_analyzer/rb_verb_analyzer.py         | 30 +++++--------
 .../pipeline/verb_analyzer/verb_analyzer.py   | 41 +++++++++++++++++
 tests/pipeline/test_conllu_formatter.py       |  4 +-
 33 files changed, 507 insertions(+), 188 deletions(-)
 create mode 100644 src/monapipe/pipeline/annotation_reader/annotation_reader.py
 create mode 100644 src/monapipe/pipeline/attribution_tagger/attribution_tagger.py
 create mode 100644 src/monapipe/pipeline/clausizer/clausizer.py
 create mode 100644 src/monapipe/pipeline/coref/coref.py
 create mode 100644 src/monapipe/pipeline/event_tagger/event_tagger.py
 create mode 100644 src/monapipe/pipeline/formatter/formatter.py
 create mode 100644 src/monapipe/pipeline/gen_tagger/gen_tagger.py
 create mode 100644 src/monapipe/pipeline/normalizer/normalizer.py
 create mode 100644 src/monapipe/pipeline/reflection_tagger/reflection_tagger.py
 create mode 100644 src/monapipe/pipeline/semantic_tagger/semantic_tagger.py
 create mode 100644 src/monapipe/pipeline/slicer/slicer.py
 create mode 100644 src/monapipe/pipeline/speaker_extractor/speaker_extractor.py
 create mode 100644 src/monapipe/pipeline/speech_tagger/speech_tagger.py
 create mode 100644 src/monapipe/pipeline/temponym_tagger/temponym_tagger.py
 create mode 100644 src/monapipe/pipeline/verb_analyzer/verb_analyzer.py

diff --git a/main/example.py b/main/example.py
index 25e4299..2f8ff20 100644
--- a/main/example.py
+++ b/main/example.py
@@ -121,4 +121,4 @@ if __name__ == "__main__":
     doc_data = pickle.dumps(make_pickleable(doc))
     doc = unmake_pickleable(pickle.loads(doc_data))
 
-    print(doc._.conllu_str)
+    print(doc._.format_str)
diff --git a/src/monapipe/pipeline/annotation_reader/annotation_reader.py b/src/monapipe/pipeline/annotation_reader/annotation_reader.py
new file mode 100644
index 0000000..dc07325
--- /dev/null
+++ b/src/monapipe/pipeline/annotation_reader/annotation_reader.py
@@ -0,0 +1,22 @@
+# SPDX-FileCopyrightText: 2022 Georg-August-Universität Göttingen
+#
+# SPDX-License-Identifier: CC0-1.0
+
+from typing import Optional
+
+from spacy.language import Language
+from spacy.tokens import Doc, Token
+
+from monapipe.pipeline.methods import add_extension
+
+
+class AnnotationReader:
+    """The super class `AnnotationReader`."""
+
+    assigns = ["doc._.annotations", "token._.annotations"]
+
+    def __init__(self, nlp: Language, corpus_path: Optional[str]):
+        self.corpus_path = corpus_path
+
+        add_extension(Token, "annotations", {})
+        add_extension(Doc, "annotations", {})
diff --git a/src/monapipe/pipeline/annotation_reader/catma_annotation_reader.py b/src/monapipe/pipeline/annotation_reader/catma_annotation_reader.py
index 06e84d1..89a7e62 100644
--- a/src/monapipe/pipeline/annotation_reader/catma_annotation_reader.py
+++ b/src/monapipe/pipeline/annotation_reader/catma_annotation_reader.py
@@ -9,15 +9,16 @@ import xml.etree.cElementTree as ET
 from typing import Any, Dict, List, Optional
 
 from spacy.language import Language
-from spacy.tokens import Doc, Token
+from spacy.tokens import Doc
 
 from monapipe.annotation import Annotation, AnnotationList
-from monapipe.pipeline.methods import add_extension, get_doc_text
+from monapipe.pipeline.annotation_reader.annotation_reader import AnnotationReader
+from monapipe.pipeline.methods import get_doc_text
 
 
 @Language.factory(
     "catma_annotation_reader",
-    assigns=["doc._.annotations", "token._.annotations"],
+    assigns=AnnotationReader.assigns,
     default_config={"corpus_path": None},
 )
 def catma_annotation_reader(nlp: Language, name: str, corpus_path: Optional[str]) -> Any:
@@ -51,11 +52,11 @@ def catma_annotation_reader(nlp: Language, name: str, corpus_path: Optional[str]
     return CatmaAnnotationReader(nlp, corpus_path)
 
 
-class CatmaAnnotationReader:
+class CatmaAnnotationReader(AnnotationReader):
     """The class `CatmaAnnotationReader`."""
 
     def __init__(self, nlp: Language, corpus_path: Optional[str]):
-        self.corpus_path = corpus_path
+        super().__init__(nlp, corpus_path)
 
         # name space for CATMA TEI/XML
         self._namespace = {
@@ -63,9 +64,6 @@ class CatmaAnnotationReader:
             "xml": "http://www.w3.org/XML/1998/namespace",
         }
 
-        add_extension(Token, "annotations", {})
-        add_extension(Doc, "annotations", {})
-
     def __call__(self, doc: Doc) -> Doc:
         if self.corpus_path is None:
             return doc
diff --git a/src/monapipe/pipeline/attribution_tagger/attribution_tagger.py b/src/monapipe/pipeline/attribution_tagger/attribution_tagger.py
new file mode 100644
index 0000000..cae3103
--- /dev/null
+++ b/src/monapipe/pipeline/attribution_tagger/attribution_tagger.py
@@ -0,0 +1,17 @@
+# SPDX-FileCopyrightText: 2022 Georg-August-Universität Göttingen
+#
+# SPDX-License-Identifier: CC0-1.0
+
+from spacy.language import Language
+from spacy.tokens import Span
+
+from monapipe.pipeline.methods import add_extension
+
+
+class AttributionTagger:
+    """The super class `AttributionTagger`."""
+
+    assigns = {"span._.attribution": "clause._.attribution"}
+
+    def __init__(self, nlp: Language):
+        add_extension(Span, "attribution")
diff --git a/src/monapipe/pipeline/attribution_tagger/neural_attribution_tagger.py b/src/monapipe/pipeline/attribution_tagger/neural_attribution_tagger.py
index 6748aa4..e0dec41 100644
--- a/src/monapipe/pipeline/attribution_tagger/neural_attribution_tagger.py
+++ b/src/monapipe/pipeline/attribution_tagger/neural_attribution_tagger.py
@@ -8,17 +8,18 @@ import numpy as np
 import torch
 from sklearn.preprocessing import MultiLabelBinarizer
 from spacy.language import Language
-from spacy.tokens import Doc, Span
+from spacy.tokens import Doc
 from transformers import BertModel, BertTokenizer
 
 import monapipe.resource_handler as resources
 from monapipe.config import SETTINGS
-from monapipe.pipeline.methods import add_extension, requires
+from monapipe.pipeline.attribution_tagger.attribution_tagger import AttributionTagger
+from monapipe.pipeline.methods import requires
 
 
 @Language.factory(
     "neural_attribution_tagger",
-    assigns={"span._.attribution": "clause._.attribution"},
+    assigns=AttributionTagger.assigns,
     default_config={},
 )
 def neural_attribution_tagger(nlp: Language, name: str) -> Any:
@@ -39,12 +40,14 @@ def neural_attribution_tagger(nlp: Language, name: str) -> Any:
     return NeuralAttributionTagger(nlp)
 
 
-class NeuralAttributionTagger:
+class NeuralAttributionTagger(AttributionTagger):
     """The class `NeuralAttributionTagger`."""
 
     def __init__(self, nlp: Language):
         requires(self, nlp, ["clausizer"])
 
+        super().__init__(nlp)
+
         # BERT tokenizer and model
         self._tokenizer = BertTokenizer.from_pretrained("dbmdz/bert-base-german-cased")
         self._model = BertModel.from_pretrained(
@@ -56,8 +59,6 @@ class NeuralAttributionTagger:
         self._label_encoder = MultiLabelBinarizer()
         self._label_encoder.fit([self._label_names])
 
-        add_extension(Span, "attribution")
-
     def __call__(self, doc: Doc) -> Doc:
         attribution_model = resources.access("attribution")
 
diff --git a/src/monapipe/pipeline/clausizer/clausizer.py b/src/monapipe/pipeline/clausizer/clausizer.py
new file mode 100644
index 0000000..ac3400c
--- /dev/null
+++ b/src/monapipe/pipeline/clausizer/clausizer.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: 2022 Georg-August-Universität Göttingen
+#
+# SPDX-License-Identifier: CC0-1.0
+
+from typing import List
+
+from spacy.language import Language
+from spacy.tokens import Doc, Span, Token
+
+from monapipe.pipeline.methods import add_extension
+
+
+class Clausizer:
+    """The super class `Clausizer`."""
+
+    assigns = {
+        "doc._.clauses": "doc._.clauses",
+        "span._.clauses": "sent._.clauses",
+        "span._.prec_punct": "clause._.prec_punct",
+        "span._.succ_punct": "clause._.succ_punct",
+        "span._.tokens": "clause._.tokens",
+        "token._.clause": "token._.clause",
+    }
+
+    def __init__(
+        self,
+        nlp: Language,
+        dep_labels: List[str],
+        conj_rule_labels: List[str],
+        xcomp_rule_labels: List[str],
+        handle_semi_modals: bool,
+        include_ws: bool,
+    ):
+        self.dep_labels = dep_labels
+        self.conj_rule_labels = conj_rule_labels
+        self.xcomp_rule_labels = xcomp_rule_labels
+        self.handle_semi_modals = handle_semi_modals
+        self.include_ws = include_ws
+
+        add_extension(Doc, "clauses", [])
+        add_extension(Span, "clauses")
+        add_extension(Span, "tokens")
+        add_extension(Span, "prec_punct")
+        add_extension(Span, "succ_punct")
+        add_extension(Token, "clause")
diff --git a/src/monapipe/pipeline/clausizer/dependency_clausizer.py b/src/monapipe/pipeline/clausizer/dependency_clausizer.py
index d9abb98..eedeab6 100644
--- a/src/monapipe/pipeline/clausizer/dependency_clausizer.py
+++ b/src/monapipe/pipeline/clausizer/dependency_clausizer.py
@@ -8,19 +8,13 @@ from spacy.language import Language
 from spacy.tokens import Doc, Span, Token
 
 from monapipe.lookups import lookup
+from monapipe.pipeline.clausizer.clausizer import Clausizer
 from monapipe.pipeline.methods import add_extension, requires
 
 
 @Language.factory(
     "dependency_clausizer",
-    assigns={
-        "doc._.clauses": "doc._.clauses",
-        "span._.clauses": "sent._.clauses",
-        "span._.prec_punct": "clause._.prec_punct",
-        "span._.succ_punct": "clause._.succ_punct",
-        "span._.tokens": "clause._.tokens",
-        "token._.clause": "token._.clause",
-    },
+    assigns=Clausizer.assigns,
     default_config={
         "dep_labels": [
             "acl",
@@ -73,7 +67,7 @@ def dependency_clausizer(
     )
 
 
-class DependencyClausizer:
+class DependencyClausizer(Clausizer):
     """The class `DependencyClausizer`."""
 
     def __init__(
@@ -87,20 +81,15 @@ class DependencyClausizer:
     ):
         requires(self, nlp, ["tok2vec", "morphologizer", "lemmatizer", "parser"])
 
+        super().__init__(
+            nlp, dep_labels, conj_rule_labels, xcomp_rule_labels, handle_semi_modals, include_ws
+        )
+
         self.dep_labels = set(dep_labels)
         self.conj_rule_labels = set(conj_rule_labels)
         self.xcomp_rule_labels = set(xcomp_rule_labels)
         self.dep_labels.difference_update(self.conj_rule_labels)
         self.dep_labels.difference_update(self.xcomp_rule_labels)
-        self.handle_semi_modals = handle_semi_modals
-        self.include_ws = include_ws
-
-        add_extension(Doc, "clauses", [])
-        add_extension(Span, "clauses")
-        add_extension(Span, "tokens")
-        add_extension(Span, "prec_punct")
-        add_extension(Span, "succ_punct")
-        add_extension(Token, "clause")
 
     def __call__(self, doc: Doc) -> Doc:
         semi_modals = lookup(doc.lang_, "semi_modal_verbs")
diff --git a/src/monapipe/pipeline/coref/coref.py b/src/monapipe/pipeline/coref/coref.py
new file mode 100644
index 0000000..feec291
--- /dev/null
+++ b/src/monapipe/pipeline/coref/coref.py
@@ -0,0 +1,35 @@
+# SPDX-FileCopyrightText: 2022 Georg-August-Universität Göttingen
+#
+# SPDX-License-Identifier: CC0-1.0
+
+from spacy.language import Language
+from spacy.tokens import Doc, Span, Token
+
+from monapipe.pipeline.methods import add_extension
+
+
+class Coref:
+    """The super class `Coref`."""
+
+    assigns = {
+        "doc._.coref_clusters",
+        "doc._.coref_resolved",
+        "doc._.coref_scores",
+        "doc._.has_coref",
+        "span._.coref_cluster",
+        "span._.coref_scores",
+        "span._.is_coref",
+        "token._.coref_clusters",
+        "token._.in_coref",
+    }
+
+    def __init__(self, nlp: Language):
+        add_extension(Doc, "coref_clusters", [])
+        add_extension(Doc, "coref_resolved")
+        add_extension(Doc, "coref_scores", {})
+        add_extension(Doc, "has_coref", False)
+        add_extension(Span, "coref_cluster")
+        add_extension(Span, "coref_scores", {})
+        add_extension(Span, "is_coref", False)
+        add_extension(Token, "coref_clusters", [])
+        add_extension(Token, "in_coref", False)
diff --git a/src/monapipe/pipeline/coref/rb_coref.py b/src/monapipe/pipeline/coref/rb_coref.py
index 4be3479..3b29192 100644
--- a/src/monapipe/pipeline/coref/rb_coref.py
+++ b/src/monapipe/pipeline/coref/rb_coref.py
@@ -7,7 +7,7 @@ from typing import Any, Callable, List, Set
 
 from nltk.corpus.util import LazyCorpusLoader
 from spacy.language import Language
-from spacy.tokens import Doc, Span, Token
+from spacy.tokens import Doc, Span
 
 import monapipe.resource_handler as resources
 from monapipe.linguistics import (
@@ -22,22 +22,13 @@ from monapipe.linguistics import (
     stringify,
 )
 from monapipe.neuralcoref import Cluster, get_resolved
-from monapipe.pipeline.methods import add_extension, requires
+from monapipe.pipeline.coref.coref import Coref
+from monapipe.pipeline.methods import requires
 
 
 @Language.factory(
     "rb_coref",
-    assigns={
-        "doc._.coref_clusters",
-        "doc._.coref_resolved",
-        "doc._.coref_scores",
-        "doc._.has_coref",
-        "span._.coref_cluster",
-        "span._.coref_scores",
-        "span._.is_coref",
-        "token._.coref_clusters",
-        "token._.in_coref",
-    },
+    assigns=Coref.assigns,
     default_config={},
 )
 def rb_coref(nlp: Language, name: str) -> Any:
@@ -58,21 +49,13 @@ def rb_coref(nlp: Language, name: str) -> Any:
     return RbCoref(nlp)
 
 
-class RbCoref:
+class RbCoref(Coref):
     """The class `RbCoref`."""
 
     def __init__(self, nlp: Language):
         requires(self, nlp, ["parser", "morphologizer", "lemmatizer", "speaker_extractor"])
 
-        add_extension(Doc, "coref_clusters", [])
-        add_extension(Doc, "coref_resolved")
-        add_extension(Doc, "coref_scores", {})
-        add_extension(Doc, "has_coref", False)
-        add_extension(Span, "coref_cluster")
-        add_extension(Span, "coref_scores", {})
-        add_extension(Span, "is_coref", False)
-        add_extension(Token, "coref_clusters", [])
-        add_extension(Token, "in_coref", False)
+        super().__init__(nlp)
 
     def __call__(self, doc: Doc) -> Doc:
         ents = get_noun_phrases(doc)
diff --git a/src/monapipe/pipeline/event_tagger/event_tagger.py b/src/monapipe/pipeline/event_tagger/event_tagger.py
new file mode 100644
index 0000000..5e407dd
--- /dev/null
+++ b/src/monapipe/pipeline/event_tagger/event_tagger.py
@@ -0,0 +1,17 @@
+# SPDX-FileCopyrightText: 2022 Georg-August-Universität Göttingen
+#
+# SPDX-License-Identifier: CC0-1.0
+
+from spacy.language import Language
+from spacy.tokens import Span
+
+from monapipe.pipeline.methods import add_extension
+
+
+class EventTagger:
+    """The super class `EventTagger`."""
+
+    assigns = {"span._.event": "clause._.event"}
+
+    def __init__(self, nlp: Language):
+        add_extension(Span, "event")
diff --git a/src/monapipe/pipeline/event_tagger/neural_event_tagger.py b/src/monapipe/pipeline/event_tagger/neural_event_tagger.py
index 524d09b..cd5fefb 100644
--- a/src/monapipe/pipeline/event_tagger/neural_event_tagger.py
+++ b/src/monapipe/pipeline/event_tagger/neural_event_tagger.py
@@ -6,17 +6,16 @@ import importlib
 from typing import Any
 
 from spacy.language import Language
-from spacy.tokens import Doc, Span
+from spacy.tokens import Doc
 from torch.utils.data import DataLoader
 
 import monapipe.resource_handler as resources
 from monapipe.config import SETTINGS
-from monapipe.pipeline.methods import add_extension, requires
+from monapipe.pipeline.event_tagger.event_tagger import EventTagger
+from monapipe.pipeline.methods import requires
 
 
-@Language.factory(
-    "neural_event_tagger", assigns={"span._.event": "clause._.event"}, default_config={}
-)
+@Language.factory("neural_event_tagger", assigns=EventTagger.assigns, default_config={})
 def neural_event_tagger(nlp: Language, name: str) -> Any:
     """Spacy pipeline component.
         Integration of event classification from EvENT project.
@@ -35,13 +34,13 @@ def neural_event_tagger(nlp: Language, name: str) -> Any:
     return NeuralEventTagger(nlp)
 
 
-class NeuralEventTagger:
+class NeuralEventTagger(EventTagger):
     """The class `NeuralEventTagger`."""
 
     def __init__(self, nlp: Language):
         requires(self, nlp, ["clausizer"])
 
-        add_extension(Span, "event")
+        super().__init__(nlp)
 
     def __call__(self, doc: Doc) -> Doc:
         model, tokenizer = resources.access("event_classification")
diff --git a/src/monapipe/pipeline/formatter/conllu_formatter.py b/src/monapipe/pipeline/formatter/conllu_formatter.py
index 0fb1f94..4c195dc 100644
--- a/src/monapipe/pipeline/formatter/conllu_formatter.py
+++ b/src/monapipe/pipeline/formatter/conllu_formatter.py
@@ -6,14 +6,15 @@ import re
 from typing import Any, Callable, Dict, List, Union
 
 from spacy.language import Language
-from spacy.tokens import Doc, Span, Token
+from spacy.tokens import Doc, Token
 
-from monapipe.pipeline.methods import add_extension, deserialize_config_param, optional
+from monapipe.pipeline.formatter.formatter import Formatter
+from monapipe.pipeline.methods import deserialize_config_param, optional
 
 
 @Language.factory(
     "conllu_formatter",
-    assigns={"doc._.conllu_str": "doc._.conllu_str", "span._.conllu_str": "sent._.conllu_str"},
+    assigns=Formatter.assigns,
     default_config={
         "column_names": [
             "ID",
@@ -64,7 +65,7 @@ def conllu_formatter(
     return ConlluFormatter(nlp, column_names, column_names_plus, column_funcs, delimiter)
 
 
-class ConlluFormatter:
+class ConlluFormatter(Formatter):
     """The class `ConlluFormatter`."""
 
     def __init__(
@@ -77,9 +78,12 @@ class ConlluFormatter:
     ):
         optional(self, nlp, ["parser"])
 
-        column_funcs = deserialize_config_param(column_funcs)
+        super().__init__(nlp, column_names, column_names_plus, column_funcs, delimiter)
+
+        self.column_names = self.column_names + self.column_names_plus
+
+        column_funcs = deserialize_config_param(self.column_funcs)
 
-        self.column_names = column_names + column_names_plus
         self.column_funcs = {
             "ID": lambda token: (
                 list(token.sent).index(token) if token.doc.is_sentenced else token.i
@@ -100,12 +104,9 @@ class ConlluFormatter:
             ),
             "DEPREL": lambda token: token.dep_,
         }
+
         for column_name in column_funcs:
             self.column_funcs[column_name] = column_funcs[column_name]
-        self.delimiter = delimiter
-
-        add_extension(Doc, "conllu_str")
-        add_extension(Span, "conllu_str")
 
     def __call__(self, doc: Doc) -> Doc:
         if doc.is_sentenced:
@@ -135,8 +136,8 @@ class ConlluFormatter:
                 sent_rows.append(row)
             sent_rows.append("")
             doc_rows.extend(sent_rows)
-            sent._.conllu_str = self._string_from_rows([first_row] + sent_rows, sent_char_widths)
-        doc._.conllu_str = self._string_from_rows([first_row] + doc_rows, doc_char_widths)
+            sent._.format_str = self._string_from_rows([first_row] + sent_rows, sent_char_widths)
+        doc._.format_str = self._string_from_rows([first_row] + doc_rows, doc_char_widths)
         return doc
 
     def _apply_column_func(self, token: Token, column_name: str) -> Any:
diff --git a/src/monapipe/pipeline/formatter/formatter.py b/src/monapipe/pipeline/formatter/formatter.py
new file mode 100644
index 0000000..85bd7f9
--- /dev/null
+++ b/src/monapipe/pipeline/formatter/formatter.py
@@ -0,0 +1,32 @@
+# SPDX-FileCopyrightText: 2022 Georg-August-Universität Göttingen
+#
+# SPDX-License-Identifier: CC0-1.0
+
+from typing import Any, Callable, Dict, List, Union
+
+from spacy.language import Language
+from spacy.tokens import Doc, Span, Token
+
+from monapipe.pipeline.methods import add_extension
+
+
+class Formatter:
+    """The super class `Formatter`."""
+
+    assigns = {"doc._.format_str": "doc._.format_str", "span._.format_str": "sent._.format_str"}
+
+    def __init__(
+        self,
+        nlp: Language,
+        column_names: List[str],
+        column_names_plus: List[str],
+        column_funcs: Union[str, Dict[str, Callable[[Token], Any]]],
+        delimiter: str,
+    ):
+        self.column_names = column_names
+        self.column_names_plus = column_names_plus
+        self.column_funcs = column_funcs
+        self.delimiter = delimiter
+
+        add_extension(Doc, "format_str")
+        add_extension(Span, "format_str")
diff --git a/src/monapipe/pipeline/gen_tagger/gen_tagger.py b/src/monapipe/pipeline/gen_tagger/gen_tagger.py
new file mode 100644
index 0000000..688f630
--- /dev/null
+++ b/src/monapipe/pipeline/gen_tagger/gen_tagger.py
@@ -0,0 +1,19 @@
+# SPDX-FileCopyrightText: 2022 Georg-August-Universität Göttingen
+#
+# SPDX-License-Identifier: CC0-1.0
+
+from spacy.language import Language
+from spacy.tokens import Span
+
+from monapipe.pipeline.methods import add_extension
+
+
+class GenTagger:
+    """The super class `GenTagger`."""
+
+    assigns = {"doc.spans": "doc.spans['gi']", "span._.gi": "gi_span._.gi"}
+
+    def __init__(self, nlp: Language, label_condition: str):
+        self.label_condition = label_condition
+
+        add_extension(Span, "gi", {})
diff --git a/src/monapipe/pipeline/gen_tagger/neural_gen_tagger.py b/src/monapipe/pipeline/gen_tagger/neural_gen_tagger.py
index 40727b5..e1ff781 100644
--- a/src/monapipe/pipeline/gen_tagger/neural_gen_tagger.py
+++ b/src/monapipe/pipeline/gen_tagger/neural_gen_tagger.py
@@ -5,16 +5,17 @@
 from typing import Any
 
 from spacy.language import Language
-from spacy.tokens import Doc, Span
+from spacy.tokens import Doc
 
 import monapipe.resource_handler as resources
-from monapipe.pipeline.methods import add_extension, requires
+from monapipe.pipeline.gen_tagger.gen_tagger import GenTagger
+from monapipe.pipeline.methods import requires
 from monapipe.pipeline.reflection_tagger.methods import create_passages_from_clause_tags
 
 
 @Language.factory(
     "neural_gen_tagger",
-    assigns={"doc.spans": "doc.spans['gi']", "span._.gi": "gi_span._.gi"},
+    assigns=GenTagger.assigns,
     default_config={"label_condition": "multi"},
 )
 def neural_gen_tagger(nlp: Language, name: str, label_condition: str) -> Any:
@@ -35,7 +36,7 @@ def neural_gen_tagger(nlp: Language, name: str, label_condition: str) -> Any:
     return NeuralGenTagger(nlp, label_condition)
 
 
-class NeuralGenTagger:
+class NeuralGenTagger(GenTagger):
     """The class `NeuralGenTagger`."""
 
     def __init__(self, nlp: Language, label_condition: str):
@@ -43,9 +44,8 @@ class NeuralGenTagger:
 
         if label_condition not in ["binary", "multi"]:
             raise ValueError('Label condition must be "binary" or "multi".')
-        self.label_condition = label_condition
 
-        add_extension(Span, "gi", {})
+        super().__init__(nlp, label_condition)
 
     def __call__(self, doc: Doc) -> Doc:
         models = resources.access("generalizing_passages_identification_bert")
diff --git a/src/monapipe/pipeline/normalizer/identity_normalizer.py b/src/monapipe/pipeline/normalizer/identity_normalizer.py
index 56ea9df..ff55d35 100644
--- a/src/monapipe/pipeline/normalizer/identity_normalizer.py
+++ b/src/monapipe/pipeline/normalizer/identity_normalizer.py
@@ -5,26 +5,14 @@
 from typing import Any
 
 from spacy.language import Language
-from spacy.tokens import Doc, Token
+from spacy.tokens import Doc
 
-from monapipe.pipeline.methods import add_extension
+from monapipe.pipeline.normalizer.normalizer import Normalizer
 
 
 @Language.factory(
     "identity_normalizer",
-    assigns=[
-        "doc.text",
-        "doc.text_with_ws",
-        "doc._.text",
-        "doc._.text_with_ws",
-        "token.idx",
-        "token.text",
-        "token.text_with_ws",
-        "token._.idx",
-        "token._.text",
-        "token._.text_with_ws",
-        "token._.whitespace_",
-    ],
+    assigns=Normalizer.assigns,
     default_config={"remove_spaces": False},
 )
 def dependency_clausizer(nlp: Language, name: str, remove_spaces: bool) -> Any:
@@ -43,18 +31,13 @@ def dependency_clausizer(nlp: Language, name: str, remove_spaces: bool) -> Any:
     return IdentityNormalizer(nlp, remove_spaces)
 
 
-class IdentityNormalizer:
+class IdentityNormalizer(Normalizer):
     """The class `IdentityNormalizer`."""
 
     def __init__(self, nlp: Language, remove_spaces: bool):
         self.remove_spaces = remove_spaces
 
-        add_extension(Doc, "text")
-        add_extension(Doc, "text_with_ws")
-        add_extension(Token, "idx")
-        add_extension(Token, "text")
-        add_extension(Token, "text_with_ws")
-        add_extension(Token, "whitespace_")
+        super().__init__(nlp, remove_spaces)
 
     def __call__(self, doc: Doc) -> Doc:
         # token.text cannot be overwritten, so we create a new document
diff --git a/src/monapipe/pipeline/normalizer/normalizer.py b/src/monapipe/pipeline/normalizer/normalizer.py
new file mode 100644
index 0000000..710ad4e
--- /dev/null
+++ b/src/monapipe/pipeline/normalizer/normalizer.py
@@ -0,0 +1,36 @@
+# SPDX-FileCopyrightText: 2022 Georg-August-Universität Göttingen
+#
+# SPDX-License-Identifier: CC0-1.0
+
+from spacy.language import Language
+from spacy.tokens import Doc, Token
+
+from monapipe.pipeline.methods import add_extension
+
+
+class Normalizer:
+    """The super class `Normalizer`."""
+
+    assigns = [
+        "doc.text",
+        "doc.text_with_ws",
+        "doc._.text",
+        "doc._.text_with_ws",
+        "token.idx",
+        "token.text",
+        "token.text_with_ws",
+        "token._.idx",
+        "token._.text",
+        "token._.text_with_ws",
+        "token._.whitespace_",
+    ]
+
+    def __init__(self, nlp: Language, remove_spaces: bool):
+        self.remove_spaces = remove_spaces
+
+        add_extension(Doc, "text")
+        add_extension(Doc, "text_with_ws")
+        add_extension(Token, "idx")
+        add_extension(Token, "text")
+        add_extension(Token, "text_with_ws")
+        add_extension(Token, "whitespace_")
diff --git a/src/monapipe/pipeline/reflection_tagger/neural_reflection_tagger.py b/src/monapipe/pipeline/reflection_tagger/neural_reflection_tagger.py
index 25f12a3..ebbb69f 100644
--- a/src/monapipe/pipeline/reflection_tagger/neural_reflection_tagger.py
+++ b/src/monapipe/pipeline/reflection_tagger/neural_reflection_tagger.py
@@ -5,16 +5,17 @@
 from typing import Any
 
 from spacy.language import Language
-from spacy.tokens import Doc, Span
+from spacy.tokens import Doc
 
 import monapipe.resource_handler as resources
-from monapipe.pipeline.methods import add_extension, requires, update_token_span_groups
+from monapipe.pipeline.methods import requires, update_token_span_groups
 from monapipe.pipeline.reflection_tagger.methods import create_passages_from_clause_tags
+from monapipe.pipeline.reflection_tagger.reflection_tagger import ReflectionTagger
 
 
 @Language.factory(
     "neural_reflection_tagger",
-    assigns={"doc.spans": "doc.spans['rp']", "span._.rp": "rp_span._.rp"},
+    assigns=ReflectionTagger.assigns,
     default_config={"label_condition": "multi"},
 )
 def neural_reflection_tagger(nlp: Language, name: str, label_condition: str) -> Any:
@@ -35,7 +36,7 @@ def neural_reflection_tagger(nlp: Language, name: str, label_condition: str) ->
     return NeuralReflectionTagger(nlp, label_condition)
 
 
-class NeuralReflectionTagger:
+class NeuralReflectionTagger(ReflectionTagger):
     """The class `NeuralReflectionTagger`."""
 
     def __init__(self, nlp: Language, label_condition: str):
@@ -43,9 +44,8 @@ class NeuralReflectionTagger:
 
         if label_condition not in ["binary", "multi"]:
             raise ValueError('Label condition must be "binary" or "multi".')
-        self.label_condition = label_condition
 
-        add_extension(Span, "rp", {})
+        super().__init__(nlp, label_condition)
 
     def __call__(self, doc: Doc) -> Doc:
         models = resources.access("reflective_passages_identification_bert")
diff --git a/src/monapipe/pipeline/reflection_tagger/reflection_tagger.py b/src/monapipe/pipeline/reflection_tagger/reflection_tagger.py
new file mode 100644
index 0000000..c560b48
--- /dev/null
+++ b/src/monapipe/pipeline/reflection_tagger/reflection_tagger.py
@@ -0,0 +1,19 @@
+# SPDX-FileCopyrightText: 2022 Georg-August-Universität Göttingen
+#
+# SPDX-License-Identifier: CC0-1.0
+
+from spacy.language import Language
+from spacy.tokens import Span
+
+from monapipe.pipeline.methods import add_extension
+
+
+class ReflectionTagger:
+    """The super class `ReflectionTagger`."""
+
+    assigns = {"doc.spans": "doc.spans['rp']", "span._.rp": "rp_span._.rp"}
+
+    def __init__(self, nlp: Language, label_condition: str):
+        self.label_condition = label_condition
+
+        add_extension(Span, "rp", {})
diff --git a/src/monapipe/pipeline/semantic_tagger/germanet_semantic_tagger.py b/src/monapipe/pipeline/semantic_tagger/germanet_semantic_tagger.py
index 8e4eaca..f82fd5e 100644
--- a/src/monapipe/pipeline/semantic_tagger/germanet_semantic_tagger.py
+++ b/src/monapipe/pipeline/semantic_tagger/germanet_semantic_tagger.py
@@ -10,15 +10,13 @@ from spacy.language import Language
 from spacy.tokens import Doc, Span, Token
 
 import monapipe.resource_handler as resources
-from monapipe.pipeline.methods import add_extension, requires
+from monapipe.pipeline.methods import requires
+from monapipe.pipeline.semantic_tagger.semantic_tagger import SemanticTagger
 
 
 @Language.factory(
     "germanet_semantic_tagger",
-    assigns={
-        "span._.verb_synset_id": "clause._.verb_synset_id",
-        "token._.synset_id": "token._.synset_id",
-    },
+    assigns=SemanticTagger.assigns,
     default_config={},
 )
 def germanet_semantic_tagger(nlp: Language, name: str) -> Any:
@@ -36,14 +34,13 @@ def germanet_semantic_tagger(nlp: Language, name: str) -> Any:
     return GermanetSemanticTagger(nlp)
 
 
-class GermanetSemanticTagger:
+class GermanetSemanticTagger(SemanticTagger):
     """The class `GermanetSemanticTagger`."""
 
     def __init__(self, nlp: Language):
         requires(self, nlp, ["lemmatizer", "clausizer"])
 
-        add_extension(Span, "verb_synset_id")
-        add_extension(Token, "synset_id")
+        super().__init__(nlp)
 
     def __call__(self, doc: Doc) -> Doc:
         germanet = resources.access("germanet")
diff --git a/src/monapipe/pipeline/semantic_tagger/semantic_tagger.py b/src/monapipe/pipeline/semantic_tagger/semantic_tagger.py
new file mode 100644
index 0000000..6e01e56
--- /dev/null
+++ b/src/monapipe/pipeline/semantic_tagger/semantic_tagger.py
@@ -0,0 +1,21 @@
+# SPDX-FileCopyrightText: 2022 Georg-August-Universität Göttingen
+#
+# SPDX-License-Identifier: CC0-1.0
+
+from spacy.language import Language
+from spacy.tokens import Span, Token
+
+from monapipe.pipeline.methods import add_extension
+
+
+class SemanticTagger:
+    """The super class `SemanticTagger`."""
+
+    assigns = {
+        "span._.verb_synset_id": "clause._.verb_synset_id",
+        "token._.synset_id": "token._.synset_id",
+    }
+
+    def __init__(self, nlp: Language):
+        add_extension(Span, "verb_synset_id")
+        add_extension(Token, "synset_id")
diff --git a/src/monapipe/pipeline/slicer/from_start_slicer.py b/src/monapipe/pipeline/slicer/from_start_slicer.py
index df555a8..1e09367 100644
--- a/src/monapipe/pipeline/slicer/from_start_slicer.py
+++ b/src/monapipe/pipeline/slicer/from_start_slicer.py
@@ -10,11 +10,12 @@ from spacy.tokens import Doc
 
 from monapipe.pipeline.methods import add_extension
 from monapipe.pipeline.slicer.methods import span_to_doc
+from monapipe.pipeline.slicer.slicer import Slicer
 
 
 @Language.factory(
     "from_start_slicer",
-    assigns=["doc.text", "doc._.fulltext"],
+    assigns=Slicer.assigns,
     default_config={"max_units": -1, "units": "sents", "complete_sentences": True},
 )
 def from_start_slicer(
@@ -40,17 +41,14 @@ def from_start_slicer(
     return FromStartSlicer(nlp, max_units, units, complete_sentences)
 
 
-class FromStartSlicer:
+class FromStartSlicer(Slicer):
     """The class `FromStartSlicer`."""
 
     def __init__(self, nlp: Language, max_units: int, units: str, complete_sentences: bool):
-        self.max_units = max_units
         if units not in ["chars", "sents", "tokens"]:
             raise ValueError('Units must be "chars", "sents" or "tokens".')
-        self.units = units
-        self.complete_sentences = complete_sentences
 
-        add_extension(Doc, "fulltext")
+        super().__init__(nlp, max_units, units, complete_sentences)
 
     def __call__(self, doc: Doc) -> Doc:
         if (self.units == "sents" or self.complete_sentences) and not doc.is_sentenced:
diff --git a/src/monapipe/pipeline/slicer/slicer.py b/src/monapipe/pipeline/slicer/slicer.py
new file mode 100644
index 0000000..9247122
--- /dev/null
+++ b/src/monapipe/pipeline/slicer/slicer.py
@@ -0,0 +1,21 @@
+# SPDX-FileCopyrightText: 2022 Georg-August-Universität Göttingen
+#
+# SPDX-License-Identifier: CC0-1.0
+
+from spacy.language import Language
+from spacy.tokens import Doc
+
+from monapipe.pipeline.methods import add_extension
+
+
+class Slicer:
+    """The super class `Slicer`."""
+
+    assigns = ["doc.text", "doc._.fulltext"]
+
+    def __init__(self, nlp: Language, max_units: int, units: str, complete_sentences: bool):
+        self.max_units = max_units
+        self.units = units
+        self.complete_sentences = complete_sentences
+
+        add_extension(Doc, "fulltext")
diff --git a/src/monapipe/pipeline/speaker_extractor/rb_speaker_extractor.py b/src/monapipe/pipeline/speaker_extractor/rb_speaker_extractor.py
index e22cd62..fb11b34 100644
--- a/src/monapipe/pipeline/speaker_extractor/rb_speaker_extractor.py
+++ b/src/monapipe/pipeline/speaker_extractor/rb_speaker_extractor.py
@@ -2,22 +2,20 @@
 #
 # SPDX-License-Identifier: CC0-1.0
 
-from typing import Any, List, Optional, Tuple
+from typing import Any
 
 from spacy.language import Language
-from spacy.tokens import Doc, Span, Token
+from spacy.tokens import Doc
 
 from monapipe.linguistics import get_noun_phrases, is_pronoun, is_proper_noun
 from monapipe.lookups import lookup
-from monapipe.pipeline.methods import add_extension, requires
+from monapipe.pipeline.methods import requires
+from monapipe.pipeline.speaker_extractor.speaker_extractor import SpeakerExtractor
 
 
 @Language.factory(
     "rb_speaker_extractor",
-    assigns={
-        "span._.addressee": "speech_span._.addressee",
-        "span._.speaker": "speech_span._.speaker",
-    },
+    assigns=SpeakerExtractor.assigns,
     default_config={},
 )
 def rb_speaker_extractor(nlp: Language, name: str) -> Any:
@@ -36,14 +34,13 @@ def rb_speaker_extractor(nlp: Language, name: str) -> Any:
     return RbSpeakerExtractor(nlp)
 
 
-class RbSpeakerExtractor:
+class RbSpeakerExtractor(SpeakerExtractor):
     """The class `RbSpeakerExtractor`."""
 
     def __init__(self, nlp: Language):
         requires(self, nlp, ["lemmatizer", "speech_tagger"])
 
-        add_extension(Span, "addressee")
-        add_extension(Span, "speaker")
+        super().__init__(nlp)
 
     def __call__(self, doc: Doc) -> Doc:
         speech_verbs = lookup(doc.lang_, "speech_verbs")
diff --git a/src/monapipe/pipeline/speaker_extractor/speaker_extractor.py b/src/monapipe/pipeline/speaker_extractor/speaker_extractor.py
new file mode 100644
index 0000000..54b1624
--- /dev/null
+++ b/src/monapipe/pipeline/speaker_extractor/speaker_extractor.py
@@ -0,0 +1,21 @@
+# SPDX-FileCopyrightText: 2022 Georg-August-Universität Göttingen
+#
+# SPDX-License-Identifier: CC0-1.0
+
+from spacy.language import Language
+from spacy.tokens import Span
+
+from monapipe.pipeline.methods import add_extension
+
+
+class SpeakerExtractor:
+    """The super class `SpeakerExtractor`."""
+
+    assigns = {
+        "span._.addressee": "speech_span._.addressee",
+        "span._.speaker": "speech_span._.speaker",
+    }
+
+    def __init__(self, nlp: Language):
+        add_extension(Span, "addressee")
+        add_extension(Span, "speaker")
diff --git a/src/monapipe/pipeline/speech_tagger/flair_speech_tagger.py b/src/monapipe/pipeline/speech_tagger/flair_speech_tagger.py
index d52292d..f5339ee 100644
--- a/src/monapipe/pipeline/speech_tagger/flair_speech_tagger.py
+++ b/src/monapipe/pipeline/speech_tagger/flair_speech_tagger.py
@@ -9,23 +9,20 @@ import torch
 from flair.data import Sentence
 from flair.models import SequenceTagger
 from spacy.language import Language
-from spacy.tokens import Doc, Span, Token
+from spacy.tokens import Doc, Span
 
 import monapipe.resource_handler as resources
 from monapipe.config import SETTINGS
-from monapipe.pipeline.methods import add_extension, requires, update_token_span_groups
+from monapipe.pipeline.methods import requires, update_token_span_groups
 from monapipe.pipeline.speech_tagger.methods import (
     create_speech_segments_from_token_tags,
 )
+from monapipe.pipeline.speech_tagger.speech_tagger import SpeechTagger
 
 
 @Language.factory(
     "flair_speech_tagger",
-    assigns={
-        "doc.spans": "doc.spans['speech']",
-        "span._.speech": "speech_span._.speech",
-        "token._.speech": "token._.speech",
-    },
+    assigns=SpeechTagger.assigns,
     default_config={"sentence_level": False},
 )
 def flair_speech_tagger(nlp: Language, name: str, sentence_level: bool) -> Any:
@@ -33,7 +30,6 @@ def flair_speech_tagger(nlp: Language, name: str, sentence_level: bool) -> Any:
         Tags tokens and clauses with speech tags.
         Wrapper for the "Redewiedergabe" taggers from https://github.com/redewiedergabe/tagger.
 
-
     Args:
         nlp: Spacy object.
         name: Component name.
@@ -47,16 +43,13 @@ def flair_speech_tagger(nlp: Language, name: str, sentence_level: bool) -> Any:
     return FlairSpeechTagger(nlp, sentence_level)
 
 
-class FlairSpeechTagger:
+class FlairSpeechTagger(SpeechTagger):
     """The class `FlairSpeechTagger`."""
 
     def __init__(self, nlp: Language, sentence_level: bool):
         requires(self, nlp, ["parser"])
 
-        self.sentence_level = sentence_level
-
-        add_extension(Token, "speech", {})
-        add_extension(Span, "speech", {})
+        super().__init__(nlp, sentence_level)
 
     def __call__(self, doc: Doc) -> Doc:
         flair.device = torch.device(SETTINGS["torch_device"])
diff --git a/src/monapipe/pipeline/speech_tagger/quotation_marks_speech_tagger.py b/src/monapipe/pipeline/speech_tagger/quotation_marks_speech_tagger.py
index 2555d95..563e538 100644
--- a/src/monapipe/pipeline/speech_tagger/quotation_marks_speech_tagger.py
+++ b/src/monapipe/pipeline/speech_tagger/quotation_marks_speech_tagger.py
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: CC0-1.0
 
-from typing import Any
+from typing import Any, Optional
 
 from spacy.language import Language
 from spacy.tokens import Doc, Span, Token
@@ -12,40 +12,36 @@ from monapipe.pipeline.methods import add_extension, update_token_span_groups
 from monapipe.pipeline.speech_tagger.methods import (
     create_speech_segments_from_token_tags,
 )
+from monapipe.pipeline.speech_tagger.speech_tagger import SpeechTagger
 
 
 @Language.factory(
     "quotation_marks_speech_tagger",
-    assigns={
-        "doc.spans": "doc.spans['speech']",
-        "span._.speech": "speech_span._.speech",
-        "token._.speech": "token._.speech",
-    },
-    default_config={},
+    assigns=SpeechTagger.assigns,
+    default_config={"sentence_level": None},
 )
-def quotation_marks_speech_tagger(nlp: Language, name: str) -> Any:
+def quotation_marks_speech_tagger(nlp: Language, name: str, sentence_level: Optional[bool]) -> Any:
     """Spacy pipeline component.
         Tags tokens and clauses with speech tags.
         Detects only direct speech within (German) quotation marks.
 
-
     Args:
         nlp: Spacy object.
         name: Component name.
+        sentence_level: Ignored. This parameter exists only for compatibility with `SpeechTagger`.
 
     Returns:
         `QuotationMarksSpeechTagger`.
 
     """
-    return QuotationMarksSpeechTagger(nlp)
+    return QuotationMarksSpeechTagger(nlp, sentence_level)
 
 
-class QuotationMarksSpeechTagger:
+class QuotationMarksSpeechTagger(SpeechTagger):
     """The class `QuotationMarksSpeechTagger`."""
 
-    def __init__(self, nlp: Language):
-        add_extension(Token, "speech", {})
-        add_extension(Span, "speech", {})
+    def __init__(self, nlp: Language, sentence_level: Optional[bool]):
+        super().__init__(nlp, sentence_level)
 
     def __call__(self, doc: Doc) -> Doc:
         q_marks = lookup(doc.lang_, "quotation_marks")
diff --git a/src/monapipe/pipeline/speech_tagger/speech_tagger.py b/src/monapipe/pipeline/speech_tagger/speech_tagger.py
new file mode 100644
index 0000000..eecf63a
--- /dev/null
+++ b/src/monapipe/pipeline/speech_tagger/speech_tagger.py
@@ -0,0 +1,26 @@
+# SPDX-FileCopyrightText: 2022 Georg-August-Universität Göttingen
+#
+# SPDX-License-Identifier: CC0-1.0
+
+from typing import Optional
+
+from spacy.language import Language
+from spacy.tokens import Span, Token
+
+from monapipe.pipeline.methods import add_extension
+
+
+class SpeechTagger:
+    """The super class `SpeechTagger`."""
+
+    assigns = {
+        "doc.spans": "doc.spans['speech']",
+        "span._.speech": "speech_span._.speech",
+        "token._.speech": "token._.speech",
+    }
+
+    def __init__(self, nlp: Language, sentence_level: Optional[bool]):
+        self.sentence_level = sentence_level
+
+        add_extension(Token, "speech", {})
+        add_extension(Span, "speech", {})
diff --git a/src/monapipe/pipeline/temponym_tagger/heideltime_temponym_tagger.py b/src/monapipe/pipeline/temponym_tagger/heideltime_temponym_tagger.py
index e89a066..6cc9ada 100644
--- a/src/monapipe/pipeline/temponym_tagger/heideltime_temponym_tagger.py
+++ b/src/monapipe/pipeline/temponym_tagger/heideltime_temponym_tagger.py
@@ -6,18 +6,16 @@ import re
 from typing import Any
 
 from spacy.language import Language
-from spacy.tokens import Doc, Span, Token
+from spacy.tokens import Doc
 
 import monapipe.resource_handler as resources
-from monapipe.pipeline.methods import add_extension, update_token_span_groups
+from monapipe.pipeline.methods import update_token_span_groups
+from monapipe.pipeline.temponym_tagger.temponym_tagger import TemponymTagger
 
 
 @Language.factory(
     "heideltime_temponym_tagger",
-    assigns={
-        "doc.spans": "doc.spans['temponym']",
-        "span._.temponym_norm": "temponym_span._.temponym_norm",
-    },
+    assigns=TemponymTagger.assigns,
     default_config={},
 )
 def heideltime_temponym_tagger(nlp: Language, name: str) -> Any:
@@ -37,11 +35,11 @@ def heideltime_temponym_tagger(nlp: Language, name: str) -> Any:
     return HeideltimeTemponymTagger(nlp)
 
 
-class HeideltimeTemponymTagger:
+class HeideltimeTemponymTagger(TemponymTagger):
     """The class `HeideltimeTemponymTagger`."""
 
     def __init__(self, nlp: Language):
-        add_extension(Span, "temponym_norm")
+        super().__init__(nlp)
 
     def __call__(self, doc: Doc) -> Doc:
         doc.spans["temponym"] = []
diff --git a/src/monapipe/pipeline/temponym_tagger/temponym_tagger.py b/src/monapipe/pipeline/temponym_tagger/temponym_tagger.py
new file mode 100644
index 0000000..4aa6597
--- /dev/null
+++ b/src/monapipe/pipeline/temponym_tagger/temponym_tagger.py
@@ -0,0 +1,20 @@
+# SPDX-FileCopyrightText: 2022 Georg-August-Universität Göttingen
+#
+# SPDX-License-Identifier: CC0-1.0
+
+from spacy.language import Language
+from spacy.tokens import Span
+
+from monapipe.pipeline.methods import add_extension
+
+
+class TemponymTagger:
+    """The super class `TemponymTagger`."""
+
+    assigns = {
+        "doc.spans": "doc.spans['temponym']",
+        "span._.temponym_norm": "temponym_span._.temponym_norm",
+    }
+
+    def __init__(self, nlp: Language):
+        add_extension(Span, "temponym_norm")
diff --git a/src/monapipe/pipeline/verb_analyzer/rb_verb_analyzer.py b/src/monapipe/pipeline/verb_analyzer/rb_verb_analyzer.py
index 2f2244c..cdef19d 100644
--- a/src/monapipe/pipeline/verb_analyzer/rb_verb_analyzer.py
+++ b/src/monapipe/pipeline/verb_analyzer/rb_verb_analyzer.py
@@ -12,17 +12,13 @@ from spacy.tokens import Doc, MorphAnalysis, Span, Token
 
 from monapipe.linguistics import get_morph_analyses
 from monapipe.lookups import lookup
-from monapipe.pipeline.methods import add_extension, requires
+from monapipe.pipeline.methods import requires
+from monapipe.pipeline.verb_analyzer.verb_analyzer import VerbAnalyzer
 
 
 @Language.factory(
     "rb_verb_analyzer",
-    assigns={
-        "span._.form": "clause._.form",
-        "span._.form_main": "clause._.form_main",
-        "span._.form_modals": "clause._.form_modals",
-        "span._.form_verbs": "clause._.form_verbs",
-    },
+    assigns=VerbAnalyzer.assigns,
     default_config={
         "ov": True,
         "conj_rule_labels": ["conj"],
@@ -67,7 +63,7 @@ def rb_verb_analyzer(
     )
 
 
-class RbVerbAnalyzer:
+class RbVerbAnalyzer(VerbAnalyzer):
     """The class `RbVerbAnalyzer`."""
 
     def __init__(
@@ -81,16 +77,14 @@ class RbVerbAnalyzer:
     ):
         requires(self, nlp, ["morphologizer", "lemmatizer", "clausizer"])
 
-        self.ov = ov
-        self.conj_rule_labels = conj_rule_labels
-        self.handle_semi_modals = handle_semi_modals
-        self.handle_particles = handle_particles
-        self.handle_local_verb_movement = handle_local_verb_movement
-
-        add_extension(Span, "form", MorphAnalysis(nlp.vocab, {}))
-        add_extension(Span, "form_main", None)
-        add_extension(Span, "form_modals", [])
-        add_extension(Span, "form_verbs", [])
+        super().__init__(
+            nlp,
+            ov,
+            conj_rule_labels,
+            handle_semi_modals,
+            handle_particles,
+            handle_local_verb_movement,
+        )
 
     def __call__(self, doc: Doc) -> Doc:
         # read language-specific inflection table, auxiliary verbs and modal verbs
diff --git a/src/monapipe/pipeline/verb_analyzer/verb_analyzer.py b/src/monapipe/pipeline/verb_analyzer/verb_analyzer.py
new file mode 100644
index 0000000..8592b62
--- /dev/null
+++ b/src/monapipe/pipeline/verb_analyzer/verb_analyzer.py
@@ -0,0 +1,41 @@
+# SPDX-FileCopyrightText: 2022 Georg-August-Universität Göttingen
+#
+# SPDX-License-Identifier: CC0-1.0
+
+from typing import List
+
+from spacy.language import Language
+from spacy.tokens import MorphAnalysis, Span
+
+from monapipe.pipeline.methods import add_extension
+
+
+class VerbAnalyzer:
+    """The super class `VerbAnalyzer`."""
+
+    assigns = {
+        "span._.form": "clause._.form",
+        "span._.form_main": "clause._.form_main",
+        "span._.form_modals": "clause._.form_modals",
+        "span._.form_verbs": "clause._.form_verbs",
+    }
+
+    def __init__(
+        self,
+        nlp: Language,
+        ov: bool,
+        conj_rule_labels: List[str],
+        handle_semi_modals: bool,
+        handle_particles: bool,
+        handle_local_verb_movement: bool,
+    ):
+        self.ov = ov
+        self.conj_rule_labels = conj_rule_labels
+        self.handle_semi_modals = handle_semi_modals
+        self.handle_particles = handle_particles
+        self.handle_local_verb_movement = handle_local_verb_movement
+
+        add_extension(Span, "form", MorphAnalysis(nlp.vocab, {}))
+        add_extension(Span, "form_main", None)
+        add_extension(Span, "form_modals", [])
+        add_extension(Span, "form_verbs", [])
diff --git a/tests/pipeline/test_conllu_formatter.py b/tests/pipeline/test_conllu_formatter.py
index f2bc247..66dbdc9 100644
--- a/tests/pipeline/test_conllu_formatter.py
+++ b/tests/pipeline/test_conllu_formatter.py
@@ -13,5 +13,5 @@ def test_conllu_formatter():
     nlp = monapipe.model.load()
     nlp.add_pipe("conllu_formatter")
     doc = nlp(text_goethe_wv)
-    assert check_data_types([doc], "conllu_str", str)
-    assert check_data_types(doc.sents, "conllu_str", str)
+    assert check_data_types([doc], "format_str", str)
+    assert check_data_types(doc.sents, "format_str", str)
-- 
GitLab