Commit 23624b08 authored by Seungbin Yim's avatar Seungbin Yim
Browse files

WIP: ab-testing

parent 1c931171
FROM python:3.8
WORKDIR /app
COPY . /app
RUN cat /etc/apt/sources.list
RUN apt-get update
RUN apt-get upgrade -y
RUN apt-get -y install gcc
RUN apt-get install -y make build-essential python-dev git
# TODO: Parameterize this wheel file
RUN pip install prodigy -f prodigy-1.10.2-cp36.cp37.cp38-cp36m.cp37m.cp38-linux_x86_64.whl
ENV PRODIGY_HOST=0.0.0.0
ENV PRODIGY_PORT=8080
ENV PRODIGY_HOME=/app/data-ingestion/repositories/extraction/evaluation
RUN ["chmod", "+x", "/app/ab-test.sh"]
CMD ["sh", "/app/ab-test.sh"]
\ No newline at end of file
#!/bin/sh
set -e
set -x
eval $(ssh-agent -s)
mkdir -p ~/.ssh
mkdir -p ~/.creds
chmod 700 ~/.ssh
cd /app
pip install --upgrade pip setuptools wheel
GITLAB_TOKEN=$(echo $GITLAB)
git clone https://sbyim1:$GITLAB_TOKEN@gitlab.gwdg.de/sshoc/data-ingestion.git
# TODO: remove checkout
cd data-ingestion
cd /app/data-ingestion
git checkout 96-ab-testing
##### COPY Database Setting #####
cd /app/data-ingestion/repositories/extraction/
cp /app/prodigy.json /app/data-ingestion/repositories/extraction/prodigy.json
##### Install dependencies ####
pip install pydantic==1.7.2
pip install psycopg2==2.8.6
pip install 'dvc[gs]'
cd /app/data-ingestion || exit
pip install -U -r /app/data-ingestion/repositories/extraction/ner_ml/requirements.txt
#### Pull NER models to compare ####
cd /app/data-ingestion/repositories/extraction/ || exit
\ No newline at end of file
......@@ -19356,3 +19356,130 @@ output/Access, Ownership, Protection - The Ethics of Digital Scholarship_0.tei.x
output/Achieving Machine-Readable Mayan Text via Unicode - Blending "Old World" script-encoding with novel digital approaches_0.tei.xml
output/A Clear Temporal GIS Viewer and Software for Discovering Irregularities in Historical GIS_0.tei.xml
output/Academy of Finland Research Programme "Digital Humanities" (DIGIHUM)_0.tei.xml
output/Academy of Finland Research Programme "Digital Humanities" (DIGIHUM)_0.tei.xml
output/A Case Study of Integration of Services and Resources on a Web Service_0.tei.xml
output/A catalogue of digital editions_0.tei.xml
output/Accessibility and Reception - Vector Semantics, Reading Publics, and the Changing Reception of Literary Works_0.tei.xml
output/Access, Ownership, Protection - The Ethics of Digital Scholarship_0.tei.xml
output/Achieving Machine-Readable Mayan Text via Unicode - Blending "Old World" script-encoding with novel digital approaches_0.tei.xml
output/A Clear Temporal GIS Viewer and Software for Discovering Irregularities in Historical GIS_0.tei.xml
output/1 Million Dutch Newspaper Images available for researchers - The KBK-1M Dataset_0.tei.xml
output/3D-ICONS -- 3D Digitisation of Icons of European Architectural and Archaeological Heritage_0.tei.xml
output/3D Scanning for Preservation - Difficulties and Dissemination_0.tei.xml
output/3rd International Workshop on Computational History, HistoInformatics@DH 2016, Krakow, Poland, July 11, 2016_0.tei.xml
output/4 Default Text Structure - The TEI Guidelines_0.tei.xml
output/4Humanities - Designing Digital Advocacy_0.tei.xml
output/4 Ríos - una construcción transmedia de memoria histórica sobre el conflicto armado en Colombia_0.tei.xml
output/9 Dictionaries - The TEI Guidelines_0.tei.xml
output/A 3D Common Ground - Bringing Humanities Data Together Inside Online Game Engines_0.tei.xml
output/Abbreviations In Manuscripts - Systematization And Crowdsourcing By Ad Fontes_0.tei.xml
output/A Bilingual Digital Edition of Trinity College Cambridge MS O.1.77_0.tei.xml
output/Abundance and Access - Early Modern Political Letters in Contemporary and Digital Archives_0.tei.xml
output/Abusing the Concept of Normalization for Better Collation Results (and Profit)_0.tei.xml
output/Academic Migrants - A Digital Discussion of Transnational Teaching and Learning_0.tei.xml
output/Academic Pillow-Talk and Two Immersive Explorations of Linguistic Space_0.tei.xml
output/Academy of Finland Research Programme "Digital Humanities" (DIGIHUM)_0.tei.xml
output/A Case Study of Integration of Services and Resources on a Web Service_0.tei.xml
output/A catalogue of digital editions_0.tei.xml
output/Accessibility and Reception - Vector Semantics, Reading Publics, and the Changing Reception of Literary Works_0.tei.xml
output/Access, Ownership, Protection - The Ethics of Digital Scholarship_0.tei.xml
output/Achieving Machine-Readable Mayan Text via Unicode - Blending "Old World" script-encoding with novel digital approaches_0.tei.xml
output/A Clear Temporal GIS Viewer and Software for Discovering Irregularities in Historical GIS_0.tei.xml
output/A Collaborative Workspace for Archival Research - MIA and the EURONEWS Project_0.tei.xml
output/A Community Fab Lab - Introductions to Making_0.tei.xml
output/A Comparative Analysis of Bibliographic Ontologies - Implications for Digital Humanities_0.tei.xml
output/A Comparative Kalendar - Building a Research Tool for Medieval Books of Hours from Distributed Resources_0.tei.xml
output/A Comparative Study of Astronomical Clock towers in Europe and China based on their detailed 3D modeling_0.tei.xml
output/A comparative study of sentiment and topics in migration related tweets_0.tei.xml
output/A Comprehensive Image-Based Digital Edition Using CEX - A fragment of the Gospel of Matthew_0.tei.xml
output/A concept of data modeling for the humanities_0.tei.xml
output/A Conceptual Framework for the Analysis of Multilayer Networks in the Humanities_0.tei.xml
output/A corpus approach to cultural keywords - a critical corpus-based analysis of ideology in the Blair years (1998-2007) through print news reporting_0.tei.xml
output/A Corpus Approach to Manuscript Abbreviations (CAMA)_0.tei.xml
output/Acquisition and Analysis of a Meme Corpus to Investigate Web Culture_0.tei.xml
output/AdA Filmontology - a machine-readable Film Analysis Vocabulary for Video Annotation_0.tei.xml
output/Adapting a Spelling Normalization Tool Designed for English to 17th Century Dutch_0.tei.xml
output/A Data Model for Digital Musicology and its Current State - The Music Encoding Initiative_0.tei.xml
output/A Day in the Life of Digital Humanities_0.tei.xml
output/Adding Semantics To Comics Using A Crowdsourcing Approach_0.tei.xml
output/Adding Value to a Research Infrastructure Through User-contributed ePublications_0.tei.xml
output/Addressing Torture in Iraq through Critical Digital Media Art - Hearts and Minds - The Interrogations Project_0.tei.xml
output/A Deep Gazetteer of Time Periods_0.tei.xml
output/A Demonstration of Multispectral Imaging_0.tei.xml
output/A Design Process Model for Inquiry-driven, Collaboration-first Scholarly Communications_0.tei.xml
output/A DH-Leavened Musicological Toolbox_0.tei.xml
output/A Digital Archive of Buddhist Temple Gazetteers_0.tei.xml
output/A digital assessment tool for monitoring and planning food security interventions in rural households of Uganda_0.tei.xml
output/A Digital Edition of Leonhard Euler's Correspondence with Christian Goldbach_0.tei.xml
output/A Digital Humanities Approach to the Design of Gesture-Driven Interactive Narratives_0.tei.xml
output/A Digital Study of Ralph Ellison's Integrative Form_0.tei.xml
output/Adjusting LERA For The Comparison Of Arabic Manuscripts Of _Kalīla wa-Dimna__0.tei.xml
output/A Flow for Digitizing Japanese Historical Materials and their Long-Term Use_0.tei.xml
output/A formação de professores/pesquisadores de História no contexto da Cibercultura - História Digital, Humanidades Digitais e as novas perspectivas de ensino no Brasil_0.tei.xml
output/Afterlives of Digitization_0.tei.xml
output/Against the Binary of Gender - A Case for Considering the Many Dimensions of Gender in DH Teaching and Research_0.tei.xml
output/Agent-Based Modeling and Historical Simulation_0.tei.xml
output/Agents for Actors - A Digital Humanities framework for distributed microservices for text linking and visualization_0.tei.xml
output/A Graphical User Interface for LDA Topic Modeling_0.tei.xml
output/Agréger le passé en ligne - Euchronie, le passé ici et maintenant!_0.tei.xml
output/A Humanist Perspective on Building Ontologies in Theory and Practice_0.tei.xml
output/AI4AV (Artificial Intelligence for Audiovisual) - Design and Evaluation of a Shared System for LAMs_0.tei.xml
output/À la Croisée des Discours Littéraire et Scientifique - La Comparaison comme Haute Figure Dialogique_0.tei.xml
output/A lesson in applied minimalism - adopting the TEI processing model_0.tei.xml
output/Alfabetización digital, prácticas y posibilidades de las humanidades digitales en América Latina y el Caribe_0.tei.xml
output/Algorithmically Mapping Historical Fascination in Late Imperial Chinese Literature_0.tei.xml
output/Algorithms of Resistance - Using OCR and AI for Social Justice_0.tei.xml
output/A literary rat race_0.tei.xml
output/All in the Family - Testing Burrows' Delta on Robert Louis Stevenson's Collaboratively Authored Volumes The Dynamiter and The Wrecker_0.tei.xml
output/All papers are data papers - from open principles to digital methods_0.tei.xml
output/Almost All the Way Through - All at Once_0.tei.xml
output/A machine learning methodology to analyze 3D digital models of cultural heritage objects_0.tei.xml
output/A Management of Personal Name with Alternate Name and its Searching for Japanese Historical Study_0.tei.xml
output/Ambiances - A Framework to Write and Visualize Poetry_0.tei.xml
output/A Method for Record Linkage with Sparse Historical Data_0.tei.xml
output/"A Model for International Cooperation - Emblematica Online and Linked Data in Research and Pedagogy"_0.tei.xml
output/An Advising Framework for Intersecting Disciplines within Graduate Digital Humanities Certificate Programs_0.tei.xml
output/Análisis del coro como personaje en la dramaturgia grecolatina y española incluidas en DraCor_0.tei.xml
output/Analyse automatique pour une étude du genre - quels jugements des écrivaines au XIXe siècle ?_0.tei.xml
output/Analysis and Categorisation of Research Software in the Digital Humanities_0.tei.xml
output/Analysis and Visualisation of Complex Familial Relationships in Greek Mythology_0.tei.xml
output/Analytical study of 20th century paint_0.tei.xml
output/Analyzing Border Crossings in Contemporary Irish Literature with Literary Mapping_0.tei.xml
output/Analyzing Link Topology to Quantify the Degree of Planned Obsolesce in Online Digital Humanities Projects_0.tei.xml
output/Analyzing Social Networks of XML Plays - Exploring Shakespeare's Genres_0.tei.xml
output/Analyzing the 17th Century Theatre Critique Texts with a Semantic Annotation Tool Driven by a Dedicated Ontology_0.tei.xml
output/An Approach to Ancient-to-modern and Cross-script Information Access for Traditional Mongolian Historical Collections_0.tei.xml
output/An Archaeology of Americana - Recovering the Hemispheric Origins of Sabin's Bibliotheca Americana to Contest the Database's (National) Limits_0.tei.xml
output/A national virtual laboratory for the humanities in Australia - the HuNI (Humanities Networked Infrastructure) project_0.tei.xml
output/An Augmented Reality Mobile Application for Intergenerational Learning and Critical Connection_0.tei.xml
output/Ancient Maya Writings as High-Dimensional Data - a Visualization Approach_0.tei.xml
output/An Easy-to-use Data Analysis and Visualization Tool for Studying Chinese Buddhist Literature_0.tei.xml
output/An Environment to Support User-Structured Digital Humanities Sources_0.tei.xml
output/A Neural OCR Engine for North Saami_0.tei.xml
output/An Evaluation of the Involvement of General Users in a Cultural Heritage Collection_0.tei.xml
output/A new approach to libraries in the Digital Humanities - the case of "Fonte-Gaia"_0.tei.xml
output/A New Digital Method for a New Literary Problem - A Proposed Methodology for Briding the "Generalist" - "Specialist" Divide in the Study of World Literature_0.tei.xml
output/A New Ecological Model for Learning_0.tei.xml
output/A New Methodology for Error Detection and Data Completion in a Large Historical Catalogue Based on an Event Ontology and Network Analysis_0.tei.xml
output/A New Spatial Analysis of the Early Chesapeake Architecture_0.tei.xml
output/Animating Text Newcastle University_0.tei.xml
output/An Improvement of Collaborative Digital Scholarly Edition with IIIF_0.tei.xml
output/An Interactive 3D Visualization of RDF-based Digital Editions_0.tei.xml
output/An Interactive Interface for Text Variant Graph Models_0.tei.xml
output/An interactive multimedia companion to Wagner's Lohengrin - encoding and visualising a motivic study_0.tei.xml
output/An Inter-Disciplinary Approach to Web Programming - A Collaboration Between the University Archives and the Department of Computer Science_0.tei.xml
output/An Introduction to X-Ray Fluorescence (XRF) Analysis in Archaeology_0.tei.xml
output/An Islamic Manuscript Database as a Network of Objects_0.tei.xml
output/An Iterative 3DGIS Analysis of the Role of Visibility in Ancient Landscapes_0.tei.xml
output/ANJA, ¿dónde están los encabalgamientos?_0.tei.xml
output/Annotating and Georeferencing of Digitized Early Maps_0.tei.xml
output/Annotating Reader Absorption_0.tei.xml
output/Annotating spatial entities in Romanian Novels_0.tei.xml
output/An online course system easy to make, preserve, and promote critical thinking_0.tei.xml
output/An Online Odyssey - Digital Storytelling in the Humanities Classroom_0.tei.xml
output/An Ontological Model for Inferring Psychological Profiles and Narrative Roles of Characters_0.tei.xml
output/An Ontology for Critical Editions of Variant Text_0.tei.xml
output/Anonymity and Online Discussion - A New Framework for Analysis_0.tei.xml
output/An Open Data Approach to Revealing Indigenous Texts in Large-Scale Digital Repositories - A Case-Study of Locating Pages of Māori Text in the HathiTrust_0.tei.xml
output/An Open Source Toolkit for Flexible Browsing of Historical Maps on the Web_0.tei.xml
output/An OWL 2 Formal Ontology for the Text Encoding Initiative_0.tei.xml
......@@ -28,3 +28,5 @@ https://www.khronos.org/files/collada_schema_1_5
http://www.lido-schema.org/schema/v1.0/lido-v1.0.xsd
https://www.khronos.org/files/collada_schema_1_5
http://www.lido-schema.org/schema/v1.0/lido-v1.0.xsd
https://www.khronos.org/files/collada_schema_1_5
http://www.lido-schema.org/schema/v1.0/lido-v1.0.xsd
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment