Commit 1dd93b23 authored by Ubbo Veentjer's avatar Ubbo Veentjer
Browse files

Merge branch 'release/3.1.0'

parents f6ec3b3f 3a910517
Pipeline #135710 passed with stages
in 7 minutes and 30 seconds
/.gradle
/build
/node_modules
/package-lock.json
image: docker:latest
services:
- docker:dind
include:
- template: SAST.gitlab-ci.yml
- template: Dependency-Scanning.gitlab-ci.yml
- template: Code-Quality.gitlab-ci.yml
variables:
GRADLE_OPTS: "-Dorg.gradle.daemon=false"
DOCKER_IMAGE_TAGGED: $CI_REGISTRY_IMAGE/$CI_COMMIT_REF_SLUG:$CI_COMMIT_SHA
DOCKER_IMAGE_LATEST: $CI_REGISTRY_IMAGE/$CI_COMMIT_REF_SLUG:latest
DOCKER_RELEASE_IMAGE_TAGGED: $CI_REGISTRY_IMAGE:$CI_COMMIT_TAG
DOCKER_RELEASE_IMAGE_LATEST: $CI_REGISTRY_IMAGE:latest
stages:
- build
- test
- package
- deploy
build:
image: gradle:jdk8
stage: build
script: gradle --build-cache assemble
artifacts:
paths:
- build/libs/*.war
expire_in: 1 week
cache:
key: "$CI_COMMIT_REF_NAME"
policy: push
paths:
- build
- .gradle
test:
image: gradle:jdk8
stage: test
script:
- gradle check
- awk -F"," '{ instructions += $4 + $5; covered += $5 } END { print covered, "/", instructions, " instructions covered"; print 100*covered/instructions, "% covered" }' build/reports/jacoco/test/jacocoTestReport.csv
coverage: /\d+.\d+ \% covered/
artifacts:
reports:
junit: build/test-results/test/TEST-*.xml
paths:
- build/reports/jacoco/
cache:
key: "$CI_COMMIT_REF_NAME"
policy: pull
paths:
- build
- .gradle
package:
stage: package
script:
- docker login -u gitlab-ci-token -p $CI_BUILD_TOKEN $CI_REGISTRY
- docker build -t $DOCKER_IMAGE_TAGGED .
- docker push $DOCKER_IMAGE_TAGGED
- docker tag $DOCKER_IMAGE_TAGGED $DOCKER_IMAGE_LATEST
- docker push $DOCKER_IMAGE_LATEST
dependencies:
- build
except:
- master
release-image:
stage: package
script:
- docker login -u gitlab-ci-token -p $CI_BUILD_TOKEN $CI_REGISTRY
- docker build -t $DOCKER_RELEASE_IMAGE_TAGGED .
- docker push $DOCKER_RELEASE_IMAGE_TAGGED
- docker tag $DOCKER_RELEASE_IMAGE_TAGGED $DOCKER_RELEASE_IMAGE_LATEST
- docker push $DOCKER_RELEASE_IMAGE_LATEST
dependencies:
- build
only:
- tags
pages:
stage: deploy
dependencies:
- test
script:
- mkdir public
- mv build/reports/jacoco/test/html public/coverage
artifacts:
paths:
- public
only:
- develop
FROM openjdk:8-jdk-alpine
RUN addgroup -S tgrep && adduser -S tgrep -G tgrep
USER tgrep:tgrep
ARG WAR_FILE=build/libs/*.war
COPY ${WAR_FILE} portal.war
ENTRYPOINT ["java","-jar","/portal.war"]
......@@ -14,32 +14,27 @@ These instructions will get you a copy of the project up and running on your loc
You need Java (at least in version 8) and Gradle to run a development version.
```bash
sudo apt install openjdk-8-jdk
```
sudo apt install openjdk-8-jdk
Gradle needs to be in a version newer then 4.6, which is not yet available in Debian upstream. There are [instructions about installation online](https://gradle.org/install). This is with [sdkman](https://sdkman.io/)
```bash
sdk install gradle 6.0.1
```
This project is build with gradle. The gradle wrapper script `gradlew` will take care of installing gradle if not available.
### Installing
First get the JavaScript libaries, iamges and fonts managed with npm in place:
```bash
gradle -q copyAssets
```
Run a local version of the portal:
```bash
gradle bootRun
```
./gradlew bootRun
Access the portal at http://localhost:8080/ afterwards
### Configuration
Edit `src/main/resources/application.properties` to change configuration.
## Running the tests
Running the tests requires internet access and the services at https://textgridlab.org to be available.
./gradlew test
Test output is written to `build/reports/tests/test/index.html`
plugins {
id 'org.springframework.boot' version '2.2.4.RELEASE'
id 'org.springframework.boot' version '2.3.0.RELEASE'
id 'com.github.node-gradle.node' version '2.2.0'
}
apply plugin: 'java'
apply plugin: 'io.spring.dependency-management'
apply plugin: 'jacoco'
apply plugin: 'war'
group = 'info.textgrid.rep'
version = '3.1.0'
sourceCompatibility = '1.8'
repositories {
mavenCentral()
......@@ -16,11 +22,24 @@ repositories {
dependencies {
implementation 'org.springframework.boot:spring-boot-starter-web'
implementation 'org.apache.tomcat.embed:tomcat-embed-jasper'
implementation 'javax.servlet:jstl'
implementation 'org.apache.cxf:cxf-rt-rs-client:3.3.4'
implementation 'io.jsonwebtoken:jjwt:0.9.1'
implementation 'com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider'
implementation 'info.textgrid.middleware.clients:textgrid-clients:3.4.1-SNAPSHOT'
implementation 'javax.servlet:jstl'
implementation 'org.apache.cxf:cxf-rt-rs-client:3.3.6'
implementation 'io.jsonwebtoken:jjwt:0.9.1'
implementation 'com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider'
implementation 'com.atlassian.commonmark:commonmark:0.14.0'
implementation 'info.textgrid.middleware.clients:textgrid-clients:3.4.3'
testImplementation('org.springframework.boot:spring-boot-starter-test') {
exclude group: 'org.junit.vintage', module: 'junit-vintage-engine'
}
}
test {
useJUnitPlatform()
}
// show deprecation and unchecked warnings when compiling java src
tasks.withType(JavaCompile) {
options.compilerArgs << "-Xlint:unchecked" << "-Xlint:deprecation"
}
/**
......@@ -36,23 +55,35 @@ node {
download = true
}
// execute task: gradle -q copyJS
task copyJS(type: Copy, dependsOn: npmInstall) {
from 'node_modules/jquery/dist/jquery.min.js',
'node_modules/openseadragon/build/openseadragon/openseadragon.min.js'
into 'src/main/resources/static/js'
}
// npm install should be run before processResources
processResources.dependsOn npmInstall
task copyImages(type: Copy, dependsOn: npmInstall) {
from 'node_modules/openseadragon/build/openseadragon/images'
into 'src/main/resources/static/images/openseadragon'
// copy static files to web application
processResources {
from ('docs') {
into 'docs'
}
from('node_modules/font-awesome/fonts') {
into 'static/fonts'
}
from ('node_modules/jquery/dist/jquery.min.js') {
into 'static/js'
}
from ('node_modules/openseadragon/build/openseadragon/openseadragon.min.js') {
into 'static/js'
}
from ('node_modules/openseadragon/build/openseadragon/images') {
into 'static/images/openseadragon'
}
}
task copyFonts(type: Copy, dependsOn: npmInstall) {
from 'node_modules/font-awesome/fonts'
into 'src/main/resources/static/fonts'
test {
finalizedBy jacocoTestReport
}
// execute task: gradle -q copyAssets
task copyAssets(dependsOn: [copyJS, copyImages, copyFonts])
jacocoTestReport {
reports {
csv.enabled true
}
}
# Documentation pages in the TextGridRep portal
This folder contains all static pages belonging to the TextGridRep portal. They are all written in [Markdown](https://daringfireball.net/projects/markdown/).
## Naming schema
The files in this directory follow the naming scheme
[pagename].[lang].md
Where `pagename` is the identifier used for the page. Supported languages are English and German, they are coded by their iso code, so `en` for English and `de` for German. The `.md` extension identifies the file as Markdown file.
The markdown files are later rendered as html within the TextGridRep portal page in the location /docs/ identified by their `pagename`.
This means `syntax.en.md` is shown at <https://textgridrep.org/docs/syntax> if your language is English, `syntax.de.md` is shown if you switch to German.
Some more examples:
* [voyant.en.md](voyant.en.md) -> <https://textgridrep.org/docs/voyant> - choose lang English
* [voyant.de.md](voyant.de.md) -> <https://textgridrep.org/docs/voyant> - choose lang German
* [annotate.en.md](annotate.en.md) -> <https://dev.textgridrep.org/docs/annotate> - choose lang English
* [annotate.de.md](annotate.de.md) -> <https://dev.textgridrep.org/docs/annotate> - choose lang German
### index.en.md / index.de.md
An exception from the naming scheme is `index.en.md` or `index.de.md` which is not only available at <https://textgridrep.org/docs/index> but also at the root of the portal: <https://textgridrep.org> . If you want to refer to it do not use the `docs/index` location ;-).
## Deployment / development system: https://dev.textgridrep.org
If you make changes like changing or adding files these will be available automatically on the [development system](https://dev.textgridrep.org/) within at most half an hour. You will find your page by its pagename in the /docs/ folder, even if its not yet linked from anywhere. So a new page `testitest.en.md` will be visible at `https://dev.textgridrep.org/docs/testitest` if the language is English.
## Syntax
The Markdown syntax used is described in the [CommonMark Spec 0.29](https://spec.commonmark.org/0.29/).
### Linking between pages
Linking between pages is possible, adress them by their relative adress in the `/docs/` section. Refer to the syntax.[en/de].md document as to `/docs/syntax`. Example:
```markdown
Find more info on the [syntax](/docs/syntax) page.
```
### Images
There is no place for placing own images yet, I will work on it and update this document accordingly. Feel free to ask if you need this feature urgently.
### Markdown parser in use / supported syntax / extensions
For parsing Markdwon and rendering HTML the [commonmark-java](https://github.com/atlassian/commonmark-java) library is used. Currently implemented by this library is the [CommonMark Spec 0.29](https://spec.commonmark.org/0.29/). Look at the [CommonMark Dingus](http://spec.commonmark.org/dingus/) for testing and previewing the syntax.
Currently there are no extensions activated, but there are some [available](https://github.com/atlassian/commonmark-java#extensions), if you need one just ask.
If there is a need for even more extensions or different syntax we may painlessly switch to [flexmark-java](https://github.com/vsch/flexmark-java) for parsing, which is a commonmark-java fork wich supports a lot more different markdown flavours and extensions.
# TODO
\ No newline at end of file
# TODO
\ No newline at end of file
Das TextGrid Repository, ein Langzeitarchiv für geisteswissenschaftliche Forschungsdaten, liefert einen umfangreichen,
durchsuch- und nachnutzbaren Bestand XML/TEI-kodierter Texte, Bilder und Datenbanken. Zum stetig wachsenden Bestand
zählen mit der [Digitalen Bibliothek von TextGrid](https://textgrid.de/digitale-bibliothek) heute z.B. Werke von rund 600 Autorinnen und Autoren deutschsprachiger
Belletristik (Prosa, Lyrik, Dramen) und Sachliteratur von den Anfängen des Buchdrucks bis zum frühen 20. Jahrhundert,
die in verschiedenen Ausgabeformaten (z.B. XML, ePub, PDF) gespeichert, publiziert und durchsucht werden können.
Mit verschiedenen Werkzeugen, wie etwa zur Bildbetrachtung oder zur quantitativen Textanalyse, können die Texte
weiter erforscht und visualisiert werden.
Das TextGrid Repository ist Teil der Virtuellen Forschungsumgebung [TextGrid](https://textgrid.de/), die neben dem fachwissenschaftlichen
Langzeitarchiv eine Open Source-Software für die kollaborative Erstellung und Publikation z.B. digitaler Editionen
auf XML/TEI-Basis anbietet.
Durch Eingabe eines Suchwortes kann direkt im Bestand gesucht werden; alternativ kann dieser über "Explore"
nach vordefinierten Kategorien (z.B. "Autor", "Genre") angezeigt und aufgerufen werden.
Die Suche unterstützt die Anfragesprache [Lucene](https://lucene.apache.org/core/); neben der Freitextsuche ermöglicht sie u.a. Anfragen nach
folgendem Muster:
[edition.agent.value:goethe AND pudel](/search?query=edition.agent.value%3Agoethe+AND+pudel)
Weitere Informationen zur Suchsyntax, zu Suchkategorien und -filtern finden Sie in der [Hilfe](/doc/syntax).
**Mitmachen**
Möchten Sie eigenes XML-erschlossenes Material im TextGrid Repository zitierfähig archivieren und zugänglich machen?
Nehmen Sie Kontakt mit uns auf: https://textgrid.de/kontakt/
# Mission Statement
The TextGrid Repository (TextGridRep) is a digital preservation archive for human sciences research data providing a variety of data for teaching and research purposes. It promotes open access to research data including open standards allowing an efficient reuse for research. The TextGridRep also provides researchers with a comprehensive and reliable service to store their data permanently, well described and with a stable reference for citation and reuse.
The TextGridRep is part of the [TextGrid](https://textgrid.de/en/) Virtual Research Environment (VRE), which offers besides digital preservation also open-source software for collaborative creation, analysis and publication of text and images. The TextGrid VRE is optimised for XML/TEI formats and editorial publication out of the TextGrid Laboratory (TextGridLab). An independent publication from the TextGridLab including other types of data and formats is equally possible by tools using the TextGridRep API such as TG-import.
The TextGrid Repository is a community orientated result of a national program to establish a Digital Humanities infrastructure in Germany and operates together with the DARIAH-DE Repository as part of the [Humanities Data Center](https://humanities-data-centre.de/) (HDC).
The mission of the TextGridRep is to serve national and international research, teaching and learning by providing long term preservation, further processing, openly sharing and dissemination of digital research data according to ethical and scientific standards of the international research community.
The repository's mission is in line with the [Open Access strategy of the University of Göttingen](https://www.uni-goettingen.de/en/221506.html) and its [research data policy](http://www.uni-goettingen.de/en/488918.html). It provides all necessary resources to promote and support making the research results of its researchers as widely accessible and usable as possible. This commitment to open access is reflected in the organisational and technical infrastructure as well as in its archiving procedures of the repository to allow the use of publications and data without any access restriction in order “to support research and innovation in science […] and society in a direct and lasting way”.
In terms of [data management](https://wiki.de.dariah.eu/display/TextGrid/Digital+Object+Management), publication and preservation workflows are based on the Open Archiving Information System, see [TextGrid Repository – Digital Object Management](https://wiki.de.dariah.eu/display/TextGrid/Digital+Object+Management#DigitalObjectManagement-TextGridandtheOpenArchivalInformationSystem(OAIS)).
The commitment is strongly supported by the two relevant institutions ensuring also the long-term sustainability of the repository and its data: The [Göttingen State and University Library](https://www.sub.uni-goettingen.de/en/about-us/portrait/) (SUB) and the [Gesellschaft für wissenschaftliche Datenverarbeitung Göttingen mbH](https://www.gwdg.de/about-us) (GWDG).
Both institutions share a commitment to the sustainability of services and to [FAIR principles](https://www.go-fair.org/fair-principles/) in research and its infrastructures. For the SUB research data management is an important aspect of the [strategic aims of Göttingen State and University Library](https://www.sub.uni-goettingen.de/en/about-us/portrait/strategy/#c13124). Not only for research data, but for all digital resources, Göttingen State and University Library follows a [policy](https://www.sub.uni-goettingen.de/en/about-us/portrait/goettingen-state-and-university-library-digital-policies-guiding-principles/), which contains guiding principles in order to ensure the quality for access, metadata and IT architecture.
In the context of open access, the Göttingen State and University Library also participates in national and international projects, such as the [Confederation of Open Access Repositories](https://www.coar-repositories.org/) (COAR) and [OpenAIRE](http://www.openaire.eu/). In this perspective the TextGrid Repository is also in line with open access requirements of important funders of the German research system as the German Research Foundation (DFG) (see <https://www.dfg.de/formulare/2_00/v/dfg_2_00_de_v1215.pdf>, p. 44, section 12.2.1) and the European Union. Mandates of the European Commission and the European Research Council require as stated e.g. in the European Open Access Pilot on Open Data all funded projects to publish their results in Open Access (see the [Horizon 2020 Online Manual](https://ec.europa.eu/research/participants/docs/h2020-funding-guide/cross-cutting-issues/open-access-data-management/open-access_en.htm)). The Research Department at Göttingen University offers detailed information about the [European Union Open Access Pilot](https://www.uni-goettingen.de/en/487290.html) also on its web pages.
# Corpus and Digital Library of TextGrid
The TextGrid Repository offers an extensive searchable and adaptable corpus of XML/TEI encoded texts and images. Amongst the continuously growing corpus is the [Digital Library of TextGrid](https://textgrid.de/en/digitale-bibliothek), which consists of works of more than 600 authors of German fiction (prose, verse and drama), as well as nonfiction from the beginning of the printing press to the early 20th century. The files are saved in different output formats (XML, ePub, PDF), published and made searchable. Different tools e.g. viewing or quantitative text-analysis tools can be used for visualization or to further research the text.
You can search within the corpus by entering a search term; alternatively “Explore” will lead you to predefined categories (e.g. “author”, “genre”).
The search function supports the query language [Lucene](https://lucene.apache.org/core/); in addition to the free text search it allows queries with the following pattern:
[edition.agent.value:goethe AND pudel](/search?query=edition.agent.value%3Agoethe+AND+pudel)
More information on search syntax, search categories and filters are covered in the [Help](/doc/syntax) section.
# Citation recommendation
TextGrid Consortium. 2006–2014. TextGrid: A Virtual Research Environment for the Humanities. Göttingen: TextGrid Consortium. textgrid.de.
# Participation
Would you like your own XML encoded files to be archived, made quotable and accessible through the TextGrid Repository? Then contact us: <https://textgrid.de/en/kontakt/>
# TODO
\ No newline at end of file
# TODO
\ No newline at end of file
# Suchen im TextGrid Repository
## Die Syntax von Apache Lucene
Das TextGrid Repository nutzt die Syntax von Apache Lucene 2.9.4. Die Mechanismen dieser Syntax werden im Folgenden erläutert. Die Darstellung folgt dabei weitgehend der Zusammenfassung auf der [Website von Apache Lucene](https://lucene.apache.org/core/5_1_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#package_description).
Suchanfragen werden in dieser Syntax mit Suchbegriffen und sogenannten Operatoren gebildet. Suchbegriffe können über Felder näher bestimmt werden.
## Suchbegriffe und -felder
Es können ebenso einzelne Worte als Suchbegriffe dienen wie Phrasen, die über doppelte Anführungszeichen als zusammengehörig gekennzeichnet werden, z.B. "TextGrid Repository". Suchbegriffe können auch nur auf bestimmte Felder bezogen sein:
Feldname:Suchbegriff
oder
Feldname:"Mehrteilige Phrase"
TextGrid kennt die verschiedene Felder wie
* „title“ für den Werktitel
* „edition.agent.value“ für den Verfasser
* „language“ für die Sprache des Textes
* „notes" für Anmerkungen zum Text
* „genre“ für Gattungen
* „rightsHolder“ für den Inhaber der Rechte an der digitalen Textfassung
* „work.dateOfCreation.date“ sowie „work.dateOfCreation.notBefore“ und „work.dateOfCreation.noAfter“ für Datierungen der Werke.
Die „Advanced Search“ bietet die Möglichkeit, diese Felder für die Suche in den Metadaten direkt auszuwählen und auch mit Operatoren zu Suchanfragen zu verbinden.
Suchbegriffe können außerdem in verschiedener Weise abgewandelt werden. Es gibt verschiedene Platzhalter, Möglichkeiten für eine ungenaue Suche, Angabe von Distanzen und Reihen sowie die Berücksichtigung der Relevanz.
* **Platzhalter:** Bei einzelnen Worten kann ? für einzelne beliebige Zeichen, * für beliebig viele Zeichen verwendet werden, z.B. ? und * am Anfang des Wortes können die Suche verlängern.
* **Ungenaue Suche:** Mit angehängtem ~ kann ein Wort mit einer gewissen Ungenauigkeit gemäß der Levenshteindistanz gefunden. Auf das ~ kann ein Zahlenwert zwischen 0 und 1 folgen. Je näher der Wert der 1 ist, umso größer ist die geforderte Ähnlichkeit. Der Standartwert beträgt 0.5.
* **Abstände:** Bei Phrasen kann mit einem angehängten ~ und einem folgenden Zahlenwert der Abstand zwischen den einzelnen Worten bestimmte werden, z.B. `"TextGrid Repository"~10`. Der Zahlenwert bestimmt die Anzahl der Worte, die zwischen den gesuchten Worten liegen dürfen. Die „Advanced Search“ erlaubt, diesen Wert auch direkt über die Eingabemaske festzulegen.
* **Reihen:** Verknüpft man Suchwerte mit "TO" werden alle Werte innerhalb eines Felder zwischen ihnen gefunden. Dies gilt ebenso für Zahlenwerte wie für Worte. Bei Worten wird die alphabetische Ordnung. [] dienen für Suchen inklusive der genannten Werte, {} für Suchen, die diese nicht berücksichtigen. Z.B. findet `edition.agent.value:[Aristophanes TO Zuckmayer]` alle Autorennamen zwischen „Aristophanes“ und „Zuckmayer“ inklusive dieser beiden Namen.
* **Relevanz:** Mit angehängtem ^ und folgendem Zahlenwert können Suchbegriffe oder Phrasen als besonders relevant gekennzeichnet werden, z.B. `TextGrid^5 Repository`. Der Standardwert ist 1.
Einige Zeichen müssen mit \ maskiert werden: `+ - && || ! ( ) { } [ ] ^ " ~ * ? : \`.
## Operatoren
Lucene verwendet Boolesche Operatoren, um Suchbegriffe- und phrasen zu verknüpfen. Der Standardwert ist OR bzw. ||. Boolesche Operatoren müssen, wenn sie als Wort ausgeschrieben werden, in Großbuchstaben stehen.
* **AND (auch &&):** Es werden Texte gefunden, die alle gesuchten Begriffe enthalten.
* **+:** Dies kennzeichnet, dass der folgende Suchbegriff vorhanden sein muss.
* **NOT (auch ! oder -):** Dies kennzeichnet, dass der folgende Suchbegriff nicht vorhanden sein darf. Die Verwendung zu Beginn einer Suchabfrage kann den Suchvorgang verlangsamen.
Lucene unterstützt Klammerung zur Verknüpfung Boolescher Operatoren, z.B. findet `TextGrid AND (Laboratory OR Repository)` alle Texte, die das Wort „TextGrid“ enthalten sowie das Wort „Laboratory“ oder „Repository“. Dieser Mechanismus kann auch in Bezug auf Felder verwendet werden.
\ No newline at end of file
# Searching in the TextGrid Repository
## The syntax of Apache Lucene
The TextGrid Repository uses the syntax of Apache Lucene 2.9.4. The mechanism of the syntax will be explained in the following paragraphs. The demonstration largely follows the summary on the [website of Apache Lucene](https://lucene.apache.org/core/5_1_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#package_description).
Suchanfragen werden in dieser Syntax mit Suchbegriffen und sogenannten Operatoren gebildet. Suchbegriffe können über Felder näher bestimmt werden.
## Search terms and search fields
It is possible to search for single words as well as phrases that are shown to belong together via quotation marks, e.g. “TextGrid Repository”. Search terms can be limited to certain fields:
field-name:search-term
or
field-name:"multipart phrase"
These are the different fields of TextGrid:
* “title” for the title of the work
* „edition.agent.value“ for the author
* „language“ for the language of the work
* „notes” for notes of the text
* „genre“ for the genre
* „rightsHolder“ for the rights holder of the digital version of the text
* „work.dateOfCreation.date“ , „work.dateOfCreation.notBefore“ and „work.dateOfCreation.noAfter“ for dates of the work
The “Advanced Search” offers the possibility to choose the fields to search in the meta data directly and to connect them with operators for search queries.
Search queries can be altered in different ways. There are place holders, options for a vague search, specifying distances between words, searching in a defined range and appointing different relevance scales to search terms.
* **Place holders:** For single words ? replaces one character, and * stands for any number of characters. E.g. `Text?rid` or `*xtgrid`.
* **Vague search:** Adding a ~ to the word results in a vagueness of the search according to the Levenshtein distance. Following the ~ can be a value between 0 and 1. The closer the value is to 1, the higher the demanded resemblance. The standard value is 0.5.
* **Distances:** When searching for phrases, adding a ~ and a number after the phrase specifies the distance between the single words within the phrase. E.g. `"TextGrid Repository"~10`. The number stands for how many words can lie between the words. The “Advanced Search” gives the option to directly enter the number in the searching mask.
* **Ranges:** When connecting two search values with a “TO”, all values between them are found within the field. This applies to numerical values as well as words. For words the alphabetical order counts. Searches including the given search values are written within [], while searches excluding them are written within {}. E.g. `edition.agent.value:[Aristophanes TO Zuckmayer]` searches for all author names between “Aristophanes” and “Zuckmayer” including those names.
* **Relevance:** By adding a ^ and a number after a search term or phrase, they can be marked as more relevant, e.g. `TextGrid^5 Repository`. The standard value is 1.
Some characters must be masked with a \ : `+ - && || ! ( ) { } [ ] ^ " ~ * ? : \.`
Operators
Lucene uses Logical connectives to combine search terms and phrases. The standard value is OR, which is equal to ||. Logical connectives must be written in capital letters.
* **AND (equal to &&)**:Texts containing all of the search terms are found
* **+:** The following search term must be contained in the text
* **NOT (equal to ! or -):**The following search term must not be in the text. Using this at the beginning of the search query can slow down the searching process.
Lucene supports bracketing for the combination of logical connectives, e.g. `TextGrid AND (Laboratory OR Repository)` finds all texts that contain the word “TextGrid”, as well as the word “Laboratory” or “Repository”. This mechanism can be used with fields as well.
# Voyant in TextGrid
Die Grundidee der Voyant Tools ist es, web-basierte, vielfältige, explorative und analytische Zugänge zu beliebigen Texten oder Textsammlungen zu ermöglichen.
## Beschreibung
Der Service **Voyant in TextGrid** bietet die Möglichkeit, Texte direkt aus der [Digitalen Bibliothek von TextGrid](https://textgrid.de/digitale-bibliothek) des TextGrid Repository heraus sowie weitere Texte einer quantitativen Analyse zu unterziehen. Es handelt sich dabei um eine Instanz der [Voyant Tools](https://voyant-tools.org/), die direkt über die Webseite des TextGridRep heraus aufgerufen werden kann.
Voyant in TextGrid zeigt für ausgewählte Texte Worthäufigkeiten, Verteilungen und Kookkurrenzen und erlaubt es, auf einfache Art Eigenschaften der ausgewählten Texte zu explorieren und zu visualisieren. So lassen sich stilistische Besonderheiten, wie die Verwendung von Wörtern in gegensätzlichen Zusammenhängen, Wortballungen an charakteristischen Stellen im Gesamtkorpus oder im Einzeltext, auf einen Blick erkennen und darstellen. Dies kann im Rahmen literaturwissenschaftlicher Analysen zur Unterstützung der Interpretation aus dem Close-Reading sein oder erst den Anstoß geben, einen oder viele Texte aus einem anderen quantitativen Blickwinkel zu betrachten.
In der [Digitalen Bibliothek von TextGrid im TextGrid Repository](https://textgridrep.org/repository.html) können ein oder mehrere Texte zu einem „Regal“ hinzugefügt werden, um sich ein Korpus zusammenzustellen, das der eigenen Forschungsfrage angepasst ist. Dieses Regal steht sowohl zum Download bereit (Funktion: „Alles herunterladen“) und bietet gleichwohl die Möglichkeit unter „Werkzeuge“ die ausgewählten Texte direkt mit den Voyant Tools zu analysieren.
Damit ist die Nutzung von Voyant ohne weitere Vorkenntnisse möglich und wendet sich besonders an Lernende und Lehrende aus der Literaturwissenschaft, die einen Einstieg in die quantitative Analyse innerhalb der Digitalen Bibliothek suchen.
## Beispiel
Eine Beispielanwendung zu Johann Wolfgang Goethes [Wilhelm Meisters Wanderjahre oder Die Entsagenden](https://textgridrep.org/browse/-/browse/11h9j_0#) findet sich im TextGrid Repository und kann unter „Werkzeuge“ [direkt in Voyant geladen werden](https://voyant-tools.org/?input=https://textgridlab.org/1.0/tgcrud-public/rest/textgrid:11h9j.0/data). Für die Auswahl von Texten bietet sich auch ein [Blick in die Digitale Bibliothek von TextGrid](https://textgridrep.org/repository.html) an.
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-6.4-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
#!/usr/bin/env sh
#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
##############################################################################
##
## Gradle start up script for UN*X
##
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
ls=`ls -ld "$PRG"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "$PRG"`"/$link"
fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null
APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"
warn () {
echo "$*"
}
die () {
echo
echo "$*"
echo
exit 1
}
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
CYGWIN* )
cygwin=true
;;
Darwin* )
darwin=true
;;
MINGW* )
msys=true
;;
NONSTOP* )
nonstop=true
;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
else
JAVACMD="$JAVA_HOME/bin/java"
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD="java"
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi