Commit a6446b69 authored by Andreas Wagner's avatar Andreas Wagner
Browse files

Elaborate docs, adjust config format.

parent 1117a290
# TEI to Zenodo Service
[![Go Report Card](https://goreportcard.com/badge/gitlab.gwdg.de/andreas.wagner01/tei2zenodo?style=flat-square)](https://goreportcard.com/report/gitlab.gwdg.de/andreas.wagner01/tei2zenodo)
[![Go Doc](https://img.shields.io/badge/godoc-reference-blue.svg?style=flat-square)](http://godoc.org/gitlab.gwdg.de/andreas.wagner01/tei2zenodo)
[![Release](https://img.shields.io/gitlab.gwdg.de/andreas.wagner01/tei2zenodo.svg?style=flat-square)](https://gitlab.gwdg.de/andreas.wagner01/tei2zenodo/releases/latest)
[![Go Report Card](https://goreportcard.com/badge/gitlab.gwdg.de/rg-mpg-de/tei2zenodo?style=flat-square)](https://goreportcard.com/report/gitlab.gwdg.de/rg-mpg-de/tei2zenodo)
[![Go Doc](https://img.shields.io/badge/godoc-reference-blue.svg?style=flat-square)](http://godoc.org/gitlab.gwdg.de/rg-mpg-de/tei2zenodo)
[![Release](https://img.shields.io/gitlab.gwdg.de/rg-mpg-de/tei2zenodo.svg?style=flat-square)](https://gitlab.gwdg.de/rg-mpg-de/tei2zenodo/releases/latest)
This is the TEI to Zenodo service developed at the [Max Planck Institute for European Legal History](http://www.rg.mpg.de/). It is meant to provide a means to quickly push TEI XML files to zenodo deposits, thereby assigning them a DOI identifier and committing them to long-term archival. Files can be uploaded with a REST POST command or by calling a webhook that will retrieve the file(s).
## Configuration
This service is configured via a `config.json` file residing either in the current directory, in the `configs` directory below the current directory or the `.t2z` directory below the current user's $home directory.
In this file, you can specify the listening port for this service, the API endpoints you want to be alive (if you want to disable one, just set the value to the empty string ""), the zenodo connection (host, port, token, DOI prefix), the git context allowed to post to the webhook, and how to parse the XML files that this service processes into zenodo metadata fields. Each of these XML parsing entries consists of the name of zenodo's "receiving" field, and either an xpath, xpath expression, or the combination of xpath and subfields (that consist of fieldnames, xpath/xexpression fields in turn). For details, have a look at the [./configs/config.json.tpl](./configs/config.json.tpl) template file.
## API endpoints
- /api/v1/file
- /api/v1/webhook
- /api/v1/file (POST, content-type: application/xml - receives a TEI file and a ?doPublish=(False|True) url query parameter)
- /api/v1/hooks/receivers/github/events/ (POST)
(You can change these paths in the configuration file.)
## Development
This service has been written in [Go](https://golang.org/).
This service has been written in [Go](https://golang.org/) by [Andreas Wagner](https://orcid.org/0000-0003-1835-1653).
## Licence
The licence for this service is the MIT licence.
The licence for this software is the MIT licence.
{
"listenspec": 8000,
"domain": "ssl.domain.com",
"apiuri": "http://127.0.0.1:8000/api/v1/",
"db": {
"host": "localhost",
"port": 3306,
"dbname": "mydatabase",
"user": "apiuser",
"pw": "mysecretpassword"
"ListenSpec": 8000,
"APIRoot": "/api/v1",
"FileAPI": "/file",
"WebhookAPI": "/hooks/receivers/github/events/",
"ZenodoRepo": {
"prefix": "10.5072/zenodo.",
"host": "https://sandbox.zenodo.org",
"port": 443,
"token": "aBcDeFgHiJkLmNoPqRsTuVwXyZ"
},
"es": {
"host": "https://localhost:9200",
"index": "test",
"port": 9200,
"defaultformat": "csv"
"AllowedGit": [
{
"host": "https://github.com",
"user": "digicademy",
"repo": "svsal"
}
],
"metadata": {
"fields": [
{
"field": "upload_type",
"xexpression": "string('publication')"
},
{
"field": "publication_type",
"xexpression": "string('other')"
},
{
"field": "publication_date",
"xpath": "//publicationStmt/date"
},
{
"field": "title",
"xpath": "//titleStmt//title[@type='main']"
},
{
"field": "creators",
"xpath": "//titleStmt/author",
"subfields": [
{
"field": "name",
"xpath": "."
},
{
"field": "affiliation",
"xpath": ""
},
{
"field": "orcid",
"xpath": ""
},
{
"field": "gnd",
"xpath": ""
}
]
},
{
"field": "description",
"xexpression": "string('Work published in the context of the School of Salamanca project.')"
},
{
"field": "access_right",
"xexpression": "string('open')"
},
{
"field": "license",
"xpath": "//publicationStmt/availability/licence/@n"
},
{
"field": "contributors",
"xpath": "//titleStmt/editor",
"subfields": [
{
"field": "name",
"xpath": "."
},
{
"field": "type",
"xpath": "@role"
},
{
"field": "affiliation",
"xpath": ""
},
{
"field": "orcid",
"xpath": ""
},
{
"field": "gnd",
"xpath": ""
}
]
},
{
"field": "doi",
"xpath": "//publicationStmt//idno[@type='DOI']"
},
{
"field": "keywords",
"xpath": "//teiHeader/profileDesc/textClass/keywords/term"
}
]
}
}
......@@ -29,120 +29,124 @@ func SetupRoutes(conf tei2zenodo.Config) *gin.Engine {
c.String(http.StatusOK, "Service homepage with page "+page+".")
})
APIv1 := router.Group("/api/v1")
APIv1 := router.Group(conf.APIRoot)
{
APIv1.POST("/file", func(c *gin.Context) {
var myDeposit tei2zenodo.Deposit
var r io.ReadSeeker
var doPublish bool
// Read file from Request body
buf := new(bytes.Buffer)
buf.ReadFrom(c.Request.Body)
file := buf.String()
r = strings.NewReader(file)
// Get filename for upload
filename := c.Request.FormValue(`filename`)
if filename == "" {
filename = t2zxml.GetFilename(r)
if conf.FileAPI != "" {
APIv1.POST(conf.FileAPI, func(c *gin.Context) {
var myDeposit tei2zenodo.Deposit
var r io.ReadSeeker
var doPublish bool
// Read file from Request body
buf := new(bytes.Buffer)
buf.ReadFrom(c.Request.Body)
file := buf.String()
r = strings.NewReader(file)
// Get filename for upload
filename := c.Request.FormValue(`filename`)
if filename == "" {
filename = t2zxml.GetFilename(r)
r.Seek(0, 0)
}
// Get doPublish from request (false if not set)
if c.Request.FormValue(`doPublish`) == "True" {
doPublish = true
} else {
doPublish = false
}
// Parse TEI file
log.Printf("=== Parse TEI file ===")
// log.Printf("Beginning of submitted file %s: %s ...", filename, file[:100])
mc := conf.Metadata
var md tei2zenodo.ZMetadata
doi, err := t2zxml.ParseTEI(r, &md, &mc)
if err != nil {
log.Printf("Error (%s) parsing TEI file: %80s", err, file)
AbortMsg(500, err, c)
return
}
r.Seek(0, 0)
}
// Get doPublish from request (false if not set)
if c.Request.FormValue(`doPublish`) == "True" {
doPublish = true
} else {
doPublish = false
}
// Parse TEI file
log.Printf("=== Parse TEI file ===")
// log.Printf("Beginning of submitted file %s: %s ...", filename, file[:100])
mc := conf.Metadata
var md tei2zenodo.ZMetadata
doi, err := t2zxml.ParseTEI(r, &md, &mc)
if err != nil {
log.Printf("Error (%s) parsing TEI file: %80s", err, file)
AbortMsg(500, err, c)
return
}
r.Seek(0, 0)
md.DOI = ""
if doi == "" {
log.Printf("=== Create new DOI at zenodo ===")
doi, err = zenodo.CreateNewDOI(&md, &conf.Repo)
if err != nil || doi == "" {
log.Printf("Error creating DOI reservation deposit: %v", err)
AbortMsg(500, fmt.Errorf("problem reported by zenodo: %s", err), c)
md.DOI = ""
if doi == "" {
log.Printf("=== Create new DOI at zenodo ===")
doi, err = zenodo.CreateNewDOI(&md, &conf.ZenodoRepo)
if err != nil || doi == "" {
log.Printf("Error creating DOI reservation deposit: %v", err)
AbortMsg(500, fmt.Errorf("problem reported by zenodo: %s", err), c)
return
}
myDeposit.DepositDOI = doi
myDeposit.Metadata.DOI = doi
md.DOI = doi
} else {
log.Printf("=== Retrieve deposit for %s and create a new version ===", doi)
myDeposit.OldDOI = doi
md.RelatedIdentifiers = append(md.RelatedIdentifiers, tei2zenodo.ZIdentifier{Relation: "isNewVersionOf", Identifier: doi})
newDOI, err := zenodo.UpdateDeposit(&myDeposit, &md, &conf.ZenodoRepo)
if err != nil {
log.Printf("Error retrieving zenodo deposit for doi %s: %v", doi, err)
AbortMsg(500, fmt.Errorf("problem reported by zenodo: %s", err), c)
return
}
myDeposit.DepositDOI = newDOI
md.DOI = newDOI
}
log.Printf("=== Add new DOI to document ===")
newfile, err := t2zxml.MixinDOI(r, md.DOI)
if err != nil {
log.Printf("Error mixing new DOI into document: %v", err)
AbortMsg(500, fmt.Errorf("problem mixing new DOI into document: %s", err), c)
return
}
myDeposit.DepositDOI = doi
myDeposit.Metadata.DOI = doi
md.DOI = doi
} else {
log.Printf("=== Retrieve deposit for %s and create a new version ===", doi)
myDeposit.OldDOI = doi
md.RelatedIdentifiers = append(md.RelatedIdentifiers, tei2zenodo.ZIdentifier{Relation: "isNewVersionOf", Identifier: doi})
newDOI, err := zenodo.UpdateDeposit(&myDeposit, &md, &conf.Repo)
// log.Printf("Here is the new file:\n%s", newfile[:10000])
myDeposit.FileContent = newfile
r = strings.NewReader(newfile)
r.Seek(0, 0)
log.Printf("=== Upload to zenodo ===")
url, err := zenodo.PostFile(r, filename, &md, &conf.ZenodoRepo)
if err != nil {
log.Printf("Error retrieving zenodo deposit for doi %s: %v", doi, err)
AbortMsg(500, fmt.Errorf("problem reported by zenodo: %s", err), c)
log.Printf("Error sending POST request to zenodo: %v", err)
AbortMsg(500, err, c)
return
}
myDeposit.DepositDOI = newDOI
md.DOI = newDOI
}
log.Printf("=== Add new DOI to document ===")
newfile, err := t2zxml.MixinDOI(r, md.DOI)
if err != nil {
log.Printf("Error mixing new DOI into document: %v", err)
AbortMsg(500, fmt.Errorf("problem mixing new DOI into document: %s", err), c)
return
}
// log.Printf("Here is the new file:\n%s", newfile[:10000])
myDeposit.FileContent = newfile
r = strings.NewReader(newfile)
r.Seek(0, 0)
log.Printf("=== Upload to zenodo ===")
url, err := zenodo.PostFile(r, filename, &md, &conf.Repo)
if err != nil {
log.Printf("Error sending POST request to zenodo: %v", err)
AbortMsg(500, err, c)
return
}
myDeposit.FileURI = url
log.Printf("=== Add metadata to zenodo ===")
err = zenodo.PutMetadata(&myDeposit, &md, &conf.Repo)
if err != nil {
log.Printf("Error putting metadata: %v", err)
AbortMsg(500, err, c)
return
}
if doPublish {
log.Printf("=== Publish at zenodo ===")
err = zenodo.Publish(&md, &conf.Repo)
myDeposit.FileURI = url
log.Printf("=== Add metadata to zenodo ===")
err = zenodo.PutMetadata(&myDeposit, &md, &conf.ZenodoRepo)
if err != nil {
log.Printf("Error publishing deposit: %v", err)
log.Printf("Error putting metadata: %v", err)
AbortMsg(500, err, c)
return
}
} else {
log.Printf("=== Not published. If you want to publish, set doPublish=True as request parameter. ===")
}
log.Printf("=== All done ===")
c.JSON(200, myDeposit)
})
APIv1.GET("/webhook", func(c *gin.Context) {
})
if doPublish {
log.Printf("=== Publish at zenodo ===")
err = zenodo.Publish(&md, &conf.ZenodoRepo)
if err != nil {
log.Printf("Error publishing deposit: %v", err)
AbortMsg(500, err, c)
return
}
} else {
log.Printf("=== Not published. If you want to publish, set doPublish=True as request parameter. ===")
}
log.Printf("=== All done ===")
c.JSON(200, myDeposit)
})
}
if conf.WebhookAPI != "" {
APIv1.GET(conf.WebhookAPI, func(c *gin.Context) {
})
}
}
return router
}
......
......@@ -4,10 +4,11 @@ package tei2zenodo
// Config is the struct of the application's general configuration.
type Config struct {
Domain string
ListenSpec int64
APIURI string
Repo RepoConfig
APIRoot string
FileAPI string
WebhookAPI string
ZenodoRepo RepoConfig
AllowedGit GitConfig
Metadata MetadataConfig
Log LoggingConfig
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment