Dear Gitlab users, due to maintenance reasons, Gitlab will not be available on Thursday 30.09.2021 from 5:00 pm to approximately 5:30 pm.

Commit 4fe65471 authored by Andreas Wagner's avatar Andreas Wagner
Browse files

Add form interface.

parent 176fae41
......@@ -20,6 +20,14 @@ This webservice:
- is capable of looking up an existing deposit (if the TEI file mentions its own zenodo DOI entry) and creating a new version of it. This new version will have a new DOI, so the software deletes the old DOI and adds the new one before uploading the file to zenodo
- uploads and commits the TEI file that now has the new DOI field back to your github repository
When used via its file upload API endpoint (instead of the webhook listener), the service skips all the github-related parts:
- accepts a file upload of a TEI file
- parses the TEI file and assigns values to the various [metadata fields that zenodo accepts/requires](https://developers.zenodo.org/#entities). It does this using a user-specified configuration based on (simple) [XPath](https://www.w3.org/TR/1999/REC-xpath-19991116/) expressions.
- creates a new [zenodo](https://about.zenodo.org/) deposit with a new DOI, adds the DOI to the TEI file and then uploads it to the deposit
- is capable of uploading the deposit to zenodo and *not publishing it yet* if another user-defined phrase is present; publishes the deposit otherwise
- is capable of looking up an existing deposit (if the TEI file mentions its own zenodo DOI entry) and creating a new version of it. This new version will have a new DOI, so the software deletes the old DOI and adds the new one before uploading the file to zenodo
<img align="left" style="margin-right:10px;" src="https://upload.wikimedia.org/wikipedia/commons/d/d1/Emblem-notice.svg"/>
Note that, since the XPath library that this service uses only supports basic XPath functions, you cannot really parse or manipulate the values via configuration settings. This means that you have to use some of zenodo's controlled vocabulary in your TEI markup! For instance, the license name or the editor roles in your TEI files are expected to be compatible with zenodo. You could, for example, use the `@n`-attribute of TEI's `&lt;editor&gt;` element to hold the required string like "cc-by" or use zenodo's controlled vocabulary for contributor types to specify the TEI `&lt;editor&gt;`'s `@role`-attribute...
......
<html>
<header>
<title>Repertory of Police Ordinances - Documentation</title>
</header>
<body>
<h1>Repertory of Police Ordinances - Documentation</h1>
</body>
</html>
......@@ -31,11 +31,16 @@ func SetupRoutes(conf tei2zenodo.Config) *gin.Engine {
// Create a gin router with logrus router and stock recovery
router := gin.New()
router.Use(ginerus.Ginerus(), gin.Recovery(), cors.Default())
router.LoadHTMLGlob("templates/*")
// Routes - one for the html/webapp, one group per API version...
router.GET("/tei2zenodo.html", func(c *gin.Context) {
page := c.DefaultQuery("p", "index")
c.String(http.StatusOK, "Service homepage with page "+page+".")
router.GET("/", func(c *gin.Context) {
c.HTML(http.StatusOK, "index.html", nil)
// page := c.DefaultQuery("p", "index")
// c.String(http.StatusOK, "Service homepage with page "+page+".")
})
router.GET("/index.html", func(c *gin.Context) {
c.HTML(http.StatusOK, "index.html", nil)
})
APIv1 := router.Group(conf.APIRoot)
......@@ -45,6 +50,7 @@ func SetupRoutes(conf tei2zenodo.Config) *gin.Engine {
var r io.ReadSeeker
var myDeposit tei2zenodo.Deposit
var DOIs []string
if conf.Verbose {
log.Printf("====== Received POST message from %s at %s ======", c.Request.RemoteAddr, conf.FileAPI)
......@@ -52,29 +58,59 @@ func SetupRoutes(conf tei2zenodo.Config) *gin.Engine {
log.Printf("====== Received POST message ======")
}
// Read file from Request body
buf := new(bytes.Buffer)
buf.ReadFrom(c.Request.Body)
file := buf.String()
r = strings.NewReader(file)
// Do a first parse of the request (is it a form?)
_, err := c.MultipartForm()
if err != nil {
log.Tracef(" Request could not be parsed as multipart form. Maybe a raw upload?")
}
// Read filename from Form field, Query parameter or File field
filename := c.PostForm("filename")
if filename == "" {
filename = c.Query("filename")
}
// Read file from Form or as raw body
ff, err := c.FormFile("file")
if err != nil {
log.Debugf(" Request form did not contain a file. Maybe a raw upload?")
buf := new(bytes.Buffer)
buf.ReadFrom(c.Request.Body)
file := buf.String()
r = strings.NewReader(file)
} else {
if filename == "" {
filename = ff.Filename
}
file, err := ff.Open()
if err != nil {
log.Errorf("Problem opening form file: %+v", err)
AbortMsg(400, tei2zenodo.NewError("errParse", fmt.Sprintf("problem opening form file: %s", err), 400, err), c)
return
}
r = file
}
// Get filename for upload
var filename string
filename = c.Request.FormValue(`filename`)
// If we still have no filename, parse one from the file's contents
if filename == "" {
log.Warnf(" No filename specified. Creating one from the file's content...")
f, GFErr := t2zxml.GetFilename(r)
if GFErr != nil {
log.Errorf("Problem reading filename: %+v", GFErr)
AbortMsg(500, tei2zenodo.NewError("errParse", fmt.Sprintf("Error reading filename: %s", GFErr), 500, GFErr), c)
AbortMsg(500, tei2zenodo.NewError("errParse", fmt.Sprintf("error reading filename: %s", GFErr), 500, GFErr), c)
return
}
filename = f
log.Debugf(" Set filename to %s.", filename)
r.Seek(0, 0)
}
myDeposit.Filename = filename
// Get doPublish from request (false if not set)
if c.Request.FormValue(`doPublish`) == "True" {
doPublish := c.PostForm("doPublish")
if doPublish == "" {
doPublish = c.Query("doPublish")
}
if doPublish == "True" {
myDeposit.DoPublish = true
} else {
myDeposit.DoPublish = false
......@@ -87,8 +123,8 @@ func SetupRoutes(conf tei2zenodo.Config) *gin.Engine {
switch PTErr.Typ {
case "errNoTEIXML":
{
log.Warnf("Problem with file %s (%s): No TEI file.", myDeposit.Filename, myDeposit.GithubObjSHA)
AbortMsg(500, tei2zenodo.NewError("errZProcessing", fmt.Sprintf("problem with file %s (%s): No TEI file.", myDeposit.Filename, myDeposit.GithubObjSHA), 500, PTErr), c)
log.Warnf("Problem with file %s: No TEI file.", myDeposit.Filename)
AbortMsg(400, tei2zenodo.NewError("errZProcessing", fmt.Sprintf("problem with file %s: No TEI file.", myDeposit.Filename), 400, PTErr), c)
return
}
default:
......@@ -104,24 +140,19 @@ func SetupRoutes(conf tei2zenodo.Config) *gin.Engine {
ZPFErr := zenodo.ProcessFile(r, doi, &md, &myDeposit, &conf)
if ZPFErr != nil {
switch ZPFErr.Typ {
case "errNoTEIXML":
{
log.Errorf("Problem processing file %s: %+v", myDeposit.Filename, ZPFErr)
AbortMsg(500, tei2zenodo.NewError("errZProcessing", fmt.Sprintf("problem processing file %s", myDeposit.Filename), 500, ZPFErr), c)
return
}
default:
{
log.Errorf("Problem processing file %s: %+v", myDeposit.Filename, ZPFErr)
AbortMsg(500, tei2zenodo.NewError("errZProcessing", fmt.Sprintf("problem processing file %s", myDeposit.Filename), 500, ZPFErr), c)
return
}
}
log.Errorf("Problem processing file %s: %+v", myDeposit.Filename, ZPFErr)
AbortMsg(500, tei2zenodo.NewError("errZProcessing", fmt.Sprintf("problem processing file %s: %s", myDeposit.Filename, ZPFErr.Error()), 500, ZPFErr), c)
return
}
log.Printf(" Successfully processed file %s (DOI %s).", myDeposit.Filename, myDeposit.DepositDOI)
DOIs = append(DOIs, myDeposit.DOIURL)
log.Printf("====== All done ======")
c.JSON(200, myDeposit)
if len(DOIs) == 1 {
c.Header("Location", DOIs[0])
}
c.JSON(http.StatusCreated, gin.H{"doi": DOIs})
})
}
if conf.WebhookAPI != "" {
......@@ -214,8 +245,8 @@ func SetupRoutes(conf tei2zenodo.Config) *gin.Engine {
case "errNoTEIXML":
{
log.Warnf("Problem with file %s (%s): No TEI file.", myDeposit.Filename, myDeposit.GithubObjSHA)
AbortMsg(500, tei2zenodo.NewError("errZProcessing", fmt.Sprintf("problem with file %s (%s): No TEI file.", myDeposit.Filename, myDeposit.GithubObjSHA), 500, PTErr), c)
return
// AbortMsg(400, tei2zenodo.NewError("errZProcessing", fmt.Sprintf("problem with file %s (%s): No TEI file.", myDeposit.Filename, myDeposit.GithubObjSHA), 400, PTErr), c)
continue
}
default:
{
......@@ -231,7 +262,7 @@ func SetupRoutes(conf tei2zenodo.Config) *gin.Engine {
// Upload (and publish?) file to zenodo
ZPFErr := zenodo.ProcessFile(r, doi, &md, &myDeposit, &conf)
if ZPFErr != nil {
log.Warnf("Problem processing file %s: %v ...", myDeposit.Filename, ZPFErr)
log.Errorf("Problem processing file %s: %v ...", myDeposit.Filename, ZPFErr)
AbortMsg(500, tei2zenodo.NewError("errZProcessing", fmt.Sprintf("problem processing file %s (%s): %s", myDeposit.Filename, myDeposit.GithubObjSHA, ZPFErr.Error()), 500, ZPFErr), c)
return
}
......
<!DOCTYPE html>
<html>
<head>
<head>
<title>TEI2Zenodo Web service</title>
<!--
<link rel="stylesheet" type="text/css" href="resources/css/bootstrap.min.css"/>
<link rel="stylesheet" type="text/css" href="resources/css/bootstrap-responsive.min.css"/>
<link rel="stylesheet" type="text/css" href="resources/css/style.css"/>
-->
<!--
<script type="text/javascript" src="$shared/resources/scripts/bootstrap.min.js"/>
-->
</head>
<body>
<div class="navbar">
<p style="float: right; margin-top: 0.8em">
<!--
DOI: <a href="https://doi.org/10.5281/zenodo.2604391">
10.5281/zenodo.2604391
</a> |
<a href="changelog.html">Version 1.4.0</a> |
-->
<a href="https://gitlab.gwdg.de/rg-mpg-de/tei2zenodo">Gitlab</a>
</p>
<!--
<p style="float: left; margin-top: 0.8em">
<a href="index.html">About</a> |
<a href="documentation.html">Documentation</a> |
<a href="examples.html">Examples</a> |
<a href="dh.html">Digital Humanities</a>
</p>
-->
</div>
<div class="main">
<div class="page-header">
<h1>TEI2Zenodo</h1>
<p>A generic webservice to to quickly push <a href="https://tei-c.org/guidelines/p5/">TEI XML</a> files
to <a href="https://about.zenodo.org/">zenodo</a> deposits, thereby assigning them a
<a href="https://www.doi.org/">DOI identifier</a> and committing them to long-term archival.
It offers a <a href="https://github.com/">gihub</a> integration, listening to
<a href="https://developer.github.com/webhooks/">webhooks</a> sent by github and updating the TEI files
in the repository with the new zenodo DOI identifiers.</p>
</div>
<div class="row-fluid">
<div id="quickstart">
<h2>Quickstart</h2>
<p>
With the TEI2Zenodo webservice you can submit TEI files to Zenodo deposits.
Based on a simple configuration using XPATH expressions, it extracts metadata
to be used in the zenodo deposit description from the TEI file.
The webservice can be used with direct POST or form-style POST requests,
or it can be used via github webhooks.
</p>
<p>
Note that Zenodo requires some metadata fields to be present and to use a controlled
vocabulary. Since this webservice cannot perform more than very simple XPath operations,
it cannot create the required terms and instead presupposes that the TEI files that are
being submitted make use of this controlled vocabulary, a presupposition that goes
beyond what the TEI guidelines recommend. Alternatively, you can hardcode a fixed value
for such fields in your configuration. The former approach could, for instance, be applied
to the contributor roles, where the service could look up the
<code>//titleStmt/editor/@type</code> value, but requires this value to be one of a specific
list of values <h href="https://developers.zenodo.org/#deposit-metadata">defined by zenodo</a>.
The second approach on the other hand could, for example, be used to specify that the
"upload_type" should always be "publication", no matter what. For more details, please see
the <a href="https://gitlab.gwdg.de/rg-mpg-de/tei2zenodo/-/blob/master/README.md">general
documentation</a> and the
<a href="https://gitlab.gwdg.de/rg-mpg-de/tei2zenodo/-/raw/master/configs/config.json.tpl">configuration
file template</a>.
</p>
</div>
<div id="form-style">
<h3>Form-style POST requests</h3>
<p>Example for the form-style POST API.</p>
<form action="api/v1/file" method="post" enctype="multipart/form-data" style="background: #F5F5F5; padding: 1em; border: 1px solid #9F9F9F;">
<div class="control-group">
<!--
<div class="controls">
<label>
Filename: <input type="text" name="filename" value="filename"/>
</label>
</div>
-->
<div class="controls">
<label>
Publish zenodo deposit: <input type="checkbox" name="publish" value="doPublish"/>
</label>
</div>
<div class="controls">
<label>
Upload TEI file: <input name="file" type="file" size="50" accept="application/tei+xml" onchange="this.form.submit()"/>
</label>
</div>
</div>
</form>
</div>
<div id="direct-post">
<h3>Direct POST requests</h3>
<p>
You can also submit direct POST requests to the <code>file</code> API endpoint. Where this
actually is, depends on your configuration. By default, it is at <code>`hostname`:8081/api/v1/file</code>
</p>
<p>
(a) The <code>Content-Type</code> HTTP header should have a value of <code>application/xml</code> and the request
body should directly contain your file. Options are specified as query parameters:
specify a filename with the <code>filename</code> query parameter and use the <code>doPublish</code>
query parameter (set to either <code>True</code> or <code>False</code> whether you want zenodo to publish
the deposit or to leave it in editable state. (In the latter case, you can edit and publish it manually
if you log in to zenodo and go to your Uploads.)
</p>
<p>
(b) Alternatively, you can send a <code>multipart/form-data</code> request. (That would be a <code>Content-Type</code>
header of "multipart/form-data" plus some boundary string appended with a semicolon, e.g.
<code>multipart/form-data;boundary="myboundary" </code>.) The form fields are then called <code>filename</code>,
<code>file</code> and <code>doPublish</code>.
</p>
</div>
</div>
<div class="navbar">
<p style="float: left; margin-top: 0.8em">
Credits: <a href="https://www.rg.mpg.de">Max Plack Institute for European Legal History</a> (<a href="https://twitter.com/rg_mpg">@rg_mpg</a>)
<br/>Research Software Engineering: <a href="https://orcid.org/0000-0003-1835-1653">Andreas Wagner</a>
<br/>Software licensed under <a href="http://opensource.org/licenses/MIT">MIT</a>, content and documentation licensed under <a href="https://creativecommons.org/licenses/by/4.0/">CC-BY 4.0</a>
</p>
</div>
</body>
</html>
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment