Commit 38ee0d84 authored by Andreas Wagner's avatar Andreas Wagner
Browse files

Initial commit

parents
# TEI to Zenodo Service
[![Go Report Card](https://goreportcard.com/badge/gitlab.gwdg.de/andreas.wagner01/tei2zenodo?style=flat-square)](https://goreportcard.com/report/gitlab.gwdg.de/andreas.wagner01/tei2zenodo)
[![Go Doc](https://img.shields.io/badge/godoc-reference-blue.svg?style=flat-square)](http://godoc.org/gitlab.gwdg.de/andreas.wagner01/tei2zenodo)
[![Release](https://img.shields.io/gitlab.gwdg.de/andreas.wagner01/tei2zenodo.svg?style=flat-square)](https://gitlab.gwdg.de/andreas.wagner01/tei2zenodo/releases/latest)
This is the TEI to Zenodo service developed at the [Max Planck Institute for European Legal History](http://www.rg.mpg.de/). It is meant to provide a means to quickly push TEI XML files to zenodo deposits, thereby assigning them a DOI identifier and committing them to long-term archival. Files can be uploaded with a REST POST command or by calling a webhook that will retrieve the file(s).
## API endpoints
- /api/v1/file
- /api/v1/webhook
## Development
This service has been written in [Go](https://golang.org/).
## Licence
The licence for this service is the MIT licence.
package main
import (
"log"
"strconv"
"gitlab.gwdg.de/rg-mpg-de/tei2zenodo"
"gitlab.gwdg.de/rg-mpg-de/tei2zenodo/internal/pkg/conf"
"gitlab.gwdg.de/rg-mpg-de/tei2zenodo/internal/pkg/routing"
)
// Config stores the application's configuration.
var Config tei2zenodo.Config
func main() {
var Config tei2zenodo.Config
conf.Configure(&Config)
log.Printf("[main/main] Config: %+v", Config)
router := routing.SetupRoutes()
router.Run(":" + strconv.FormatInt(Config.ListenSpec, 10))
// router.RunTLS(":"+strconv.Itoa(Config.ListenSpec), "./policey_server.cert", "./policey_server.key")
}
{
"domain": "ssl.domain.com",
"listenspec": 8000,
"apiuri": "http://127.0.0.1:8000/api/v1/",
"repo": {
"host": "https://sandbox.zenodo.org",
"port": 443,
"token": "88M7polNLjdh9iLz8MpeG8WRfRaFlhXFVuee6fRTtLeWuAkd5K74roLVaGmt"
},
"allowed_git": [
{
"host": "https://github.com",
"user": "digicademy",
"repo": "svsal"
}
],
"metadata_fields": [
{
"field": "creator",
"xpath": "//author[1]"
},
{
"field": "license",
"xpath": "//license[1]"
},
{
"field": "title",
"xpath": "//title[1]"
}
]
}
{
"listenspec": 8000,
"domain": "ssl.domain.com",
"apiuri": "http://127.0.0.1:8000/api/v1/",
"db": {
"host": "localhost",
"port": 3306,
"dbname": "mydatabase",
"user": "apiuser",
"pw": "mysecretpassword"
},
"es": {
"host": "https://localhost:9200",
"index": "test",
"port": 9200,
"defaultformat": "csv"
}
}
<html>
<header>
<title>Repertory of Police Ordinances - Documentation</title>
</header>
<body>
<h1>Repertory of Police Ordinances - Documentation</h1>
</body>
</html>
package conf
import (
"log"
"strings"
"github.com/spf13/viper"
"gitlab.gwdg.de/rg-mpg-de/tei2zenodo"
"gitlab.gwdg.de/rg-mpg-de/tei2zenodo/internal/pkg/logger"
)
// Configure loads configuration parameters into Config struct
func Configure(Config *tei2zenodo.Config) error {
// err := viper.BindPFlags(cmd.Flags())
// if err != nil {
// return err
// }
viper.SetEnvPrefix("T2Z")
viper.SetEnvKeyReplacer(strings.NewReplacer(".", "_"))
viper.AutomaticEnv()
// if configFile, _ := cmd.Flags().GetString("config"); configFile != "" {
// viper.SetConfigFile(configFile)
// } else {
viper.SetConfigName("config")
viper.AddConfigPath("./")
viper.AddConfigPath("./configs/")
viper.AddConfigPath("$HOME/.t2z")
// }
if err := viper.ReadInConfig(); err != nil {
log.Printf("conf/Configure - Cannot read config in %s: %+v", viper.ConfigFileUsed(), err)
return err
}
logger.ConfigureLogging(&Config.Log)
viper.Unmarshal(Config)
return nil
}
package logger
import (
"bufio"
"os"
"strings"
"github.com/sirupsen/logrus"
"gitlab.gwdg.de/rg-mpg-de/policey"
)
// ConfigureLogging will take the logging configuration and also adds a few default parameters.
func ConfigureLogging(config *policey.LoggingConfig) (*logrus.Entry, error) {
hostname, err := os.Hostname()
if err != nil {
return nil, err
}
// use a file if you want
if config.File != "" {
f, errOpen := os.OpenFile(config.File, os.O_RDWR|os.O_APPEND, 0660)
if errOpen != nil {
return nil, errOpen
}
logrus.SetOutput(bufio.NewWriter(f))
}
if config.Level != "" {
level, err := logrus.ParseLevel(strings.ToUpper(config.Level))
if err != nil {
return nil, err
}
logrus.SetLevel(level)
}
// always use the fulltimestamp
logrus.SetFormatter(&logrus.TextFormatter{
FullTimestamp: true,
DisableTimestamp: false,
})
return logrus.StandardLogger().WithField("hostname", hostname), nil
}
package routing
import (
"bytes"
"log"
"net/http"
"strconv"
"time"
"github.com/gin-contrib/cors"
"github.com/gin-gonic/gin"
"github.com/stephenmuss/ginerus"
"gitlab.gwdg.de/rg-mpg-de/tei2zenodo"
"gitlab.gwdg.de/rg-mpg-de/tei2zenodo/internal/pkg/xml"
"gitlab.gwdg.de/rg-mpg-de/tei2zenodo/internal/pkg/zenodo"
)
// SetupRoutes establishes the various API endpoints.
func SetupRoutes() *gin.Engine {
// Create a gin router with logrus router and stock recovery
// logger.Infof("Starting routing...")
router := gin.New()
router.Use(ginerus.Ginerus(), gin.Recovery(), cors.Default())
// Routes - one for the html/webapp, one group per API version...
router.GET("/tei2zenodo.html", func(c *gin.Context) {
page := c.DefaultQuery("p", "index")
c.String(http.StatusOK, "Service homepage with page "+page+".")
})
APIv1 := router.Group("/api/v1")
{
APIv1.POST("/file", func(c *gin.Context) {
// Read file from Request body
buf := new(bytes.Buffer)
buf.ReadFrom(c.Request.Body)
// log.Printf("Here is the file: %s", buf)
// Parse TEI file
var md zenodo2tei.ZMetadata
doi, err := xml.ParseTEI(%md, buf)
if err != nil {
log.Printf("Error (%s) parsing TEI file: %v", err, buf)
AbortMsg(500, err, c)
}
switch doi {
case "":
// prereserve doi
err := zenodo.GetDOI(buf, %md, c.Writer)
// If we have any, stream laws
timestamp := time.Now().UTC().Format("(2006-01-02_15-04-05)")
downloadName := "policey_ordinances_" + rangeString +
timestamp + "." + format
c.Writer.Header().Set("Content-Disposition", "attachment; filename="+downloadName)
c.Writer.Header().Set("Content-Description", "File Transfer")
c.Writer.Header().Set("Access-Control-Allow-Origin", "*")
c.Writer.Header().Set("Access-Control-Expose-Headers", "Content-Disposition")
c.Writer.Header().Set("X-Accel-Buffering", "no")
c.Writer.Header().Set("Content-type", "text/xml")
err := zenodo.PostFile(buf, %md, c.Writer)
if err != nil {
log.Printf("Error posting file: %v", err)
AbortMsg(500, err, c)
}
err := zenodo.PutMetadata(buf, %md, c.Writer)
err := zenodo.Publish(buf, %md, c.Writer)
case else:
}
})
APIv1.GET("/webhook", func(c *gin.Context) {
})
}
return router
}
// AbortMsg returns an error code and message.
func AbortMsg(code int, err error, c *gin.Context) {
c.String(code, "Something has gone wrong, causing a %v error.\n", code)
// A custom error page with HTML templates can be shown by c.HTML()
// A JSON object would be given like c.JSON(404, gin.H{"code": "PAGE_NOT_FOUND", "message": "Seite nicht gefunden"})
if err != nil {
c.Error(err)
}
c.Abort()
}
package xml
import (
"bytes"
"encoding/xml"
"io"
"strings"
"github.com/antchfx/xmlquery"
"gitlab.gwdg.de/rg-mpg-de/tei2zenodo"
"gitlab.gwdg.de/rg-mpg-de/tei2zenodo/internal/pkg/zenodo"
)
// ParsesTEI reads a TEI file and parses its metadata into a ZMetadata variable. Returns a doi (maybe empty) and an error value.
func ParseTEI(md *tei2zenodo.ZMetadata, buf *bytes.Buffer) string, error {
doc, err := xmlquery.Parse(buf)
// this is with standard xml module, decoding everything...
/*
decoder := xml.NewDecoder(strings.NewReader(string(buf)))
for {
// err is ignore here. IF you are reading from a XML file
// do not ignore err and also check for io.EOF
token, _ := decoder.Token()
if token == nil {
break
}
switch Element := token.(type) {
case xml.StartElement:
if Element.Name.Local == "teiHeader" {
fmt.Println("Element name is : ", Element.Name.Local)
err := decoder.DecodeElement(&l, &Element)
if err != nil {
fmt.Println(err)
}
fmt.Println("Element value is : ", l.Loc)
}
}
// example on handling XML attribute
switch Element := token.(type) {
case xml.StartElement:
if Element.Name.Local == "title" {
fmt.Println("Element name is : ", Element.Name.Local)
attrName := Element.Attr[0].Name.Local
attrValue := Element.Attr[0].Value
fmt.Printf("Attribute name is [%s] and value is [%s] \n", attrName, attrValue)
}
}
}
*/
}
package zenodo
import (
"bytes"
"encoding/json"
"net/http"
)
func GetDOI (buf *bytes.Buffer) {
resp, err := http.Post("http://example.com/upload", "image/jpeg", &buf)
if err != nil {
// handle error
}
defer resp.Body.Close()
/* This call should return something like this, from which we want to retrieve the id:
{
"created": "2016-06-15T16:10:03.319363+00:00",
"files": [],
"id": 1234,
"links": {
"discard": "https://zenodo.org/api/deposit/depositions/1234/actions/discard",
"edit": "https://zenodo.org/api/deposit/depositions/1234/actions/edit",
"files": "https://zenodo.org/api/deposit/depositions/1234/files",
"publish": "https://zenodo.org/api/deposit/depositions/1234/actions/publish",
"newversion": "https://zenodo.org/api/deposit/depositions/1234/actions/newversion",
"self": "https://zenodo.org/api/deposit/depositions/1234"
},
"metadata": {
"prereserve_doi": {
"doi": "10.5072/zenodo.1234",
"recid": 1234
}
},
"modified": "2016-06-15T16:10:03.319371+00:00",
"owner": 1,
"record_id": 1234,
"state": "unsubmitted",
"submitted": false,
"title": ""
}
*/
}
func PostFile
func PutMetadata
func Publish
// The order is this:
// I. Get id
// I.1. Either from the TEI file
// I.2. Or from uploading an empty deposit:
// headers = {"Content-Type": "application/json"}
// requests.post('https://zenodo.org/api/deposit/depositions',
// params={'access_token': ACCESS_TOKEN},
// json={},
// headers=headers)
// II. Upload file(s)
// deposition_id = r.json()['id']
// data = {'name': 'myfirstfile.csv'}
// files = {'file': open('/path/to/myfirstfile.csv', 'rb')}
// requests.post('https://zenodo.org/api/deposit/depositions/%s/files' % deposition_id,
// params={'access_token': ACCESS_TOKEN},
// data=data,
// files=files)
// III. Upload metadata
// requests.put('https://zenodo.org/api/deposit/depositions/%s' % deposition_id,
// params={'access_token': ACCESS_TOKEN},
// data=json.dumps(metadata),
// headers=headers)
// IV. Publish
// requests.post('https://zenodo.org/api/deposit/depositions/%s/actions/publish' % deposition_id,
// params={'access_token': ACCESS_TOKEN} )
// Package tei2zenodo parses TEI files for metadata and published them on zenodo.
// Files can be fed via REST POST or by calling a webhook that will retrieve the file(s).
package tei2zenodo
// Config is the struct of the application's general configuration.
type Config struct {
Domain string
ListenSpec int64
APIURI string
Repo RepoConfig
AllowedGit GitConfig
MetadataFields []MetadataConfig
Log LoggingConfig
}
// LoggingConfig specifies all the parameters needed for logging.
type LoggingConfig struct {
Level string
File string
}
// RepoConfig specifies parameters for the zenodo repository connection.
type RepoConfig struct {
Host string
Port int64
Token string
}
// AllowedGit specifies git repositories from which webhooks are accepted.
type AllowedGit struct {
Host string
User string
Repo string
}
// MetadataConfig specifies metadata fields and xpaths to retrieve their values if the need to be different from the defaults.
type MetadataConfig struct {
Field string
XPath string
}
// Service is the interface for the main application
type Service interface {
PostFile(id string) (*Territory, error)
}
// Link HATEOAS-links our resources
type Link struct {
Rel string `json:"rel"`
Typ string `json:"type"`
Href string `json:"href"`
}
// ZMetadata holds metadata that zenodo uses (based on DataCite's metadata scheme)
type ZMetadata struct {
UploadType string `json:"upload_type"` // mandatory
// values: publication, poster, presentation, dataset, image, video, software,
// lesson, other
PublicationType string `json:"publication_type"` // mandatory if upload_type=publication
// values: annotationcollection, book, section, conferencepaper, datamanagementplan,
// article, patent, preprint, deliverable, milestone, proposal, report,
// softwaredocumentation, taxonomictreatment, technicalnote, thesis,
// workingpaper, other
ImageType string `json:"image_type"` // mandatory if upload_type=image
// values: figure, plot, drawing, diagram, photo, other
PublicationDate string `json:"publication_date"` // mandatory, in ISO8601 format (YYYY-MM-DD)
Title string `json:"title"` // mandatory
Creators []ZCreator `json:"creators"` // mandatory
Description string `json:"description"` // mandatory, can contain html
AccessRight string `json:"access_right"` // mandatory
// values: open, embargoed, restricted, closed
License string `json:"license"` // mandatory if access_right=(open|embargoed)
// values: See Open Definition Licenses Service.
// Defaults to cc-by for non-datasets and cc-zero for datasets
EmbargoDate string `json:"embargo_date"` // mandatory if access_right=embargoed, in ISO8601 format (YYYY-MM-DD)
AccessConditions string `json:"access_conditions"` // mandatory if access_right=restricted, can contain html
Contributors []ZContributor `json:"contributors"` // optional
DOI string `json:"doi"` // optional
PrereserveDOI bool `json:"prereserve_doi"` // optional, use this to reserve and get a doi to include in your upload
Keywords []string `json:"keywords"` // optional, free form
Notes string `json:"notes"` // optional, can contain html
RelatedIdentifiers []ZIdentifier `json:"related_identifiers"` // optional
References []string `json:"references"` // optional
Communities []ZCommunity `json:"communities"` // optional
Grants []ZGrants `json:"grants"` // optional
JournalTitle string `json:"journal_title"` // optional
JournalVolume string `json:"journal_volume"` // optional
JournalIssue string `json:"journal_issue"` // optional
JournalPages string `json:"journal_pages"` // optional
ConferenceTitle string `json:"conference_title"` // optional
ConferenceAcronym string `json:"conference_acronym"` // optional
ConferenceDates string `json:"conference_dates"` // optional
ConferencePlace string `json:"conference_place"` // optional
ConferenceURL string `json:"conference_url"` // optional
ConferenceSession string `json:"conference_session"` // optional
ConferenceSessionPart string `json:"conference_session_part"` // optional
ImprintPublisher string `json:"imprint_publisher"` // optional
ImprintISBN string `json:"imprint_isbn"` // optional
ImprintPlace string `json:"imprint_place"` // optional
PartofTitle string `json:"partof_title"` // optional
PartofPages string `json:"partof_pages"` // optional
ThesisSupervisors []ZCreators `json:"thesis_supervisors"` // optional
ThesisUniversity string `json:"thesis_university"` // optional
Subjects []ZSubject `json:"subjects"` // optional
Version string `json:"version"` // optional
Language string `json:"language"` // optional, ISO 639-2 or 639-3 code
Locations []ZLocation `json:"locations"` // optional
Dates []ZDate `json:"dates"` // optional
Method string `json:"method"` // optional, can contain html
}
// ZCreator holds agents who have created a deposit
type ZCreator struct {
Name string `json:"name"` // mandatory; family name, given names
Affiliation string `json:"affiliation"` // optional
ORCID string `json:"orcid"` // optional
GND string `json:"gnd"` // optional
}
// ZContributor holds agents who have contributed to creating a deposit
type ZContributor struct {
Name string `json:"name"` // mandatory; family name, given names
Type string `json:"type"` // mandatory
// values: ContactPerson, DataCollector, DataCurator, DataManager,
// Distributor, Editor, Funder, HostingInstitution,
// Producer, ProjectLeader, ProjectManager, ProjectMember,
// RegistrationAgency, RegistrationAuthority, RelatedPerson,
// Researcher, ResearchGroup, RightsHolder, Supervisor,
// Sponsor, WorkPackageLeader, Other
Affiliation string `json:"affiliation"` // optional
ORCID string `json:"orcid"` // optional
GND string `json:"gnd"` // optional
}
// ZCommunity holds zenodo's communities of interest
type ZCommunity struct {
Identifier string `json:"identifier"`
}
// ZGrant holds grant ids
type ZGrant struct {
ID string `json:"id"` // funder DOI-prefixed grant ids
}
// ZIdentifier holds identifiers of resources related to the deposit
type ZIdentifier struct {
Relation string `json:"relation"` // mandatory
// values: isCitedBy, cites, isSupplementTo, isSupplementedBy,
// isNewVersionOf, isPreviousVersionOf, isPartOf, hasPart,
// compiles, isCompiledBy, isIdenticalTo, isAlternateIdentifier
Idenfitier string `json:"identifier"` // mandatory
}
// ZSubject holds what the deposit is about (controlled in an external scheme)
type ZSubject struct {
Term string `json:"term"`
Identifier string `json:"identifier"`
Scheme string `json:"scheme"`
}
// ZLocation holds places relevant to the deposit
type ZLocation struct {
Place string `json:"place"` // mandatory
Description string `json:"description"` // optional
Lat float64 `json:"lat"`
Lon float64 `json:"lon"`
}
// ZDate holds dates relevant to the deposit
type ZDate struct {
Start string `json:"start"` // one of start or end must be present, in ISO8601 format (YYYY-MM-DD)
End string `json:"end"` // one of start or end must be present, in ISO8601 format (YYYY-MM-DD)
Type string `json:"type"` // mandatory, values: Collected, Valid, Withdrawn
Description string `json:"description"` // optional
}
// TFile holds a TEI file
type TFile struct {
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment