Commit 7cd68d6a authored by Andreas Wagner's avatar Andreas Wagner
Browse files

Uploading works.

parent 0e8e3cd1
......@@ -3,6 +3,7 @@ package routing
import (
"bytes"
"fmt"
"io"
"log"
"net/http"
"strings"
......@@ -32,11 +33,14 @@ func SetupRoutes(conf tei2zenodo.Config) *gin.Engine {
{
APIv1.POST("/file", func(c *gin.Context) {
var myResult tei2zenodo.Result
var r io.ReadSeeker
// Read file from Request body
buf := new(bytes.Buffer)
buf.ReadFrom(c.Request.Body)
file := buf.String()
r := strings.NewReader(file)
r = strings.NewReader(file)
// Get filename for upload
filename := c.Request.FormValue(`filename`)
......@@ -45,70 +49,84 @@ func SetupRoutes(conf tei2zenodo.Config) *gin.Engine {
r.Seek(0, 0)
}
log.Printf("Beginning of submitted file %s: %s ...", filename, file[:100])
// log.Printf("Beginning of submitted file %s: %s ...", filename, file[:100])
// Parse TEI file
log.Printf("=== Parse TEI file ===")
mc := conf.Metadata
var md tei2zenodo.ZMetadata
// var doc *t2zxml.TEIDoc
// doc, doi, err := t2zxml.ParseTEI(r, &md, &mc)
_, doi, err := t2zxml.ParseTEI(r, &md, &mc)
doi, err := t2zxml.ParseTEI(r, &md, &mc)
if err != nil {
log.Printf("Error (%s) parsing TEI file: %80s", err, file)
AbortMsg(500, err, c)
return
}
r.Seek(0, 0)
log.Printf("--- TEI successfully parsed ---")
log.Printf("Title: %v", md.Title)
log.Printf("Creator 1: %v", md.Creators[0])
log.Printf("Contributor 1: %v", md.Contributors[0])
if doi != "" {
log.Printf("DOI present: %v", doi)
log.Printf("=== DOI %s present, retrieving zenodo deposit ===", doi)
depositURI, err := zenodo.ResolveDOI(doi, &md, &conf.Repo)
if err != nil || depositURI == "" {
log.Printf("Error retrieving zenodo deposit for doi %s: %v", doi, err)
AbortMsg(500, fmt.Errorf("problem reported by zenodo: %s", err), c)
return
}
myResult.DepositURI = depositURI
myResult.DepositDOI = doi
myResult.FileContent = file
md.DOI = doi
} else {
log.Printf("=== Fetching DOI from zenodo ===")
doi, err := zenodo.GetDOI(&md, &conf.Repo)
log.Printf("=== Fetching new DOI from zenodo ===")
doi, depositURI, err := zenodo.GetDOI(&conf.Repo)
if err != nil || doi == "" {
log.Printf("Error creating DOI reservation deposit: %v", err)
AbortMsg(500, fmt.Errorf("problem reported by zenodo: %s", err), c)
} else {
log.Printf("New DOI minted: %s", doi)
return
}
myResult.DepositURI = depositURI
myResult.DepositDOI = doi
md.DOI = doi
/*
log.Printf("=== Adding new DOI to document ===")
err = t2zxml.MixinDOI(&doc, doi)
if err != nil {
log.Printf("Error mixing new DOI into document: %v", err)
AbortMsg(500, fmt.Errorf("problem mixing new DOI into document: %s", err), c)
}
*/
log.Printf("=== Adding new DOI to document ===")
newfile, err := t2zxml.MixinDOI(r, doi)
if err != nil {
log.Printf("Error mixing new DOI into document: %v", err)
AbortMsg(500, fmt.Errorf("problem mixing new DOI into document: %s", err), c)
return
}
// log.Printf("Here is the new file:\n%s", newfile[:10000])
myResult.FileContent = newfile
r = strings.NewReader(newfile)
}
r.Seek(0, 0)
log.Printf("=== Sending to zenodo ===")
err = zenodo.PostFile(r, filename, &md, &conf.Repo)
log.Printf("=== Uploading to zenodo ===")
url, err := zenodo.PostFile(r, filename, &md, &conf.Repo)
if err != nil {
log.Printf("Error sending POST request to zenodo: %v", err)
AbortMsg(500, err, c)
return
}
myResult.FileURI = url
/*
log.Printf("=== Adding metadata to zenodo ===")
err = zenodo.PutMetadata(&md, &conf.Repo)
if err != nil {
log.Printf("Error putting metadata: %v", err)
AbortMsg(500, err, c)
return
}
err := zenodo.PutMetadata(buf, %md)
if err != nil {
log.Printf("Error posting file: %v", err)
AbortMsg(500, err, c)
}
log.Printf("=== Publish at zenodo ===")
err = zenodo.Publish(&md, &conf.Repo)
if err != nil {
log.Printf("Error publishing deposit: %v", err)
AbortMsg(500, err, c)
return
}
err := zenodo.Publish(buf, %md)
if err != nil {
log.Printf("Error posting file: %v", err)
AbortMsg(500, err, c)
}
*/
log.Printf("=== All done ===")
c.JSON(200, myResult)
})
APIv1.GET("/webhook", func(c *gin.Context) {
})
......
......@@ -9,13 +9,12 @@ import (
"github.com/antchfx/xmlquery"
"github.com/antchfx/xpath"
"github.com/beevik/etree"
"gitlab.gwdg.de/rg-mpg-de/tei2zenodo"
)
// TEIDoc represents a parsed TEI XML file
type TEIDoc xmlquery.Node
// GetFilename extracts what can serve as filename. Like /TEI/@xml:id
// it returns a string to serve as filename
func GetFilename(r io.Reader) string {
// Parse document (in r) wih antchfx/xmlquery...
doc, err := xmlquery.Parse(r)
......@@ -27,22 +26,20 @@ func GetFilename(r io.Reader) string {
if n := t; t != nil {
u := n.InnerText()
return u + ".xml"
} else {
return ""
}
return ""
}
// ParseTEI reads a TEI file and parses its metadata into a ZMetadata variable.
// Returns a doi (maybe empty) and an error value.
func ParseTEI(r io.Reader, md *tei2zenodo.ZMetadata, conf *tei2zenodo.MetadataConfig) (*xmlquery.Node, string, error) {
func ParseTEI(r io.Reader, md *tei2zenodo.ZMetadata, conf *tei2zenodo.MetadataConfig) (string, error) {
var doc *xmlquery.Node
// Parse document (in r) wih antchfx/xmlquery...
// Parse document r wih antchfx/xmlquery...
doc, err := xmlquery.Parse(r)
if err != nil {
log.Printf("Could not parse xml.\n")
return doc, "", err
return "", err
}
re := regexp.MustCompile(`\s+`)
......@@ -108,7 +105,7 @@ func ParseTEI(r io.Reader, md *tei2zenodo.ZMetadata, conf *tei2zenodo.MetadataCo
xexpr, err := xpath.Compile(confSubfields[m].XExpression)
if err != nil {
log.Printf("Erroneous XPath expression: %s ...", confSubfields[m].XExpression)
return doc, "", fmt.Errorf("unknown (hardcoded?) metadata type: %s.%s", structFieldtype, zStructFieldtype)
return "", fmt.Errorf("erroneous XPath expression: %s", confSubfields[m].XExpression)
}
switch zStructFieldtype.String() {
case "string":
......@@ -133,12 +130,12 @@ func ParseTEI(r io.Reader, md *tei2zenodo.ZMetadata, conf *tei2zenodo.MetadataCo
default:
log.Printf("Unknown (hardcoded?) metadata type: %s.%s ...", structFieldtype, zStructFieldtype)
return doc, "", fmt.Errorf("xml: unknown (hardcoded?) metadata type: %s.%s", structFieldtype, zStructFieldtype)
return "", fmt.Errorf("xml: unknown (hardcoded?) metadata type: %s.%s", structFieldtype, zStructFieldtype)
}
} else if confSubfields[m].Field == "name" || confSubfields[m].Field == "type" {
log.Printf("Problem with config: XPath or XExpression missing in %v ...", conf.Fields[j])
return doc, "", fmt.Errorf("xml: malformed config (xpath/xexpression missing): %v", conf.Fields[j])
return "", fmt.Errorf("xml: malformed config (xpath/xexpression missing): %v", conf.Fields[j])
}
}
}
......@@ -170,12 +167,12 @@ func ParseTEI(r io.Reader, md *tei2zenodo.ZMetadata, conf *tei2zenodo.MetadataCo
f.Set(newSlice)
default:
log.Printf("Problem with type conversion of %s (%s)", structFieldname, varType.Name())
return doc, "", fmt.Errorf("xml: malformed config (type problem in %s [%s])", structFieldname, varType.Name())
return "", fmt.Errorf("xml: malformed config (type problem in %s [%s])", structFieldname, varType.Name())
}
}
} else {
log.Printf("Problem with config: XPath missing in %v ...", conf.Fields[j])
return doc, "", fmt.Errorf("xml: malformed config (xpath missing): %v", conf.Fields[j])
return "", fmt.Errorf("xml: malformed config (xpath missing): %v", conf.Fields[j])
}
} else if conf.Fields[j].XPath != "" {
......@@ -202,13 +199,13 @@ func ParseTEI(r io.Reader, md *tei2zenodo.ZMetadata, conf *tei2zenodo.MetadataCo
default:
log.Printf("Unknown (hardcoded?) metadata type: %s ...", structFieldtype)
return doc, "", fmt.Errorf("xml: unknown (hardcoded?) metadata type: %s", structFieldtype)
return "", fmt.Errorf("xml: unknown (hardcoded?) metadata type: %s", structFieldtype)
}
} else if conf.Fields[j].XExpression != "" {
xexpr, err := xpath.Compile(conf.Fields[j].XExpression)
if err != nil {
log.Printf("Erroneous XPath expression: %s ...", conf.Fields[j].XExpression)
return doc, "", fmt.Errorf("unknown (hardcoded?) metadata type: %s", structFieldtype)
return "", fmt.Errorf("erroneous XPath expression: %s", conf.Fields[j].XExpression)
}
switch structFieldtype.String() {
case "string":
......@@ -233,21 +230,78 @@ func ParseTEI(r io.Reader, md *tei2zenodo.ZMetadata, conf *tei2zenodo.MetadataCo
default:
log.Printf("Unknown (hardcoded?) metadata type: %s ...", structFieldtype)
return doc, "", fmt.Errorf("xml: unknown (hardcoded?) metadata type: %s", structFieldtype)
return "", fmt.Errorf("xml: unknown (hardcoded?) metadata type: %s", structFieldtype)
}
} else {
log.Printf("Malformed config entry: %v ...", conf.Fields[j])
return doc, "", fmt.Errorf("xml: malformed config entry: %v", conf.Fields[j])
return "", fmt.Errorf("xml: malformed config entry: %v", conf.Fields[j])
}
}
}
}
doi := md.DOI
return doc, doi, nil
log.Printf("Success.")
log.Printf(" Title: %v", md.Title)
if len(md.Creators) > 0 {
log.Printf(" Creator 1: %v", md.Creators[0])
}
//if len(md.Contributors) > 0 {
// log.Printf(" Contributor 1: %v", md.Contributors[0])
//}
log.Printf(" DOI: %s", doi)
return doi, nil
}
// MixinDOI adds a DOI idno element to the document
func MixinDOI(doc *xmlquery.Node, doi string) error {
return nil
// it returns the string serialization of the new document and an error value
func MixinDOI(r io.Reader, doi string) (string, error) {
// Parse document (in r)...
var doc etree.Document
_, err := doc.ReadFrom(r)
if err != nil {
log.Printf("Could not parse xml.\n")
return "", err
}
pStmt := doc.FindElement(`/TEI/teiHeader/fileDesc/publicationStmt`)
if pStmt == nil {
log.Printf("XML file had no /TEI/teiHeader/fileDesc/publicationStmt element.")
err := fmt.Errorf("XML file had no /TEI/teiHeader/fileDesc/publicationStmt element")
return "", err
}
topLevelIdno := pStmt.FindElement(`./idno`)
if topLevelIdno == nil { // publicationStmt does not contain any <idno> element -> add one as last child of pStmt
log.Printf("No idno element present. Create one.")
targetIdno := pStmt.CreateElement("idno")
targetIdno.CreateAttr("type", "DOI")
targetIdno.CreateText(doi)
} else {
subIdnos := topLevelIdno.FindElement(`./idno`)
if subIdnos != nil { // publicationStmt contains <idno> with sub-<idno>s -> add doi idno as last child of parentIdno
parentIdno := pStmt.FindElement(`./idno`)
targetIdno := parentIdno.CreateElement("idno")
targetIdno.CreateAttr("type", "DOI")
targetIdno.CreateText(doi)
} else { // publicationStmt contains one (or several) <idno> elements ->
targetIdno := etree.NewElement("idno")
targetIdno.CreateAttr("type", "DOI")
targetIdno.CreateText(doi)
pStmt.InsertChildAt(topLevelIdno.Index(), targetIdno)
}
}
output, err := doc.WriteToString()
if err != nil {
log.Printf("Problem serializing xml to string.")
return "", err
} else if output == "" {
err := fmt.Errorf("serialization of xml resulted in empty string")
return "", err
}
log.Printf("Succes.")
return output, nil
}
......@@ -9,19 +9,42 @@ import (
"log"
"mime/multipart"
"net/http"
"regexp"
"strconv"
"strings"
"gitlab.gwdg.de/rg-mpg-de/tei2zenodo"
)
// ZResponse stores a zenodo response
type ZResponse struct {
// DOIResponse stores a response from doi.org
type DOIResponse struct {
ResponseCode int
Handle string
Values []DOIValue
}
// DOIValue stores a single value of a doi response
type DOIValue struct {
Index int
Type string
TTL int64
Timestamp string
Data DOIData
}
// DOIData stores data of doi response values
type DOIData struct {
Format string
Value string
}
// ZDepositResponse stores a zenodo deposit response
type ZDepositResponse struct {
Created string
Files []string
Files []ZFileResponse
ID int64
Links ZLinks
Metadata ZMetadata
Metadata ZPrereserveMetadata
Modified string
Owner int64
RecordID int64 `json:"record_id"`
......@@ -30,6 +53,15 @@ type ZResponse struct {
Title string
}
// ZFileResponse stores a zenodo file response
type ZFileResponse struct {
Checksum string
Filename string `json:"name"`
ID string
Filesize int64
Links ZLinks
}
// ZLinks stores links to zenodo's REST endpoints
type ZLinks struct {
Discard string
......@@ -38,10 +70,16 @@ type ZLinks struct {
Publish string
NewVersion string
Self string
Download string
}
// ZPostData wraps the metadata in a "metadata" object
type ZPostData struct {
Metadata *tei2zenodo.ZMetadata `json:"metadata"`
}
// ZMetadata stores zenodo metadata
type ZMetadata struct {
// ZPrereserveMetadata stores DOI prereservation metadata
type ZPrereserveMetadata struct {
PrereserveDOI ZPrereserveDOI `json:"prereserve_doi"`
}
......@@ -51,199 +89,340 @@ type ZPrereserveDOI struct {
RecID int64
}
/* What's happening here?
- The order as described at https://developers.zenodo.org/ is this:
I. Get id
I.1. Either from the TEI file
I.2. Or from uploading an empty deposit:
headers = {"Content-Type": "application/json"}
requests.post('https://zenodo.org/api/deposit/depositions',
params={'access_token': ACCESS_TOKEN},
json={},
headers=headers)
II. Upload file(s)
deposition_id = r.json()['id']
data = {'name': 'myfirstfile.csv'}
files = {'file': open('/path/to/myfirstfile.csv', 'rb')}
requests.post('https://zenodo.org/api/deposit/depositions/%s/files' % deposition_id,
params={'access_token': ACCESS_TOKEN},
data=data,
files=files)
III. Upload metadata
requests.put('https://zenodo.org/api/deposit/depositions/%s' % deposition_id,
params={'access_token': ACCESS_TOKEN},
data=json.dumps(metadata),
headers=headers)
IV. Publish
requests.post('https://zenodo.org/api/deposit/depositions/%s/actions/publish' % deposition_id,
params={'access_token': ACCESS_TOKEN} )
*/
// GetDOI creates an empty zenodo deposit and reserves a DOI
func GetDOI(md *tei2zenodo.ZMetadata, conf *tei2zenodo.RepoConfig) (string, error) {
uri := conf.Host + ":" + strconv.Itoa(int(conf.Port)) + "/api/deposit/depositions?access_token=" + conf.Token
log.Printf("Post request to: %s", uri)
client := &http.Client{}
// it returns the DOI of the deposit, its URI and an error value
func GetDOI(conf *tei2zenodo.RepoConfig) (string, string, error) {
// Compile POST request
targetURI := conf.Host + ":" + strconv.Itoa(int(conf.Port)) + "/api/deposit/depositions" // ?access_token=" + conf.Token
log.Printf("Post request to: %s", targetURI)
v := []byte(`{}`)
buf := bytes.NewBuffer(v)
req, err := http.NewRequest("POST", uri, buf)
req, err := http.NewRequest("POST", targetURI, buf)
if err != nil {
log.Printf("Problem creating POST request: %v ...", err)
return "", fmt.Errorf("problem creating POST request: %s", err)
return "", "", fmt.Errorf("problem creating POST request: %s", err)
}
req.Header.Add("Content-Type", `application/json`)
req.Header.Add("Authorization", "Bearer "+conf.Token)
// Send POST request
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
log.Printf("Problem sending POST request: %v ...", err)
return "", fmt.Errorf("problem sending POST request: %s", err)
return "", "", fmt.Errorf("problem sending POST request: %s", err)
}
defer resp.Body.Close()
// Handle problematic responses
content, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Printf("Problem reading POST response: %v ...", err)
return "", fmt.Errorf("problem reading POST response: %s", err)
return "", "", fmt.Errorf("problem reading POST response: %s", err)
}
if strconv.Itoa(resp.StatusCode)[:1] != "2" {
log.Printf("Problem reported by zenodo: %d %v. %80s ...", resp.StatusCode, err, content)
return "", fmt.Errorf("problem reported by zenodo: %d %v. %80s", resp.StatusCode, err, content)
return "", "", fmt.Errorf("problem reported by zenodo: %d %v. %80s", resp.StatusCode, err, content)
}
var parsedContent ZResponse
// Parse response
var parsedContent ZDepositResponse
err = json.Unmarshal(content, &parsedContent)
if err != nil {
log.Printf("Problem parsing zenodo's response: %v ...", err)
return "", fmt.Errorf("problem parsing zenodo's response: %s", err)
return "", "", fmt.Errorf("problem parsing zenodo's response: %s", err)
}
doi := parsedContent.Metadata.PrereserveDOI.DOI
return doi, nil
depositURI := parsedContent.Links.Self
log.Printf("Success (code %d (expected 201)). DOI %s is created and %s.", resp.StatusCode, doi, parsedContent.State)
return doi, depositURI, nil
}
// ResolveDOI retrieves a zenodo deposit based on a gived DOI
// it returns a zenodo ID for the deposit and an error value
func ResolveDOI(doi string, md *tei2zenodo.ZMetadata, conf *tei2zenodo.RepoConfig) (string, error) {
// Compile POST request
targetURI := "https://doi.org/api/handles/" + doi
log.Printf("Get request from: %s", targetURI)
// Send GET request
resp, err := http.Get(targetURI)
if err != nil {
log.Printf("Problem creating GET request: %v ...", err)
return "", fmt.Errorf("problem creating GET request: %s", err)
}
defer resp.Body.Close()
// Handle problematic responses
content, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Printf("Problem reading GET response: %v ...", err)
return "", fmt.Errorf("problem reading GET response: %s", err)
}
if strconv.Itoa(resp.StatusCode)[:1] != "2" {
log.Printf("Problem reported by doi.org: %d %v. %80s ...", resp.StatusCode, err, content)
return "", fmt.Errorf("problem reported by doi.org: %d %v. %80s", resp.StatusCode, err, content)
}
// Parse response
var parsedContent DOIResponse
err = json.Unmarshal(content, &parsedContent)
if err != nil {
log.Printf("Problem parsing doi.org's response: %v ...", err)
return "", fmt.Errorf("problem parsing doi.org's response: %s", err)
}
for _, v := range parsedContent.Values {
if v.Type == "URL" {
depositURI := v.Data.Value
if depositURI != "" {
// "https://zenodo.org/record/1186520" -> "https://sandbox.zenodo.org/api/deposit/depositions/1186520"
re := regexp.MustCompile(`^https://zenodo\.org/record/`)
apiURI := re.ReplaceAllString(depositURI, conf.Host+":"+strconv.Itoa(int(conf.Port))+"/api/deposit/depositions/")
log.Printf("Retrieved zenodo deposit for %s: %s.", doi, apiURI)
return apiURI, nil
}
}
}
return "", fmt.Errorf("no valid deposit URI found in doi.org response")
}
// PostFile posts a file to zenodo, taking the id from the md.DOI field
func PostFile(r io.Reader, filename string, md *tei2zenodo.ZMetadata, conf *tei2zenodo.RepoConfig) error {
// it returns an URI for the upload and an error value
func PostFile(r io.Reader, filename string, md *tei2zenodo.ZMetadata, conf *tei2zenodo.RepoConfig) (string, error) {
if md.DOI[:15] != "10.5072/zenodo." { // not a zenodo doi
log.Printf("Problem: DOI %s is not a zenodo DOI.", md.DOI)
return fmt.Errorf("invalid DOI value")
} else {
id := md.DOI[15:]
// Read file from Request body
buf := new(bytes.Buffer)
buf.ReadFrom(r)
file := buf.String()
reader := strings.NewReader(file)
uri := conf.Host + ":" + strconv.Itoa(int(conf.Port)) + "/api/deposit/depositions/" + id + "/files?access_token=" + conf.Token
// uri := "https://postman-echo.com/post"
log.Printf("Post request to: %s", uri)
log.Printf("Upload filename: %s", filename)
log.Printf("Upload file: %s", file[:80])
body := new(bytes.Buffer)
writer := multipart.NewWriter(body)
label, err := writer.CreateFormField("name")
if err != nil {
log.Printf("Problem creating POST request body: %v ...", err)
return fmt.Errorf("problem creating POST request body: %s", err)
}
label.Write([]byte(filename))
return "", fmt.Errorf("invalid DOI value")
}
id := md.DOI[15:]
part, err := writer.CreateFormFile(`file`, filename)
if err != nil {
log.Printf("Problem creating POST request body: %v ...", err)
return fmt.Errorf("problem creating POST request body: %s", err)
}
io.Copy(part, reader)
writer.Close()
// Read file from Request body
buf := new(bytes.Buffer)
buf.ReadFrom(r)
file := buf.String()
reader := strings.NewReader(file)
req, err := http.NewRequest("POST", uri, body)
if err != nil {
log.Printf("Problem creating POST request: %v ...", err)
return fmt.Errorf("problem creating POST request: %s", err)