Commit e001539c authored by Andreas Wagner's avatar Andreas Wagner
Browse files

Successful run.

parent 67eccb86
{
"ListenSpec": 8000,
"Verbose": false,
"APIRoot": "/api/v1",
"FileAPI": "/file",
"WebhookAPI": "/hooks/receivers/github/events/",
......@@ -9,13 +10,13 @@
"port": 443,
"token": "aBcDeFgHiJkLmNoPqRsTuVwXyZ"
},
"AllowedGit": [
{
"host": "https://github.com",
"user": "digicademy",
"repo": "svsal"
}
],
"AllowedGit": {
"host": "https://github.com",
"token": "aBcDeFgHiJkLmNoPqRsTuVwXyZ",
"user": "digicademy",
"repo": "digicademy/svsal",
"commit_keyword": ""
},
"metadata": {
"fields": [
{
......
......@@ -5,7 +5,12 @@ import (
"encoding/json"
"fmt"
"io"
"io/ioutil"
"log"
"net/http"
"strconv"
"strings"
"time"
"gitlab.gwdg.de/rg-mpg-de/tei2zenodo"
)
......@@ -44,28 +49,28 @@ type ReleaseHook struct {
// Release stores the details of a github release
type Release struct {
ID int64
NodeID string
TagName string
TargetCommitish string
NodeID string `json:"node_id"`
TagName string `json:"tag_name"`
TargetCommitish string `json:"target_commitish"`
Name string
Draft bool
Prerelease bool
Author User
CreatedAt string
PublishedAt string
CreatedAt string `json:"created_at"`
PublishedAt string `json:"published_at"`
Body string
URL string
AssetsURL string
UploadURL string
HTMLURL string
TarballURL string
ZipballURL string
Body string
AssetsURL string `json:"assets_url"`
UploadURL string `json:"upload_url"`
HTMLURL string `json:"html_url"`
TarballURL string `json:"tarball_url"`
ZipballURL string `json:"zipball_url"`
}
// Commit stores info about a single commit
type Commit struct {
SHA string
SHA string `json:"sha"`
Message string
URL string
Author User
......@@ -73,8 +78,8 @@ type Commit struct {
// CommitDetails stores more detailed information about a single commit
type CommitDetails struct {
SHA string
NodeID string
SHA string `json:"sha"`
NodeID string `json:"node_id"`
Author User
Committer User
Commit CommitMeta
......@@ -82,8 +87,8 @@ type CommitDetails struct {
Stats Stats
URL string
HTMLURL string
CommentsURL string
HTMLURL string `json:"html_url"`
CommentsURL string `json:"comments_url"`
}
// CommitMeta stores information about committer, tree etc
......@@ -92,7 +97,7 @@ type CommitMeta struct {
Author User
Committer User
Message string
CommentCount int
CommentCount int `json:"comment_count"`
Tree Commit
Parents []Commit
Verification Verification
......@@ -106,6 +111,15 @@ type Verification struct {
Payload string
}
// License stores information about a repo's license
type License struct {
Key string
Name string
SPDXID string `json:"spdx_id"`
URL string
NodeID string `json:"node_id"`
}
// Stats stores information about a commit's stats
type Stats struct {
Additions int
......@@ -116,14 +130,16 @@ type Stats struct {
// File stores information about a file concerned by a commit
type File struct {
Filename string
Additions int
Deletions int
Changes int
Status string
Patch string
RawURL string
BlobURL string
Filename string
Additions int
Deletions int
Changes int
Status string
PreviousFilename string `json:"previous_filename"`
Patch string
RawURL string `json:"raw_url"`
BlobURL string `json:"blob_url"`
ContentURL string `json:"content_url"`
}
// User stores information about a person
......@@ -132,124 +148,132 @@ type User struct {
Login string
EMail string
ID int64
NodeID string
NodeID string `json:"node_id"`
Type string
GravatarID string
GravatarID string `json:"gravatar_id"`
Date string
SideAdmin bool
SiteAdmin bool `json:"site_admin"`
URL string
HTMLURL string
AvatarURL string
FollowersURL string
FollowingURL string
GistsURL string
StarredURL string
SubscriptionsURL string
OrganizationsURL string
ReposURL string
EventsURL string
ReceivedEventsURL string
HTMLURL string `json:"html_url"`
AvatarURL string `json:"avatar_url"`
FollowersURL string `json:"followers_url"`
FollowingURL string `json:"following_url"`
GistsURL string `json:"gists_url"`
StarredURL string `json:"starred_url"`
SubscriptionsURL string `json:"subscriptions_url"`
OrganizationsURL string `json:"organizations_url"`
ReposURL string `json:"repos_url"`
EventsURL string `json:"events_url"`
ReceivedEventsURL string `json:"received_events_url"`
}
// Org stores information about a github organization
type Org struct {
Login string
ID int64
NodeID string
NodeID string `json:"node_id"`
Description string
URL string
ReposURL string
EventsURL string
HooksURL string
IssuesURL string
MembersURL string
PublicMembersURL string
AvatarURL string
URL string `json:"url"`
ReposURL string `json:"repos_url"`
EventsURL string `json:"events_url"`
HooksURL string `json:"hooks_url"`
IssuesURL string `json:"issues_url"`
MembersURL string `json:"memberts_url"`
PublicMembersURL string `json:"public_members_url"`
AvatarURL string `json:"avatar_url"`
}
// Repo stores information about a github repository
type Repo struct {
ID int64
NodeID string
Name string
FullName string
Description string
License string
Owner User
Language string
DefaultBranch string
MasterBranch string
CreatedAt string
UpdatedAt string
PushedAt string
ID int64
NodeID string `json:"node_id"`
Name string
FullName string `json:"full_name"`
Description string
License License
Owner User
Language string
DefaultBranch string `json:"default_branch"`
MasterBranch string `json:"master_branch"`
// CreatedAt int64 `json:"created_at"`
// UpdatedAt int64 `json:"updated_at"`
// PushedAt string `json:"pushed_at"`
Size int
ForksCount int
ForksCount int `json:"forks_count"`
Forks int
OpenIssuesCount int
OpenIssues int
WatchersCount int
OpenIssuesCount int `json:"open_issues_count"`
OpenIssues int `json:"open_issues"`
WatchersCount int `json:"watchers_count"`
Watchers int
StargazersCount int
StargazersCount int `json:"stargazers_count"`
Stargazers int
Private bool
Form bool
Archived bool
Disabled bool
HasIssues bool
HasProjects bool
HasDownloads bool
HasWiki bool
HasPages bool
HasIssues bool `json:"has_issues"`
HasProjects bool `json:"has_projects"`
HasDownloads bool `json:"has_downloads"`
HasWiki bool `json:"has_wiki"`
HasPages bool `json:"has_pages"`
Homepage string
URL string
HTMLURL string
ForksURL string
KeysURL string
CollaboratorsURL string
TeamsURL string
HooksURL string
IssueEventsURL string
EventsURL string
AssigneesURL string
BranchesURL string
TagsURL string
BlobsURL string
GitTagsURL string
GitRegsURL string
TreesURL string
StatusesURL string
LanguagesURL string
StargazersURL string
ContributorsURL string
SubscribersURL string
SubscriptionURL string
CommitsURL string
GitCommitsURL string
CommentsURL string
IssueCommentsURL string
ContentsURL string
CompareURL string
MergesURL string
ArchiveURL string
DownloadsURL string
IssuesURL string
PullsURL string
MilestonesURL string
NotificationsURL string
LabelsURL string
ReleasesURL string
DeploymentsURL string
GitURL string
SSHURL string
CloneURL string
SVNURL string
MirrorURL string
HTMLURL string `json:"html_url"`
ForksURL string `json:"forks_url"`
KeysURL string `json:"keys_url"`
CollaboratorsURL string `json:"collaborators_url"`
TeamsURL string `json:"teams_url"`
HooksURL string `json:"hooks_url"`
IssueEventsURL string `json:"issue_events_url"`
EventsURL string `json:"events_url"`
AssigneesURL string `json:"assignees_url"`
BranchesURL string `json:"branches_url"`
TagsURL string `json:"tags_url"`
BlobsURL string `json:"blobl_url"`
GitTagsURL string `json:"git_tags_url"`
GitRefsURL string `json:"git_refs_url"`
TreesURL string `json:"trees_url"`
StatusesURL string `json:"statuses_url"`
LanguagesURL string `json:"languages_url"`
StargazersURL string `json:"stargazers_url"`
ContributorsURL string `json:"contributors_url"`
SubscribersURL string `json:"subscribers_url"`
SubscriptionURL string `json:"subscription_url"`
CommitsURL string `json:"commits_url"`
GitCommitsURL string `json:"git_commits_url"`
CommentsURL string `json:"comments_url"`
IssueCommentsURL string `json:"issue_comments_url"`
ContentsURL string `json:"contents_url"`
CompareURL string `json:"compare_url"`
MergesURL string `json:"merges_url"`
ArchiveURL string `json:"archive_url"`
DownloadsURL string `json:"downloads_url"`
IssuesURL string `json:"ussues_url"`
PullsURL string `json:"pulls_url"`
MilestonesURL string `json:"milestones_url"`
NotificationsURL string `json:"notifications_url"`
LabelsURL string `json:"labels_url"`
ReleasesURL string `json:"releases_url"`
DeploymentsURL string `json:"deployments_url"`
GitURL string `json:"git_url"`
SSHURL string `json:"ssh_url"`
CloneURL string `json:"clone_url"`
SVNURL string `json:"svn_url"`
MirrorURL string `json:"mirror_url"`
}
// FileInfo stores a file URL, modification type and modification timestamp
// (to be used as value in a map keyed to the filenames)
type FileInfo struct {
URL string
Change string
Date time.Time
}
// ProcessHook processes a Webhook's payload
// it returns a (boolean) doPublish value, a slice of readers with all concerned files, and an error value
func ProcessHook(hookType string, r io.Reader, conf *tei2zenodo.Config) (bool, []io.ReadSeeker, error) {
// it returns a (boolean) doPublish value, a map of filenames/urls with all concerned files, and an error value
func ProcessHook(hookType string, r io.ReadSeeker, conf *tei2zenodo.Config) (bool, map[string]FileInfo, error) {
doPublish := false
......@@ -257,13 +281,61 @@ func ProcessHook(hookType string, r io.Reader, conf *tei2zenodo.Config) (bool, [
case "push":
{
var payload PushHook
// Parse payload
err := json.NewDecoder(r).Decode(&payload)
if err != nil {
log.Printf("Error processing push hook: %+v", err)
return false, nil, fmt.Errorf("error processing push hook: %s", err)
}
log.Printf("Parsed hook: %+v", payload)
log.Printf("%+d Commits: %+v", len(payload.Commits), payload.Commits)
if payload.Repository.FullName != conf.AllowedGit.Repo && conf.AllowedGit.Repo != "" {
log.Printf("Repo not allowed: %s", payload.Repository.FullName)
return false, nil, fmt.Errorf("repo not allowed: %s", payload.Repository.FullName)
}
if payload.Pusher.Name != conf.AllowedGit.User && conf.AllowedGit.User != "" {
log.Printf("Git user not allowed as pusher: %s", payload.Pusher.Name)
return false, nil, fmt.Errorf("git user not allowed as pusher: %s", payload.Pusher.Name)
}
if conf.Verbose {
log.Printf("Push payload: %+v", payload)
}
commitsURL := payload.Repository.CommitsURL
// Get all commits and their associated files
files := make(map[string]FileInfo)
for _, c := range payload.Commits {
sha := c.URL[strings.LastIndex(c.URL, "/")+1:]
log.Printf(" commit %s ...", sha)
if !(strings.Contains(c.Message, conf.AllowedGit.Keyword)) && conf.AllowedGit.Keyword != "" {
log.Printf("Required keyword not contained in commit message for commit %s", sha)
continue
}
f, err := retrieveFiles(commitsURL, sha, conf)
if err != nil {
log.Printf("Error retrieving files from commit %s: %+v", sha, err)
return false, nil, fmt.Errorf("error retrieving files from commit %s: %s", sha, err)
}
for k, v := range f {
if old, ok := files[k]; ok { // file is already present
if v.Date.After(old.Date) { // but the new entry is newer
files[k] = v
} // else (file is already present and newer): no nothing
} else {
files[k] = v
}
}
}
// log.Printf("Parsed hook: %+v", payload)
if conf.Verbose {
log.Printf("Push hook with %d commits and %d files: %+v", len(payload.Commits), len(files), files)
} else {
fls := ""
for s := range files {
fls = fls + " " + s
}
log.Printf("Push hook with %d commits and %d files: %s", len(payload.Commits), len(files), fls)
}
return doPublish, files, nil
}
case "ping":
{
......@@ -275,6 +347,65 @@ func ProcessHook(hookType string, r io.Reader, conf *tei2zenodo.Config) (bool, [
return false, nil, fmt.Errorf("unknown hook type %s", hookType)
}
}
}
func retrieveFiles(commitsURL string, sha string, conf *tei2zenodo.Config) (map[string]FileInfo, error) {
files := make(map[string]FileInfo)
// Compile GET request to retrieve commit information
targetURI := strings.Replace(commitsURL, "{/sha}", "/"+sha, -1)
req, err := http.NewRequest("GET", targetURI, nil)
req.Header.Add("Authorization", "Bearer "+conf.AllowedGit.Token)
// Send GET request
if conf.Verbose {
log.Printf(" Get files at %s", targetURI)
} else {
log.Printf(" Get files")
}
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
log.Printf("Problem creating GET request: %v ...", err)
return files, fmt.Errorf("problem creating GET request: %s", err)
}
defer resp.Body.Close()
// Handle problematic responses
content, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Printf("Problem reading GET response: %v ...", err)
return files, fmt.Errorf("problem reading GET response: %s", err)
}
if strconv.Itoa(resp.StatusCode)[:1] != "2" {
log.Printf("Problem reported by github: %d %v. %+v ...", resp.StatusCode, err, content)
return files, fmt.Errorf("problem reported by github: %d %v", resp.StatusCode, err)
}
// Parse response
var commit CommitDetails
err = json.Unmarshal(content, &commit)
if err != nil {
log.Printf("Problem parsing github's response: %v ...", err)
return nil, fmt.Errorf("problem parsing github's response: %s", err)
}
return doPublish, nil, nil
// layout := "2006-01-02T15:04:05.000Z"
str := commit.Commit.Committer.Date
commitDate, err := time.Parse(time.RFC3339, str)
for i, f := range commit.Files {
filename := f.Filename
url := f.RawURL
modType := f.Status
if conf.Verbose {
log.Printf(" %d. %s %s: %s", i+1, modType, filename, url)
} else {
log.Printf(" %d. %s %s", i+1, modType, filename)
}
if modType != "deleted" && modType != "renamed" {
files[filename] = FileInfo{URL: url, Change: modType, Date: commitDate}
}
}
return files, nil
}
......@@ -92,16 +92,15 @@ func SetupRoutes(conf tei2zenodo.Config) *gin.Engine {
log.Printf("Error parsing webhook event: %v", err)
AbortMsg(500, fmt.Errorf("problem parsing webhook event: %s", err), c)
}
if len(files) == 0 {
log.Printf("No files returned")
}
log.Printf("%d files returned", len(files))
log.Printf("doPublish: %s", strconv.FormatBool(doPublish))
// Send each file to processing
for i, f := range files {
for f := range files {
var myDeposit tei2zenodo.Deposit
log.Printf("Processing file %d", i)
err := zenodo.ProcessFile(&conf, &myDeposit, f, doPublish)
log.Printf("Processing file %s", f)
myDeposit.Filename = strings.Replace(f, "/", "_", -1)
err := zenodo.ProcessDownloadFile(&conf, &myDeposit, files[f].URL, doPublish)
if err != nil {
AbortMsg(500, err, c)
return
......
......@@ -33,7 +33,9 @@ func GetFilename(r io.Reader) string {
// ParseTEI reads a TEI file and parses its metadata into a ZMetadata variable.
// Returns a doi (maybe empty) and an error value.
func ParseTEI(r io.Reader, md *tei2zenodo.ZMetadata, conf *tei2zenodo.MetadataConfig) (string, error) {
func ParseTEI(r io.Reader, md *tei2zenodo.ZMetadata, c *tei2zenodo.Config) (string, error) {
conf := c.Metadata
var doc *xmlquery.Node
// Parse document r wih antchfx/xmlquery...
......@@ -249,21 +251,23 @@ func ParseTEI(r io.Reader, md *tei2zenodo.ZMetadata, conf *tei2zenodo.MetadataCo
doi := md.DOI
log.Printf("Success.")
log.Printf(" Title: %v", md.Title)
if len(md.Creators) > 0 {
log.Printf(" Creator 1: %v", md.Creators[0])
if c.Verbose {
log.Printf(" Title: %v", md.Title)
if len(md.Creators) > 0 {
log.Printf(" Creator 1: %v", md.Creators[0])
}
if len(md.Contributors) > 0 {
log.Printf(" Contributor 1: %v", md.Contributors[0])
}
log.Printf(" DOI: %s", doi)
}
//if len(md.Contributors) > 0 {
// log.Printf(" Contributor 1: %v", md.Contributors[0])
//}
log.Printf(" DOI: %s", doi)
return doi, nil
}
// MixinDOI adds a DOI idno element to the document
// it returns the string serialization of the new document and an error value
func MixinDOI(r io.Reader, doi string) (string, error) {
func MixinDOI(r io.Reader, doi string, c *tei2zenodo.Config) (string, error) {
// Parse document (in r)...
var doc etree.Document
......@@ -281,7 +285,9 @@ func MixinDOI(r io.Reader, doi string) (string, error) {
topLevelIdno := pStmt.FindElement(`./idno`)
if topLevelIdno == nil { // publicationStmt does not contain any <idno> element -> add one as last child of pStmt
log.Printf("No idno element present. Create one.")
if c.Verbose {
log.Printf("No idno element present. Create one.")
}
targetIdno := pStmt.CreateElement("idno")
targetIdno.CreateAttr("type", "DOI")
targetIdno.CreateText(doi)
......
......@@ -43,49 +43,47 @@ type ZPostData struct {
Metadata *tei2zenodo.ZMetadata `json:"metadata"`
}
/* What's happening here?
- The order as described at https://developers.zenodo.org/ is this:
I. Get id
I.1. Either from the TEI file
I.2. Or from uploading an empty deposit:
headers = {"Content-Type": "application/json"}
requests.post('https://zenodo.org/api/deposit/depositions',
params={'access_token': ACCESS_TOKEN},
json={},
headers=headers)
II. Upload file(s)
deposition_id = r.json()['id']
data = {'name': 'myfirstfile.csv'}
files = {'file': open('/path/to/myfirstfile.csv', 'rb')}
requests.post('https://zenodo.org/api/deposit/depositions/%s/files' % deposition_id,
params={'access_token': ACCESS_TOKEN},
data=data,
files=files)
III. Upload metadata
requests.put('https://zenodo.org/api/deposit/depositions/%s' % deposition_id,
params={'access_token': ACCESS_TOKEN},
data=json.dumps(metadata),
headers=headers)
IV. Publish
requests.post('https://zenodo.org/api/deposit/depositions/%s/actions/publish' % deposition_id,
params={'access_token': ACCESS_TOKEN} )
*/
// ProcessDownloadFile takes a URL, downloads the file and continues processing
func ProcessDownloadFile(conf *tei2zenodo.Config, myDeposit *tei2zenodo.Deposit, url string, doPublish bool) error {
// Compile GET request
targetURI := url