Commit fc9afdc0 authored by Joerg-Holger Panzer's avatar Joerg-Holger Panzer
Browse files

Update the indexer

add infojson check/createn
parent 44005705
Pipeline #300498 failed with stages
in 7 minutes and 34 seconds
......@@ -19,7 +19,7 @@ EXPORT_KEY_PATTERN_ENDNOTE="citation/%s/%s_%s.enw"
EXPORT_KEY_PATTERN_IIIF="iiif/%s.json"
EXPORT_KEY_PATTERN_IIIF_RANGE="iiif/%s/range/%s.json"
EXPORT_KEY_PATTERN_INFO_JSON="info/%s/%s.json"
EXPORT_KEY_PATTERN_IMAGE="orig/%s/%s."
EXPORT_KEY_PATTERN_IMAGE="orig/%s/%s"
# orig/<work_id>/<page>.{jpg | tif | gif | ...}
S3_IMAGE_KEY_PATTERN=orig/%s/%s.%s
......
......@@ -10,6 +10,7 @@ require (
github.com/go-redis/redis v6.15.9+incompatible
github.com/sirupsen/logrus v1.8.1
github.com/spf13/viper v1.11.0
gopkg.in/gographics/imagick.v3 v3.4.0
)
require (
......
......@@ -18,7 +18,11 @@ import (
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/go-redis/redis"
"gitlab.gwdg.de/subugoe/metsimporter/indexer/templating"
"gitlab.gwdg.de/subugoe/metsimporter/indexer/types"
//"gopkg.in/gographics/imagick.v2/imagick"
"gopkg.in/gographics/imagick.v3/imagick"
)
var (
......@@ -193,7 +197,144 @@ func GetXMLFrom(bucket string, key string, context string, doctype string) (stri
return "", nil
}
func GetImageDimensionFromS3(bucket string, key string) (*types.ImageInfo, error) {
func GetXMLFromOLAHDS(key string, context string) (string, error) {
if context == "olahds" {
url := config.OlahdsServiceEndpointBasepath + key
str, err := DownloadFile(url)
if err != nil {
return "", err
} else {
return str, nil
}
}
return "", nil
}
func GetImageDimensionFromOLAHDS(bucket string, key string) (*types.ImageInfo, error) {
// url := config.OlahdsServiceEndpointBasepath + key
// str, err := DownloadFile(url)
// if err != nil {
// return *types.ImageInfo{
// Width: 0,
// Height: 0,
// }, err
// } else {
// return types.ImageInfo{
// Width: 0,
// Height: 0,
// }, nil
// }
return new(types.ImageInfo), nil
}
func CreateImageInfoInS3(bucket string, infojsonkey string, imagekey string, document string, product string, page string) (*types.ImageInfo, error) {
b := bytes.Buffer{}
var result *s3.GetObjectOutput
var err error
attempts := 0
for {
attempts++
result, err = s3Client.GetObject(&s3.GetObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(imagekey),
//Key: aws.String(infoJsonJob.Key),
})
if err != nil {
if strings.Contains(err.Error(), "NoSuchKey") {
// go on: NoSuchKey
return nil, fmt.Errorf("NoSuchKey %s/%s", bucket, imagekey)
}
if attempts > 5 {
time.Sleep(time.Duration(6*attempts) * time.Second)
return nil, fmt.Errorf("unable to load %s/%s, %s", bucket, imagekey, err)
}
continue
}
break
}
defer result.Body.Close()
if _, err := io.Copy(&b, result.Body); err != nil {
if _, err := io.Copy(&b, result.Body); err != nil {
return nil, fmt.Errorf("failed to copy response body for %s/%s, %s", bucket, imagekey, err.Error())
}
}
mw := imagick.NewMagickWand()
defer mw.Destroy()
err = mw.ReadImageBlob(b.Bytes())
if err != nil {
return nil, fmt.Errorf("Failed to create image object %s/%s, due to %s\n", bucket, imagekey, err.Error())
}
var x, y float64
depth := mw.GetImageDepth()
height := mw.GetImageHeight()
width := mw.GetImageWidth()
size, _ := mw.GetImageLength()
x, y, _ = mw.GetImageResolution()
imageInfo := templating.ImageInfo{
Depth: depth,
Height: int32(height),
Width: int32(width),
Size: int32(size),
X: x,
Y: y,
Document: document,
//Context: context,
Product: product,
Page: page,
//InfoJsonKey: infojsonkey,
}
infoJsonString := templating.Create(imageInfo)
err = UploadTo(product, infojsonkey, []byte(infoJsonString))
if err != nil {
log.Errorf("Could not upload infoJson for %s/%s to S3, due to %s", bucket, infojsonkey, err.Error())
}
return &types.ImageInfo{
Width: int32(width),
Height: int32(height),
}, nil
}
func ExistImageInS3(bucket string, key string) bool {
s3Client := GetS3Client()
query := &s3.HeadObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
}
//_, err := c.outClient.HeadObject(query)
_, err := s3Client.HeadObject(query)
if err != nil {
if strings.Contains(err.Error(), "NoSuchKey") {
//log.Errorf("NoSuchKey %s/%s", bucket, key)
return false
// go on: NoSuchKey
} else {
// go on: Unable to get object
//log.Errorf("unable to load %s/%s, %s", bucket, key, err)
return false
}
}
return true
}
func GetImageInfoFromInfoJsonInS3(bucket string, infojsonkey string) (*types.ImageInfo, error) {
svc := GetS3Client()
......@@ -205,16 +346,16 @@ func GetImageDimensionFromS3(bucket string, key string) (*types.ImageInfo, error
result, err = svc.GetObject(&s3.GetObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
Key: aws.String(infojsonkey),
})
if err != nil {
if strings.Contains(err.Error(), "NoSuchKey") {
return nil, fmt.Errorf("NoSuchKey %s", key)
return nil, fmt.Errorf("NoSuchKey %s", infojsonkey)
}
if attempts > 5 {
time.Sleep(time.Duration(6*attempts) * time.Second)
return nil, fmt.Errorf("unable to load %s, %s", key, err)
return nil, fmt.Errorf("unable to load %s, %s", infojsonkey, err)
}
continue
}
......@@ -224,12 +365,12 @@ func GetImageDimensionFromS3(bucket string, key string) (*types.ImageInfo, error
defer result.Body.Close()
body, err := ioutil.ReadAll(result.Body)
if err != nil {
return nil, fmt.Errorf("could not read S3 response for object s3://%s/%s, due to %s", bucket, key, err.Error())
return nil, fmt.Errorf("could not read S3 response for object s3://%s/%s, due to %s", bucket, infojsonkey, err.Error())
}
var imageInfo *types.ImageInfo = new(types.ImageInfo)
if err := json.Unmarshal(body, &imageInfo); err != nil {
return nil, fmt.Errorf("could not unmarshal object s3://%s/%s , due to %s", bucket, key, err.Error())
return nil, fmt.Errorf("could not unmarshal object s3://%s/%s , due to %s", bucket, infojsonkey, err.Error())
}
return imageInfo, nil
......
......@@ -226,13 +226,33 @@ func getPhysStructure(physMeta types.PhysicalAttributes,
esPhys.PageHRef = fileAttr.Href
if ccontext != "olahds" {
key := fmt.Sprintf(config.ExportKeyPatternInfoJson, workID, fileAttr.Page)
pageDimension, err := helper.GetImageDimensionFromS3(product, key)
if err == nil {
esPhys.PageHeight = pageDimension.Height
esPhys.PageWidth = pageDimension.Width
infojsonkey := fmt.Sprintf(config.ExportKeyPatternInfoJson, workID, fileAttr.Page)
imagekey := fmt.Sprintf(config.ExportKeyPatternImage, workID, fileAttr.Filename)
exists := helper.ExistImageInS3(product, infojsonkey)
var imageInfo *types.ImageInfo
//var pageDimension_ templating.ImageInfo
var err error
if !exists {
// create and write to S3
imageInfo, err = helper.CreateImageInfoInS3(product, infojsonkey, imagekey, workID, product, fileAttr.Page)
} else {
// get
imageInfo, err = helper.GetImageInfoFromInfoJsonInS3(product, infojsonkey)
}
// set dimensions
if err == nil {
esPhys.PageHeight = imageInfo.Height
esPhys.PageWidth = imageInfo.Width
} else {
// set default value
esPhys.PageHeight = 300
esPhys.PageWidth = 300
}
}
esPhys.Log = llog
......
......@@ -201,7 +201,8 @@ func getMapFromFileSec(fileSec types.Filesec, context string, isExternal bool) m
} else {
if (strings.ToLower(fileGrp.Use) == "default") || (strings.ToLower(fileGrp.Use) == "presentation") || (strings.ToLower(fileGrp.Use) == "thumbs") {
reString = "^\\S*/(\\S*)$"
//reString = "^\\S*/(\\S*)$"
reString = "^\\S*/((\\S*).(tif|TIF|gif|GIF|jpg|JPG))$"
re = regexp.MustCompile(reString)
} else {
continue
......@@ -212,7 +213,7 @@ func getMapFromFileSec(fileSec types.Filesec, context string, isExternal bool) m
if isExternal {
var arr []string = re.FindStringSubmatch(strings.Replace(file.FLocat.Href, " ", "", -1))
if len(arr) != 2 {
if len(arr) != 4 {
log.Errorf("URL %s doesn't match pattern %s", file.FLocat.Href, reString)
} else {
fileIDToAttrMap[file.ID] = types.FileSecAttributes{
......@@ -221,9 +222,9 @@ func getMapFromFileSec(fileSec types.Filesec, context string, isExternal bool) m
Mimetype: file.Mimetype,
Loctype: file.FLocat.Loctype,
Href: file.FLocat.Href,
Page: arr[1],
Page: arr[2],
Filename: arr[1],
Format: "",
Format: arr[3],
}
}
} else {
......
......@@ -3,6 +3,7 @@ package templating
import (
"bytes"
"embed"
"fmt"
"text/template"
)
......@@ -11,6 +12,65 @@ var (
f embed.FS
)
type Data struct {
WorkID string
PageId string
Key string
Width1 float32
Width2 float32
Width3 float32
Width4 float32
Width5 float32
Height1 float32
Height2 float32
Height3 float32
Height4 float32
Height5 float32
}
type ImageInfo struct {
Depth uint `json:"depth"`
Height int32 `json:"height"`
Width int32 `json:"width"`
Size int32 `json:"size"`
X float64 `json:"x"`
Y float64 `json:"y"`
Document string `json:"document"`
Context string `json:"context"`
Product string `json:"product"`
Page string `json:"page"`
InfoJsonKey string `json:"infojsonkey"`
}
func Create(imageInfo ImageInfo) string {
for {
key := fmt.Sprintf("%s:%s:%s", imageInfo.Product, imageInfo.Document, imageInfo.Page)
w := float32(imageInfo.Width)
h := float32(imageInfo.Height)
data := Data{
WorkID: imageInfo.Document,
PageId: imageInfo.Page,
Key: key,
Width1: w / 1.0,
Width2: w / 2.0,
Width3: w / 4.0,
Width4: w / 8.0,
Width5: w / 16.0,
Height1: h / 1.0,
Height2: h / 2.0,
Height3: h / 4.0,
Height4: h / 8.0,
Height5: h / 16.0,
}
infojson := ProcessFile("templates/info.tmpl", data)
return infojson
}
}
// process applies the data structure 'vars' onto an already
// parsed template 't', and returns the resulting string.
func process(t *template.Template, vars interface{}) string {
......
{
"@context": "http://iiif.io/api/image/2/context.json",
"@id": "{{`{{HOST_BASE_URL}}`}}/iiif/image/{{ .Key }}",
"width": {{ .Width1 }},
"height": {{ .Height1 }},
"protocol": "http://iiif.io/api/image",
"profile": "http://iiif.io/api/image/2/level0.json",
"sizes": [
{
"height": {{ .Height5 }},
"width": {{ .Width5 }}
},
{
"height": {{ .Height4 }},
"width": {{ .Width4 }}
},
{
"height": {{ .Height3 }},
"width": {{ .Width3 }}
},
{
"height": {{ .Height2 }},
"width": {{ .Width2 }}
},
{
"height": {{ .Height1 }},
"width": {{ .Width1 }}
}
],
"tiles": [
{
"width": 512,
"height": 512,
"scaleFactors": [
1,
2,
4,
8,
16
]
}
]
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment