From 55ea758670a0cbbbd7b5bf4e900f45cc1a07c94b Mon Sep 17 00:00:00 2001 From: Stefan Hynek Date: Tue, 25 May 2021 09:52:58 +0200 Subject: [PATCH 1/4] refactor(datasets/views/prep): revert loggin level in some places to 'debug' --- discuss_data/dddatasets/views/prep.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/discuss_data/dddatasets/views/prep.py b/discuss_data/dddatasets/views/prep.py index 6530810..3bfc6d2 100644 --- a/discuss_data/dddatasets/views/prep.py +++ b/discuss_data/dddatasets/views/prep.py @@ -1484,9 +1484,9 @@ def edit_publish_final_publish(request, ds_uuid): publisher = Publisher(t, ds) # get collection from session collection = request.session.get("collection", {}) - logger.error("[DHREP PUBLISHER] collection: %s", str(collection)) + logger.debug("[DHREP PUBLISHER] collection: %s", str(collection)) status = publisher.publish_status(collection) - logger.error("[DHREP PUBLISHER] status", status) + logger.debug("[DHREP PUBLISHER] status: %s", status) # an API implementation flaw makes the "processStatus" entry missing sometimes; i suppose this is in the state of QUEUING try: @@ -1523,7 +1523,7 @@ def edit_publish_final_publish(request, ds_uuid): request.session.pop("collection") publisher.update_dataset(collection, status) # also, make DD publication when finished - logger.error("[DHREP PUBLISHER] finished, starting ds.publish", status) + logger.debug("[DHREP PUBLISHER] finished, starting ds.publish: %s", status) ds.publish(request.user) messages.success( request, @@ -1534,7 +1534,7 @@ def edit_publish_final_publish(request, ds_uuid): if process_status == "FAILED": response["X-IC-CancelPolling"] = "true" - logger.error("[DHREP PUBLISHER] FAILED", status) + logger.error("[DHREP PUBLISHER] FAILED: %s", status) messages.error( request, _("An Error occured. The dataset has not been published.") ) @@ -1648,7 +1648,7 @@ def edit_publish_request(request, ds_uuid): if request.method == "POST": # run checks - logger.error("POST publish request") + logger.debug("POST publish request") check_error, check_list = ds.perform_checks() logger.error(check_error) if check_error: @@ -1657,9 +1657,9 @@ def edit_publish_request(request, ds_uuid): logger.error(error_string) messages.error(request, error_string) else: - logger.error("start request_publication") + logger.debug("start request_publication") publish_message = ds.request_publication(request.user) - logger.error("end request_publication") + logger.debug("end request_publication") return edit_versions(request, ds_uuid) else: publish_message = None -- GitLab From d835b50b67f804c3d8cf0491f30895949684d84d Mon Sep 17 00:00:00 2001 From: Stefan Hynek Date: Tue, 25 May 2021 09:54:30 +0200 Subject: [PATCH 2/4] fix(dhrep/services): open file on update as binary this may fix the memory leakage issue; must be tested on dev instance close #402 --- discuss_data/dhrep/services.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/discuss_data/dhrep/services.py b/discuss_data/dhrep/services.py index 934de76..fd396b9 100644 --- a/discuss_data/dhrep/services.py +++ b/discuss_data/dhrep/services.py @@ -35,7 +35,7 @@ def update(token: Token, storage_id: str, file, content_type=None) -> None: content = file # https://docs.djangoproject.com/en/2.2/ref/models/fields/#django.db.models.fields.files.FieldFile content_type = "text/plain; charset=utf-8" - with content.open() as stream: + with content.open(mode="rb") as stream: response = requests.put( settings.STORAGE_URL + storage_id, headers={ -- GitLab From 748851e9d120cb41ccabfdd3ccd7955f92b9a52e Mon Sep 17 00:00:00 2001 From: Stefan Hynek Date: Tue, 25 May 2021 10:00:41 +0200 Subject: [PATCH 3/4] fix(dhrep/storage): do not store `response.text` of a `GET` on storage resource into exception this may be the root cause of the memory leakage occuring on a big file upload close #402 --- discuss_data/dhrep/storage.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/discuss_data/dhrep/storage.py b/discuss_data/dhrep/storage.py index 5ab118c..0418a6d 100644 --- a/discuss_data/dhrep/storage.py +++ b/discuss_data/dhrep/storage.py @@ -23,8 +23,8 @@ class Storage: validator = URLValidator(["https"]) try: validator(settings.STORAGE_URL) - except ValidationError: - raise ImproperlyConfigured + except ValidationError as e: + raise ImproperlyConfigured from e storage_url = settings.STORAGE_URL if not storage_url.endswith("/"): @@ -47,10 +47,10 @@ class Storage: raise StorageException( "Not yet uploaded " + storage_id - + ": " - + response.text - + " - " + + ". " + str(response.status_code) + + ": " + + response.reason ) return response @@ -86,7 +86,7 @@ class Storage: return storage_id def update(self, token: Token, storage_id: str, content) -> None: - """Update an object in dariahstorage + """Update an object in dariahstorage (DEPRECATED) :param token: authentication token :type token: Token -- GitLab From 2f8935a23e045507aaa79685870d55ae2418f1b4 Mon Sep 17 00:00:00 2001 From: Stefan Hynek Date: Tue, 25 May 2021 10:03:02 +0200 Subject: [PATCH 4/4] refactor(dhrep/publisher): revert some more log-levels to debug also, use lazy logging --- discuss_data/dhrep/publisher.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/discuss_data/dhrep/publisher.py b/discuss_data/dhrep/publisher.py index 4becda6..cac58aa 100644 --- a/discuss_data/dhrep/publisher.py +++ b/discuss_data/dhrep/publisher.py @@ -45,8 +45,8 @@ class Publisher: validator = URLValidator(["https"]) try: validator(settings.PUBLISH_URL) - except ValidationError: - raise ImproperlyConfigured + except ValidationError as e: + raise ImproperlyConfigured from e publish_url = settings.PUBLISH_URL if not publish_url.endswith("/"): @@ -143,7 +143,7 @@ class Publisher: "collection.ttl", {"storage_id": storage_id, "dataset": dataset, "datafiles": datafiles}, ) - logger.error("[COLLECTION_RDF]", turtle) + logger.debug("[COLLECTION_RDF]: %s", turtle) return turtle @@ -198,8 +198,8 @@ class Publisher: }, ) - logger.error("status-text: %s", response.text) - logger.error("status-status: %s", response.status_code) + logger.debug("status-text: %s", response.text) + logger.debug("status-status: %s", response.status_code) if response.status_code != 200: raise PublisherError( @@ -287,11 +287,11 @@ class Publisher: for file in files.keys(): try: dariah_storage.get(self.token, file) - # logger.debug("DARIAH UPLOAD STATUS: {} finished".format(file)) + logger.debug("DARIAH UPLOAD STATUS: %s finished", format(file)) except StorageException as e: status[file] = False # do not show complete traceback in debug logging - logger.error("[[DARIAH UPLOAD STATUS]]: {}".format(e.args[0])) + logger.debug("[[DARIAH UPLOAD STATUS]]: %s ", format(e.args[0])) else: status[file] = True -- GitLab