We are getting 503 and 500 error message in DLQ aws.structured-data.article-update-dead-letter.v1 topic, we need to figure out how to deal with those messages to avoid data loss.
Acceptance criteria
Code handles 503 and 500 error messages in a graceful manner that avoids data loss.
ToDo
- figure out how to deal with those messages in code
- write a piece of code that will handle those messages accordingly
Test Strategy
Test strategy for this is going to be pretty straightforward, we just need to monitor error and dead letter queue to make sure that those messages are properly processed.
Things to consider
- We need to make sure this aligns with other DLQ work in the pipeline, maybe there's a way to come up with overarching solution.
Example of messages
{ "name": "Projections_of_population_growth", "identifier": 33517435, "abstract": "", "date_created": null, "date_modified": null, "date_previously_modified": null, "protection": [], "version": { "wikimedia_enterprise.general.schema.Version": { "identifier": 1144838189, "comment": "", "tags": [], "is_minor_edit": false, "is_flagged_stable": false, "has_tag_needs_citation": false, "scores": { "wikimedia_enterprise.general.schema.Scores": { "damaging": { "wikimedia_enterprise.general.schema.ProbabilityScore": { "prediction": false, "probability": { "wikimedia_enterprise.general.schema.Probability": { "truthy": 0.7974707917749783, "falsy": 0.2025292082250217 } } } }, "goodfaith": { "wikimedia_enterprise.general.schema.ProbabilityScore": { "prediction": true, "probability": { "wikimedia_enterprise.general.schema.Probability": { "truthy": 0.11690136838813703, "falsy": 0.883098631611863 } } } } } }, "editor": { "wikimedia_enterprise.general.schema.Editor": { "identifier": -1, "name": "....", "edit_count": 1271, "groups": [ "extendedconfirmed", "*", "user", "autoconfirmed" ], "is_bot": false, "is_anonymous": false, "is_admin": false, "is_patroller": false, "has_advanced_rights": false, "date_started": { "long": 1460144117000000 } } }, "diff": null, "number_of_characters": 0, "sizes": null, "event": null } }, "previous_version": { "wikimedia_enterprise.general.schema.PreviousVersion": { "identifier": 1144686166, "number_of_characters": 0 } }, "version_identifier": "", "url": "", "watchers_count": 0, "namespace": { "wikimedia_enterprise.general.schema.Namespace": { "name": "", "alternate_name": "", "identifier": 0, "description": "", "event": null } }, "in_language": { "wikimedia_enterprise.general.schema.Language": { "identifier": "en", "name": "", "alternate_name": "", "direction": "", "event": null } }, "main_entity": null, "additional_entities": [], "categories": [], "templates": [], "redirects": [], "is_part_of": { "wikimedia_enterprise.general.schema.Project": { "name": "", "identifier": "enwiki", "url": "https://en.wikipedia.org", "version": "", "date_modified": null, "in_language": null, "namespace": null, "sizes": null, "additional_type": "", "event": null } }, "article_body": null, "license": [], "visibility": null, "event": { "wikimedia_enterprise.general.schema.Event": { "identifier": "f9ab2ca0-e59e-4215-b96f-8179ed7f727f", "type": "update", "date_created": { "long": 1679327991327676 }, "fail_count": 2, "fail_reason": "500 Internal Server Error:{\"error\":{\"code\":\"internalservererror\",\"message\":\"Traceback(mostrecentcalllast):\\nFile\\\"./ores/wsgi/routes/v3/util.py\\\",line107,inprocess_score_request\\nscore_response=scoring_system.score(score_request)\\nFile\\\"./ores/scoring_systems/scoring_system.py\\\",line60,inscore\\nresponse=self._score(request)\\nFile\\\"./ores/scoring_systems/celery_queue.py\\\",line194,in_score\\nself._check_queue_full()\\nFile\\\"./ores/scoring_systems/celery_queue.py\\\",line204,in_check_queue_full\\nqueue_size=self.redis.llen(DEFAULT_CELERY_QUEUE)\\nFile\\\"/srv/deployment/ores/deploy/venv/lib/python3.7/site-packages/redis/client.py\\\",line1953,inllen\\nreturnself.execute_command('LLEN',name)\\nFile\\\"/srv/deployment/ores/deploy/venv/lib/python3.7/site-packages/redis/client.py\\\",line898,inexecute_command\\nconn=self.connectionorpool.get_connection(command_name,**options)\\nFile\\\"/srv/deployment/ores/deploy/venv/lib/python3.7/site-packages/redis/connection.py\\\",line1192,inget_connection\\nconnection.connect()\\nFile\\\"/srv/deployment/ores/deploy/venv/lib/python3.7/site-packages/redis/connection.py\\\",line567,inconnect\\nself.on_connect()\\nFile\\\"/srv/deployment/ores/deploy/venv/lib/python3.7/site-packages/redis/connection.py\\\",line643,inon_connect\\nauth_response=self.read_response()\\nFile\\\"/srv/deployment/ores/deploy/venv/lib/python3.7/site-packages/redis/connection.py\\\",line756,inread_response\\nraiseresponse\\nredis.exceptions.ResponseError:WRONGPASSinvalidusername-passwordpair\\n\"}}" } } }
{ "name": "يحيى_علي_أحمد_الراعي", "identifier": 1701970, "abstract": "", "date_created": null, "date_modified": null, "date_previously_modified": null, "protection": [], "version": { "wikimedia_enterprise.general.schema.Version": { "identifier": 61660486, "comment": "", "tags": [], "is_minor_edit": false, "is_flagged_stable": false, "has_tag_needs_citation": false, "scores": { "wikimedia_enterprise.general.schema.Scores": { "damaging": { "wikimedia_enterprise.general.schema.ProbabilityScore": { "prediction": false, "probability": { "wikimedia_enterprise.general.schema.Probability": { "truthy": 0.8430879771222933, "falsy": 0.15691202287770664 } } } }, "goodfaith": { "wikimedia_enterprise.general.schema.ProbabilityScore": { "prediction": true, "probability": { "wikimedia_enterprise.general.schema.Probability": { "truthy": 0.000001066589444653765, "falsy": 0.9999989334105553 } } } } } }, "editor": { "wikimedia_enterprise.general.schema.Editor": { "identifier": -1, "name": "...", "edit_count": 2004338, "groups": [ "bot", "*", "user", "autoconfirmed" ], "is_bot": true, "is_anonymous": false, "is_admin": false, "is_patroller": false, "has_advanced_rights": false, "date_started": { "long": 1184853738000000 } } }, "diff": null, "number_of_characters": 0, "sizes": null, "event": null } }, "previous_version": { "wikimedia_enterprise.general.schema.PreviousVersion": { "identifier": 59969991, "number_of_characters": 0 } }, "version_identifier": "", "url": "", "watchers_count": 0, "namespace": { "wikimedia_enterprise.general.schema.Namespace": { "name": "", "alternate_name": "", "identifier": 0, "description": "", "event": null } }, "in_language": { "wikimedia_enterprise.general.schema.Language": { "identifier": "ar", "name": "", "alternate_name": "", "direction": "", "event": null } }, "main_entity": null, "additional_entities": [], "categories": [], "templates": [], "redirects": [], "is_part_of": { "wikimedia_enterprise.general.schema.Project": { "name": "", "identifier": "arwiki", "url": "https://ar.wikipedia.org", "version": "", "date_modified": null, "in_language": null, "namespace": null, "sizes": null, "additional_type": "", "event": null } }, "article_body": null, "license": [], "visibility": null, "event": { "wikimedia_enterprise.general.schema.Event": { "identifier": "bc5cab19-5e99-4f5f-b9cd-6600acea3e63", "type": "update", "date_created": { "long": 1679327746899744 }, "fail_count": 2, "fail_reason": "503 Service Unavailable:upstreamconnecterrorordisconnect/resetbeforeheaders.resetreason:connectiontermination" } } }