We are listening to mediawiki.page_change.v1 events. For commonswiki namespace 6, we want to know when a file (content) has changed and we need to re-download the content. Our goal is to minimize API calls and only download the file when the file itself has changed.
Looking at edit events, we have 3 sha1s: revision.rev_sha1, content_slots.main.content_sha1, and content_slots.mediainfo.content_sha1. (Example event below)
- Based on some initial discussions, none of the above tell me whether the file itself has changed. Please correct me, if wrong.
- Will I see an edit event if an editor changes the file itself and not any other metadata associated? For example, re-uploading a cropped image instead of the original one. Based on some initial discussion, I will not get an event for this. However, this is a case where I would need to re-download the file as well.
Pending confirmations on 1 & 2, is it possible to support a field that tells whether the file has changed for namespace 6? This will reduce huge amount of calls to imageinfo API.
Sample commonswiki namespace 6 edit event:
{
"changelog_kind": "update",
"page_change_kind": "edit",
"dt": "2024-08-27T20:28:54Z",
"wiki_id": "commonswiki",
"page": {
"page_id": 101313526,
"page_title": "File:GOC_Brookmans_Park_064_Mimmshall_Brook_(26742869716).jpg",
"namespace_id": 6,
"is_redirect": false
},
"performer": {
"user_text": "FlickypediaBackfillrBot",
"groups": [
"bot",
"*",
"user",
"autoconfirmed"
],
"is_bot": true,
"is_system": false,
"is_temp": false,
"user_id": 12576746,
"registration_dt": "2023-10-31T14:27:46Z",
"edit_count": 8245557
},
"revision": {
"rev_id": 916556508,
"rev_dt": "2024-08-27T20:28:54Z",
"is_minor_edit": false,
"rev_sha1": "i18qbfk9x9u8ixin2p7ria73f7k1lr5",
"rev_size": 9426,
"rev_parent_id": 762790573,
"comment": "/* wbeditentity-update:0| */ Update the [[Commons:Structured data|structured data]] based on metadata from Flickr",
"editor": {
"user_text": "FlickypediaBackfillrBot",
"groups": [
"bot",
"*",
"user",
"autoconfirmed"
],
"is_bot": true,
"is_system": false,
"is_temp": false,
"user_id": 12576746,
"registration_dt": "2023-10-31T14:27:46Z",
"edit_count": 8245557
},
"is_content_visible": true,
"is_editor_visible": true,
"is_comment_visible": true,
"content_slots": {
"main": {
"slot_role": "main",
"content_model": "wikitext",
"content_sha1": "5fkfnlhpexfvlulauzpt4kjbkeiryci",
"content_size": 1590,
"content_format": "text/x-wiki",
"origin_rev_id": 762790573
},
"mediainfo": {
"slot_role": "mediainfo",
"content_model": "wikibase-mediainfo",
"content_sha1": "ecq0j33bxmwstqlz9qzaedva5ky8xlo",
"content_size": 7836,
"content_format": "application/json",
"origin_rev_id": 916556508
}
}
},
"prior_state": {
"revision": {
"rev_id": 762790573,
"rev_dt": "2023-05-13T17:20:56Z",
"is_minor_edit": true,
"rev_sha1": "bj7k2m4sy320a1g5vcpyi94a286mwu8",
"rev_size": 8186,
"rev_parent_id": 713191955,
"comment": "delete [[Category:Photographs taken on 2016-01-09]] because more detail cat already available",
"editor": {
"user_text": "RudolphousBot",
"groups": [
"bot",
"*",
"user",
"autoconfirmed"
],
"is_bot": true,
"is_system": false,
"is_temp": false,
"user_id": 2523146,
"registration_dt": "2012-12-31T21:25:07Z",
"edit_count": 17036187
},
"is_content_visible": true,
"is_editor_visible": true,
"is_comment_visible": true,
"content_slots": {
"main": {
"slot_role": "main",
"content_model": "wikitext",
"content_sha1": "5fkfnlhpexfvlulauzpt4kjbkeiryci",
"content_size": 1590,
"content_format": "text/x-wiki",
"origin_rev_id": 762790573
},
"mediainfo": {
"slot_role": "mediainfo",
"content_model": "wikibase-mediainfo",
"content_sha1": "2qnqedm6j4ac8mt2pe34u16druzzw2v",
"content_size": 6596,
"content_format": "application/json",
"origin_rev_id": 658705963
}
}
}
},
"$schema": "/mediawiki/page/change/1.2.0",
"meta": {
"stream": "mediawiki.page_change.v1",
"uri": "https://commons.wikimedia.org/wiki/File:GOC_Brookmans_Park_064_Mimmshall_Brook_(26742869716).jpg",
"id": "1accc914-100b-4b7b-adda-7e91fc7195dc",
"request_id": "b232cf9d-14b3-4b21-859a-43964c0e30c2",
"domain": "commons.wikimedia.org",
"dt": "2024-08-27T20:28:54Z",
"topic": "eqiad.mediawiki.page_change.v1",
"partition": 0,
"offset": 492075370,
"key": {
"type": "Buffer",
"data": [
123,
34,
119,
105,
107,
105,
95,
105,
100,
34,
58,
34,
99,
111,
109,
109,
111,
110,
115,
119,
105,
107,
105,
34,
44,
34,
112,
97,
103,
101,
95,
105,
100,
34,
58,
49,
48,
49,
51,
49,
51,
53,
50,
54,
125
]
}
}
}