Page MenuHomePhabricator
Paste P86609

RR Threshold Analysis Query
ActivePublic

Authored by gkyziridis on Dec 15 2025, 12:37 PM.
Referenced Files
F71079030: RR Threshold Analysis Query
Dec 15 2025, 12:37 PM
Subscribers
QUERY = f"""
WITH revert_risks AS (
SELECT
rr.wiki_db AS wiki_db,
rr.rev_id AS revision_id,
rev_revert_risk AS revert_risk_score,
rr.rev_timestamp AS edit_timestamp,
mwh.event_user_text,
mwh.event_user_revision_count AS user_edit_count,
-- was it a newly created account. defined as user that created account 90 days before edit
CASE WHEN rru.user_id IS NOT NULL
AND unix_timestamp(rr.rev_timestamp) - unix_timestamp(rru.user_registration_timestamp) < 7776000
THEN TRUE
ELSE FALSE
END AS is_new_account,
-- was the user anonymous
CASE
WHEN event_user_is_anonymous THEN TRUE
ELSE FALSE
END AS is_anon,
CASE
WHEN SIZE(event_user_is_bot_by_historical) = 0 THEN FALSE
ELSE TRUE
END AS user_is_bot,
-- edit reverts another edit
revision_is_identity_revert AS is_revert,
rr.page_title,
page_namespace_is_content AS namespace_is_content,
-- was this edit reverted
CASE
WHEN rr.rev_is_identity_reverted THEN TRUE
ELSE FALSE
END AS is_reverted,
-- time to revert
rr.rev_seconds_to_identity_revert AS time_to_revert,
-- was the user extended confirmed
CASE
WHEN ARRAY_CONTAINS(event_user_groups, 'extendedconfirmed') THEN TRUE
ELSE FALSE
END AS is_extendedconfirmed,
CASE
WHEN bc.user_text IS NOT NULL THEN TRUE
ELSE FALSE
END AS is_blocked,
CASE
WHEN amr.amr_rev_parent_id IS NOT NULL THEN TRUE
ELSE FALSE
END AS was_reverted_by_automoderator
FROM
risk_observatory.revert_risk_predictions rr
JOIN
wmf.mediawiki_history mwh
ON rr.wiki_db = mwh.wiki_db
AND rr.rev_id = mwh.revision_id
LEFT JOIN wmf_product.automoderator_monitoring_snapshot_daily amr
ON rr.rev_id = amr.amr_rev_parent_id
-- was reverted by automoderator edits
AND is_amr_revert
AND amr.wiki_db = '{wiki_name}'
AND amr.snapshot_date = '2025-02-15'
LEFT JOIN event.mediawiki_user_blocks_change bc
ON rr.user_name = bc.user_text
AND rr.wiki_db = bc.`database`
AND bc.`database` = '{wiki_name}'
-- user blocked after edit
-- AND bc.meta.dt > rr.rev_timestamp
-- join to users dataset to identify new users
LEFT JOIN mneisler.canonical_revertrisk_users_2024 rru
ON mwh.event_user_id = rru.user_id
AND mwh.wiki_db = rru.wiki_db
WHERE
mwh.snapshot = '{mwh_snapshot}'
-- limit to indonesian Wikipedia
AND rr.wiki_db == '{wiki_name}'
-- limit to only revisions assigned revert risk score
AND rev_revert_risk IS NOT NULL
-- exclude adminstrators
AND
(
event_user_groups IS NULL
OR NOT ARRAY_CONTAINS(mwh.event_user_groups_historical, 'sysop')
)
AND event_timestamp > "{start_timestamp}"
AND event_timestamp < "{end_timestamp}"
),
excl_self_reverts AS (
SELECT
rd.*
FROM
revert_risks rd
JOIN
wmf.mediawiki_history mwh
ON rd.revision_id = mwh.revision_first_identity_reverting_revision_id
AND rd.wiki_db = mwh.wiki_db
WHERE
snapshot = '{mwh_snapshot}'
AND rd.is_revert
-- exclude self reverts
AND NOT rd.event_user_text = mwh.event_user_text
)
SELECT
*
FROM
revert_risks
WHERE
NOT revert_risks.is_revert
UNION ALL
SELECT
*
FROM
excl_self_reverts
"""