Page MenuHomePhabricator

Masterwork From Distant Lands

Authored By
Eevans
Mar 21 2018, 2:16 PM
Size
25 KB
Referenced Files
None
Subscribers
None

Masterwork From Distant Lands

spec: &spec
x-sub-request-filters:
- type: default
name: http
options:
allow:
- pattern: /^https?:\/\//
forward_headers:
user-agent: true
title: The Change Propagation root
paths:
/sys/limit:
x-modules:
- path: sys/rate_limiter.js
options:
redis: &redis_config
host: localhost
port: 6379
limiters:
blacklist:
# First, allow no more then 100 errors per week
# The precision parameter controls the step a sliding window moves by
- interval: 604800
limit: 100
precision: 86400
# Secondly to avoid bursts in case of outages, don't allow more then 10
# errors per hour
- interval: 3600
limit: 10
/sys/dedupe:
x-modules:
- path: sys/deduplicator.js
options:
redis: *redis_config
/sys/purge:
x-modules:
- path: sys/purge.js
options:
host: 127.0.0.1
port: 4321
/sys/links:
x-modules:
- path: sys/dep_updates.js
options:
templates:
mw_api:
uri: 'https://{{message.meta.domain}}/w/api.php'
headers:
host: '{{message.meta.domain}}'
body:
formatversion: 2
/sys/queue:
x-modules:
- path: sys/kafka.js
options:
metadata_broker_list: 127.0.0.1:9092
dc_name: test_dc
startup_delay: 0
consumer:
# These options should not be copied to puppet config.
# We're using this config for testing, so need to configure
# for minimal latency
fetch.wait.max.ms: "1"
fetch.min.bytes: "1"
queue.buffering.max.ms: "1"
producer:
queue.buffering.max.messages: "10"
concurrency: 250
test_mode: true
templates:
summary_definition_rerender: &summary_definition_rerender_spec
topic: '/^(?:change-prop\.transcludes\.)?resource[-_]change$/'
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
retry_limit: 2
retry_delay: 500
retry_on:
status:
- '5xx'
limiters:
blacklist: 'summary:{message.meta.uri}'
cases: # Non wiktionary domains - rerender summary
- match:
meta:
uri: '/^(?<proto>https?):\/\/[^\/]+\/api\/rest_v1\/page\/html\/(?<title>[^/]+)$/'
tags:
- restbase
match_not:
- meta:
domain: '/wiktionary.org$/'
- meta:
domain: /\.wikidata\.org$/
exec:
method: get
# Don't encode title since it should be already encoded
uri: '{{match.meta.uri.proto}}://{{message.meta.domain}}/api/rest_v1/page/summary/{{match.meta.uri.title}}'
query:
redirect: false
headers:
cache-control: no-cache
- match: # Wiktionary domains - rerender definitions
meta:
# These URIs are coming from RESTBase, so we know that article titles will be normalized
# and main namespace articles will not have : (uri-encoded, so %3a or %3A)
uri: '/^(?<proto>https?):\/\/[^\/]+\/api\/rest_v1\/page\/html\/(?<title>(?:(?!%3a|%3A|\/).)+)$/'
domain: '/^en\.wiktionary\.org$/'
tags:
- restbase
exec:
method: get
# Don't encode title since it should be already encoded
uri: '{{match.meta.uri.proto}}://{{message.meta.domain}}/api/rest_v1/page/definition/{{match.meta.uri.title}}'
query:
redirect: false
headers:
cache-control: no-cache
mobile_rerender: &mobile_rerender_spec
topic: '/^(?:change-prop\.transcludes\.)?resource[-_]change$/'
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
retry_limit: 2
retry_delay: 500
retry_on:
status:
- '5xx'
limiters:
blacklist: 'mobile:{message.meta.uri}'
match:
meta:
uri: '/^(?<proto>https?):\/\/[^\/]+\/api\/rest_v1\/page\/html\/(?<title>[^/]+)$/'
tags:
- restbase
match_not:
meta:
domain: /\.wikidata\.org$/
exec:
- method: get
uri: '{{match.meta.uri.proto}}://{{message.meta.domain}}/api/rest_v1/page/mobile-sections/{{match.meta.uri.title}}'
query:
redirect: false
headers:
cache-control: no-cache
# Until we start storing and actively rerendering PCS endpoints we still need to purge it from Varnish
- method: post
uri: '/sys/purge/'
body:
- meta:
uri: '//{{message.meta.domain}}/api/rest_v1/page/media/{{match.meta.uri.title}}'
# RESTBase update jobs
mw_purge:
topic: resource_change
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
match:
meta:
uri: '/^(?<proto>https?):\/\/[^\/]+\/wiki\/(?<title>.+)$/'
tags:
- purge
limiters:
blacklist: 'html:{message.meta.uri}'
exec:
- method: get
# This even comes directly from MediaWiki, so title is encoded in MW-specific way.
# Re-encode the title in standard `encodeURIComponent` encoding.
uri: '{{match.meta.uri.proto}}://{{message.meta.domain}}/api/rest_v1/page/html/{decode(match.meta.uri.title)}'
headers:
cache-control: no-cache
if-unmodified-since: '{{date(message.meta.dt)}}'
query:
redirect: false
# The HTML might not change but sometimes editors use a purge to drop incorrectly rendered summary/MCS
# content, so let's purge them as well just in case. The rate is low.
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/summary/{decode(match.meta.uri.title)}'
headers:
cache-control: no-cache
query:
redirect: false
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/mobile-sections/{decode(match.meta.uri.title)}'
headers:
cache-control: no-cache
query:
redirect: false
null_edit:
topic: resource_change
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
ignore:
status:
- 403 # Ignoring 403 since some of the pages with high number of null_edit events are blacklisted
- 412
limiters:
blacklist: 'html:{message.meta.uri}'
match:
meta:
uri: '/^(?<proto>https?):\/\/[^\/]+\/wiki\/(?<title>.+)$/'
tags:
- null_edit
exec:
- method: get
# This even comes directly from MediaWiki, so title is encoded in MW-specific way.
# Re-encode the title in standard `encodeURIComponent` encoding.
uri: '{{match.meta.uri.proto}}://{{message.meta.domain}}/api/rest_v1/page/html/{decode(match.meta.uri.title)}'
headers:
cache-control: no-cache
if-unmodified-since: '{{date(message.meta.dt)}}'
query:
redirect: false
# The HTML might not change but sometimes editors use a purge to drop incorrectly rendered summary/MCS
# content, so let's purge them as well just in case. The rate is low.
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/summary/{decode(match.meta.uri.title)}'
headers:
cache-control: no-cache
query:
redirect: false
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/mobile-sections/{decode(match.meta.uri.title)}'
headers:
cache-control: no-cache
query:
redirect: false
page_edit:
topic: mediawiki.revision-create
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
limiters:
blacklist: 'html:{message.meta.uri}'
retry_on:
status:
- '5xx'
- 404 # Sometimes occasional 404s happen because of the mysql replication lag, so retry
match:
rev_content_changed: true
match_not:
# Test-only. We use undefined rev_parent_id to test backlinks so we
# don't want transclusions to interfere with backlinks test
- rev_parent_id: undefined
# end of test-only config
- meta:
domain: /\.wikidata\.org$/
page_namespace: 0
- meta:
domain: /\.wikidata\.org$/
page_namespace: 120
exec:
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/html/{message.page_title}/{{message.rev_id}}'
headers:
cache-control: no-cache
x-restbase-parentrevision: '{{message.rev_parent_id}}'
if-unmodified-since: '{{date(message.meta.dt)}}'
query:
redirect: false
- method: post
uri: '/sys/links/transcludes/{message.page_title}'
body: '{{globals.message}}'
revision_visibility_change:
topic: mediawiki.revision-visibility-change
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
ignore:
status:
- 403 # When the revision is hidden 403 will be returned by RESTBase, it's a valid situation
- 412
match_not:
- meta:
domain: /\.wikidata\.org$/
page_namespace: 0
- meta:
domain: /\.wikidata\.org$/
page_namespace: 120
exec:
method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/title/{message.page_title}/{{message.rev_id}}'
headers:
cache-control: no-cache
query:
redirect: false
page_delete:
topic: mediawiki.page-delete
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
ignore:
status:
- 404 # 404 is a normal response for page deletion
- 412
match_not:
- meta:
domain: /\.wikidata\.org$/
page_namespace: 0
- meta:
domain: /\.wikidata\.org$/
page_namespace: 120
exec:
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/title/{message.page_title}'
headers:
cache-control: no-cache
query:
redirect: false
# The links to the deleted page should become red again
- method: post
uri: '/sys/links/backlinks/{message.page_title}'
body: '{{globals.message}}'
# For page deletion RESTBase doesn't emit resource_change events, and to go through
# the normal purge chain (html update -> html resource_change -> summary update -> summary resource_change)
# we need to add many workarounds/shortcurst in RESTBase. So having this list here is an OK compromise.
- method: post
uri: '/sys/purge/'
body:
- meta:
uri: '//{{message.meta.domain}}/api/rest_v1/page/html/{message.page_title}'
- meta:
uri: '//{{message.meta.domain}}/api/rest_v1/page/html/{message.page_title}/{{message.rev_id}}'
- meta:
uri: '//{{message.meta.domain}}/api/rest_v1/page/summary/{message.page_title}'
- meta:
uri: '//{{message.meta.domain}}/api/rest_v1/page/definition/{message.page_title}'
- meta:
uri: '//{{message.meta.domain}}/api/rest_v1/page/mobile-sections/{message.page_title}'
- meta:
uri: '//{{message.meta.domain}}/api/rest_v1/page/mobile-sections-lead/{message.page_title}'
- meta:
uri: '//{{message.meta.domain}}/api/rest_v1/page/mobile-sections-remaining/{message.page_title}'
- meta:
uri: '//{{message.meta.domain}}/api/rest_v1/page/media/{message.page_title}'
page_restore:
topic: mediawiki.page-undelete
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
match_not:
- meta:
domain: /\.wikidata\.org$/
page_namespace: 0
- meta:
domain: /\.wikidata\.org$/
page_namespace: 120
exec:
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/title/{message.page_title}'
headers:
cache-control: no-cache
query:
redirect: false
# The links to the deleted page should become red again
- method: post
uri: '/sys/links/backlinks/{message.page_title}'
body: '{{globals.message}}'
page_move:
topic: mediawiki.page-move
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
match_not:
- meta:
domain: /\.wikidata\.org$/
page_namespace: 0
- meta:
domain: /\.wikidata\.org$/
page_namespace: 120
exec:
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/html/{message.page_title}/{{message.rev_id}}'
headers:
cache-control: no-cache
if-unmodified-since: '{{date(message.meta.dt)}}'
query:
redirect: false
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/title/{message.prior_state.page_title}'
headers:
cache-control: no-cache
query:
redirect: false
on_transclusion_update:
topic: change-prop.transcludes.resource-change
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
limiters:
blacklist: 'html:{message.meta.uri}'
cases:
- match:
meta:
schema_uri: 'resource_change/1'
uri: '/https?:\/\/[^\/]+\/wiki\/(?<title>.+)/'
tags: [ 'transcludes' ]
exec:
method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/html/{{match.meta.uri.title}}'
headers:
cache-control: no-cache
if-unmodified-since: '{{date(message.root_event.dt)}}'
x-restbase-mode: '{{message.tags[1]}}'
query:
redirect: false
- match:
meta:
schema_uri: 'continue/1'
exec:
method: post
uri: '/sys/links/transcludes/{message.original_event.page_title}'
body: '{{globals.message}}'
page_create:
topic: mediawiki.page-create
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
retry_on:
status:
- '5xx'
- 404 # Sometimes occasional 404s happen because of the mysql replication lag, so retry
match_not:
- meta:
domain: /\.wikidata\.org$/
page_namespace: 0
- meta:
domain: /\.wikidata\.org$/
page_namespace: 120
exec:
- method: post
uri: '/sys/links/backlinks/{message.page_title}'
body: '{{globals.message}}'
on_backlinks_update:
topic: change-prop.backlinks.resource-change
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
limiters:
blacklist: 'html:{message.meta.uri}'
cases:
- match:
meta:
schema_uri: 'resource_change/1'
uri: '/https?:\/\/[^\/]+\/wiki\/(?<title>.+)/'
tags: [ 'backlinks' ]
exec:
method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/html/{{match.meta.uri.title}}'
headers:
cache-control: no-cache
if-unmodified-since: '{{date(message.root_event.dt)}}'
x-restbase-mode: '{{message.tags[1]}}'
query:
redirect: false
- match:
meta:
schema_uri: 'continue/1'
exec:
method: post
uri: '/sys/links/backlinks/{message.original_event.page_title}'
body: '{{globals.message}}'
wikidata_description_on_edit:
topic: mediawiki.revision-create
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
match:
meta:
domain: www.wikidata.org
page_namespace: 0
# It's impossible to modify a comment in wikidata while editing the entity.
# TODO: This is a temp solution until we get a more general fragment support T148079
comment: '/wbeditentity|wbsetdescription|undo/'
rev_content_changed: true
exec:
method: post
uri: '/sys/links/wikidata_descriptions'
body: '{{globals.message}}'
wikidata_description_on_undelete:
topic: mediawiki.page-undelete
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
match:
meta:
domain: www.wikidata.org
page_namespace: 0
exec:
method: post
uri: '/sys/links/wikidata_descriptions'
body: '{{globals.message}}'
on_wikidata_description_change:
topic: change-prop.wikidata.resource-change
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
match:
meta:
uri: '/https:\/\/[^\/]+\/wiki\/(?<title>.+)/'
tags: [ 'wikidata' ]
exec:
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/summary/{{match.meta.uri.title}}'
headers:
cache-control: no-cache
query:
redirect: false
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/mobile-sections/{{match.meta.uri.title}}'
headers:
cache-control: no-cache
query:
redirect: false
page_images:
topic: mediawiki.page-properties-change
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
# We don't support 'OR' in the match section, so workaround it by 2 cases with identical exec
cases:
- match:
added_properties:
page_image: '/.+/' # Regex that matches anything just to check the prop is set
match_not:
- meta:
domain: /\.wikidata\.org$/
page_namespace: 0
- meta:
domain: /\.wikidata\.org$/
page_namespace: 120
exec:
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/summary/{message.page_title}'
headers:
cache-control: no-cache
query:
redirect: false
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/mobile-sections/{message.page_title}'
headers:
cache-control: no-cache
query:
redirect: false
- match:
removed_properties:
page_image: '/.+/' # Regex that matches anything just to check the prop is set
match_not:
meta:
domain: /\.wikidata\.org$/
exec:
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/summary/{message.page_title}'
headers:
cache-control: no-cache
query:
redirect: false
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/mobile-sections/{message.page_title}'
headers:
cache-control: no-cache
query:
redirect: false
num_workers: 1
logging:
name: changeprop-dev
level: info
services:
- name: changeprop-dev
module: hyperswitch
conf:
port: 7272
user_agent: DevChangePropInstance
spec: *spec

File Metadata

Mime Type
text/plain; charset=utf-8
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
5653441
Default Alt Text
Masterwork From Distant Lands (25 KB)

Event Timeline