Page MenuHomePhabricator
Paste P6869

Masterwork From Distant Lands
ActivePublic

Authored by Eevans on Mar 21 2018, 2:16 PM.
Tags
None
Referenced Files
F15946327: Masterwork From Distant Lands
Mar 21 2018, 2:16 PM
Subscribers
None
spec: &spec
x-sub-request-filters:
- type: default
name: http
options:
allow:
- pattern: /^https?:\/\//
forward_headers:
user-agent: true
title: The Change Propagation root
paths:
/sys/limit:
x-modules:
- path: sys/rate_limiter.js
options:
redis: &redis_config
host: localhost
port: 6379
limiters:
blacklist:
# First, allow no more then 100 errors per week
# The precision parameter controls the step a sliding window moves by
- interval: 604800
limit: 100
precision: 86400
# Secondly to avoid bursts in case of outages, don't allow more then 10
# errors per hour
- interval: 3600
limit: 10
/sys/dedupe:
x-modules:
- path: sys/deduplicator.js
options:
redis: *redis_config
/sys/purge:
x-modules:
- path: sys/purge.js
options:
host: 127.0.0.1
port: 4321
/sys/links:
x-modules:
- path: sys/dep_updates.js
options:
templates:
mw_api:
uri: 'https://{{message.meta.domain}}/w/api.php'
headers:
host: '{{message.meta.domain}}'
body:
formatversion: 2
/sys/queue:
x-modules:
- path: sys/kafka.js
options:
metadata_broker_list: 127.0.0.1:9092
dc_name: test_dc
startup_delay: 0
consumer:
# These options should not be copied to puppet config.
# We're using this config for testing, so need to configure
# for minimal latency
fetch.wait.max.ms: "1"
fetch.min.bytes: "1"
queue.buffering.max.ms: "1"
producer:
queue.buffering.max.messages: "10"
concurrency: 250
test_mode: true
templates:
summary_definition_rerender: &summary_definition_rerender_spec
topic: '/^(?:change-prop\.transcludes\.)?resource[-_]change$/'
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
retry_limit: 2
retry_delay: 500
retry_on:
status:
- '5xx'
limiters:
blacklist: 'summary:{message.meta.uri}'
cases: # Non wiktionary domains - rerender summary
- match:
meta:
uri: '/^(?<proto>https?):\/\/[^\/]+\/api\/rest_v1\/page\/html\/(?<title>[^/]+)$/'
tags:
- restbase
match_not:
- meta:
domain: '/wiktionary.org$/'
- meta:
domain: /\.wikidata\.org$/
exec:
method: get
# Don't encode title since it should be already encoded
uri: '{{match.meta.uri.proto}}://{{message.meta.domain}}/api/rest_v1/page/summary/{{match.meta.uri.title}}'
query:
redirect: false
headers:
cache-control: no-cache
- match: # Wiktionary domains - rerender definitions
meta:
# These URIs are coming from RESTBase, so we know that article titles will be normalized
# and main namespace articles will not have : (uri-encoded, so %3a or %3A)
uri: '/^(?<proto>https?):\/\/[^\/]+\/api\/rest_v1\/page\/html\/(?<title>(?:(?!%3a|%3A|\/).)+)$/'
domain: '/^en\.wiktionary\.org$/'
tags:
- restbase
exec:
method: get
# Don't encode title since it should be already encoded
uri: '{{match.meta.uri.proto}}://{{message.meta.domain}}/api/rest_v1/page/definition/{{match.meta.uri.title}}'
query:
redirect: false
headers:
cache-control: no-cache
mobile_rerender: &mobile_rerender_spec
topic: '/^(?:change-prop\.transcludes\.)?resource[-_]change$/'
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
retry_limit: 2
retry_delay: 500
retry_on:
status:
- '5xx'
limiters:
blacklist: 'mobile:{message.meta.uri}'
match:
meta:
uri: '/^(?<proto>https?):\/\/[^\/]+\/api\/rest_v1\/page\/html\/(?<title>[^/]+)$/'
tags:
- restbase
match_not:
meta:
domain: /\.wikidata\.org$/
exec:
- method: get
uri: '{{match.meta.uri.proto}}://{{message.meta.domain}}/api/rest_v1/page/mobile-sections/{{match.meta.uri.title}}'
query:
redirect: false
headers:
cache-control: no-cache
# Until we start storing and actively rerendering PCS endpoints we still need to purge it from Varnish
- method: post
uri: '/sys/purge/'
body:
- meta:
uri: '//{{message.meta.domain}}/api/rest_v1/page/media/{{match.meta.uri.title}}'
# RESTBase update jobs
mw_purge:
topic: resource_change
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
match:
meta:
uri: '/^(?<proto>https?):\/\/[^\/]+\/wiki\/(?<title>.+)$/'
tags:
- purge
limiters:
blacklist: 'html:{message.meta.uri}'
exec:
- method: get
# This even comes directly from MediaWiki, so title is encoded in MW-specific way.
# Re-encode the title in standard `encodeURIComponent` encoding.
uri: '{{match.meta.uri.proto}}://{{message.meta.domain}}/api/rest_v1/page/html/{decode(match.meta.uri.title)}'
headers:
cache-control: no-cache
if-unmodified-since: '{{date(message.meta.dt)}}'
query:
redirect: false
# The HTML might not change but sometimes editors use a purge to drop incorrectly rendered summary/MCS
# content, so let's purge them as well just in case. The rate is low.
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/summary/{decode(match.meta.uri.title)}'
headers:
cache-control: no-cache
query:
redirect: false
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/mobile-sections/{decode(match.meta.uri.title)}'
headers:
cache-control: no-cache
query:
redirect: false
null_edit:
topic: resource_change
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
ignore:
status:
- 403 # Ignoring 403 since some of the pages with high number of null_edit events are blacklisted
- 412
limiters:
blacklist: 'html:{message.meta.uri}'
match:
meta:
uri: '/^(?<proto>https?):\/\/[^\/]+\/wiki\/(?<title>.+)$/'
tags:
- null_edit
exec:
- method: get
# This even comes directly from MediaWiki, so title is encoded in MW-specific way.
# Re-encode the title in standard `encodeURIComponent` encoding.
uri: '{{match.meta.uri.proto}}://{{message.meta.domain}}/api/rest_v1/page/html/{decode(match.meta.uri.title)}'
headers:
cache-control: no-cache
if-unmodified-since: '{{date(message.meta.dt)}}'
query:
redirect: false
# The HTML might not change but sometimes editors use a purge to drop incorrectly rendered summary/MCS
# content, so let's purge them as well just in case. The rate is low.
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/summary/{decode(match.meta.uri.title)}'
headers:
cache-control: no-cache
query:
redirect: false
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/mobile-sections/{decode(match.meta.uri.title)}'
headers:
cache-control: no-cache
query:
redirect: false
page_edit:
topic: mediawiki.revision-create
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
limiters:
blacklist: 'html:{message.meta.uri}'
retry_on:
status:
- '5xx'
- 404 # Sometimes occasional 404s happen because of the mysql replication lag, so retry
match:
rev_content_changed: true
match_not:
# Test-only. We use undefined rev_parent_id to test backlinks so we
# don't want transclusions to interfere with backlinks test
- rev_parent_id: undefined
# end of test-only config
- meta:
domain: /\.wikidata\.org$/
page_namespace: 0
- meta:
domain: /\.wikidata\.org$/
page_namespace: 120
exec:
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/html/{message.page_title}/{{message.rev_id}}'
headers:
cache-control: no-cache
x-restbase-parentrevision: '{{message.rev_parent_id}}'
if-unmodified-since: '{{date(message.meta.dt)}}'
query:
redirect: false
- method: post
uri: '/sys/links/transcludes/{message.page_title}'
body: '{{globals.message}}'
revision_visibility_change:
topic: mediawiki.revision-visibility-change
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
ignore:
status:
- 403 # When the revision is hidden 403 will be returned by RESTBase, it's a valid situation
- 412
match_not:
- meta:
domain: /\.wikidata\.org$/
page_namespace: 0
- meta:
domain: /\.wikidata\.org$/
page_namespace: 120
exec:
method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/title/{message.page_title}/{{message.rev_id}}'
headers:
cache-control: no-cache
query:
redirect: false
page_delete:
topic: mediawiki.page-delete
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
ignore:
status:
- 404 # 404 is a normal response for page deletion
- 412
match_not:
- meta:
domain: /\.wikidata\.org$/
page_namespace: 0
- meta:
domain: /\.wikidata\.org$/
page_namespace: 120
exec:
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/title/{message.page_title}'
headers:
cache-control: no-cache
query:
redirect: false
# The links to the deleted page should become red again
- method: post
uri: '/sys/links/backlinks/{message.page_title}'
body: '{{globals.message}}'
# For page deletion RESTBase doesn't emit resource_change events, and to go through
# the normal purge chain (html update -> html resource_change -> summary update -> summary resource_change)
# we need to add many workarounds/shortcurst in RESTBase. So having this list here is an OK compromise.
- method: post
uri: '/sys/purge/'
body:
- meta:
uri: '//{{message.meta.domain}}/api/rest_v1/page/html/{message.page_title}'
- meta:
uri: '//{{message.meta.domain}}/api/rest_v1/page/html/{message.page_title}/{{message.rev_id}}'
- meta:
uri: '//{{message.meta.domain}}/api/rest_v1/page/summary/{message.page_title}'
- meta:
uri: '//{{message.meta.domain}}/api/rest_v1/page/definition/{message.page_title}'
- meta:
uri: '//{{message.meta.domain}}/api/rest_v1/page/mobile-sections/{message.page_title}'
- meta:
uri: '//{{message.meta.domain}}/api/rest_v1/page/mobile-sections-lead/{message.page_title}'
- meta:
uri: '//{{message.meta.domain}}/api/rest_v1/page/mobile-sections-remaining/{message.page_title}'
- meta:
uri: '//{{message.meta.domain}}/api/rest_v1/page/media/{message.page_title}'
page_restore:
topic: mediawiki.page-undelete
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
match_not:
- meta:
domain: /\.wikidata\.org$/
page_namespace: 0
- meta:
domain: /\.wikidata\.org$/
page_namespace: 120
exec:
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/title/{message.page_title}'
headers:
cache-control: no-cache
query:
redirect: false
# The links to the deleted page should become red again
- method: post
uri: '/sys/links/backlinks/{message.page_title}'
body: '{{globals.message}}'
page_move:
topic: mediawiki.page-move
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
match_not:
- meta:
domain: /\.wikidata\.org$/
page_namespace: 0
- meta:
domain: /\.wikidata\.org$/
page_namespace: 120
exec:
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/html/{message.page_title}/{{message.rev_id}}'
headers:
cache-control: no-cache
if-unmodified-since: '{{date(message.meta.dt)}}'
query:
redirect: false
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/title/{message.prior_state.page_title}'
headers:
cache-control: no-cache
query:
redirect: false
on_transclusion_update:
topic: change-prop.transcludes.resource-change
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
limiters:
blacklist: 'html:{message.meta.uri}'
cases:
- match:
meta:
schema_uri: 'resource_change/1'
uri: '/https?:\/\/[^\/]+\/wiki\/(?<title>.+)/'
tags: [ 'transcludes' ]
exec:
method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/html/{{match.meta.uri.title}}'
headers:
cache-control: no-cache
if-unmodified-since: '{{date(message.root_event.dt)}}'
x-restbase-mode: '{{message.tags[1]}}'
query:
redirect: false
- match:
meta:
schema_uri: 'continue/1'
exec:
method: post
uri: '/sys/links/transcludes/{message.original_event.page_title}'
body: '{{globals.message}}'
page_create:
topic: mediawiki.page-create
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
retry_on:
status:
- '5xx'
- 404 # Sometimes occasional 404s happen because of the mysql replication lag, so retry
match_not:
- meta:
domain: /\.wikidata\.org$/
page_namespace: 0
- meta:
domain: /\.wikidata\.org$/
page_namespace: 120
exec:
- method: post
uri: '/sys/links/backlinks/{message.page_title}'
body: '{{globals.message}}'
on_backlinks_update:
topic: change-prop.backlinks.resource-change
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
limiters:
blacklist: 'html:{message.meta.uri}'
cases:
- match:
meta:
schema_uri: 'resource_change/1'
uri: '/https?:\/\/[^\/]+\/wiki\/(?<title>.+)/'
tags: [ 'backlinks' ]
exec:
method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/html/{{match.meta.uri.title}}'
headers:
cache-control: no-cache
if-unmodified-since: '{{date(message.root_event.dt)}}'
x-restbase-mode: '{{message.tags[1]}}'
query:
redirect: false
- match:
meta:
schema_uri: 'continue/1'
exec:
method: post
uri: '/sys/links/backlinks/{message.original_event.page_title}'
body: '{{globals.message}}'
wikidata_description_on_edit:
topic: mediawiki.revision-create
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
match:
meta:
domain: www.wikidata.org
page_namespace: 0
# It's impossible to modify a comment in wikidata while editing the entity.
# TODO: This is a temp solution until we get a more general fragment support T148079
comment: '/wbeditentity|wbsetdescription|undo/'
rev_content_changed: true
exec:
method: post
uri: '/sys/links/wikidata_descriptions'
body: '{{globals.message}}'
wikidata_description_on_undelete:
topic: mediawiki.page-undelete
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
match:
meta:
domain: www.wikidata.org
page_namespace: 0
exec:
method: post
uri: '/sys/links/wikidata_descriptions'
body: '{{globals.message}}'
on_wikidata_description_change:
topic: change-prop.wikidata.resource-change
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
match:
meta:
uri: '/https:\/\/[^\/]+\/wiki\/(?<title>.+)/'
tags: [ 'wikidata' ]
exec:
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/summary/{{match.meta.uri.title}}'
headers:
cache-control: no-cache
query:
redirect: false
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/mobile-sections/{{match.meta.uri.title}}'
headers:
cache-control: no-cache
query:
redirect: false
page_images:
topic: mediawiki.page-properties-change
sample:
rate: 0.2
hash_template: '{{message.meta.domain}}-{{message.page_title}}'
# We don't support 'OR' in the match section, so workaround it by 2 cases with identical exec
cases:
- match:
added_properties:
page_image: '/.+/' # Regex that matches anything just to check the prop is set
match_not:
- meta:
domain: /\.wikidata\.org$/
page_namespace: 0
- meta:
domain: /\.wikidata\.org$/
page_namespace: 120
exec:
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/summary/{message.page_title}'
headers:
cache-control: no-cache
query:
redirect: false
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/mobile-sections/{message.page_title}'
headers:
cache-control: no-cache
query:
redirect: false
- match:
removed_properties:
page_image: '/.+/' # Regex that matches anything just to check the prop is set
match_not:
meta:
domain: /\.wikidata\.org$/
exec:
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/summary/{message.page_title}'
headers:
cache-control: no-cache
query:
redirect: false
- method: get
uri: 'https://{{message.meta.domain}}/api/rest_v1/page/mobile-sections/{message.page_title}'
headers:
cache-control: no-cache
query:
redirect: false
num_workers: 1
logging:
name: changeprop-dev
level: info
services:
- name: changeprop-dev
module: hyperswitch
conf:
port: 7272
user_agent: DevChangePropInstance
spec: *spec

Event Timeline

Eevans changed the title of this paste from untitled to Masterwork From Distant Lands.
Eevans updated the paste's language from autodetect to autodetect.
Eevans updated the paste's language from autodetect to yaml.Mar 21 2018, 2:16 PM