spec: &spec
x-sub-request-filters:
- type: default
name: http
options:
allow:
- pattern: /^https?:\/\//
forward_headers:
user-agent: true
title: The Change Propagation root
paths:
/{api:sys}/queue:
x-modules:
- path: src/sys/kafka.js
options:
metadata_broker_list: kafka2001.codfw.wmnet:9092,kafka2002.codfw.wmnet:9092,kafka2003.codfw.wmnet:9092
dc_name: codfw
consumer:
# JobQueue jobs might sent messages larget then 1 Meg,
# so we increase the max message size in kafka and have to
# ajust the consumers accordingly.
fetch.message.max.bytes: 4194304
concurrency: 50
startup_delay: 60000
test_mode: true
templates:
summary_definition_rerender: &summary_definition_rerender_spec
topic: '/^(?:change-prop\.transcludes\.)?resource[-_]change$/'
limiters:
blacklist: 'summary:{message.meta.uri}'
cases:
- match:
meta:
uri: '/^https?:\/\/[^\/]+\/api\/rest_v1\/page\/html\/([^\/]+)$/'
tags:
- restbase
match_not:
- meta:
domain: '/wiktionary\.org$/'
- meta:
domain: /\.wikidata\.org$/
exec:
method: get
# Don't encode title since it should be already encoded
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/summary/{{match.meta.uri[1]}}'
query:
redirect: false
headers:
cache-control: no-cache
- match:
meta:
# These URIs are coming from RESTBase, so we know that article titles will be normalized
# and main namespace articles will not have : (uri-encoded, so %3a or %3A)
uri: '/^https?:\/\/[^\/]+\/api\/rest_v1\/page\/html\/((?:(?!%3a|%3A|\/).)+)$/'
domain: '/^en\.wiktionary\.org$/'
tags:
- restbase
exec:
method: get
# Don't encode title since it should be already encoded
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/definition/{{match.meta.uri[1]}}'
query:
redirect: false
headers:
cache-control: no-cache
mobile_rerender: &mobile_rerender_spec
topic: '/^(?:change-prop\.transcludes\.)?resource[-_]change$/'
limiters:
blacklist: 'mobile:{message.meta.uri}'
match:
meta:
uri: '/^https?:\/\/[^\/]+\/api\/rest_v1\/page\/html\/([^/]+)$/'
domain: '/\.wikipedia\.org$/'
tags:
- restbase
exec:
- method: get
# Don't encode title since it should be already encoded
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/mobile-sections/{{match.meta.uri[1]}}'
query:
redirect: false
headers:
cache-control: no-cache
# Until we start storing and actively rerendering PCS endpoints we still need to purge it from Varnish
- method: post
uri: '/sys/purge/'
body:
- meta:
uri: '//{{message.meta.domain}}/api/rest_v1/page/media/{{match.meta.uri[1]}}'
# RESTBase update jobs
mw_purge:
topic: resource_change
limiters:
blacklist: 'html:{message.meta.uri}'
match:
meta:
uri: '/^https?:\/\/[^\/]+\/wiki\/(?
.+)$/'
tags:
- purge
match_not:
meta:
domain: /\.wikidata\.org$/
exec:
- method: get
# This even comes directly from MediaWiki, so title is encoded in MW-specific way.
# Re-encode the title in standard `encodeURIComponent` encoding.
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/html/{decode(match.meta.uri.title)}'
headers:
cache-control: no-cache
if-unmodified-since: '{{date(message.meta.dt)}}'
query:
redirect: false
# The HTML might not change but sometimes editors use a purge to drop incorrectly rendered summary/MCS
# content, so let's purge them as well just in case. The rate is low.
- method: get
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/summary/{decode(match.meta.uri.title)}'
headers:
cache-control: no-cache
query:
redirect: false
- method: get
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/mobile-sections/{decode(match.meta.uri.title)}'
headers:
cache-control: no-cache
query:
redirect: false
f null_edit:
topic: resource_change
ignore:
status:
- 403 # Ignoring 403 since some of the pages with high number of null_edit events are blacklisted
- 412
limiters:
blacklist: 'html:{message.meta.uri}'
match:
meta:
uri: '/^https?:\/\/[^\/]+\/wiki\/(?.+)$/'
tags:
- null_edit
match_not:
meta:
domain: /\.wikidata\.org$/
exec:
- method: get
# This even comes directly from MediaWiki, so title is encoded in MW-specific way.
# Re-encode the title in standard `encodeURIComponent` encoding.
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/html/{decode(match.meta.uri.title)}'
headers:
cache-control: no-cache
if-unmodified-since: '{{date(message.meta.dt)}}'
query:
redirect: false
# The HTML might not change but sometimes editors use a purge to drop incorrectly rendered summary/MCS
# content, so let's purge them as well just in case. The rate is low.
- method: get
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/summary/{decode(match.meta.uri.title)}'
headers:
cache-control: no-cache
query:
redirect: false
- method: get
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/mobile-sections/{decode(match.meta.uri.title)}'
headers:
cache-control: no-cache
query:
redirect: false
page_edit:
topic: mediawiki.revision-create
limiters:
blacklist: 'html:{message.meta.uri}'
retry_on:
status:
- '5xx'
- 404 # Sometimes occasional 404s happen because of the mysql replication lag, so retry
match:
rev_content_changed: true
match_not:
- meta:
domain: /\.wikidata\.org$/
page_namespace: 0
- meta:
domain: /\.wikidata\.org$/
page_namespace: 120
# Block Scribunto Module dependencies as they create a lot of load
# with supposingly very low actual effect
- meta:
domain: '/wiktionary\.org$/'
page_namespace: 828
exec:
- method: get
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/html/{message.page_title}/{{message.rev_id}}'
headers:
cache-control: no-cache
x-restbase-parentrevision: '{{message.rev_parent_id}}'
if-unmodified-since: '{{date(message.meta.dt)}}'
query:
redirect: false
revision_visibility_change:
topic: mediawiki.revision-visibility-change
ignore:
status:
- 403 # When the revision is hidden 403 will be returned by RESTBase, it's a valid situation
- 412
match_not:
- meta:
domain: /\.wikidata\.org$/
page_namespace: 0
- meta:
domain: /\.wikidata\.org$/
page_namespace: 120
exec:
method: get
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/title/{message.page_title}/{{message.rev_id}}'
headers:
cache-control: no-cache
query:
redirect: false
page_restore:
topic: mediawiki.page-undelete
match_not:
- meta:
domain: /\.wikidata\.org$/
page_namespace: 0
- meta:
domain: /\.wikidata\.org$/
page_namespace: 120
exec:
- method: get
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/title/{message.page_title}'
headers:
cache-control: no-cache
query:
redirect: false
# The links to the restored page should not be red any more
- method: post
uri: '/sys/links/backlinks/{message.page_title}'
body: '{{globals.message}}'
page_move:
topic: mediawiki.page-move
match_not:
- meta:
domain: /\.wikidata\.org$/
page_namespace: 0
- meta:
domain: /\.wikidata\.org$/
page_namespace: 120
exec:
- method: get
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/html/{message.page_title}/{{message.rev_id}}'
headers:
cache-control: no-cache
if-unmodified-since: '{{date(message.meta.dt)}}'
query:
redirect: false
- method: get
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/title/{message.prior_state.page_title}'
headers:
cache-control: no-cache
query:
redirect: false
on_transclusion_update:
concurrency: 200
topic: change-prop.transcludes.resource-change
limiters:
blacklist: 'html:{message.meta.uri}'
cases:
- match:
meta:
schema_uri: 'resource_change/1'
uri: '/https?:\/\/[^\/]+\/wiki\/(?.+)/'
tags: [ 'transcludes' ]
exec:
method: get
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/html/{{match.meta.uri.title}}'
headers:
cache-control: no-cache
if-unmodified-since: '{{date(message.root_event.dt)}}'
x-restbase-mode: '{{message.tags[1]}}'
query:
redirect: false
match_not:
meta:
domain: ceb.wikipedia.org
- match:
meta:
schema_uri: 'continue/1'
match_not:
- meta:
domain: ceb.wikipedia.org
- original_event:
# Block Scribunto Module dependencies as they create a lot of load
# with supposingly very low actual effect
meta:
domain: '/wiktionary\.org$/'
page_namespace: 828
exec:
method: post
uri: '/sys/links/transcludes/{message.original_event.page_title}'
body: '{{globals.message}}'
page_create:
topic: mediawiki.page-create
retry_on:
status:
- '5xx'
- 404 # Sometimes occasional 404s happen because of the mysql replication lag, so retry
match_not:
- meta:
domain: /\.wikidata\.org$/
page_namespace: 0
- meta:
domain: /\.wikidata\.org$/
page_namespace: 120
exec:
- method: post
uri: '/sys/links/backlinks/{message.page_title}'
body: '{{globals.message}}'
on_backlinks_update:
topic: change-prop.backlinks.resource-change
limiters:
blacklist: 'html:{message.meta.uri}'
cases:
- match:
meta:
schema_uri: 'resource_change/1'
uri: '/https?:\/\/[^\/]+\/wiki\/(?.+)/'
tags: [ 'backlinks' ]
exec:
method: get
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/html/{{match.meta.uri.title}}'
headers:
cache-control: no-cache
if-unmodified-since: '{{date(message.root_event.dt)}}'
x-restbase-mode: '{{message.tags[1]}}'
query:
redirect: false
- match:
meta:
schema_uri: 'continue/1'
exec:
method: post
uri: '/sys/links/backlinks/{message.original_event.page_title}'
body: '{{globals.message}}'
on_wikidata_description_change:
topic: change-prop.wikidata.resource-change
cases:
- match:
meta:
uri: '/https?:\/\/[^\/]+\/wiki\/(?.+)/'
domain: '/\.wikipedia\.org$/'
tags: [ 'wikidata' ]
exec:
- method: get
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/summary/{{match.meta.uri.title}}'
headers:
cache-control: no-cache
query:
redirect: false
- method: get
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/mobile-sections/{{match.meta.uri.title}}'
headers:
cache-control: no-cache
query:
redirect: false
- match:
meta:
uri: '/https?:\/\/[^\/]+\/wiki\/(?.+)/'
tags: [ 'wikidata' ]
exec:
- method: get
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/summary/{{match.meta.uri.title}}'
headers:
cache-control: no-cache
query:
redirect: false
# Rerender summary when pageimages page property change
page_images_summary:
topic: mediawiki.page-properties-change
# We don't support 'OR' in the match section, so workaround it by 2 cases with identical exec
cases:
- match:
added_properties:
page_image: '/.+/' # Regex that matches anything just to check the prop is set
match_not:
- meta:
domain: /\.wikidata\.org$/
page_namespace: 0
- meta:
domain: /\.wikidata\.org$/
page_namespace: 120
exec:
method: get
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/summary/{message.page_title}'
headers:
cache-control: no-cache
query:
redirect: false
- match:
removed_properties:
page_image: '/.+/' # Regex that matches anything just to check the prop is set
match_not:
- meta:
domain: /\.wikidata\.org$/
page_namespace: 0
- meta:
domain: /\.wikidata\.org$/
page_namespace: 120
exec:
method: get
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/summary/{message.page_title}'
headers:
cache-control: no-cache
query:
redirect: false
page_images_mobile:
topic: mediawiki.page-properties-change
# We don't support 'OR' in the match section, so workaround it by 2 cases with identical exec
cases:
- match:
meta:
domain: '/\.wikipedia\.org$/'
added_properties:
page_image: '/.+/' # Regex that matches anything just to check the prop is set
exec:
method: get
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/mobile-sections/{message.page_title}'
headers:
cache-control: no-cache
query:
redirect: false
- match:
meta:
domain: '/\.wikipedia\.org$/'
removed_properties:
page_image: '/.+/' # Regex that matches anything just to check the prop is set
exec:
method: get
uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/mobile-sections/{message.page_title}'
headers:
cache-control: no-cache
query:
redirect: false
# Number of worker processes to spawn.
# Set to 0 to run everything in a single process without clustering.
# Use 'ncpu' to run as many workers as there are CPU units
num_workers: ncpu
# Log error messages and gracefully restart a worker if v8 reports that it
# uses more heap (note: not RSS) than this many mb.
worker_heap_limit_mb: 750
# The maximum interval in ms that can pass between two beat messages
# sent by each worker to the master before it is killed
worker_heartbeat_timeout: 30000
# Logger info
logging:
level: warn
name: changeprop-dev
streams:
- host: logstash.svc.eqiad.wmnet
port: 12201
type: gelf
- level: info
path: /srv/log/changeprop/main.log
type: file
sampled_levels:
trace/dedupe: 0.01
# Statsd metrics reporter
metrics:
name: changeprop-dev
host: statsd.eqiad.wmnet
port: 8125
type: statsd
services:
- name: changeprop-dev
# a relative path or the name of an npm package, if different from name
module: hyperswitch
# per-service config
conf:
cors: "*"
port: 7272
# interface: localhost # uncomment to only listen on localhost
# URL of the outbound proxy to use (complete with protocol)
proxy: http://url-downloader.codfw.wikimedia.org:8080
# the list of domains for which not to use the proxy defined above
# no_proxy_list:
# - domain1.com
# - domain2.org
# the list of incoming request headers that can be logged; if left empty,
# the following headers are allowed: cache-control, content-length,
# content-type, if-match, user-agent, x-request-id
# log_header_whitelist:
# - cache-control
# - content-length
# - content-type
# - if-match
# - user-agent
# - x-request-id
user_agent: ChangePropagationDev/WMF
spec: *spec