spec: &spec x-sub-request-filters: - type: default name: http options: allow: - pattern: /^https?:\/\// forward_headers: user-agent: true title: The Change Propagation root paths: /sys/limit: x-modules: - path: sys/rate_limiter.js options: redis: &redis_config host: localhost port: 6379 limiters: blacklist: # First, allow no more then 100 errors per week # The precision parameter controls the step a sliding window moves by - interval: 604800 limit: 100 precision: 86400 # Secondly to avoid bursts in case of outages, don't allow more then 10 # errors per hour - interval: 3600 limit: 10 /sys/dedupe: x-modules: - path: sys/deduplicator.js options: redis: *redis_config /sys/purge: x-modules: - path: sys/purge.js options: host: 127.0.0.1 port: 4321 /sys/links: x-modules: - path: sys/dep_updates.js options: templates: mw_api: uri: 'https://{{message.meta.domain}}/w/api.php' headers: host: '{{message.meta.domain}}' body: formatversion: 2 /sys/queue: x-modules: - path: sys/kafka.js options: metadata_broker_list: 127.0.0.1:9092 dc_name: test_dc startup_delay: 0 consumer: # These options should not be copied to puppet config. # We're using this config for testing, so need to configure # for minimal latency fetch.wait.max.ms: "1" fetch.min.bytes: "1" queue.buffering.max.ms: "1" producer: queue.buffering.max.messages: "10" concurrency: 250 test_mode: true templates: summary_definition_rerender: &summary_definition_rerender_spec topic: '/^(?:change-prop\.transcludes\.)?resource[-_]change$/' sample: rate: 0.2 hash_template: '{{message.meta.domain}}-{{message.page_title}}' retry_limit: 2 retry_delay: 500 retry_on: status: - '5xx' limiters: blacklist: 'summary:{message.meta.uri}' cases: # Non wiktionary domains - rerender summary - match: meta: uri: '/^(?https?):\/\/[^\/]+\/api\/rest_v1\/page\/html\/(?[^/]+)$/' tags: - restbase match_not: - meta: domain: '/wiktionary.org$/' - meta: domain: /\.wikidata\.org$/ exec: method: get # Don't encode title since it should be already encoded uri: '{{match.meta.uri.proto}}://{{message.meta.domain}}/api/rest_v1/page/summary/{{match.meta.uri.title}}' query: redirect: false headers: cache-control: no-cache - match: # Wiktionary domains - rerender definitions meta: # These URIs are coming from RESTBase, so we know that article titles will be normalized # and main namespace articles will not have : (uri-encoded, so %3a or %3A) uri: '/^(?<proto>https?):\/\/[^\/]+\/api\/rest_v1\/page\/html\/(?<title>(?:(?!%3a|%3A|\/).)+)$/' domain: '/^en\.wiktionary\.org$/' tags: - restbase exec: method: get # Don't encode title since it should be already encoded uri: '{{match.meta.uri.proto}}://{{message.meta.domain}}/api/rest_v1/page/definition/{{match.meta.uri.title}}' query: redirect: false headers: cache-control: no-cache mobile_rerender: &mobile_rerender_spec topic: '/^(?:change-prop\.transcludes\.)?resource[-_]change$/' sample: rate: 0.2 hash_template: '{{message.meta.domain}}-{{message.page_title}}' retry_limit: 2 retry_delay: 500 retry_on: status: - '5xx' limiters: blacklist: 'mobile:{message.meta.uri}' match: meta: uri: '/^(?<proto>https?):\/\/[^\/]+\/api\/rest_v1\/page\/html\/(?<title>[^/]+)$/' tags: - restbase match_not: meta: domain: /\.wikidata\.org$/ exec: - method: get uri: '{{match.meta.uri.proto}}://{{message.meta.domain}}/api/rest_v1/page/mobile-sections/{{match.meta.uri.title}}' query: redirect: false headers: cache-control: no-cache # Until we start storing and actively rerendering PCS endpoints we still need to purge it from Varnish - method: post uri: '/sys/purge/' body: - meta: uri: '//{{message.meta.domain}}/api/rest_v1/page/media/{{match.meta.uri.title}}' # RESTBase update jobs mw_purge: topic: resource_change sample: rate: 0.2 hash_template: '{{message.meta.domain}}-{{message.page_title}}' match: meta: uri: '/^(?<proto>https?):\/\/[^\/]+\/wiki\/(?<title>.+)$/' tags: - purge limiters: blacklist: 'html:{message.meta.uri}' exec: - method: get # This even comes directly from MediaWiki, so title is encoded in MW-specific way. # Re-encode the title in standard `encodeURIComponent` encoding. uri: '{{match.meta.uri.proto}}://{{message.meta.domain}}/api/rest_v1/page/html/{decode(match.meta.uri.title)}' headers: cache-control: no-cache if-unmodified-since: '{{date(message.meta.dt)}}' query: redirect: false # The HTML might not change but sometimes editors use a purge to drop incorrectly rendered summary/MCS # content, so let's purge them as well just in case. The rate is low. - method: get uri: 'https://{{message.meta.domain}}/api/rest_v1/page/summary/{decode(match.meta.uri.title)}' headers: cache-control: no-cache query: redirect: false - method: get uri: 'https://{{message.meta.domain}}/api/rest_v1/page/mobile-sections/{decode(match.meta.uri.title)}' headers: cache-control: no-cache query: redirect: false null_edit: topic: resource_change sample: rate: 0.2 hash_template: '{{message.meta.domain}}-{{message.page_title}}' ignore: status: - 403 # Ignoring 403 since some of the pages with high number of null_edit events are blacklisted - 412 limiters: blacklist: 'html:{message.meta.uri}' match: meta: uri: '/^(?<proto>https?):\/\/[^\/]+\/wiki\/(?<title>.+)$/' tags: - null_edit exec: - method: get # This even comes directly from MediaWiki, so title is encoded in MW-specific way. # Re-encode the title in standard `encodeURIComponent` encoding. uri: '{{match.meta.uri.proto}}://{{message.meta.domain}}/api/rest_v1/page/html/{decode(match.meta.uri.title)}' headers: cache-control: no-cache if-unmodified-since: '{{date(message.meta.dt)}}' query: redirect: false # The HTML might not change but sometimes editors use a purge to drop incorrectly rendered summary/MCS # content, so let's purge them as well just in case. The rate is low. - method: get uri: 'https://{{message.meta.domain}}/api/rest_v1/page/summary/{decode(match.meta.uri.title)}' headers: cache-control: no-cache query: redirect: false - method: get uri: 'https://{{message.meta.domain}}/api/rest_v1/page/mobile-sections/{decode(match.meta.uri.title)}' headers: cache-control: no-cache query: redirect: false page_edit: topic: mediawiki.revision-create sample: rate: 0.2 hash_template: '{{message.meta.domain}}-{{message.page_title}}' limiters: blacklist: 'html:{message.meta.uri}' retry_on: status: - '5xx' - 404 # Sometimes occasional 404s happen because of the mysql replication lag, so retry match: rev_content_changed: true match_not: # Test-only. We use undefined rev_parent_id to test backlinks so we # don't want transclusions to interfere with backlinks test - rev_parent_id: undefined # end of test-only config - meta: domain: /\.wikidata\.org$/ page_namespace: 0 - meta: domain: /\.wikidata\.org$/ page_namespace: 120 exec: - method: get uri: 'https://{{message.meta.domain}}/api/rest_v1/page/html/{message.page_title}/{{message.rev_id}}' headers: cache-control: no-cache x-restbase-parentrevision: '{{message.rev_parent_id}}' if-unmodified-since: '{{date(message.meta.dt)}}' query: redirect: false - method: post uri: '/sys/links/transcludes/{message.page_title}' body: '{{globals.message}}' revision_visibility_change: topic: mediawiki.revision-visibility-change sample: rate: 0.2 hash_template: '{{message.meta.domain}}-{{message.page_title}}' ignore: status: - 403 # When the revision is hidden 403 will be returned by RESTBase, it's a valid situation - 412 match_not: - meta: domain: /\.wikidata\.org$/ page_namespace: 0 - meta: domain: /\.wikidata\.org$/ page_namespace: 120 exec: method: get uri: 'https://{{message.meta.domain}}/api/rest_v1/page/title/{message.page_title}/{{message.rev_id}}' headers: cache-control: no-cache query: redirect: false page_delete: topic: mediawiki.page-delete sample: rate: 0.2 hash_template: '{{message.meta.domain}}-{{message.page_title}}' ignore: status: - 404 # 404 is a normal response for page deletion - 412 match_not: - meta: domain: /\.wikidata\.org$/ page_namespace: 0 - meta: domain: /\.wikidata\.org$/ page_namespace: 120 exec: - method: get uri: 'https://{{message.meta.domain}}/api/rest_v1/page/title/{message.page_title}' headers: cache-control: no-cache query: redirect: false # The links to the deleted page should become red again - method: post uri: '/sys/links/backlinks/{message.page_title}' body: '{{globals.message}}' # For page deletion RESTBase doesn't emit resource_change events, and to go through # the normal purge chain (html update -> html resource_change -> summary update -> summary resource_change) # we need to add many workarounds/shortcurst in RESTBase. So having this list here is an OK compromise. - method: post uri: '/sys/purge/' body: - meta: uri: '//{{message.meta.domain}}/api/rest_v1/page/html/{message.page_title}' - meta: uri: '//{{message.meta.domain}}/api/rest_v1/page/html/{message.page_title}/{{message.rev_id}}' - meta: uri: '//{{message.meta.domain}}/api/rest_v1/page/summary/{message.page_title}' - meta: uri: '//{{message.meta.domain}}/api/rest_v1/page/definition/{message.page_title}' - meta: uri: '//{{message.meta.domain}}/api/rest_v1/page/mobile-sections/{message.page_title}' - meta: uri: '//{{message.meta.domain}}/api/rest_v1/page/mobile-sections-lead/{message.page_title}' - meta: uri: '//{{message.meta.domain}}/api/rest_v1/page/mobile-sections-remaining/{message.page_title}' - meta: uri: '//{{message.meta.domain}}/api/rest_v1/page/media/{message.page_title}' page_restore: topic: mediawiki.page-undelete sample: rate: 0.2 hash_template: '{{message.meta.domain}}-{{message.page_title}}' match_not: - meta: domain: /\.wikidata\.org$/ page_namespace: 0 - meta: domain: /\.wikidata\.org$/ page_namespace: 120 exec: - method: get uri: 'https://{{message.meta.domain}}/api/rest_v1/page/title/{message.page_title}' headers: cache-control: no-cache query: redirect: false # The links to the deleted page should become red again - method: post uri: '/sys/links/backlinks/{message.page_title}' body: '{{globals.message}}' page_move: topic: mediawiki.page-move sample: rate: 0.2 hash_template: '{{message.meta.domain}}-{{message.page_title}}' match_not: - meta: domain: /\.wikidata\.org$/ page_namespace: 0 - meta: domain: /\.wikidata\.org$/ page_namespace: 120 exec: - method: get uri: 'https://{{message.meta.domain}}/api/rest_v1/page/html/{message.page_title}/{{message.rev_id}}' headers: cache-control: no-cache if-unmodified-since: '{{date(message.meta.dt)}}' query: redirect: false - method: get uri: 'https://{{message.meta.domain}}/api/rest_v1/page/title/{message.prior_state.page_title}' headers: cache-control: no-cache query: redirect: false on_transclusion_update: topic: change-prop.transcludes.resource-change sample: rate: 0.2 hash_template: '{{message.meta.domain}}-{{message.page_title}}' limiters: blacklist: 'html:{message.meta.uri}' cases: - match: meta: schema_uri: 'resource_change/1' uri: '/https?:\/\/[^\/]+\/wiki\/(?<title>.+)/' tags: [ 'transcludes' ] exec: method: get uri: 'https://{{message.meta.domain}}/api/rest_v1/page/html/{{match.meta.uri.title}}' headers: cache-control: no-cache if-unmodified-since: '{{date(message.root_event.dt)}}' x-restbase-mode: '{{message.tags[1]}}' query: redirect: false - match: meta: schema_uri: 'continue/1' exec: method: post uri: '/sys/links/transcludes/{message.original_event.page_title}' body: '{{globals.message}}' page_create: topic: mediawiki.page-create sample: rate: 0.2 hash_template: '{{message.meta.domain}}-{{message.page_title}}' retry_on: status: - '5xx' - 404 # Sometimes occasional 404s happen because of the mysql replication lag, so retry match_not: - meta: domain: /\.wikidata\.org$/ page_namespace: 0 - meta: domain: /\.wikidata\.org$/ page_namespace: 120 exec: - method: post uri: '/sys/links/backlinks/{message.page_title}' body: '{{globals.message}}' on_backlinks_update: topic: change-prop.backlinks.resource-change sample: rate: 0.2 hash_template: '{{message.meta.domain}}-{{message.page_title}}' limiters: blacklist: 'html:{message.meta.uri}' cases: - match: meta: schema_uri: 'resource_change/1' uri: '/https?:\/\/[^\/]+\/wiki\/(?<title>.+)/' tags: [ 'backlinks' ] exec: method: get uri: 'https://{{message.meta.domain}}/api/rest_v1/page/html/{{match.meta.uri.title}}' headers: cache-control: no-cache if-unmodified-since: '{{date(message.root_event.dt)}}' x-restbase-mode: '{{message.tags[1]}}' query: redirect: false - match: meta: schema_uri: 'continue/1' exec: method: post uri: '/sys/links/backlinks/{message.original_event.page_title}' body: '{{globals.message}}' wikidata_description_on_edit: topic: mediawiki.revision-create sample: rate: 0.2 hash_template: '{{message.meta.domain}}-{{message.page_title}}' match: meta: domain: www.wikidata.org page_namespace: 0 # It's impossible to modify a comment in wikidata while editing the entity. # TODO: This is a temp solution until we get a more general fragment support T148079 comment: '/wbeditentity|wbsetdescription|undo/' rev_content_changed: true exec: method: post uri: '/sys/links/wikidata_descriptions' body: '{{globals.message}}' wikidata_description_on_undelete: topic: mediawiki.page-undelete sample: rate: 0.2 hash_template: '{{message.meta.domain}}-{{message.page_title}}' match: meta: domain: www.wikidata.org page_namespace: 0 exec: method: post uri: '/sys/links/wikidata_descriptions' body: '{{globals.message}}' on_wikidata_description_change: topic: change-prop.wikidata.resource-change sample: rate: 0.2 hash_template: '{{message.meta.domain}}-{{message.page_title}}' match: meta: uri: '/https:\/\/[^\/]+\/wiki\/(?<title>.+)/' tags: [ 'wikidata' ] exec: - method: get uri: 'https://{{message.meta.domain}}/api/rest_v1/page/summary/{{match.meta.uri.title}}' headers: cache-control: no-cache query: redirect: false - method: get uri: 'https://{{message.meta.domain}}/api/rest_v1/page/mobile-sections/{{match.meta.uri.title}}' headers: cache-control: no-cache query: redirect: false page_images: topic: mediawiki.page-properties-change sample: rate: 0.2 hash_template: '{{message.meta.domain}}-{{message.page_title}}' # We don't support 'OR' in the match section, so workaround it by 2 cases with identical exec cases: - match: added_properties: page_image: '/.+/' # Regex that matches anything just to check the prop is set match_not: - meta: domain: /\.wikidata\.org$/ page_namespace: 0 - meta: domain: /\.wikidata\.org$/ page_namespace: 120 exec: - method: get uri: 'https://{{message.meta.domain}}/api/rest_v1/page/summary/{message.page_title}' headers: cache-control: no-cache query: redirect: false - method: get uri: 'https://{{message.meta.domain}}/api/rest_v1/page/mobile-sections/{message.page_title}' headers: cache-control: no-cache query: redirect: false - match: removed_properties: page_image: '/.+/' # Regex that matches anything just to check the prop is set match_not: meta: domain: /\.wikidata\.org$/ exec: - method: get uri: 'https://{{message.meta.domain}}/api/rest_v1/page/summary/{message.page_title}' headers: cache-control: no-cache query: redirect: false - method: get uri: 'https://{{message.meta.domain}}/api/rest_v1/page/mobile-sections/{message.page_title}' headers: cache-control: no-cache query: redirect: false num_workers: 1 logging: name: changeprop-dev level: info services: - name: changeprop-dev module: hyperswitch conf: port: 7272 user_agent: DevChangePropInstance spec: *spec