spec: &spec x-sub-request-filters: - type: default name: http options: allow: - pattern: /^https?:\/\// forward_headers: user-agent: true title: The Change Propagation root paths: /{api:sys}/queue: x-modules: - path: src/sys/kafka.js options: metadata_broker_list: kafka2001.codfw.wmnet:9092,kafka2002.codfw.wmnet:9092,kafka2003.codfw.wmnet:9092 dc_name: codfw consumer: # JobQueue jobs might sent messages larget then 1 Meg, # so we increase the max message size in kafka and have to # ajust the consumers accordingly. fetch.message.max.bytes: 4194304 concurrency: 50 startup_delay: 60000 test_mode: true templates: summary_definition_rerender: &summary_definition_rerender_spec topic: '/^(?:change-prop\.transcludes\.)?resource[-_]change$/' limiters: blacklist: 'summary:{message.meta.uri}' cases: - match: meta: uri: '/^https?:\/\/[^\/]+\/api\/rest_v1\/page\/html\/([^\/]+)$/' tags: - restbase match_not: - meta: domain: '/wiktionary\.org$/' - meta: domain: /\.wikidata\.org$/ exec: method: get # Don't encode title since it should be already encoded uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/summary/{{match.meta.uri[1]}}' query: redirect: false headers: cache-control: no-cache - match: meta: # These URIs are coming from RESTBase, so we know that article titles will be normalized # and main namespace articles will not have : (uri-encoded, so %3a or %3A) uri: '/^https?:\/\/[^\/]+\/api\/rest_v1\/page\/html\/((?:(?!%3a|%3A|\/).)+)$/' domain: '/^en\.wiktionary\.org$/' tags: - restbase exec: method: get # Don't encode title since it should be already encoded uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/definition/{{match.meta.uri[1]}}' query: redirect: false headers: cache-control: no-cache mobile_rerender: &mobile_rerender_spec topic: '/^(?:change-prop\.transcludes\.)?resource[-_]change$/' limiters: blacklist: 'mobile:{message.meta.uri}' match: meta: uri: '/^https?:\/\/[^\/]+\/api\/rest_v1\/page\/html\/([^/]+)$/' domain: '/\.wikipedia\.org$/' tags: - restbase exec: - method: get # Don't encode title since it should be already encoded uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/mobile-sections/{{match.meta.uri[1]}}' query: redirect: false headers: cache-control: no-cache # Until we start storing and actively rerendering PCS endpoints we still need to purge it from Varnish - method: post uri: '/sys/purge/' body: - meta: uri: '//{{message.meta.domain}}/api/rest_v1/page/media/{{match.meta.uri[1]}}' # RESTBase update jobs mw_purge: topic: resource_change limiters: blacklist: 'html:{message.meta.uri}' match: meta: uri: '/^https?:\/\/[^\/]+\/wiki\/(?.+)$/' tags: - purge match_not: meta: domain: /\.wikidata\.org$/ exec: - method: get # This even comes directly from MediaWiki, so title is encoded in MW-specific way. # Re-encode the title in standard `encodeURIComponent` encoding. uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/html/{decode(match.meta.uri.title)}' headers: cache-control: no-cache if-unmodified-since: '{{date(message.meta.dt)}}' query: redirect: false # The HTML might not change but sometimes editors use a purge to drop incorrectly rendered summary/MCS # content, so let's purge them as well just in case. The rate is low. - method: get uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/summary/{decode(match.meta.uri.title)}' headers: cache-control: no-cache query: redirect: false - method: get uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/mobile-sections/{decode(match.meta.uri.title)}' headers: cache-control: no-cache query: redirect: false f null_edit: topic: resource_change ignore: status: - 403 # Ignoring 403 since some of the pages with high number of null_edit events are blacklisted - 412 limiters: blacklist: 'html:{message.meta.uri}' match: meta: uri: '/^https?:\/\/[^\/]+\/wiki\/(?<title>.+)$/' tags: - null_edit match_not: meta: domain: /\.wikidata\.org$/ exec: - method: get # This even comes directly from MediaWiki, so title is encoded in MW-specific way. # Re-encode the title in standard `encodeURIComponent` encoding. uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/html/{decode(match.meta.uri.title)}' headers: cache-control: no-cache if-unmodified-since: '{{date(message.meta.dt)}}' query: redirect: false # The HTML might not change but sometimes editors use a purge to drop incorrectly rendered summary/MCS # content, so let's purge them as well just in case. The rate is low. - method: get uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/summary/{decode(match.meta.uri.title)}' headers: cache-control: no-cache query: redirect: false - method: get uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/mobile-sections/{decode(match.meta.uri.title)}' headers: cache-control: no-cache query: redirect: false page_edit: topic: mediawiki.revision-create limiters: blacklist: 'html:{message.meta.uri}' retry_on: status: - '5xx' - 404 # Sometimes occasional 404s happen because of the mysql replication lag, so retry match: rev_content_changed: true match_not: - meta: domain: /\.wikidata\.org$/ page_namespace: 0 - meta: domain: /\.wikidata\.org$/ page_namespace: 120 # Block Scribunto Module dependencies as they create a lot of load # with supposingly very low actual effect - meta: domain: '/wiktionary\.org$/' page_namespace: 828 exec: - method: get uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/html/{message.page_title}/{{message.rev_id}}' headers: cache-control: no-cache x-restbase-parentrevision: '{{message.rev_parent_id}}' if-unmodified-since: '{{date(message.meta.dt)}}' query: redirect: false revision_visibility_change: topic: mediawiki.revision-visibility-change ignore: status: - 403 # When the revision is hidden 403 will be returned by RESTBase, it's a valid situation - 412 match_not: - meta: domain: /\.wikidata\.org$/ page_namespace: 0 - meta: domain: /\.wikidata\.org$/ page_namespace: 120 exec: method: get uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/title/{message.page_title}/{{message.rev_id}}' headers: cache-control: no-cache query: redirect: false page_restore: topic: mediawiki.page-undelete match_not: - meta: domain: /\.wikidata\.org$/ page_namespace: 0 - meta: domain: /\.wikidata\.org$/ page_namespace: 120 exec: - method: get uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/title/{message.page_title}' headers: cache-control: no-cache query: redirect: false # The links to the restored page should not be red any more - method: post uri: '/sys/links/backlinks/{message.page_title}' body: '{{globals.message}}' page_move: topic: mediawiki.page-move match_not: - meta: domain: /\.wikidata\.org$/ page_namespace: 0 - meta: domain: /\.wikidata\.org$/ page_namespace: 120 exec: - method: get uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/html/{message.page_title}/{{message.rev_id}}' headers: cache-control: no-cache if-unmodified-since: '{{date(message.meta.dt)}}' query: redirect: false - method: get uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/title/{message.prior_state.page_title}' headers: cache-control: no-cache query: redirect: false on_transclusion_update: concurrency: 200 topic: change-prop.transcludes.resource-change limiters: blacklist: 'html:{message.meta.uri}' cases: - match: meta: schema_uri: 'resource_change/1' uri: '/https?:\/\/[^\/]+\/wiki\/(?<title>.+)/' tags: [ 'transcludes' ] exec: method: get uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/html/{{match.meta.uri.title}}' headers: cache-control: no-cache if-unmodified-since: '{{date(message.root_event.dt)}}' x-restbase-mode: '{{message.tags[1]}}' query: redirect: false match_not: meta: domain: ceb.wikipedia.org - match: meta: schema_uri: 'continue/1' match_not: - meta: domain: ceb.wikipedia.org - original_event: # Block Scribunto Module dependencies as they create a lot of load # with supposingly very low actual effect meta: domain: '/wiktionary\.org$/' page_namespace: 828 exec: method: post uri: '/sys/links/transcludes/{message.original_event.page_title}' body: '{{globals.message}}' page_create: topic: mediawiki.page-create retry_on: status: - '5xx' - 404 # Sometimes occasional 404s happen because of the mysql replication lag, so retry match_not: - meta: domain: /\.wikidata\.org$/ page_namespace: 0 - meta: domain: /\.wikidata\.org$/ page_namespace: 120 exec: - method: post uri: '/sys/links/backlinks/{message.page_title}' body: '{{globals.message}}' on_backlinks_update: topic: change-prop.backlinks.resource-change limiters: blacklist: 'html:{message.meta.uri}' cases: - match: meta: schema_uri: 'resource_change/1' uri: '/https?:\/\/[^\/]+\/wiki\/(?<title>.+)/' tags: [ 'backlinks' ] exec: method: get uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/html/{{match.meta.uri.title}}' headers: cache-control: no-cache if-unmodified-since: '{{date(message.root_event.dt)}}' x-restbase-mode: '{{message.tags[1]}}' query: redirect: false - match: meta: schema_uri: 'continue/1' exec: method: post uri: '/sys/links/backlinks/{message.original_event.page_title}' body: '{{globals.message}}' on_wikidata_description_change: topic: change-prop.wikidata.resource-change cases: - match: meta: uri: '/https?:\/\/[^\/]+\/wiki\/(?<title>.+)/' domain: '/\.wikipedia\.org$/' tags: [ 'wikidata' ] exec: - method: get uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/summary/{{match.meta.uri.title}}' headers: cache-control: no-cache query: redirect: false - method: get uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/mobile-sections/{{match.meta.uri.title}}' headers: cache-control: no-cache query: redirect: false - match: meta: uri: '/https?:\/\/[^\/]+\/wiki\/(?<title>.+)/' tags: [ 'wikidata' ] exec: - method: get uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/summary/{{match.meta.uri.title}}' headers: cache-control: no-cache query: redirect: false # Rerender summary when pageimages page property change page_images_summary: topic: mediawiki.page-properties-change # We don't support 'OR' in the match section, so workaround it by 2 cases with identical exec cases: - match: added_properties: page_image: '/.+/' # Regex that matches anything just to check the prop is set match_not: - meta: domain: /\.wikidata\.org$/ page_namespace: 0 - meta: domain: /\.wikidata\.org$/ page_namespace: 120 exec: method: get uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/summary/{message.page_title}' headers: cache-control: no-cache query: redirect: false - match: removed_properties: page_image: '/.+/' # Regex that matches anything just to check the prop is set match_not: - meta: domain: /\.wikidata\.org$/ page_namespace: 0 - meta: domain: /\.wikidata\.org$/ page_namespace: 120 exec: method: get uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/summary/{message.page_title}' headers: cache-control: no-cache query: redirect: false page_images_mobile: topic: mediawiki.page-properties-change # We don't support 'OR' in the match section, so workaround it by 2 cases with identical exec cases: - match: meta: domain: '/\.wikipedia\.org$/' added_properties: page_image: '/.+/' # Regex that matches anything just to check the prop is set exec: method: get uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/mobile-sections/{message.page_title}' headers: cache-control: no-cache query: redirect: false - match: meta: domain: '/\.wikipedia\.org$/' removed_properties: page_image: '/.+/' # Regex that matches anything just to check the prop is set exec: method: get uri: 'http://restbase-async.discovery.wmnet:7231/{{message.meta.domain}}/v1/page/mobile-sections/{message.page_title}' headers: cache-control: no-cache query: redirect: false # Number of worker processes to spawn. # Set to 0 to run everything in a single process without clustering. # Use 'ncpu' to run as many workers as there are CPU units num_workers: ncpu # Log error messages and gracefully restart a worker if v8 reports that it # uses more heap (note: not RSS) than this many mb. worker_heap_limit_mb: 750 # The maximum interval in ms that can pass between two beat messages # sent by each worker to the master before it is killed worker_heartbeat_timeout: 30000 # Logger info logging: level: warn name: changeprop-dev streams: - host: logstash.svc.eqiad.wmnet port: 12201 type: gelf - level: info path: /srv/log/changeprop/main.log type: file sampled_levels: trace/dedupe: 0.01 # Statsd metrics reporter metrics: name: changeprop-dev host: statsd.eqiad.wmnet port: 8125 type: statsd services: - name: changeprop-dev # a relative path or the name of an npm package, if different from name module: hyperswitch # per-service config conf: cors: "*" port: 7272 # interface: localhost # uncomment to only listen on localhost # URL of the outbound proxy to use (complete with protocol) proxy: http://url-downloader.codfw.wikimedia.org:8080 # the list of domains for which not to use the proxy defined above # no_proxy_list: # - domain1.com # - domain2.org # the list of incoming request headers that can be logged; if left empty, # the following headers are allowed: cache-control, content-length, # content-type, if-match, user-agent, x-request-id # log_header_whitelist: # - cache-control # - content-length # - content-type # - if-match # - user-agent # - x-request-id user_agent: ChangePropagationDev/WMF spec: *spec