Change Details

+------------+-------+ | namespace | n | +------------+-------+ | main | 5995 | | category | 42 | | wikipedia | 21 | | template | 14 | | help | 8 | | file | 5 | | talk | 1 | | book | 1 | | portal | 1 | | file_talk | 1 | +------------+-------+ --- !set outputformat table # Notes: # - /api/rest_v1/page/pdf/ is 22 characters, which is where the magic 23 comes from. # - Lowercase the extracted namespace in the namespaces CTE as we can't use column aliases in # certain clauses (see https://issues.apache.org/jira/browse/HIVE-1683). # - The list of accepted namespaces was created by running the following while browsing # https://en.wikipedia.org: # # Object.keys( mw.config.get( 'wgNamespaceIds' ) ) # .map( name => `"${name}"` ) # .join( ', ' ) with namespaces as ( select lower(case when instr(uri_path, "%3A") > 0 then substring(uri_path, 23, instr(uri_path, "%3A") - 23) else "main" end) as namespace from wmf.webrequest where year = 2018 and month = 11 and day = 25 and hour >= 0 and hour < 6 and normalized_host.project_family = "wikipedia" and normalized_host.project = "en" and uri_path like "/api/rest_v1/page/pdf/%" ) select namespace, count(*) as n from namespaces where namespace in ( "media", "special", "main", "talk", "user", "user_talk", "wikipedia", "wikipedia_talk", "file", "file_talk", "mediawiki", "mediawiki_talk", "template", "template_talk", "help", "help_talk", "category", "category_talk", "portal", "portal_talk", "book", "book_talk", "draft", "draft_talk", "timedtext", "timedtext_talk", "module", "module_talk", "gadget", "gadget_talk", "gadget_definition", "gadget_definition_talk", "wp", "wt", "image", "image_talk", "project", "project_talk" ) group by namespace order by n desc limit 1000 ;