Page MenuHomePhabricator
Paste P4583

T152122 Hive query for impression outage numbers, grouped by multiple factors
ActivePublic

Authored by AndyRussG on Dec 7 2016, 5:35 PM.
use ejegg;
CREATE TABLE IF NOT EXISTS pageratios (
`project` string,
`project_class` string,
`browser_family` string,
`browser_major` string,
`os_family` string,
`country_code` string,
`is_v6` int,
`pageviews` int,
`banners` int,
`ratio` float,
`pageviews_dip` int,
`banners_dip` int,
`ratio_dip` float
);
INSERT INTO pageratios
SELECT
normalized_host.project,
normalized_host.project_class,
user_agent_map['browser_family'],
user_agent_map['browser_major'],
user_agent_map['os_family'],
geocoded_data['country_code'],
CASE WHEN (client_ip LIKE '%:%') THEN 1 ELSE 0 END AS is_v6,
SUM(CASE WHEN (hour = 9 and is_pageview) THEN 1 ELSE 0 END) as pageviews,
SUM(CASE WHEN (hour = 9 and NOT is_pageview ) THEN 1 ELSE 0 END) as banners,
SUM(CASE WHEN (hour = 9 and NOT is_pageview ) THEN 1 ELSE 0 END) / SUM(CASE WHEN (hour = 9 and is_pageview) THEN 1 ELSE 0 END) AS ratio,
sum(case when (hour = 8 and is_pageview) then 1 else 0 end) as pageviews_dip,
sum(case when (hour = 8 and NOT is_pageview ) then 1 else 0 end) as banners_dip,
SUM(CASE WHEN (hour = 8 and NOT is_pageview ) THEN 1 ELSE 0 END) / SUM(CASE WHEN (hour = 8 and is_pageview) THEN 1 ELSE 0 END) as ratio_dip
FROM wmf.webrequest
WHERE
YEAR = 2016
AND month = 12
AND day = 2
AND hour IN(8, 9)
AND agent_type = 'user'
AND (
( ts > '2016-12-02 08:10' AND ts < '2016-12-02 08:55' ) OR
( ts > '2016-12-02 09:10' AND ts < '2016-12-02 09:55' )
)
AND (
is_pageview OR uri_path = '/beacon/impression'
)
GROUP BY
normalized_host.project,
normalized_host.project_class,
user_agent_map['browser_family'],
user_agent_map['browser_major'],
user_agent_map['os_family'],
geocoded_data['country_code'],
CASE WHEN (client_ip LIKE '%:%') THEN 1 ELSE 0 END
HAVING banners > 100 AND pageviews > 100 AND pageviews_dip > 100;