Paste P7948

Shell script to launch Hive/Beeline query and send an email with the result
ActivePublic

Authored by Tbayer on Wed, Jan 2, 12:41 PM.
Tags
None
Subscribers
None
Tokens
"Barnstar" token, awarded by MusikAnimal.
#! /bin/bash
DESCRIPTION="top 100+ viewed pages per wiki, all wikis, 2018"
QUERY="
SET mapred.job.queue.name=nice;
WITH counted AS ( -- adapted from https://phabricator.wikimedia.org/T183903#4824489 :
SELECT project,
page_title,
SUM(view_count) as views, -- Some mainspace views are wrongly logged without namespace_id (NULL)
SUM(IF(namespace_id = 0,view_count,0)) AS ns0views,
SUM(IF(access_method != 'desktop', view_count, 0))/SUM(view_count) AS mobile_ratio
FROM wmf.pageview_hourly
WHERE year=2018
AND agent_type = 'user'
AND page_title != '-'
GROUP BY project, page_title
HAVING ns0views >= 100 -- Some small projects may have very low traffic pages in the top X
),
ns0ranked AS (
SELECT project,
page_title,
views,
ns0views,
mobile_ratio,
rank() OVER (PARTITION BY project ORDER BY ns0views DESC) as ranking
FROM counted
)
SELECT project,
REGEXP_REPLACE(page_title,'_',' ') AS page,
CONCAT('https://', project, '.org/wiki/', page_title) AS desktopurl,
views,
ROUND(100 * mobile_ratio, 2) AS mobile_percentage
FROM ns0ranked
WHERE ranking <= 150 -- In case ranking by all views differs a bit (cf. above)
ORDER BY project ASC, views DESC LIMIT 1000000
"
THISSCRIPTFILE=`basename "$0"`
RESULTSFILE=~/${THISSCRIPTFILE%.*}_result.txt
{ date ; echo = ; TZ='America/Los_Angeles' date ; echo generated by $THISSCRIPTFILE on $HOSTNAME ; beeline --verbose=true -e "$QUERY" ; } &> $RESULTSFILE
cat $RESULTSFILE | heirloom-mailx -s "$DESCRIPTION (Hive result from $HOSTNAME)" [your email address]