Page MenuHomePhabricator
Paste P5464

search syntax usage may 10-20
ActivePublic

Authored by EBernhardson on May 20 2017, 11:17 AM.
Tags
None
Referenced Files
F8131409: search syntax usage may 10-20
May 20 2017, 11:17 AM
Subscribers
add jar hdfs://analytics-hadoop/user/ebernhardson/refinery-hive-0.0.39-SNAPSHOT.jar;
CREATE TEMPORARY FUNCTION get_main_search_request AS 'org.wikimedia.analytics.refinery.hive.GetMainSearchRequestUDF';
with syntax as (
select source, split(get_main_search_request(wikiid, requests).payload['syntax'], ',') as pieces
from cirrussearchrequestset
where year=2017 and month=5 and day > 10
and get_main_search_request(wikiid, requests) is not null
and array_contains(map_keys(get_main_search_request(wikiid, requests).payload), 'syntax')
)
select source, foo, count(1) as count
from syntax
lateral view explode(pieces) p as foo
group by source, foo
order by count desc
limit 100;
api,full_text,214791490
api,full_text_simple_match,184495805
web,full_text,64491539
web,full_text_simple_match,54183753
api,full_text_querystring,27505665
web,full_text_querystring,10283407
api,query_string,7350951
web,query_string,5772853
api,nearcoord,2766423
api,intitle,664505
api,prefix,255808
web,intitle,75325
api,incategory,52298
web,prefix,39818
web,incategory,24705
api,insource,20092
web,insource,2586
api,linksto,1914
web,hastemplate,853
api,hastemplate,710
api,filemime,682
api,filesize,682
web,filetype,194
web,linksto,154
web,prefer-recent,112
api,filetype,95
api,contentmodel,76
web,fileres,58
api,prefer-recent,27
web,nearcoord,27
api,local,16
web,filemime,11
web,filebits,4
web,local,4
web,inlanguage,2
web,filesize,1
api,fileres,1
web,boost-templates,1