Page MenuHomePhabricator
Paste P4095

BM25 A/B Test - Joining Cirrus Results & Event Logs
ActivePublic

Authored by mpopov on Sep 22 2016, 7:04 PM.
ADD JAR hdfs:///wmf/refinery/current/artifacts/refinery-hive.jar;
CREATE TEMPORARY FUNCTION array_sum AS 'org.wikimedia.analytics.refinery.hive.ArraySumUDF';
SELECT
event_logs.page_id AS page_id,
event_logs.event_id AS event_id,
search_results.page_id AS result_pids
FROM (
SELECT
get_json_object(json_string, '$.event.searchToken') AS cirrus_id,
get_json_object(json_string, '$.event.subTest') AS group_id,
get_json_object(json_string, '$.event.pageViewId') AS page_id,
get_json_object(json_string, '$.event.uniqueId') AS event_id
FROM mikhail.TestSearchSatisfaction2
WHERE year = 2016 AND month = 9
AND get_json_object(json_string, '$.event.source') = 'fulltext'
AND get_json_object(json_string, '$.event.action') = 'searchResultPage'
AND INSTR(get_json_object(json_string, '$.event.subTest'), 'bm25') > 0
) AS event_logs
LEFT JOIN (
SELECT
id,
CASE
WHEN backendusertests[0] = 'bm25:bm25_allfield' THEN 'bm25:allfield'
WHEN backendusertests[0] = 'bm25:bm25_inclinks' THEN 'bm25:inclinks'
WHEN backendusertests[0] = 'bm25:bm25_inclinks_pv' THEN 'bm25:inclinks_pv'
WHEN backendusertests[0] = 'bm25:bm25_inclinks_pv_rev' THEN 'bm25:inclinks_pv_rev'
ELSE 'bm25:control'
END AS test_group,
CASE
WHEN array_sum(requests.hitstotal, -1) = 0 THEN NULL
ELSE hits.pageid
END AS page_id
FROM wmf_raw.CirrusSearchRequestSet
WHERE year = 2016 AND month = 9
AND source = 'web' AND wikiid = 'enwiki'
AND ARRAY_CONTAINS(requests.querytype, 'full_text')
AND SIZE(backendusertests) = 1
AND INSTR(backendusertests[0], 'bm25') > 0
AND NOT ARRAY_CONTAINS(requests.hitstotal, -1)
) AS search_results
ON (event_logs.cirrus_id = search_results.id AND event_logs.group_id = search_results.test_group);