Page MenuHomePhabricator
Paste P43846

Caffeine patch crashing UDF example
ActivePublic

Authored by Antoine_Quhen on Feb 8 2023, 4:59 PM.
-- spark3-sql \
-- --master yarn \
-- --executor-memory 24G \
-- --executor-cores 4 \
-- --driver-memory 4G \
-- --driver-cores 1 \
-- --conf spark.dynamicAllocation.maxExecutors=64 \
-- --name test-udfs-caffeine
-- This jar contains the Guava->Caffeine patch https://gerrit.wikimedia.org/r/c/analytics/refinery/source/+/883118
-- + the modification over pageview_definition (imported with merge)
-- https://gerrit.wikimedia.org/r/c/analytics/refinery/source/+/886800/
-- It's an attempt to fix the problem. Without any difference in the result...
ADD JAR /home/aqu/analytics_refinery_source/refinery-hive/target/refinery-hive-0.2.11-SNAPSHOT-shaded.jar;
CREATE TEMPORARY FUNCTION get_pageview_info AS 'org.wikimedia.analytics.refinery.hive.GetPageviewInfoUDF';
SET spark.sql.shuffle.partitions = 256;
SELECT
CASE
WHEN is_pageview THEN get_pageview_info(uri_host, uri_path, uri_query)
ELSE NULL
END as pageview_info
FROM wmf.webrequest
WHERE webrequest_source='text' AND year=2023 AND month=2 AND day=8 AND hour=0
limit 100 ;
-- => Leads to an error "quickly"
--
-- Serialization trace:
-- mappingFunction (com.github.benmanes.caffeine.cache.BoundedLocalCache$BoundedLocalLoadingCache)
-- pageviewHostnameCache (org.wikimedia.analytics.refinery.core.PageviewDefinition)
-- pageviewDefinition (org.wikimedia.analytics.refinery.hive.GetPageviewInfoUDF)
-- at com.esotericsoftware.kryo.util.DefaultClassResolver.readName(DefaultClassResolver.java:160)
-- ........
-- at java.lang.Thread.run(Thread.java:750)
-- Caused by: java.lang.ClassNotFoundException: com.github.benmanes.caffeine.cache.LocalLoadingCache$$Lambda$1983/1345962703
--