We have third party "filtering" going on on refine code but third party data is getting refined:
nuria@an-coord1001:~$ more /etc/refinery/refine/refine_eventlogging_analytics.properties database = event hive_server_url = an-coord1001.eqiad.wmnet:10000 input_path = /wmf/data/raw/eventlogging input_path_regex = eventlogging_(.+)/hourly/(\d+)/(\d+)/(\d+)/(\d+) input_path_regex_capture_groups = table,year,month,day,hour output_path = /wmf/data/event schema_base_uri = eventlogging should_email_report = true since = 26 table_blacklist_regex = ^Edit|ChangesListHighlights|InputDeviceDynamics|PageIssues$ to_emails = analytics-alerts@wikimedia.org transform_functions = org.wikimedia.analytics.refinery.job.refine.deduplicate_eventlogging,org.wikimedia.analytics.refinery.job.refine.geocode_ip,org.wikimedia.analytics.refine ry.job.refine.eventlogging_filter_is_allowed_hostname until = 2
select webhost, count(*) from virtualpageview where year=2019 and month=07 and day=19 and webhost not like "%wiki%" group by webhost; 0s.oj2q.o5uww2lqmvsgsyjon5zgo.cmle.ru 9 dakaita.com 9 zhwp.iotac.xyz 2 1937-engara.tryitforfree.at-wt.com 1 wb.v2dd.com 14 en.w.meaqua.org 1 z5h64q92x9.net 211 speechpanel.readspeaker.com 1 web.archive.org 7 w.upupming.site 337 0s.mvxa.o5uww2lqmvsgsyjon5zgo.cmle.ru 25 0s.pjua.o5uww2lqmvsgsyjon5zgo.dresk.ru 3 zh.100ke.info 2