It looks like this Hive query is failing because of an errant automatic local join optimization. We believe this because uncommenting the first line lets the task run:
--set hive.auto.convert.join = false;
select ar_id, ar_namespace, ar_title, NULL as ar_text, NULL as ar_comment, NULL as ar_comment_id,
case when ar_deleted&4 != 0 then null when ar_actor = 0
then ar_user else COALESCE( actor_user, 0 ) END AS ar_user,
case when ar_deleted&4 != 0 then null when ar_actor = 0
then ar_user_text else actor_name END AS ar_user_text,
if(ar_deleted&4 <> 0,0,ar_actor) as ar_actor, ar_timestamp, ar_minor_edit, NULL as ar_flags, ar_rev_id,
case when ar_deleted&1 != 0 then null when content_id is NULL then ar_text_id
else content_id end as ar_text_id,
ar_deleted, if(ar_deleted&1 <> 0,null,ar_len) as ar_len,
ar_page_id, ar_parent_id, if(ar_deleted&1 <> 0,null,ar_sha1) as ar_sha1,
case when ar_deleted&1 != 0 then null when model_name is NULL then ar_content_model
else model_name end as ar_content_model,
case when ar_deleted&1 != 0 then null when model_name IS NULL then ar_content_format
else NULL end as ar_content_format
from archive
left join
(select slot_revision_id,
content_id,
model_name
from slots
join
slot_roles on slot_role_id = role_id
and role_name = 'main'
join
content on slot_content_id = content_id
join
content_models on content_model = model_id
) slot_with_content_meta on ar_rev_id = slot_revision_id
left join
actor on ar_actor = actor_id
limit 100
;