When using pyspark3 --master yarn (or Jupyter, or spark3-submit) on stat1008 or stat1010 and running a simple query that accesses hadoop:
import wmfdata as wmf countries = wmf.spark.run(""" SELECT * FROM canonical_data.countries """)
It crashes with
--------------------------------------------------------------------------- AnalysisException Traceback (most recent call last) Cell In[2], line 1 ----> 1 countries = wmf.spark.run(""" 2 SELECT * 3 FROM canonical_data.countries 4 """) File /opt/conda-analytics/lib/python3.10/site-packages/wmfdata/spark.py:282, in run(commands) 279 overall_result = None 281 for cmd in commands: --> 282 cmd_result = session.sql(cmd) 283 # If the result has columns, the command was a query and therefore 284 # results-producing. If not, it was a DDL or DML command and not 285 # results-producing. 286 if len(cmd_result.columns) > 0: File /opt/conda-analytics/lib/python3.10/site-packages/pyspark/sql/session.py:723, in SparkSession.sql(self, sqlQuery) 707 def sql(self, sqlQuery): 708 """Returns a :class:`DataFrame` representing the result of the given query. 709 710 .. versionadded:: 2.0.0 (...) 721 [Row(f1=1, f2='row1'), Row(f1=2, f2='row2'), Row(f1=3, f2='row3')] 722 """ --> 723 return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped) File /opt/conda-analytics/lib/python3.10/site-packages/pyspark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py:1304, in JavaMember.__call__(self, *args) 1298 command = proto.CALL_COMMAND_NAME +\ 1299 self.command_header +\ 1300 args_command +\ 1301 proto.END_COMMAND_PART 1303 answer = self.gateway_client.send_command(command) -> 1304 return_value = get_return_value( 1305 answer, self.gateway_client, self.target_id, self.name) 1307 for temp_arg in temp_args: 1308 temp_arg._detach() File /opt/conda-analytics/lib/python3.10/site-packages/pyspark/sql/utils.py:117, in capture_sql_exception.<locals>.deco(*a, **kw) 113 converted = convert_exception(e.java_exception) 114 if not isinstance(converted, UnknownException): 115 # Hide where the exception came from that shows a non-Pythonic 116 # JVM exception message. --> 117 raise converted from None 118 else: 119 raise AnalysisException: java.lang.RuntimeException: java.io.IOException: Permission denied
Checking my user groups shows:
tchin@stat1008:~$ groups wikidev render analytics-privatedata-users analytics-admins
The query succeeds on stat1009.
@gmodena has tried this and it works for him on stat1010 but not stat1008
