Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Paste
P10460
PySpark crashed SparkContext error
Active
Public
Actions
Authored by
nshahquinn-wmf
on Feb 19 2020, 7:27 PM.
Edit Paste
Archive Paste
View Raw File
Subscribe
Mute Notifications
Award Token
Flag For Later
Tags
None
Referenced Files
F31623090: raw.txt
Feb 19 2020, 7:27 PM
2020-02-19 19:27:11 (UTC+0)
Subscribers
None
Py4JJavaError
Traceback
(
most
recent
call
last
)
<
ipython
-
input
-
15
-
9
f19ca5594a3
>
in
<
module
>
19
isp_data
[
"isp"
],
20
CONCAT_WS
(
"-"
,
year
,
LPAD
(
month
,
2
,
"0"
),
LPAD
(
day
,
2
,
"0"
))
--->
21
""", spark_config=SPARK_CONFIG)
/srv/home/neilpquinn-wmf/pkg/wmfdata/wmfdata/hive.py in run(cmds, fmt, spark_master, app_name, spark_config)
29 result_DF = cmd_result
30 if fmt == 'pandas':
---> 31 result = result_DF.toPandas()
32 else:
33 result = result_DF.collect()
/usr/lib/spark2/python/pyspark/sql/dataframe.py in toPandas(self)
2141
2142 # Below is toPandas without Arrow optimization.
-> 2143 pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
2144
2145 dtype = {}
/usr/lib/spark2/python/pyspark/sql/dataframe.py in collect(self)
532 """
533
with
SCCallSiteSync
(
self
.
_sc
)
as
css
:
-->
534
sock_info
=
self
.
_jdf
.
collectToPython
()
535
return
list
(
_load_from_socket
(
sock_info
,
BatchedSerializer
(
PickleSerializer
())))
536
/
usr
/
lib
/
spark2
/
python
/
py4j
/
java_gateway
.
py
in
__call__
(
self
,
*
args
)
1255
answer
=
self
.
gateway_client
.
send_command
(
command
)
1256
return_value
=
get_return_value
(
->
1257
answer
,
self
.
gateway_client
,
self
.
target_id
,
self
.
name
)
1258
1259
for
temp_arg
in
temp_args
:
/
usr
/
lib
/
spark2
/
python
/
pyspark
/
sql
/
utils
.
py
in
deco
(
*
a
,
**
kw
)
61
def
deco
(
*
a
,
**
kw
):
62
try
:
--->
63
return
f
(
*
a
,
**
kw
)
64
except
py4j
.
protocol
.
Py4JJavaError
as
e
:
65
s
=
e
.
java_exception
.
toString
()
/
usr
/
lib
/
spark2
/
python
/
py4j
/
protocol
.
py
in
get_return_value
(
answer
,
gateway_client
,
target_id
,
name
)
326
raise
Py4JJavaError
(
327
"An error occurred while calling {0}{1}{2}.
\n
"
.
-->
328
format
(
target_id
,
"."
,
name
),
value
)
329
else
:
330
raise
Py4JError
(
Py4JJavaError
:
An
error
occurred
while
calling
o275
.
collectToPython
.
:
org
.
apache
.
spark
.
SparkException
:
Job
3
cancelled
because
SparkContext
was
shut
down
at
org
.
apache
.
spark
.
scheduler
.
DAGScheduler
$$
anonfun
$
cleanUpAfterSchedulerStop
$
1.
apply
(
DAGScheduler
.
scala
:
932
)
at
org
.
apache
.
spark
.
scheduler
.
DAGScheduler
$$
anonfun
$
cleanUpAfterSchedulerStop
$
1.
apply
(
DAGScheduler
.
scala
:
930
)
at
scala
.
collection
.
mutable
.
HashSet
.
foreach
(
HashSet
.
scala
:
78
)
at
org
.
apache
.
spark
.
scheduler
.
DAGScheduler
.
cleanUpAfterSchedulerStop
(
DAGScheduler
.
scala
:
930
)
at
org
.
apache
.
spark
.
scheduler
.
DAGSchedulerEventProcessLoop
.
onStop
(
DAGScheduler
.
scala
:
2128
)
at
org
.
apache
.
spark
.
util
.
EventLoop
.
stop
(
EventLoop
.
scala
:
84
)
at
org
.
apache
.
spark
.
scheduler
.
DAGScheduler
.
stop
(
DAGScheduler
.
scala
:
2041
)
at
org
.
apache
.
spark
.
SparkContext
$$
anonfun
$
stop
$
6.
apply
$
mcV
$
sp
(
SparkContext
.
scala
:
1949
)
at
org
.
apache
.
spark
.
util
.
Utils
$
.
tryLogNonFatalError
(
Utils
.
scala
:
1340
)
at
org
.
apache
.
spark
.
SparkContext
.
stop
(
SparkContext
.
scala
:
1948
)
at
org
.
apache
.
spark
.
SparkContext
$$
anon
$
3.
run
(
SparkContext
.
scala
:
1903
)
at
org
.
apache
.
spark
.
scheduler
.
DAGScheduler
.
runJob
(
DAGScheduler
.
scala
:
737
)
at
org
.
apache
.
spark
.
SparkContext
.
runJob
(
SparkContext
.
scala
:
2061
)
at
org
.
apache
.
spark
.
SparkContext
.
runJob
(
SparkContext
.
scala
:
2082
)
at
org
.
apache
.
spark
.
SparkContext
.
runJob
(
SparkContext
.
scala
:
2101
)
at
org
.
apache
.
spark
.
SparkContext
.
runJob
(
SparkContext
.
scala
:
2126
)
at
org
.
apache
.
spark
.
rdd
.
RDD
$$
anonfun
$
collect
$
1.
apply
(
RDD
.
scala
:
945
)
at
org
.
apache
.
spark
.
rdd
.
RDDOperationScope
$
.
withScope
(
RDDOperationScope
.
scala
:
151
)
at
org
.
apache
.
spark
.
rdd
.
RDDOperationScope
$
.
withScope
(
RDDOperationScope
.
scala
:
112
)
at
org
.
apache
.
spark
.
rdd
.
RDD
.
withScope
(
RDD
.
scala
:
363
)
at
org
.
apache
.
spark
.
rdd
.
RDD
.
collect
(
RDD
.
scala
:
944
)
at
org
.
apache
.
spark
.
sql
.
execution
.
datasources
.
InMemoryFileIndex
$
.
bulkListLeafFiles
(
InMemoryFileIndex
.
scala
:
237
)
at
org
.
apache
.
spark
.
sql
.
execution
.
datasources
.
InMemoryFileIndex
.
listLeafFiles
(
InMemoryFileIndex
.
scala
:
126
)
at
org
.
apache
.
spark
.
sql
.
execution
.
datasources
.
InMemoryFileIndex
.
refresh0
(
InMemoryFileIndex
.
scala
:
91
)
at
org
.
apache
.
spark
.
sql
.
execution
.
datasources
.
InMemoryFileIndex
.<
init
>
(
InMemoryFileIndex
.
scala
:
67
)
at
org
.
apache
.
spark
.
sql
.
execution
.
datasources
.
PrunedInMemoryFileIndex
.<
init
>
(
CatalogFileIndex
.
scala
:
118
)
at
org
.
apache
.
spark
.
sql
.
execution
.
datasources
.
CatalogFileIndex
.
filterPartitions
(
CatalogFileIndex
.
scala
:
84
)
at
org
.
apache
.
spark
.
sql
.
execution
.
datasources
.
PruneFileSourcePartitions
$$
anonfun
$
apply
$
1.
applyOrElse
(
PruneFileSourcePartitions
.
scala
:
63
)
at
org
.
apache
.
spark
.
sql
.
execution
.
datasources
.
PruneFileSourcePartitions
$$
anonfun
$
apply
$
1.
applyOrElse
(
PruneFileSourcePartitions
.
scala
:
27
)
at
org
.
apache
.
spark
.
sql
.
catalyst
.
trees
.
TreeNode
$$
anonfun
$
2.
apply
(
TreeNode
.
scala
:
259
)
at
org
.
apache
.
spark
.
sql
.
catalyst
.
trees
.
TreeNode
$$
anonfun
$
2.
apply
(
TreeNode
.
scala
:
259
)
at
org
.
apache
.
spark
.
sql
.
catalyst
.
trees
.
CurrentOrigin
$
.
withOrigin
(
TreeNode
.
scala
:
70
)
at
org
.
apache
.
spark
.
sql
.
catalyst
.
trees
.
TreeNode
.
transformDown
(
TreeNode
.
scala
:
258
)
at
org
.
apache
.
spark
.
sql
.
catalyst
.
plans
.
logical
.
LogicalPlan
.
org
$
apache
$
spark
$
sql
$
catalyst
$
plans
$
logical
$
AnalysisHelper
$$
super
$
transformDown
(
LogicalPlan
.
scala
:
29
)
at
org
.
apache
.
spark
.
sql
.
catalyst
.
plans
.
logical
.
AnalysisHelper
$
class
.
transformDown
(
AnalysisHelper
.
scala
:
149
)
at
org
.
apache
.
spark
.
sql
.
catalyst
.
plans
.
logical
.
LogicalPlan
.
transformDown
(
LogicalPlan
.
scala
:
29
)
at
org
.
apache
.
spark
.
sql
.
catalyst
.
plans
.
logical
.
LogicalPlan
.
transformDown
(
LogicalPlan
.
scala
:
29
)
at
org
.
apache
.
spark
.
sql
.
catalyst
.
trees
.
TreeNode
$$
anonfun
$
transformDown
$
1.
apply
(
TreeNode
.
scala
:
264
)
at
org
.
apache
.
spark
.
sql
.
catalyst
.
trees
.
TreeNode
$$
anonfun
$
transformDown
$
1.
apply
(
TreeNode
.
scala
:
264
)
at
org
.
apache
.
spark
.
sql
.
catalyst
.
trees
.
TreeNode
$$
anonfun
$
4.
apply
(
TreeNode
.
scala
:
329
)
at
org
.
apache
.
spark
.
sql
.
catalyst
.
trees
.
TreeNode
.
mapProductIterator
(
TreeNode
.
scala
:
187
)
at
org
.
apache
.
spark
.
sql
.
catalyst
.
trees
.
TreeNode
.
mapChildren
(
TreeNode
.
scala
:
327
)
at
org
.
apache
.
spark
.
sql
.
catalyst
.
trees
.
TreeNode
.
transformDown
(
TreeNode
.
scala
:
264
)
at
org
.
apache
.
spark
.
sql
.
catalyst
.
plans
.
logical
.
LogicalPlan
.
org
$
apache
$
spark
$
sql
$
catalyst
$
plans
$
logical
$
AnalysisHelper
$$
super
$
transformDown
(
LogicalPlan
.
scala
:
29
)
at
org
.
apache
.
spark
.
sql
.
catalyst
.
plans
.
logical
.
AnalysisHelper
$
class
.
transformDown
(
AnalysisHelper
.
scala
:
149
)
at
org
.
apache
.
spark
.
sql
.
catalyst
.
plans
.
logical
.
LogicalPlan
.
transformDown
(
LogicalPlan
.
scala
:
29
)
at
org
.
apache
.
spark
.
sql
.
execution
.
datasources
.
PruneFileSourcePartitions
$
.
apply
(
PruneFileSourcePartitions
.
scala
:
27
)
at
org
.
apache
.
spark
.
sql
.
execution
.
datasources
.
PruneFileSourcePartitions
$
.
apply
(
PruneFileSourcePartitions
.
scala
:
26
)
at
org
.
apache
.
spark
.
sql
.
catalyst
.
rules
.
RuleExecutor
$$
anonfun
$
execute
$
1
$$
anonfun
$
apply
$
1.
apply
(
RuleExecutor
.
scala
:
87
)
at
org
.
apache
.
spark
.
sql
.
catalyst
.
rules
.
RuleExecutor
$$
anonfun
$
execute
$
1
$$
anonfun
$
apply
$
1.
apply
(
RuleExecutor
.
scala
:
84
)
at
scala
.
collection
.
IndexedSeqOptimized
$
class
.
foldl
(
IndexedSeqOptimized
.
scala
:
57
)
at
scala
.
collection
.
IndexedSeqOptimized
$
class
.
foldLeft
(
IndexedSeqOptimized
.
scala
:
66
)
at
scala
.
collection
.
mutable
.
WrappedArray
.
foldLeft
(
WrappedArray
.
scala
:
35
)
at
org
.
apache
.
spark
.
sql
.
catalyst
.
rules
.
RuleExecutor
$$
anonfun
$
execute
$
1.
apply
(
RuleExecutor
.
scala
:
84
)
at
org
.
apache
.
spark
.
sql
.
catalyst
.
rules
.
RuleExecutor
$$
anonfun
$
execute
$
1.
apply
(
RuleExecutor
.
scala
:
76
)
at
scala
.
collection
.
immutable
.
List
.
foreach
(
List
.
scala
:
392
)
at
org
.
apache
.
spark
.
sql
.
catalyst
.
rules
.
RuleExecutor
.
execute
(
RuleExecutor
.
scala
:
76
)
at
org
.
apache
.
spark
.
sql
.
execution
.
QueryExecution
.
optimizedPlan
$
lzycompute
(
QueryExecution
.
scala
:
66
)
at
org
.
apache
.
spark
.
sql
.
execution
.
QueryExecution
.
optimizedPlan
(
QueryExecution
.
scala
:
66
)
at
org
.
apache
.
spark
.
sql
.
execution
.
QueryExecution
.
sparkPlan
$
lzycompute
(
QueryExecution
.
scala
:
72
)
at
org
.
apache
.
spark
.
sql
.
execution
.
QueryExecution
.
sparkPlan
(
QueryExecution
.
scala
:
68
)
at
org
.
apache
.
spark
.
sql
.
execution
.
QueryExecution
.
executedPlan
$
lzycompute
(
QueryExecution
.
scala
:
77
)
at
org
.
apache
.
spark
.
sql
.
execution
.
QueryExecution
.
executedPlan
(
QueryExecution
.
scala
:
77
)
at
org
.
apache
.
spark
.
sql
.
Dataset
.
withAction
(
Dataset
.
scala
:
3365
)
at
org
.
apache
.
spark
.
sql
.
Dataset
.
collectToPython
(
Dataset
.
scala
:
3260
)
at
sun
.
reflect
.
NativeMethodAccessorImpl
.
invoke0
(
Native
Method
)
at
sun
.
reflect
.
NativeMethodAccessorImpl
.
invoke
(
NativeMethodAccessorImpl
.
java
:
62
)
at
sun
.
reflect
.
DelegatingMethodAccessorImpl
.
invoke
(
DelegatingMethodAccessorImpl
.
java
:
43
)
at
java
.
lang
.
reflect
.
Method
.
invoke
(
Method
.
java
:
498
)
at
py4j
.
reflection
.
MethodInvoker
.
invoke
(
MethodInvoker
.
java
:
244
)
at
py4j
.
reflection
.
ReflectionEngine
.
invoke
(
ReflectionEngine
.
java
:
357
)
at
py4j
.
Gateway
.
invoke
(
Gateway
.
java
:
282
)
at
py4j
.
commands
.
AbstractCommand
.
invokeMethod
(
AbstractCommand
.
java
:
132
)
at
py4j
.
commands
.
CallCommand
.
execute
(
CallCommand
.
java
:
79
)
at
py4j
.
GatewayConnection
.
run
(
GatewayConnection
.
java
:
238
)
at
java
.
lang
.
Thread
.
run
(
Thread
.
java
:
748
)
Event Timeline
nshahquinn-wmf
created this paste.
Feb 19 2020, 7:27 PM
2020-02-19 19:27:11 (UTC+0)
Log In to Comment