Page MenuHomePhabricator
Paste P6243

https://gerrit.wikimedia.org/r/#/c/387658/5 mjolnir / py.test timeout?
ActivePublic

Authored by hashar on Nov 1 2017, 10:14 PM.
# That is for https://gerrit.wikimedia.org/r/#/c/387658/5
/src$ /src/.tox/pytest/bin/pytest --pyargs mjolnir
============================= test session starts ==============================
platform linux2 -- Python 2.7.9, pytest-3.2.3, py-1.4.34, pluggy-0.4.0
rootdir: /src, inifile:
collected 44 items
mjolnir/test/test_dbn.py E
mjolnir/test/test_features.py EE
mjolnir/test/test_metrics.py EE
mjolnir/test/test_norm_query.py E........
mjolnir/test/test_sampling.py EE
mjolnir/test/test_spark.py E
mjolnir/test/training/test_hyperopt.py EE
mjolnir/test/training/test_tuning.py EE
mjolnir/test/training/test_xgboost.py EEEE
mjolnir/test/utilities/test_spark.py ...................
==================================== ERRORS ====================================
_______________________ ERROR at setup of test_dbn_train _______________________
request = <SubRequest 'spark_context' for <Function 'test_dbn_train'>>
@pytest.fixture(scope="session")
def spark_context(request):
"""Fixture for creating a spark context.
Args:
request: pytest.FixtureRequest object
Returns:
SparkContext for tests
"""
quiet_log4j()
# Pull appropriate jvm dependencies from archiva. Would be nice
# if we could provide this in SparkConf, but in 2.1.x there isn't
# a way.
os.environ['PYSPARK_SUBMIT_ARGS'] = '--repositories %s pyspark-shell' % (
','.join(['https://archiva.wikimedia.org/repository/%s' % (repo)
for repo in ['releases', 'snapshots', 'mirrored']]))
conf = (
SparkConf()
.setMaster("local[2]")
.setAppName("pytest-pyspark-local-testing")
# Maven coordinates of jvm dependencies
.set('spark.jars.packages', ','.join([
'ml.dmlc:xgboost4j-spark:0.7-wmf-1',
'org.wikimedia.search:mjolnir:0.2',
'org.apache.spark:spark-streaming-kafka-0-8_2.11:2.1.0']))
# By default spark will shuffle to 200 partitions, which is
# way too many for our small test cases. This cuts execution
# time of the tests in half.
.set('spark.sql.shuffle.partitions', 4))
if 'XDG_CACHE_HOME' in os.environ:
conf.set('spark.jars.ivy', os.path.join(os.environ['XDG_CACHE_HOME'], 'ivy2'))
> sc = SparkContext(conf=conf)
mjolnir/test/conftest.py:62:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:115: in __init__
SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:256: in _ensure_initialized
SparkContext._gateway = gateway or launch_gateway(conf)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
conf = <pyspark.conf.SparkConf object at 0x7f79821821d0>
def launch_gateway(conf=None):
"""
launch jvm gateway
:param conf: spark configuration passed to spark-submit
:return:
"""
if "PYSPARK_GATEWAY_PORT" in os.environ:
gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
else:
SPARK_HOME = _find_spark_home()
# Launch the Py4j gateway using Spark's run command so that we pick up the
# proper classpath and settings from spark-env.sh
on_windows = platform.system() == "Windows"
script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit"
command = [os.path.join(SPARK_HOME, script)]
if conf:
for k, v in conf.getAll():
command += ['--conf', '%s=%s' % (k, v)]
submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
if os.environ.get("SPARK_TESTING"):
submit_args = ' '.join([
"--conf spark.ui.enabled=false",
submit_args
])
command = command + shlex.split(submit_args)
# Start a socket that will be used by PythonGatewayServer to communicate its port to us
callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
callback_socket.bind(('127.0.0.1', 0))
callback_socket.listen(1)
callback_host, callback_port = callback_socket.getsockname()
env = dict(os.environ)
env['_PYSPARK_DRIVER_CALLBACK_HOST'] = callback_host
env['_PYSPARK_DRIVER_CALLBACK_PORT'] = str(callback_port)
# Launch the Java gateway.
# We open a pipe to stdin so that the Java gateway can die when the pipe is broken
if not on_windows:
# Don't send ctrl-c / SIGINT to the Java gateway:
def preexec_func():
signal.signal(signal.SIGINT, signal.SIG_IGN)
proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
else:
# preexec_fn not supported on Windows
proc = Popen(command, stdin=PIPE, env=env)
gateway_port = None
# We use select() here in order to avoid blocking indefinitely if the subprocess dies
# before connecting
while gateway_port is None and proc.poll() is None:
timeout = 1 # (seconds)
readable, _, _ = select.select([callback_socket], [], [], timeout)
if callback_socket in readable:
gateway_connection = callback_socket.accept()[0]
# Determine which ephemeral port the server started on:
gateway_port = read_int(gateway_connection.makefile(mode="rb"))
gateway_connection.close()
callback_socket.close()
if gateway_port is None:
> raise Exception("Java gateway process exited before sending the driver its port number")
E Exception: Java gateway process exited before sending the driver its port number
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/java_gateway.py:95: Exception
---------------------------- Captured stderr setup -----------------------------
Ivy Default Cache set to: /cache/ivy2/cache
The jars for the packages stored in: /cache/ivy2/jars
https://archiva.wikimedia.org/repository/releases added as a remote repository with the name: repo-1
https://archiva.wikimedia.org/repository/snapshots added as a remote repository with the name: repo-2
https://archiva.wikimedia.org/repository/mirrored added as a remote repository with the name: repo-3
:: loading settings :: url = jar:file:/opt/spark-2.1.0-bin-hadoop2.6/jars/ivy-2.4.0.jar!/org/apache/ivy/core/settings/ivysettings.xml
ml.dmlc#xgboost4j-spark added as a dependency
org.wikimedia.search#mjolnir added as a dependency
org.apache.spark#spark-streaming-kafka-0-8_2.11 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent;1.0
confs: [default]
found ml.dmlc#xgboost4j-spark;0.7-wmf-1 in repo-1
found ml.dmlc#xgboost4j;0.7-wmf-1 in repo-1
found com.typesafe.akka#akka-actor_2.11;2.3.11 in repo-3
found com.typesafe#config;1.2.1 in repo-3
found com.esotericsoftware.kryo#kryo;2.21 in repo-3
found com.esotericsoftware.reflectasm#reflectasm;1.07 in repo-3
found org.ow2.asm#asm;4.0 in repo-3
found com.esotericsoftware.minlog#minlog;1.2 in repo-3
found org.objenesis#objenesis;1.2 in repo-3
found org.scala-lang#scala-compiler;2.11.8 in repo-3
found org.scala-lang#scala-reflect;2.11.8 in repo-3
found org.scala-lang.modules#scala-xml_2.11;1.0.4 in repo-3
found org.scala-lang.modules#scala-parser-combinators_2.11;1.0.4 in repo-3
found commons-logging#commons-logging;1.2 in repo-3
found org.wikimedia.search#mjolnir;0.2 in repo-1
found org.apache.spark#spark-streaming-kafka-0-8_2.11;2.1.0 in repo-3
found org.apache.kafka#kafka_2.11;0.8.2.1 in repo-3
found com.yammer.metrics#metrics-core;2.2.0 in repo-3
found org.slf4j#slf4j-api;1.7.16 in repo-3
found com.101tec#zkclient;0.3 in repo-3
found log4j#log4j;1.2.17 in repo-3
found org.apache.kafka#kafka-clients;0.8.2.1 in repo-3
found net.jpountz.lz4#lz4;1.3.0 in repo-3
found org.xerial.snappy#snappy-java;1.1.2.6 in repo-3
found org.apache.spark#spark-tags_2.11;2.1.0 in repo-3
found org.scalatest#scalatest_2.11;2.2.6 in repo-3
found org.spark-project.spark#unused;1.0.0 in repo-3
downloading https://archiva.wikimedia.org/repository/releases/ml/dmlc/xgboost4j-spark/0.7-wmf-1/xgboost4j-spark-0.7-wmf-1.jar ...
downloading https://archiva.wikimedia.org/repository/mirrored/org/apache/spark/spark-streaming-kafka-0-8_2.11/2.1.0/spark-streaming-kafka-0-8_2.11-2.1.0.jar ...
downloading https://archiva.wikimedia.org/repository/releases/ml/dmlc/xgboost4j/0.7-wmf-1/xgboost4j-0.7-wmf-1.jar ...
downloading https://archiva.wikimedia.org/repository/mirrored/com/esotericsoftware/kryo/kryo/2.21/kryo-2.21.jar ...
downloading https://archiva.wikimedia.org/repository/mirrored/org/scala-lang/scala-compiler/2.11.8/scala-compiler-2.11.8.jar ...
[SUCCESSFUL ] org.scala-lang#scala-compiler;2.11.8!scala-compiler.jar (113418ms)
downloading https://archiva.wikimedia.org/repository/mirrored/org/scala-lang/scala-reflect/2.11.8/scala-reflect-2.11.8.jar ...
[SUCCESSFUL ] org.scala-lang#scala-reflect;2.11.8!scala-reflect.jar (29154ms)
downloading https://archiva.wikimedia.org/repository/mirrored/commons-logging/commons-logging/1.2/commons-logging-1.2.jar ...
[SUCCESSFUL ] commons-logging#commons-logging;1.2!commons-logging.jar (1342ms)
downloading https://archiva.wikimedia.org/repository/mirrored/com/typesafe/akka/akka-actor_2.11/2.3.11/akka-actor_2.11-2.3.11.jar ...
[SUCCESSFUL ] com.typesafe.akka#akka-actor_2.11;2.3.11!akka-actor_2.11.jar (20202ms)
downloading https://archiva.wikimedia.org/repository/mirrored/com/typesafe/config/1.2.1/config-1.2.1.jar ...
[SUCCESSFUL ] com.typesafe#config;1.2.1!config.jar(bundle) (1845ms)
downloading https://archiva.wikimedia.org/repository/mirrored/com/esotericsoftware/reflectasm/reflectasm/1.07/reflectasm-1.07-shaded.jar ...
[SUCCESSFUL ] com.esotericsoftware.reflectasm#reflectasm;1.07!reflectasm.jar (845ms)
downloading https://archiva.wikimedia.org/repository/mirrored/com/esotericsoftware/minlog/minlog/1.2/minlog-1.2.jar ...
[SUCCESSFUL ] com.esotericsoftware.minlog#minlog;1.2!minlog.jar (254ms)
downloading https://archiva.wikimedia.org/repository/mirrored/org/objenesis/objenesis/1.2/objenesis-1.2.jar ...
[SUCCESSFUL ] org.objenesis#objenesis;1.2!objenesis.jar (440ms)
downloading https://archiva.wikimedia.org/repository/mirrored/org/ow2/asm/asm/4.0/asm-4.0.jar ...
[SUCCESSFUL ] org.ow2.asm#asm;4.0!asm.jar (692ms)
downloading https://archiva.wikimedia.org/repository/mirrored/org/scala-lang/modules/scala-xml_2.11/1.0.4/scala-xml_2.11-1.0.4.jar ...
[SUCCESSFUL ] org.scala-lang.modules#scala-xml_2.11;1.0.4!scala-xml_2.11.jar(bundle) (4809ms)
downloading https://archiva.wikimedia.org/repository/mirrored/org/scala-lang/modules/scala-parser-combinators_2.11/1.0.4/scala-parser-combinators_2.11-1.0.4.jar ...
[SUCCESSFUL ] org.scala-lang.modules#scala-parser-combinators_2.11;1.0.4!scala-parser-combinators_2.11.jar(bundle) (4319ms)
downloading https://archiva.wikimedia.org/repository/mirrored/org/apache/kafka/kafka_2.11/0.8.2.1/kafka_2.11-0.8.2.1.jar ...
[SUCCESSFUL ] org.apache.kafka#kafka_2.11;0.8.2.1!kafka_2.11.jar (30520ms)
downloading https://archiva.wikimedia.org/repository/mirrored/org/apache/spark/spark-tags_2.11/2.1.0/spark-tags_2.11-2.1.0.jar ...
[SUCCESSFUL ] org.apache.spark#spark-tags_2.11;2.1.0!spark-tags_2.11.jar (592ms)
downloading https://archiva.wikimedia.org/repository/mirrored/org/spark-project/spark/unused/1.0.0/unused-1.0.0.jar ...
[SUCCESSFUL ] org.spark-project.spark#unused;1.0.0!unused.jar (226ms)
downloading https://archiva.wikimedia.org/repository/mirrored/com/yammer/metrics/metrics-core/2.2.0/metrics-core-2.2.0.jar ...
[SUCCESSFUL ] com.yammer.metrics#metrics-core;2.2.0!metrics-core.jar (1106ms)
downloading https://archiva.wikimedia.org/repository/mirrored/com/101tec/zkclient/0.3/zkclient-0.3.jar ...
[SUCCESSFUL ] com.101tec#zkclient;0.3!zkclient.jar (1005ms)
downloading https://archiva.wikimedia.org/repository/mirrored/org/apache/kafka/kafka-clients/0.8.2.1/kafka-clients-0.8.2.1.jar ...
downloading https://archiva.wikimedia.org/repository/mirrored/log4j/log4j/1.2.17/log4j-1.2.17.jar ...
downloading https://archiva.wikimedia.org/repository/mirrored/net/jpountz/lz4/lz4/1.3.0/lz4-1.3.0.jar ...
downloading https://archiva.wikimedia.org/repository/mirrored/org/xerial/snappy/snappy-java/1.1.2.6/snappy-java-1.1.2.6.jar ...
[SUCCESSFUL ] org.xerial.snappy#snappy-java;1.1.2.6!snappy-java.jar(bundle) (7525ms)
downloading https://archiva.wikimedia.org/repository/mirrored/org/scalatest/scalatest_2.11/2.2.6/scalatest_2.11-2.2.6.jar ...
[SUCCESSFUL ] org.scalatest#scalatest_2.11;2.2.6!scalatest_2.11.jar(bundle) (52928ms)
:: resolution report :: resolve 67660ms :: artifacts dl 299119ms
:: modules in use:
com.101tec#zkclient;0.3 from repo-3 in [default]
com.esotericsoftware.kryo#kryo;2.21 from repo-3 in [default]
com.esotericsoftware.minlog#minlog;1.2 from repo-3 in [default]
com.esotericsoftware.reflectasm#reflectasm;1.07 from repo-3 in [default]
com.typesafe#config;1.2.1 from repo-3 in [default]
com.typesafe.akka#akka-actor_2.11;2.3.11 from repo-3 in [default]
com.yammer.metrics#metrics-core;2.2.0 from repo-3 in [default]
commons-logging#commons-logging;1.2 from repo-3 in [default]
log4j#log4j;1.2.17 from repo-3 in [default]
ml.dmlc#xgboost4j;0.7-wmf-1 from repo-1 in [default]
ml.dmlc#xgboost4j-spark;0.7-wmf-1 from repo-1 in [default]
net.jpountz.lz4#lz4;1.3.0 from repo-3 in [default]
org.apache.kafka#kafka-clients;0.8.2.1 from repo-3 in [default]
org.apache.kafka#kafka_2.11;0.8.2.1 from repo-3 in [default]
org.apache.spark#spark-streaming-kafka-0-8_2.11;2.1.0 from repo-3 in [default]
org.apache.spark#spark-tags_2.11;2.1.0 from repo-3 in [default]
org.objenesis#objenesis;1.2 from repo-3 in [default]
org.ow2.asm#asm;4.0 from repo-3 in [default]
org.scala-lang#scala-compiler;2.11.8 from repo-3 in [default]
org.scala-lang#scala-reflect;2.11.8 from repo-3 in [default]
org.scala-lang.modules#scala-parser-combinators_2.11;1.0.4 from repo-3 in [default]
org.scala-lang.modules#scala-xml_2.11;1.0.4 from repo-3 in [default]
org.scalatest#scalatest_2.11;2.2.6 from repo-3 in [default]
org.slf4j#slf4j-api;1.7.16 from repo-3 in [default]
org.spark-project.spark#unused;1.0.0 from repo-3 in [default]
org.wikimedia.search#mjolnir;0.2 from repo-1 in [default]
org.xerial.snappy#snappy-java;1.1.2.6 from repo-3 in [default]
:: evicted modules:
org.scala-lang.modules#scala-xml_2.11;1.0.2 by [org.scala-lang.modules#scala-xml_2.11;1.0.4] in [default]
org.scala-lang.modules#scala-parser-combinators_2.11;1.0.2 by [org.scala-lang.modules#scala-parser-combinators_2.11;1.0.4] in [default]
---------------------------------------------------------------------
| | modules || artifacts |
| conf | number| search|dwnlded|evicted|| number|dwnlded|
---------------------------------------------------------------------
| default | 29 | 20 | 20 | 2 || 27 | 18 |
---------------------------------------------------------------------
:: problems summary ::
:::: WARNINGS
impossible to put metadata file in cache: https://archiva.wikimedia.org/repository/mirrored/org/slf4j/slf4j-parent/1.7.16/slf4j-parent-1.7.16.pom (1.7.16) (java.io.FileNotFoundException: /cache/ivy2/cache/org.slf4j/slf4j-parent/ivy-1.7.16.xml.original (No such file or directory))
[FAILED ] ml.dmlc#xgboost4j-spark;0.7-wmf-1!xgboost4j-spark.jar: Downloaded file size doesn't match expected Content Length for https://archiva.wikimedia.org/repository/releases/ml/dmlc/xgboost4j-spark/0.7-wmf-1/xgboost4j-spark-0.7-wmf-1.jar. Please retry. (2176ms)
[FAILED ] ml.dmlc#xgboost4j-spark;0.7-wmf-1!xgboost4j-spark.jar: Downloaded file size doesn't match expected Content Length for https://archiva.wikimedia.org/repository/releases/ml/dmlc/xgboost4j-spark/0.7-wmf-1/xgboost4j-spark-0.7-wmf-1.jar. Please retry. (2176ms)
==== repo-1: tried
https://archiva.wikimedia.org/repository/releases/ml/dmlc/xgboost4j-spark/0.7-wmf-1/xgboost4j-spark-0.7-wmf-1.jar
[FAILED ] org.apache.spark#spark-streaming-kafka-0-8_2.11;2.1.0!spark-streaming-kafka-0-8_2.11.jar: Downloaded file size doesn't match expected Content Length for https://archiva.wikimedia.org/repository/mirrored/org/apache/spark/spark-streaming-kafka-0-8_2.11/2.1.0/spark-streaming-kafka-0-8_2.11-2.1.0.jar. Please retry. (3326ms)
[FAILED ] org.apache.spark#spark-streaming-kafka-0-8_2.11;2.1.0!spark-streaming-kafka-0-8_2.11.jar: Downloaded file size doesn't match expected Content Length for https://archiva.wikimedia.org/repository/mirrored/org/apache/spark/spark-streaming-kafka-0-8_2.11/2.1.0/spark-streaming-kafka-0-8_2.11-2.1.0.jar. Please retry. (3326ms)
==== repo-3: tried
https://archiva.wikimedia.org/repository/mirrored/org/apache/spark/spark-streaming-kafka-0-8_2.11/2.1.0/spark-streaming-kafka-0-8_2.11-2.1.0.jar
[FAILED ] ml.dmlc#xgboost4j;0.7-wmf-1!xgboost4j.jar: Downloaded file size doesn't match expected Content Length for https://archiva.wikimedia.org/repository/releases/ml/dmlc/xgboost4j/0.7-wmf-1/xgboost4j-0.7-wmf-1.jar. Please retry. (11433ms)
[FAILED ] ml.dmlc#xgboost4j;0.7-wmf-1!xgboost4j.jar: Downloaded file size doesn't match expected Content Length for https://archiva.wikimedia.org/repository/releases/ml/dmlc/xgboost4j/0.7-wmf-1/xgboost4j-0.7-wmf-1.jar. Please retry. (11433ms)
==== repo-1: tried
https://archiva.wikimedia.org/repository/releases/ml/dmlc/xgboost4j/0.7-wmf-1/xgboost4j-0.7-wmf-1.jar
[FAILED ] com.esotericsoftware.kryo#kryo;2.21!kryo.jar(bundle): Downloaded file size doesn't match expected Content Length for https://archiva.wikimedia.org/repository/mirrored/com/esotericsoftware/kryo/kryo/2.21/kryo-2.21.jar. Please retry. (2176ms)
[FAILED ] com.esotericsoftware.kryo#kryo;2.21!kryo.jar(bundle): Downloaded file size doesn't match expected Content Length for https://archiva.wikimedia.org/repository/mirrored/com/esotericsoftware/kryo/kryo/2.21/kryo-2.21.jar. Please retry. (2176ms)
==== repo-3: tried
https://archiva.wikimedia.org/repository/mirrored/com/esotericsoftware/kryo/kryo/2.21/kryo-2.21.jar
[FAILED ] org.apache.kafka#kafka-clients;0.8.2.1!kafka-clients.jar: Downloaded file size doesn't match expected Content Length for https://archiva.wikimedia.org/repository/mirrored/org/apache/kafka/kafka-clients/0.8.2.1/kafka-clients-0.8.2.1.jar. Please retry. (3190ms)
[FAILED ] org.apache.kafka#kafka-clients;0.8.2.1!kafka-clients.jar: Downloaded file size doesn't match expected Content Length for https://archiva.wikimedia.org/repository/mirrored/org/apache/kafka/kafka-clients/0.8.2.1/kafka-clients-0.8.2.1.jar. Please retry. (3190ms)
==== repo-3: tried
https://archiva.wikimedia.org/repository/mirrored/org/apache/kafka/kafka-clients/0.8.2.1/kafka-clients-0.8.2.1.jar
[FAILED ] log4j#log4j;1.2.17!log4j.jar(bundle): Downloaded file size doesn't match expected Content Length for https://archiva.wikimedia.org/repository/mirrored/log4j/log4j/1.2.17/log4j-1.2.17.jar. Please retry. (3431ms)
[FAILED ] log4j#log4j;1.2.17!log4j.jar(bundle): Downloaded file size doesn't match expected Content Length for https://archiva.wikimedia.org/repository/mirrored/log4j/log4j/1.2.17/log4j-1.2.17.jar. Please retry. (3431ms)
==== repo-3: tried
https://archiva.wikimedia.org/repository/mirrored/log4j/log4j/1.2.17/log4j-1.2.17.jar
[FAILED ] net.jpountz.lz4#lz4;1.3.0!lz4.jar: Downloaded file size doesn't match expected Content Length for https://archiva.wikimedia.org/repository/mirrored/net/jpountz/lz4/lz4/1.3.0/lz4-1.3.0.jar. Please retry. (2112ms)
[FAILED ] net.jpountz.lz4#lz4;1.3.0!lz4.jar: Downloaded file size doesn't match expected Content Length for https://archiva.wikimedia.org/repository/mirrored/net/jpountz/lz4/lz4/1.3.0/lz4-1.3.0.jar. Please retry. (2112ms)
==== repo-3: tried
https://archiva.wikimedia.org/repository/mirrored/net/jpountz/lz4/lz4/1.3.0/lz4-1.3.0.jar
::::::::::::::::::::::::::::::::::::::::::::::
:: FAILED DOWNLOADS ::
:: ^ see resolution messages for details ^ ::
::::::::::::::::::::::::::::::::::::::::::::::
:: ml.dmlc#xgboost4j-spark;0.7-wmf-1!xgboost4j-spark.jar
:: ml.dmlc#xgboost4j;0.7-wmf-1!xgboost4j.jar
:: com.esotericsoftware.kryo#kryo;2.21!kryo.jar(bundle)
:: org.apache.spark#spark-streaming-kafka-0-8_2.11;2.1.0!spark-streaming-kafka-0-8_2.11.jar
:: log4j#log4j;1.2.17!log4j.jar(bundle)
:: org.apache.kafka#kafka-clients;0.8.2.1!kafka-clients.jar
:: net.jpountz.lz4#lz4;1.3.0!lz4.jar
::::::::::::::::::::::::::::::::::::::::::::::
:: USE VERBOSE OR DEBUG MESSAGE LEVEL FOR MORE DETAILS
Exception in thread "main" java.lang.RuntimeException: [download failed: ml.dmlc#xgboost4j-spark;0.7-wmf-1!xgboost4j-spark.jar, download failed: ml.dmlc#xgboost4j;0.7-wmf-1!xgboost4j.jar, download failed: com.esotericsoftware.kryo#kryo;2.21!kryo.jar(bundle), download failed: org.apache.spark#spark-streaming-kafka-0-8_2.11;2.1.0!spark-streaming-kafka-0-8_2.11.jar, download failed: log4j#log4j;1.2.17!log4j.jar(bundle), download failed: org.apache.kafka#kafka-clients;0.8.2.1!kafka-clients.jar, download failed: net.jpountz.lz4#lz4;1.3.0!lz4.jar]
at org.apache.spark.deploy.SparkSubmitUtils$.resolveMavenCoordinates(SparkSubmit.scala:1078)
at org.apache.spark.deploy.SparkSubmit$.prepareSubmitEnvironment(SparkSubmit.scala:296)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:160)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:126)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
______________________ ERROR at setup of test_collect_es _______________________
request = <SubRequest 'spark_context' for <Function 'test_dbn_train'>>
@pytest.fixture(scope="session")
def spark_context(request):
"""Fixture for creating a spark context.
Args:
request: pytest.FixtureRequest object
Returns:
SparkContext for tests
"""
quiet_log4j()
# Pull appropriate jvm dependencies from archiva. Would be nice
# if we could provide this in SparkConf, but in 2.1.x there isn't
# a way.
os.environ['PYSPARK_SUBMIT_ARGS'] = '--repositories %s pyspark-shell' % (
','.join(['https://archiva.wikimedia.org/repository/%s' % (repo)
for repo in ['releases', 'snapshots', 'mirrored']]))
conf = (
SparkConf()
.setMaster("local[2]")
.setAppName("pytest-pyspark-local-testing")
# Maven coordinates of jvm dependencies
.set('spark.jars.packages', ','.join([
'ml.dmlc:xgboost4j-spark:0.7-wmf-1',
'org.wikimedia.search:mjolnir:0.2',
'org.apache.spark:spark-streaming-kafka-0-8_2.11:2.1.0']))
# By default spark will shuffle to 200 partitions, which is
# way too many for our small test cases. This cuts execution
# time of the tests in half.
.set('spark.sql.shuffle.partitions', 4))
if 'XDG_CACHE_HOME' in os.environ:
conf.set('spark.jars.ivy', os.path.join(os.environ['XDG_CACHE_HOME'], 'ivy2'))
> sc = SparkContext(conf=conf)
mjolnir/test/conftest.py:62:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:115: in __init__
SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:256: in _ensure_initialized
SparkContext._gateway = gateway or launch_gateway(conf)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
conf = <pyspark.conf.SparkConf object at 0x7f79821821d0>
def launch_gateway(conf=None):
"""
launch jvm gateway
:param conf: spark configuration passed to spark-submit
:return:
"""
if "PYSPARK_GATEWAY_PORT" in os.environ:
gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
else:
SPARK_HOME = _find_spark_home()
# Launch the Py4j gateway using Spark's run command so that we pick up the
# proper classpath and settings from spark-env.sh
on_windows = platform.system() == "Windows"
script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit"
command = [os.path.join(SPARK_HOME, script)]
if conf:
for k, v in conf.getAll():
command += ['--conf', '%s=%s' % (k, v)]
submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
if os.environ.get("SPARK_TESTING"):
submit_args = ' '.join([
"--conf spark.ui.enabled=false",
submit_args
])
command = command + shlex.split(submit_args)
# Start a socket that will be used by PythonGatewayServer to communicate its port to us
callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
callback_socket.bind(('127.0.0.1', 0))
callback_socket.listen(1)
callback_host, callback_port = callback_socket.getsockname()
env = dict(os.environ)
env['_PYSPARK_DRIVER_CALLBACK_HOST'] = callback_host
env['_PYSPARK_DRIVER_CALLBACK_PORT'] = str(callback_port)
# Launch the Java gateway.
# We open a pipe to stdin so that the Java gateway can die when the pipe is broken
if not on_windows:
# Don't send ctrl-c / SIGINT to the Java gateway:
def preexec_func():
signal.signal(signal.SIGINT, signal.SIG_IGN)
proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
else:
# preexec_fn not supported on Windows
proc = Popen(command, stdin=PIPE, env=env)
gateway_port = None
# We use select() here in order to avoid blocking indefinitely if the subprocess dies
# before connecting
while gateway_port is None and proc.poll() is None:
timeout = 1 # (seconds)
readable, _, _ = select.select([callback_socket], [], [], timeout)
if callback_socket in readable:
gateway_connection = callback_socket.accept()[0]
# Determine which ephemeral port the server started on:
gateway_port = read_int(gateway_connection.makefile(mode="rb"))
gateway_connection.close()
callback_socket.close()
if gateway_port is None:
> raise Exception("Java gateway process exited before sending the driver its port number")
E Exception: Java gateway process exited before sending the driver its port number
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/java_gateway.py:95: Exception
__________________ ERROR at setup of test_collect_ltr_plugin ___________________
request = <SubRequest 'spark_context' for <Function 'test_dbn_train'>>
@pytest.fixture(scope="session")
def spark_context(request):
"""Fixture for creating a spark context.
Args:
request: pytest.FixtureRequest object
Returns:
SparkContext for tests
"""
quiet_log4j()
# Pull appropriate jvm dependencies from archiva. Would be nice
# if we could provide this in SparkConf, but in 2.1.x there isn't
# a way.
os.environ['PYSPARK_SUBMIT_ARGS'] = '--repositories %s pyspark-shell' % (
','.join(['https://archiva.wikimedia.org/repository/%s' % (repo)
for repo in ['releases', 'snapshots', 'mirrored']]))
conf = (
SparkConf()
.setMaster("local[2]")
.setAppName("pytest-pyspark-local-testing")
# Maven coordinates of jvm dependencies
.set('spark.jars.packages', ','.join([
'ml.dmlc:xgboost4j-spark:0.7-wmf-1',
'org.wikimedia.search:mjolnir:0.2',
'org.apache.spark:spark-streaming-kafka-0-8_2.11:2.1.0']))
# By default spark will shuffle to 200 partitions, which is
# way too many for our small test cases. This cuts execution
# time of the tests in half.
.set('spark.sql.shuffle.partitions', 4))
if 'XDG_CACHE_HOME' in os.environ:
conf.set('spark.jars.ivy', os.path.join(os.environ['XDG_CACHE_HOME'], 'ivy2'))
> sc = SparkContext(conf=conf)
mjolnir/test/conftest.py:62:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:115: in __init__
SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:256: in _ensure_initialized
SparkContext._gateway = gateway or launch_gateway(conf)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
conf = <pyspark.conf.SparkConf object at 0x7f79821821d0>
def launch_gateway(conf=None):
"""
launch jvm gateway
:param conf: spark configuration passed to spark-submit
:return:
"""
if "PYSPARK_GATEWAY_PORT" in os.environ:
gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
else:
SPARK_HOME = _find_spark_home()
# Launch the Py4j gateway using Spark's run command so that we pick up the
# proper classpath and settings from spark-env.sh
on_windows = platform.system() == "Windows"
script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit"
command = [os.path.join(SPARK_HOME, script)]
if conf:
for k, v in conf.getAll():
command += ['--conf', '%s=%s' % (k, v)]
submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
if os.environ.get("SPARK_TESTING"):
submit_args = ' '.join([
"--conf spark.ui.enabled=false",
submit_args
])
command = command + shlex.split(submit_args)
# Start a socket that will be used by PythonGatewayServer to communicate its port to us
callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
callback_socket.bind(('127.0.0.1', 0))
callback_socket.listen(1)
callback_host, callback_port = callback_socket.getsockname()
env = dict(os.environ)
env['_PYSPARK_DRIVER_CALLBACK_HOST'] = callback_host
env['_PYSPARK_DRIVER_CALLBACK_PORT'] = str(callback_port)
# Launch the Java gateway.
# We open a pipe to stdin so that the Java gateway can die when the pipe is broken
if not on_windows:
# Don't send ctrl-c / SIGINT to the Java gateway:
def preexec_func():
signal.signal(signal.SIGINT, signal.SIG_IGN)
proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
else:
# preexec_fn not supported on Windows
proc = Popen(command, stdin=PIPE, env=env)
gateway_port = None
# We use select() here in order to avoid blocking indefinitely if the subprocess dies
# before connecting
while gateway_port is None and proc.poll() is None:
timeout = 1 # (seconds)
readable, _, _ = select.select([callback_socket], [], [], timeout)
if callback_socket in readable:
gateway_connection = callback_socket.accept()[0]
# Determine which ephemeral port the server started on:
gateway_port = read_int(gateway_connection.makefile(mode="rb"))
gateway_connection.close()
callback_socket.close()
if gateway_port is None:
> raise Exception("Java gateway process exited before sending the driver its port number")
E Exception: Java gateway process exited before sending the driver its port number
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/java_gateway.py:95: Exception
______________ ERROR at setup of test_ndcg_doesnt_completely_fail ______________
request = <SubRequest 'spark_context' for <Function 'test_dbn_train'>>
@pytest.fixture(scope="session")
def spark_context(request):
"""Fixture for creating a spark context.
Args:
request: pytest.FixtureRequest object
Returns:
SparkContext for tests
"""
quiet_log4j()
# Pull appropriate jvm dependencies from archiva. Would be nice
# if we could provide this in SparkConf, but in 2.1.x there isn't
# a way.
os.environ['PYSPARK_SUBMIT_ARGS'] = '--repositories %s pyspark-shell' % (
','.join(['https://archiva.wikimedia.org/repository/%s' % (repo)
for repo in ['releases', 'snapshots', 'mirrored']]))
conf = (
SparkConf()
.setMaster("local[2]")
.setAppName("pytest-pyspark-local-testing")
# Maven coordinates of jvm dependencies
.set('spark.jars.packages', ','.join([
'ml.dmlc:xgboost4j-spark:0.7-wmf-1',
'org.wikimedia.search:mjolnir:0.2',
'org.apache.spark:spark-streaming-kafka-0-8_2.11:2.1.0']))
# By default spark will shuffle to 200 partitions, which is
# way too many for our small test cases. This cuts execution
# time of the tests in half.
.set('spark.sql.shuffle.partitions', 4))
if 'XDG_CACHE_HOME' in os.environ:
conf.set('spark.jars.ivy', os.path.join(os.environ['XDG_CACHE_HOME'], 'ivy2'))
> sc = SparkContext(conf=conf)
mjolnir/test/conftest.py:62:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:115: in __init__
SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:256: in _ensure_initialized
SparkContext._gateway = gateway or launch_gateway(conf)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
conf = <pyspark.conf.SparkConf object at 0x7f79821821d0>
def launch_gateway(conf=None):
"""
launch jvm gateway
:param conf: spark configuration passed to spark-submit
:return:
"""
if "PYSPARK_GATEWAY_PORT" in os.environ:
gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
else:
SPARK_HOME = _find_spark_home()
# Launch the Py4j gateway using Spark's run command so that we pick up the
# proper classpath and settings from spark-env.sh
on_windows = platform.system() == "Windows"
script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit"
command = [os.path.join(SPARK_HOME, script)]
if conf:
for k, v in conf.getAll():
command += ['--conf', '%s=%s' % (k, v)]
submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
if os.environ.get("SPARK_TESTING"):
submit_args = ' '.join([
"--conf spark.ui.enabled=false",
submit_args
])
command = command + shlex.split(submit_args)
# Start a socket that will be used by PythonGatewayServer to communicate its port to us
callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
callback_socket.bind(('127.0.0.1', 0))
callback_socket.listen(1)
callback_host, callback_port = callback_socket.getsockname()
env = dict(os.environ)
env['_PYSPARK_DRIVER_CALLBACK_HOST'] = callback_host
env['_PYSPARK_DRIVER_CALLBACK_PORT'] = str(callback_port)
# Launch the Java gateway.
# We open a pipe to stdin so that the Java gateway can die when the pipe is broken
if not on_windows:
# Don't send ctrl-c / SIGINT to the Java gateway:
def preexec_func():
signal.signal(signal.SIGINT, signal.SIG_IGN)
proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
else:
# preexec_fn not supported on Windows
proc = Popen(command, stdin=PIPE, env=env)
gateway_port = None
# We use select() here in order to avoid blocking indefinitely if the subprocess dies
# before connecting
while gateway_port is None and proc.poll() is None:
timeout = 1 # (seconds)
readable, _, _ = select.select([callback_socket], [], [], timeout)
if callback_socket in readable:
gateway_connection = callback_socket.accept()[0]
# Determine which ephemeral port the server started on:
gateway_port = read_int(gateway_connection.makefile(mode="rb"))
gateway_connection.close()
callback_socket.close()
if gateway_port is None:
> raise Exception("Java gateway process exited before sending the driver its port number")
E Exception: Java gateway process exited before sending the driver its port number
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/java_gateway.py:95: Exception
_____________ ERROR at setup of test_query_can_be_multiple_columns _____________
request = <SubRequest 'spark_context' for <Function 'test_dbn_train'>>
@pytest.fixture(scope="session")
def spark_context(request):
"""Fixture for creating a spark context.
Args:
request: pytest.FixtureRequest object
Returns:
SparkContext for tests
"""
quiet_log4j()
# Pull appropriate jvm dependencies from archiva. Would be nice
# if we could provide this in SparkConf, but in 2.1.x there isn't
# a way.
os.environ['PYSPARK_SUBMIT_ARGS'] = '--repositories %s pyspark-shell' % (
','.join(['https://archiva.wikimedia.org/repository/%s' % (repo)
for repo in ['releases', 'snapshots', 'mirrored']]))
conf = (
SparkConf()
.setMaster("local[2]")
.setAppName("pytest-pyspark-local-testing")
# Maven coordinates of jvm dependencies
.set('spark.jars.packages', ','.join([
'ml.dmlc:xgboost4j-spark:0.7-wmf-1',
'org.wikimedia.search:mjolnir:0.2',
'org.apache.spark:spark-streaming-kafka-0-8_2.11:2.1.0']))
# By default spark will shuffle to 200 partitions, which is
# way too many for our small test cases. This cuts execution
# time of the tests in half.
.set('spark.sql.shuffle.partitions', 4))
if 'XDG_CACHE_HOME' in os.environ:
conf.set('spark.jars.ivy', os.path.join(os.environ['XDG_CACHE_HOME'], 'ivy2'))
> sc = SparkContext(conf=conf)
mjolnir/test/conftest.py:62:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:115: in __init__
SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:256: in _ensure_initialized
SparkContext._gateway = gateway or launch_gateway(conf)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
conf = <pyspark.conf.SparkConf object at 0x7f79821821d0>
def launch_gateway(conf=None):
"""
launch jvm gateway
:param conf: spark configuration passed to spark-submit
:return:
"""
if "PYSPARK_GATEWAY_PORT" in os.environ:
gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
else:
SPARK_HOME = _find_spark_home()
# Launch the Py4j gateway using Spark's run command so that we pick up the
# proper classpath and settings from spark-env.sh
on_windows = platform.system() == "Windows"
script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit"
command = [os.path.join(SPARK_HOME, script)]
if conf:
for k, v in conf.getAll():
command += ['--conf', '%s=%s' % (k, v)]
submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
if os.environ.get("SPARK_TESTING"):
submit_args = ' '.join([
"--conf spark.ui.enabled=false",
submit_args
])
command = command + shlex.split(submit_args)
# Start a socket that will be used by PythonGatewayServer to communicate its port to us
callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
callback_socket.bind(('127.0.0.1', 0))
callback_socket.listen(1)
callback_host, callback_port = callback_socket.getsockname()
env = dict(os.environ)
env['_PYSPARK_DRIVER_CALLBACK_HOST'] = callback_host
env['_PYSPARK_DRIVER_CALLBACK_PORT'] = str(callback_port)
# Launch the Java gateway.
# We open a pipe to stdin so that the Java gateway can die when the pipe is broken
if not on_windows:
# Don't send ctrl-c / SIGINT to the Java gateway:
def preexec_func():
signal.signal(signal.SIGINT, signal.SIG_IGN)
proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
else:
# preexec_fn not supported on Windows
proc = Popen(command, stdin=PIPE, env=env)
gateway_port = None
# We use select() here in order to avoid blocking indefinitely if the subprocess dies
# before connecting
while gateway_port is None and proc.poll() is None:
timeout = 1 # (seconds)
readable, _, _ = select.select([callback_socket], [], [], timeout)
if callback_socket in readable:
gateway_connection = callback_socket.accept()[0]
# Determine which ephemeral port the server started on:
gateway_port = read_int(gateway_connection.makefile(mode="rb"))
gateway_connection.close()
callback_socket.close()
if gateway_port is None:
> raise Exception("Java gateway process exited before sending the driver its port number")
E Exception: Java gateway process exited before sending the driver its port number
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/java_gateway.py:95: Exception
______________________ ERROR at setup of test_norm_query _______________________
request = <SubRequest 'spark_context' for <Function 'test_dbn_train'>>
@pytest.fixture(scope="session")
def spark_context(request):
"""Fixture for creating a spark context.
Args:
request: pytest.FixtureRequest object
Returns:
SparkContext for tests
"""
quiet_log4j()
# Pull appropriate jvm dependencies from archiva. Would be nice
# if we could provide this in SparkConf, but in 2.1.x there isn't
# a way.
os.environ['PYSPARK_SUBMIT_ARGS'] = '--repositories %s pyspark-shell' % (
','.join(['https://archiva.wikimedia.org/repository/%s' % (repo)
for repo in ['releases', 'snapshots', 'mirrored']]))
conf = (
SparkConf()
.setMaster("local[2]")
.setAppName("pytest-pyspark-local-testing")
# Maven coordinates of jvm dependencies
.set('spark.jars.packages', ','.join([
'ml.dmlc:xgboost4j-spark:0.7-wmf-1',
'org.wikimedia.search:mjolnir:0.2',
'org.apache.spark:spark-streaming-kafka-0-8_2.11:2.1.0']))
# By default spark will shuffle to 200 partitions, which is
# way too many for our small test cases. This cuts execution
# time of the tests in half.
.set('spark.sql.shuffle.partitions', 4))
if 'XDG_CACHE_HOME' in os.environ:
conf.set('spark.jars.ivy', os.path.join(os.environ['XDG_CACHE_HOME'], 'ivy2'))
> sc = SparkContext(conf=conf)
mjolnir/test/conftest.py:62:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:115: in __init__
SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:256: in _ensure_initialized
SparkContext._gateway = gateway or launch_gateway(conf)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
conf = <pyspark.conf.SparkConf object at 0x7f79821821d0>
def launch_gateway(conf=None):
"""
launch jvm gateway
:param conf: spark configuration passed to spark-submit
:return:
"""
if "PYSPARK_GATEWAY_PORT" in os.environ:
gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
else:
SPARK_HOME = _find_spark_home()
# Launch the Py4j gateway using Spark's run command so that we pick up the
# proper classpath and settings from spark-env.sh
on_windows = platform.system() == "Windows"
script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit"
command = [os.path.join(SPARK_HOME, script)]
if conf:
for k, v in conf.getAll():
command += ['--conf', '%s=%s' % (k, v)]
submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
if os.environ.get("SPARK_TESTING"):
submit_args = ' '.join([
"--conf spark.ui.enabled=false",
submit_args
])
command = command + shlex.split(submit_args)
# Start a socket that will be used by PythonGatewayServer to communicate its port to us
callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
callback_socket.bind(('127.0.0.1', 0))
callback_socket.listen(1)
callback_host, callback_port = callback_socket.getsockname()
env = dict(os.environ)
env['_PYSPARK_DRIVER_CALLBACK_HOST'] = callback_host
env['_PYSPARK_DRIVER_CALLBACK_PORT'] = str(callback_port)
# Launch the Java gateway.
# We open a pipe to stdin so that the Java gateway can die when the pipe is broken
if not on_windows:
# Don't send ctrl-c / SIGINT to the Java gateway:
def preexec_func():
signal.signal(signal.SIGINT, signal.SIG_IGN)
proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
else:
# preexec_fn not supported on Windows
proc = Popen(command, stdin=PIPE, env=env)
gateway_port = None
# We use select() here in order to avoid blocking indefinitely if the subprocess dies
# before connecting
while gateway_port is None and proc.poll() is None:
timeout = 1 # (seconds)
readable, _, _ = select.select([callback_socket], [], [], timeout)
if callback_socket in readable:
gateway_connection = callback_socket.accept()[0]
# Determine which ephemeral port the server started on:
gateway_port = read_int(gateway_connection.makefile(mode="rb"))
gateway_connection.close()
callback_socket.close()
if gateway_port is None:
> raise Exception("Java gateway process exited before sending the driver its port number")
E Exception: Java gateway process exited before sending the driver its port number
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/java_gateway.py:95: Exception
__ ERROR at setup of test_sampling_selects_all_if_less_than_samples_per_wiki ___
request = <SubRequest 'spark_context' for <Function 'test_dbn_train'>>
@pytest.fixture(scope="session")
def spark_context(request):
"""Fixture for creating a spark context.
Args:
request: pytest.FixtureRequest object
Returns:
SparkContext for tests
"""
quiet_log4j()
# Pull appropriate jvm dependencies from archiva. Would be nice
# if we could provide this in SparkConf, but in 2.1.x there isn't
# a way.
os.environ['PYSPARK_SUBMIT_ARGS'] = '--repositories %s pyspark-shell' % (
','.join(['https://archiva.wikimedia.org/repository/%s' % (repo)
for repo in ['releases', 'snapshots', 'mirrored']]))
conf = (
SparkConf()
.setMaster("local[2]")
.setAppName("pytest-pyspark-local-testing")
# Maven coordinates of jvm dependencies
.set('spark.jars.packages', ','.join([
'ml.dmlc:xgboost4j-spark:0.7-wmf-1',
'org.wikimedia.search:mjolnir:0.2',
'org.apache.spark:spark-streaming-kafka-0-8_2.11:2.1.0']))
# By default spark will shuffle to 200 partitions, which is
# way too many for our small test cases. This cuts execution
# time of the tests in half.
.set('spark.sql.shuffle.partitions', 4))
if 'XDG_CACHE_HOME' in os.environ:
conf.set('spark.jars.ivy', os.path.join(os.environ['XDG_CACHE_HOME'], 'ivy2'))
> sc = SparkContext(conf=conf)
mjolnir/test/conftest.py:62:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:115: in __init__
SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:256: in _ensure_initialized
SparkContext._gateway = gateway or launch_gateway(conf)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
conf = <pyspark.conf.SparkConf object at 0x7f79821821d0>
def launch_gateway(conf=None):
"""
launch jvm gateway
:param conf: spark configuration passed to spark-submit
:return:
"""
if "PYSPARK_GATEWAY_PORT" in os.environ:
gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
else:
SPARK_HOME = _find_spark_home()
# Launch the Py4j gateway using Spark's run command so that we pick up the
# proper classpath and settings from spark-env.sh
on_windows = platform.system() == "Windows"
script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit"
command = [os.path.join(SPARK_HOME, script)]
if conf:
for k, v in conf.getAll():
command += ['--conf', '%s=%s' % (k, v)]
submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
if os.environ.get("SPARK_TESTING"):
submit_args = ' '.join([
"--conf spark.ui.enabled=false",
submit_args
])
command = command + shlex.split(submit_args)
# Start a socket that will be used by PythonGatewayServer to communicate its port to us
callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
callback_socket.bind(('127.0.0.1', 0))
callback_socket.listen(1)
callback_host, callback_port = callback_socket.getsockname()
env = dict(os.environ)
env['_PYSPARK_DRIVER_CALLBACK_HOST'] = callback_host
env['_PYSPARK_DRIVER_CALLBACK_PORT'] = str(callback_port)
# Launch the Java gateway.
# We open a pipe to stdin so that the Java gateway can die when the pipe is broken
if not on_windows:
# Don't send ctrl-c / SIGINT to the Java gateway:
def preexec_func():
signal.signal(signal.SIGINT, signal.SIG_IGN)
proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
else:
# preexec_fn not supported on Windows
proc = Popen(command, stdin=PIPE, env=env)
gateway_port = None
# We use select() here in order to avoid blocking indefinitely if the subprocess dies
# before connecting
while gateway_port is None and proc.poll() is None:
timeout = 1 # (seconds)
readable, _, _ = select.select([callback_socket], [], [], timeout)
if callback_socket in readable:
gateway_connection = callback_socket.accept()[0]
# Determine which ephemeral port the server started on:
gateway_port = read_int(gateway_connection.makefile(mode="rb"))
gateway_connection.close()
callback_socket.close()
if gateway_port is None:
> raise Exception("Java gateway process exited before sending the driver its port number")
E Exception: Java gateway process exited before sending the driver its port number
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/java_gateway.py:95: Exception
_______________ ERROR at setup of test_sampling_general_approach _______________
request = <SubRequest 'spark_context' for <Function 'test_dbn_train'>>
@pytest.fixture(scope="session")
def spark_context(request):
"""Fixture for creating a spark context.
Args:
request: pytest.FixtureRequest object
Returns:
SparkContext for tests
"""
quiet_log4j()
# Pull appropriate jvm dependencies from archiva. Would be nice
# if we could provide this in SparkConf, but in 2.1.x there isn't
# a way.
os.environ['PYSPARK_SUBMIT_ARGS'] = '--repositories %s pyspark-shell' % (
','.join(['https://archiva.wikimedia.org/repository/%s' % (repo)
for repo in ['releases', 'snapshots', 'mirrored']]))
conf = (
SparkConf()
.setMaster("local[2]")
.setAppName("pytest-pyspark-local-testing")
# Maven coordinates of jvm dependencies
.set('spark.jars.packages', ','.join([
'ml.dmlc:xgboost4j-spark:0.7-wmf-1',
'org.wikimedia.search:mjolnir:0.2',
'org.apache.spark:spark-streaming-kafka-0-8_2.11:2.1.0']))
# By default spark will shuffle to 200 partitions, which is
# way too many for our small test cases. This cuts execution
# time of the tests in half.
.set('spark.sql.shuffle.partitions', 4))
if 'XDG_CACHE_HOME' in os.environ:
conf.set('spark.jars.ivy', os.path.join(os.environ['XDG_CACHE_HOME'], 'ivy2'))
> sc = SparkContext(conf=conf)
mjolnir/test/conftest.py:62:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:115: in __init__
SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:256: in _ensure_initialized
SparkContext._gateway = gateway or launch_gateway(conf)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
conf = <pyspark.conf.SparkConf object at 0x7f79821821d0>
def launch_gateway(conf=None):
"""
launch jvm gateway
:param conf: spark configuration passed to spark-submit
:return:
"""
if "PYSPARK_GATEWAY_PORT" in os.environ:
gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
else:
SPARK_HOME = _find_spark_home()
# Launch the Py4j gateway using Spark's run command so that we pick up the
# proper classpath and settings from spark-env.sh
on_windows = platform.system() == "Windows"
script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit"
command = [os.path.join(SPARK_HOME, script)]
if conf:
for k, v in conf.getAll():
command += ['--conf', '%s=%s' % (k, v)]
submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
if os.environ.get("SPARK_TESTING"):
submit_args = ' '.join([
"--conf spark.ui.enabled=false",
submit_args
])
command = command + shlex.split(submit_args)
# Start a socket that will be used by PythonGatewayServer to communicate its port to us
callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
callback_socket.bind(('127.0.0.1', 0))
callback_socket.listen(1)
callback_host, callback_port = callback_socket.getsockname()
env = dict(os.environ)
env['_PYSPARK_DRIVER_CALLBACK_HOST'] = callback_host
env['_PYSPARK_DRIVER_CALLBACK_PORT'] = str(callback_port)
# Launch the Java gateway.
# We open a pipe to stdin so that the Java gateway can die when the pipe is broken
if not on_windows:
# Don't send ctrl-c / SIGINT to the Java gateway:
def preexec_func():
signal.signal(signal.SIGINT, signal.SIG_IGN)
proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
else:
# preexec_fn not supported on Windows
proc = Popen(command, stdin=PIPE, env=env)
gateway_port = None
# We use select() here in order to avoid blocking indefinitely if the subprocess dies
# before connecting
while gateway_port is None and proc.poll() is None:
timeout = 1 # (seconds)
readable, _, _ = select.select([callback_socket], [], [], timeout)
if callback_socket in readable:
gateway_connection = callback_socket.accept()[0]
# Determine which ephemeral port the server started on:
gateway_port = read_int(gateway_connection.makefile(mode="rb"))
gateway_connection.close()
callback_socket.close()
if gateway_port is None:
> raise Exception("Java gateway process exited before sending the driver its port number")
E Exception: Java gateway process exited before sending the driver its port number
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/java_gateway.py:95: Exception
__________________ ERROR at setup of test_at_least_n_distinct __________________
request = <SubRequest 'spark_context' for <Function 'test_dbn_train'>>
@pytest.fixture(scope="session")
def spark_context(request):
"""Fixture for creating a spark context.
Args:
request: pytest.FixtureRequest object
Returns:
SparkContext for tests
"""
quiet_log4j()
# Pull appropriate jvm dependencies from archiva. Would be nice
# if we could provide this in SparkConf, but in 2.1.x there isn't
# a way.
os.environ['PYSPARK_SUBMIT_ARGS'] = '--repositories %s pyspark-shell' % (
','.join(['https://archiva.wikimedia.org/repository/%s' % (repo)
for repo in ['releases', 'snapshots', 'mirrored']]))
conf = (
SparkConf()
.setMaster("local[2]")
.setAppName("pytest-pyspark-local-testing")
# Maven coordinates of jvm dependencies
.set('spark.jars.packages', ','.join([
'ml.dmlc:xgboost4j-spark:0.7-wmf-1',
'org.wikimedia.search:mjolnir:0.2',
'org.apache.spark:spark-streaming-kafka-0-8_2.11:2.1.0']))
# By default spark will shuffle to 200 partitions, which is
# way too many for our small test cases. This cuts execution
# time of the tests in half.
.set('spark.sql.shuffle.partitions', 4))
if 'XDG_CACHE_HOME' in os.environ:
conf.set('spark.jars.ivy', os.path.join(os.environ['XDG_CACHE_HOME'], 'ivy2'))
> sc = SparkContext(conf=conf)
mjolnir/test/conftest.py:62:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:115: in __init__
SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:256: in _ensure_initialized
SparkContext._gateway = gateway or launch_gateway(conf)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
conf = <pyspark.conf.SparkConf object at 0x7f79821821d0>
def launch_gateway(conf=None):
"""
launch jvm gateway
:param conf: spark configuration passed to spark-submit
:return:
"""
if "PYSPARK_GATEWAY_PORT" in os.environ:
gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
else:
SPARK_HOME = _find_spark_home()
# Launch the Py4j gateway using Spark's run command so that we pick up the
# proper classpath and settings from spark-env.sh
on_windows = platform.system() == "Windows"
script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit"
command = [os.path.join(SPARK_HOME, script)]
if conf:
for k, v in conf.getAll():
command += ['--conf', '%s=%s' % (k, v)]
submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
if os.environ.get("SPARK_TESTING"):
submit_args = ' '.join([
"--conf spark.ui.enabled=false",
submit_args
])
command = command + shlex.split(submit_args)
# Start a socket that will be used by PythonGatewayServer to communicate its port to us
callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
callback_socket.bind(('127.0.0.1', 0))
callback_socket.listen(1)
callback_host, callback_port = callback_socket.getsockname()
env = dict(os.environ)
env['_PYSPARK_DRIVER_CALLBACK_HOST'] = callback_host
env['_PYSPARK_DRIVER_CALLBACK_PORT'] = str(callback_port)
# Launch the Java gateway.
# We open a pipe to stdin so that the Java gateway can die when the pipe is broken
if not on_windows:
# Don't send ctrl-c / SIGINT to the Java gateway:
def preexec_func():
signal.signal(signal.SIGINT, signal.SIG_IGN)
proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
else:
# preexec_fn not supported on Windows
proc = Popen(command, stdin=PIPE, env=env)
gateway_port = None
# We use select() here in order to avoid blocking indefinitely if the subprocess dies
# before connecting
while gateway_port is None and proc.poll() is None:
timeout = 1 # (seconds)
readable, _, _ = select.select([callback_socket], [], [], timeout)
if callback_socket in readable:
gateway_connection = callback_socket.accept()[0]
# Determine which ephemeral port the server started on:
gateway_port = read_int(gateway_connection.makefile(mode="rb"))
gateway_connection.close()
callback_socket.close()
if gateway_port is None:
> raise Exception("Java gateway process exited before sending the driver its port number")
E Exception: Java gateway process exited before sending the driver its port number
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/java_gateway.py:95: Exception
_______________________ ERROR at setup of test_minimize ________________________
request = <SubRequest 'spark_context' for <Function 'test_dbn_train'>>
@pytest.fixture(scope="session")
def spark_context(request):
"""Fixture for creating a spark context.
Args:
request: pytest.FixtureRequest object
Returns:
SparkContext for tests
"""
quiet_log4j()
# Pull appropriate jvm dependencies from archiva. Would be nice
# if we could provide this in SparkConf, but in 2.1.x there isn't
# a way.
os.environ['PYSPARK_SUBMIT_ARGS'] = '--repositories %s pyspark-shell' % (
','.join(['https://archiva.wikimedia.org/repository/%s' % (repo)
for repo in ['releases', 'snapshots', 'mirrored']]))
conf = (
SparkConf()
.setMaster("local[2]")
.setAppName("pytest-pyspark-local-testing")
# Maven coordinates of jvm dependencies
.set('spark.jars.packages', ','.join([
'ml.dmlc:xgboost4j-spark:0.7-wmf-1',
'org.wikimedia.search:mjolnir:0.2',
'org.apache.spark:spark-streaming-kafka-0-8_2.11:2.1.0']))
# By default spark will shuffle to 200 partitions, which is
# way too many for our small test cases. This cuts execution
# time of the tests in half.
.set('spark.sql.shuffle.partitions', 4))
if 'XDG_CACHE_HOME' in os.environ:
conf.set('spark.jars.ivy', os.path.join(os.environ['XDG_CACHE_HOME'], 'ivy2'))
> sc = SparkContext(conf=conf)
mjolnir/test/conftest.py:62:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:115: in __init__
SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:256: in _ensure_initialized
SparkContext._gateway = gateway or launch_gateway(conf)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
conf = <pyspark.conf.SparkConf object at 0x7f79821821d0>
def launch_gateway(conf=None):
"""
launch jvm gateway
:param conf: spark configuration passed to spark-submit
:return:
"""
if "PYSPARK_GATEWAY_PORT" in os.environ:
gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
else:
SPARK_HOME = _find_spark_home()
# Launch the Py4j gateway using Spark's run command so that we pick up the
# proper classpath and settings from spark-env.sh
on_windows = platform.system() == "Windows"
script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit"
command = [os.path.join(SPARK_HOME, script)]
if conf:
for k, v in conf.getAll():
command += ['--conf', '%s=%s' % (k, v)]
submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
if os.environ.get("SPARK_TESTING"):
submit_args = ' '.join([
"--conf spark.ui.enabled=false",
submit_args
])
command = command + shlex.split(submit_args)
# Start a socket that will be used by PythonGatewayServer to communicate its port to us
callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
callback_socket.bind(('127.0.0.1', 0))
callback_socket.listen(1)
callback_host, callback_port = callback_socket.getsockname()
env = dict(os.environ)
env['_PYSPARK_DRIVER_CALLBACK_HOST'] = callback_host
env['_PYSPARK_DRIVER_CALLBACK_PORT'] = str(callback_port)
# Launch the Java gateway.
# We open a pipe to stdin so that the Java gateway can die when the pipe is broken
if not on_windows:
# Don't send ctrl-c / SIGINT to the Java gateway:
def preexec_func():
signal.signal(signal.SIGINT, signal.SIG_IGN)
proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
else:
# preexec_fn not supported on Windows
proc = Popen(command, stdin=PIPE, env=env)
gateway_port = None
# We use select() here in order to avoid blocking indefinitely if the subprocess dies
# before connecting
while gateway_port is None and proc.poll() is None:
timeout = 1 # (seconds)
readable, _, _ = select.select([callback_socket], [], [], timeout)
if callback_socket in readable:
gateway_connection = callback_socket.accept()[0]
# Determine which ephemeral port the server started on:
gateway_port = read_int(gateway_connection.makefile(mode="rb"))
gateway_connection.close()
callback_socket.close()
if gateway_port is None:
> raise Exception("Java gateway process exited before sending the driver its port number")
E Exception: Java gateway process exited before sending the driver its port number
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/java_gateway.py:95: Exception
______________________ ERROR at setup of test_gridsearch _______________________
request = <SubRequest 'spark_context' for <Function 'test_dbn_train'>>
@pytest.fixture(scope="session")
def spark_context(request):
"""Fixture for creating a spark context.
Args:
request: pytest.FixtureRequest object
Returns:
SparkContext for tests
"""
quiet_log4j()
# Pull appropriate jvm dependencies from archiva. Would be nice
# if we could provide this in SparkConf, but in 2.1.x there isn't
# a way.
os.environ['PYSPARK_SUBMIT_ARGS'] = '--repositories %s pyspark-shell' % (
','.join(['https://archiva.wikimedia.org/repository/%s' % (repo)
for repo in ['releases', 'snapshots', 'mirrored']]))
conf = (
SparkConf()
.setMaster("local[2]")
.setAppName("pytest-pyspark-local-testing")
# Maven coordinates of jvm dependencies
.set('spark.jars.packages', ','.join([
'ml.dmlc:xgboost4j-spark:0.7-wmf-1',
'org.wikimedia.search:mjolnir:0.2',
'org.apache.spark:spark-streaming-kafka-0-8_2.11:2.1.0']))
# By default spark will shuffle to 200 partitions, which is
# way too many for our small test cases. This cuts execution
# time of the tests in half.
.set('spark.sql.shuffle.partitions', 4))
if 'XDG_CACHE_HOME' in os.environ:
conf.set('spark.jars.ivy', os.path.join(os.environ['XDG_CACHE_HOME'], 'ivy2'))
> sc = SparkContext(conf=conf)
mjolnir/test/conftest.py:62:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:115: in __init__
SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:256: in _ensure_initialized
SparkContext._gateway = gateway or launch_gateway(conf)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
conf = <pyspark.conf.SparkConf object at 0x7f79821821d0>
def launch_gateway(conf=None):
"""
launch jvm gateway
:param conf: spark configuration passed to spark-submit
:return:
"""
if "PYSPARK_GATEWAY_PORT" in os.environ:
gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
else:
SPARK_HOME = _find_spark_home()
# Launch the Py4j gateway using Spark's run command so that we pick up the
# proper classpath and settings from spark-env.sh
on_windows = platform.system() == "Windows"
script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit"
command = [os.path.join(SPARK_HOME, script)]
if conf:
for k, v in conf.getAll():
command += ['--conf', '%s=%s' % (k, v)]
submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
if os.environ.get("SPARK_TESTING"):
submit_args = ' '.join([
"--conf spark.ui.enabled=false",
submit_args
])
command = command + shlex.split(submit_args)
# Start a socket that will be used by PythonGatewayServer to communicate its port to us
callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
callback_socket.bind(('127.0.0.1', 0))
callback_socket.listen(1)
callback_host, callback_port = callback_socket.getsockname()
env = dict(os.environ)
env['_PYSPARK_DRIVER_CALLBACK_HOST'] = callback_host
env['_PYSPARK_DRIVER_CALLBACK_PORT'] = str(callback_port)
# Launch the Java gateway.
# We open a pipe to stdin so that the Java gateway can die when the pipe is broken
if not on_windows:
# Don't send ctrl-c / SIGINT to the Java gateway:
def preexec_func():
signal.signal(signal.SIGINT, signal.SIG_IGN)
proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
else:
# preexec_fn not supported on Windows
proc = Popen(command, stdin=PIPE, env=env)
gateway_port = None
# We use select() here in order to avoid blocking indefinitely if the subprocess dies
# before connecting
while gateway_port is None and proc.poll() is None:
timeout = 1 # (seconds)
readable, _, _ = select.select([callback_socket], [], [], timeout)
if callback_socket in readable:
gateway_connection = callback_socket.accept()[0]
# Determine which ephemeral port the server started on:
gateway_port = read_int(gateway_connection.makefile(mode="rb"))
gateway_connection.close()
callback_socket.close()
if gateway_port is None:
> raise Exception("Java gateway process exited before sending the driver its port number")
E Exception: Java gateway process exited before sending the driver its port number
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/java_gateway.py:95: Exception
_________________________ ERROR at setup of test_split _________________________
request = <SubRequest 'spark_context' for <Function 'test_dbn_train'>>
@pytest.fixture(scope="session")
def spark_context(request):
"""Fixture for creating a spark context.
Args:
request: pytest.FixtureRequest object
Returns:
SparkContext for tests
"""
quiet_log4j()
# Pull appropriate jvm dependencies from archiva. Would be nice
# if we could provide this in SparkConf, but in 2.1.x there isn't
# a way.
os.environ['PYSPARK_SUBMIT_ARGS'] = '--repositories %s pyspark-shell' % (
','.join(['https://archiva.wikimedia.org/repository/%s' % (repo)
for repo in ['releases', 'snapshots', 'mirrored']]))
conf = (
SparkConf()
.setMaster("local[2]")
.setAppName("pytest-pyspark-local-testing")
# Maven coordinates of jvm dependencies
.set('spark.jars.packages', ','.join([
'ml.dmlc:xgboost4j-spark:0.7-wmf-1',
'org.wikimedia.search:mjolnir:0.2',
'org.apache.spark:spark-streaming-kafka-0-8_2.11:2.1.0']))
# By default spark will shuffle to 200 partitions, which is
# way too many for our small test cases. This cuts execution
# time of the tests in half.
.set('spark.sql.shuffle.partitions', 4))
if 'XDG_CACHE_HOME' in os.environ:
conf.set('spark.jars.ivy', os.path.join(os.environ['XDG_CACHE_HOME'], 'ivy2'))
> sc = SparkContext(conf=conf)
mjolnir/test/conftest.py:62:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:115: in __init__
SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:256: in _ensure_initialized
SparkContext._gateway = gateway or launch_gateway(conf)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
conf = <pyspark.conf.SparkConf object at 0x7f79821821d0>
def launch_gateway(conf=None):
"""
launch jvm gateway
:param conf: spark configuration passed to spark-submit
:return:
"""
if "PYSPARK_GATEWAY_PORT" in os.environ:
gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
else:
SPARK_HOME = _find_spark_home()
# Launch the Py4j gateway using Spark's run command so that we pick up the
# proper classpath and settings from spark-env.sh
on_windows = platform.system() == "Windows"
script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit"
command = [os.path.join(SPARK_HOME, script)]
if conf:
for k, v in conf.getAll():
command += ['--conf', '%s=%s' % (k, v)]
submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
if os.environ.get("SPARK_TESTING"):
submit_args = ' '.join([
"--conf spark.ui.enabled=false",
submit_args
])
command = command + shlex.split(submit_args)
# Start a socket that will be used by PythonGatewayServer to communicate its port to us
callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
callback_socket.bind(('127.0.0.1', 0))
callback_socket.listen(1)
callback_host, callback_port = callback_socket.getsockname()
env = dict(os.environ)
env['_PYSPARK_DRIVER_CALLBACK_HOST'] = callback_host
env['_PYSPARK_DRIVER_CALLBACK_PORT'] = str(callback_port)
# Launch the Java gateway.
# We open a pipe to stdin so that the Java gateway can die when the pipe is broken
if not on_windows:
# Don't send ctrl-c / SIGINT to the Java gateway:
def preexec_func():
signal.signal(signal.SIGINT, signal.SIG_IGN)
proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
else:
# preexec_fn not supported on Windows
proc = Popen(command, stdin=PIPE, env=env)
gateway_port = None
# We use select() here in order to avoid blocking indefinitely if the subprocess dies
# before connecting
while gateway_port is None and proc.poll() is None:
timeout = 1 # (seconds)
readable, _, _ = select.select([callback_socket], [], [], timeout)
if callback_socket in readable:
gateway_connection = callback_socket.accept()[0]
# Determine which ephemeral port the server started on:
gateway_port = read_int(gateway_connection.makefile(mode="rb"))
gateway_connection.close()
callback_socket.close()
if gateway_port is None:
> raise Exception("Java gateway process exited before sending the driver its port number")
E Exception: Java gateway process exited before sending the driver its port number
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/java_gateway.py:95: Exception
________________ ERROR at setup of test_cross_validate_plain_df ________________
request = <SubRequest 'spark_context' for <Function 'test_dbn_train'>>
@pytest.fixture(scope="session")
def spark_context(request):
"""Fixture for creating a spark context.
Args:
request: pytest.FixtureRequest object
Returns:
SparkContext for tests
"""
quiet_log4j()
# Pull appropriate jvm dependencies from archiva. Would be nice
# if we could provide this in SparkConf, but in 2.1.x there isn't
# a way.
os.environ['PYSPARK_SUBMIT_ARGS'] = '--repositories %s pyspark-shell' % (
','.join(['https://archiva.wikimedia.org/repository/%s' % (repo)
for repo in ['releases', 'snapshots', 'mirrored']]))
conf = (
SparkConf()
.setMaster("local[2]")
.setAppName("pytest-pyspark-local-testing")
# Maven coordinates of jvm dependencies
.set('spark.jars.packages', ','.join([
'ml.dmlc:xgboost4j-spark:0.7-wmf-1',
'org.wikimedia.search:mjolnir:0.2',
'org.apache.spark:spark-streaming-kafka-0-8_2.11:2.1.0']))
# By default spark will shuffle to 200 partitions, which is
# way too many for our small test cases. This cuts execution
# time of the tests in half.
.set('spark.sql.shuffle.partitions', 4))
if 'XDG_CACHE_HOME' in os.environ:
conf.set('spark.jars.ivy', os.path.join(os.environ['XDG_CACHE_HOME'], 'ivy2'))
> sc = SparkContext(conf=conf)
mjolnir/test/conftest.py:62:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:115: in __init__
SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:256: in _ensure_initialized
SparkContext._gateway = gateway or launch_gateway(conf)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
conf = <pyspark.conf.SparkConf object at 0x7f79821821d0>
def launch_gateway(conf=None):
"""
launch jvm gateway
:param conf: spark configuration passed to spark-submit
:return:
"""
if "PYSPARK_GATEWAY_PORT" in os.environ:
gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
else:
SPARK_HOME = _find_spark_home()
# Launch the Py4j gateway using Spark's run command so that we pick up the
# proper classpath and settings from spark-env.sh
on_windows = platform.system() == "Windows"
script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit"
command = [os.path.join(SPARK_HOME, script)]
if conf:
for k, v in conf.getAll():
command += ['--conf', '%s=%s' % (k, v)]
submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
if os.environ.get("SPARK_TESTING"):
submit_args = ' '.join([
"--conf spark.ui.enabled=false",
submit_args
])
command = command + shlex.split(submit_args)
# Start a socket that will be used by PythonGatewayServer to communicate its port to us
callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
callback_socket.bind(('127.0.0.1', 0))
callback_socket.listen(1)
callback_host, callback_port = callback_socket.getsockname()
env = dict(os.environ)
env['_PYSPARK_DRIVER_CALLBACK_HOST'] = callback_host
env['_PYSPARK_DRIVER_CALLBACK_PORT'] = str(callback_port)
# Launch the Java gateway.
# We open a pipe to stdin so that the Java gateway can die when the pipe is broken
if not on_windows:
# Don't send ctrl-c / SIGINT to the Java gateway:
def preexec_func():
signal.signal(signal.SIGINT, signal.SIG_IGN)
proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
else:
# preexec_fn not supported on Windows
proc = Popen(command, stdin=PIPE, env=env)
gateway_port = None
# We use select() here in order to avoid blocking indefinitely if the subprocess dies
# before connecting
while gateway_port is None and proc.poll() is None:
timeout = 1 # (seconds)
readable, _, _ = select.select([callback_socket], [], [], timeout)
if callback_socket in readable:
gateway_connection = callback_socket.accept()[0]
# Determine which ephemeral port the server started on:
gateway_port = read_int(gateway_connection.makefile(mode="rb"))
gateway_connection.close()
callback_socket.close()
if gateway_port is None:
> raise Exception("Java gateway process exited before sending the driver its port number")
E Exception: Java gateway process exited before sending the driver its port number
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/java_gateway.py:95: Exception
________________ ERROR at setup of test_prep_training_no_params ________________
request = <SubRequest 'spark_context' for <Function 'test_dbn_train'>>
@pytest.fixture(scope="session")
def spark_context(request):
"""Fixture for creating a spark context.
Args:
request: pytest.FixtureRequest object
Returns:
SparkContext for tests
"""
quiet_log4j()
# Pull appropriate jvm dependencies from archiva. Would be nice
# if we could provide this in SparkConf, but in 2.1.x there isn't
# a way.
os.environ['PYSPARK_SUBMIT_ARGS'] = '--repositories %s pyspark-shell' % (
','.join(['https://archiva.wikimedia.org/repository/%s' % (repo)
for repo in ['releases', 'snapshots', 'mirrored']]))
conf = (
SparkConf()
.setMaster("local[2]")
.setAppName("pytest-pyspark-local-testing")
# Maven coordinates of jvm dependencies
.set('spark.jars.packages', ','.join([
'ml.dmlc:xgboost4j-spark:0.7-wmf-1',
'org.wikimedia.search:mjolnir:0.2',
'org.apache.spark:spark-streaming-kafka-0-8_2.11:2.1.0']))
# By default spark will shuffle to 200 partitions, which is
# way too many for our small test cases. This cuts execution
# time of the tests in half.
.set('spark.sql.shuffle.partitions', 4))
if 'XDG_CACHE_HOME' in os.environ:
conf.set('spark.jars.ivy', os.path.join(os.environ['XDG_CACHE_HOME'], 'ivy2'))
> sc = SparkContext(conf=conf)
mjolnir/test/conftest.py:62:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:115: in __init__
SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:256: in _ensure_initialized
SparkContext._gateway = gateway or launch_gateway(conf)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
conf = <pyspark.conf.SparkConf object at 0x7f79821821d0>
def launch_gateway(conf=None):
"""
launch jvm gateway
:param conf: spark configuration passed to spark-submit
:return:
"""
if "PYSPARK_GATEWAY_PORT" in os.environ:
gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
else:
SPARK_HOME = _find_spark_home()
# Launch the Py4j gateway using Spark's run command so that we pick up the
# proper classpath and settings from spark-env.sh
on_windows = platform.system() == "Windows"
script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit"
command = [os.path.join(SPARK_HOME, script)]
if conf:
for k, v in conf.getAll():
command += ['--conf', '%s=%s' % (k, v)]
submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
if os.environ.get("SPARK_TESTING"):
submit_args = ' '.join([
"--conf spark.ui.enabled=false",
submit_args
])
command = command + shlex.split(submit_args)
# Start a socket that will be used by PythonGatewayServer to communicate its port to us
callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
callback_socket.bind(('127.0.0.1', 0))
callback_socket.listen(1)
callback_host, callback_port = callback_socket.getsockname()
env = dict(os.environ)
env['_PYSPARK_DRIVER_CALLBACK_HOST'] = callback_host
env['_PYSPARK_DRIVER_CALLBACK_PORT'] = str(callback_port)
# Launch the Java gateway.
# We open a pipe to stdin so that the Java gateway can die when the pipe is broken
if not on_windows:
# Don't send ctrl-c / SIGINT to the Java gateway:
def preexec_func():
signal.signal(signal.SIGINT, signal.SIG_IGN)
proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
else:
# preexec_fn not supported on Windows
proc = Popen(command, stdin=PIPE, env=env)
gateway_port = None
# We use select() here in order to avoid blocking indefinitely if the subprocess dies
# before connecting
while gateway_port is None and proc.poll() is None:
timeout = 1 # (seconds)
readable, _, _ = select.select([callback_socket], [], [], timeout)
if callback_socket in readable:
gateway_connection = callback_socket.accept()[0]
# Determine which ephemeral port the server started on:
gateway_port = read_int(gateway_connection.makefile(mode="rb"))
gateway_connection.close()
callback_socket.close()
if gateway_port is None:
> raise Exception("Java gateway process exited before sending the driver its port number")
E Exception: Java gateway process exited before sending the driver its port number
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/java_gateway.py:95: Exception
______________ ERROR at setup of test_prep_training_w_num_workers ______________
request = <SubRequest 'spark_context' for <Function 'test_dbn_train'>>
@pytest.fixture(scope="session")
def spark_context(request):
"""Fixture for creating a spark context.
Args:
request: pytest.FixtureRequest object
Returns:
SparkContext for tests
"""
quiet_log4j()
# Pull appropriate jvm dependencies from archiva. Would be nice
# if we could provide this in SparkConf, but in 2.1.x there isn't
# a way.
os.environ['PYSPARK_SUBMIT_ARGS'] = '--repositories %s pyspark-shell' % (
','.join(['https://archiva.wikimedia.org/repository/%s' % (repo)
for repo in ['releases', 'snapshots', 'mirrored']]))
conf = (
SparkConf()
.setMaster("local[2]")
.setAppName("pytest-pyspark-local-testing")
# Maven coordinates of jvm dependencies
.set('spark.jars.packages', ','.join([
'ml.dmlc:xgboost4j-spark:0.7-wmf-1',
'org.wikimedia.search:mjolnir:0.2',
'org.apache.spark:spark-streaming-kafka-0-8_2.11:2.1.0']))
# By default spark will shuffle to 200 partitions, which is
# way too many for our small test cases. This cuts execution
# time of the tests in half.
.set('spark.sql.shuffle.partitions', 4))
if 'XDG_CACHE_HOME' in os.environ:
conf.set('spark.jars.ivy', os.path.join(os.environ['XDG_CACHE_HOME'], 'ivy2'))
> sc = SparkContext(conf=conf)
mjolnir/test/conftest.py:62:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:115: in __init__
SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:256: in _ensure_initialized
SparkContext._gateway = gateway or launch_gateway(conf)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
conf = <pyspark.conf.SparkConf object at 0x7f79821821d0>
def launch_gateway(conf=None):
"""
launch jvm gateway
:param conf: spark configuration passed to spark-submit
:return:
"""
if "PYSPARK_GATEWAY_PORT" in os.environ:
gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
else:
SPARK_HOME = _find_spark_home()
# Launch the Py4j gateway using Spark's run command so that we pick up the
# proper classpath and settings from spark-env.sh
on_windows = platform.system() == "Windows"
script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit"
command = [os.path.join(SPARK_HOME, script)]
if conf:
for k, v in conf.getAll():
command += ['--conf', '%s=%s' % (k, v)]
submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
if os.environ.get("SPARK_TESTING"):
submit_args = ' '.join([
"--conf spark.ui.enabled=false",
submit_args
])
command = command + shlex.split(submit_args)
# Start a socket that will be used by PythonGatewayServer to communicate its port to us
callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
callback_socket.bind(('127.0.0.1', 0))
callback_socket.listen(1)
callback_host, callback_port = callback_socket.getsockname()
env = dict(os.environ)
env['_PYSPARK_DRIVER_CALLBACK_HOST'] = callback_host
env['_PYSPARK_DRIVER_CALLBACK_PORT'] = str(callback_port)
# Launch the Java gateway.
# We open a pipe to stdin so that the Java gateway can die when the pipe is broken
if not on_windows:
# Don't send ctrl-c / SIGINT to the Java gateway:
def preexec_func():
signal.signal(signal.SIGINT, signal.SIG_IGN)
proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
else:
# preexec_fn not supported on Windows
proc = Popen(command, stdin=PIPE, env=env)
gateway_port = None
# We use select() here in order to avoid blocking indefinitely if the subprocess dies
# before connecting
while gateway_port is None and proc.poll() is None:
timeout = 1 # (seconds)
readable, _, _ = select.select([callback_socket], [], [], timeout)
if callback_socket in readable:
gateway_connection = callback_socket.accept()[0]
# Determine which ephemeral port the server started on:
gateway_port = read_int(gateway_connection.makefile(mode="rb"))
gateway_connection.close()
callback_socket.close()
if gateway_port is None:
> raise Exception("Java gateway process exited before sending the driver its port number")
E Exception: Java gateway process exited before sending the driver its port number
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/java_gateway.py:95: Exception
_________________ ERROR at setup of test_train_minimum_params __________________
request = <SubRequest 'spark_context' for <Function 'test_dbn_train'>>
@pytest.fixture(scope="session")
def spark_context(request):
"""Fixture for creating a spark context.
Args:
request: pytest.FixtureRequest object
Returns:
SparkContext for tests
"""
quiet_log4j()
# Pull appropriate jvm dependencies from archiva. Would be nice
# if we could provide this in SparkConf, but in 2.1.x there isn't
# a way.
os.environ['PYSPARK_SUBMIT_ARGS'] = '--repositories %s pyspark-shell' % (
','.join(['https://archiva.wikimedia.org/repository/%s' % (repo)
for repo in ['releases', 'snapshots', 'mirrored']]))
conf = (
SparkConf()
.setMaster("local[2]")
.setAppName("pytest-pyspark-local-testing")
# Maven coordinates of jvm dependencies
.set('spark.jars.packages', ','.join([
'ml.dmlc:xgboost4j-spark:0.7-wmf-1',
'org.wikimedia.search:mjolnir:0.2',
'org.apache.spark:spark-streaming-kafka-0-8_2.11:2.1.0']))
# By default spark will shuffle to 200 partitions, which is
# way too many for our small test cases. This cuts execution
# time of the tests in half.
.set('spark.sql.shuffle.partitions', 4))
if 'XDG_CACHE_HOME' in os.environ:
conf.set('spark.jars.ivy', os.path.join(os.environ['XDG_CACHE_HOME'], 'ivy2'))
> sc = SparkContext(conf=conf)
mjolnir/test/conftest.py:62:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:115: in __init__
SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:256: in _ensure_initialized
SparkContext._gateway = gateway or launch_gateway(conf)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
conf = <pyspark.conf.SparkConf object at 0x7f79821821d0>
def launch_gateway(conf=None):
"""
launch jvm gateway
:param conf: spark configuration passed to spark-submit
:return:
"""
if "PYSPARK_GATEWAY_PORT" in os.environ:
gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
else:
SPARK_HOME = _find_spark_home()
# Launch the Py4j gateway using Spark's run command so that we pick up the
# proper classpath and settings from spark-env.sh
on_windows = platform.system() == "Windows"
script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit"
command = [os.path.join(SPARK_HOME, script)]
if conf:
for k, v in conf.getAll():
command += ['--conf', '%s=%s' % (k, v)]
submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
if os.environ.get("SPARK_TESTING"):
submit_args = ' '.join([
"--conf spark.ui.enabled=false",
submit_args
])
command = command + shlex.split(submit_args)
# Start a socket that will be used by PythonGatewayServer to communicate its port to us
callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
callback_socket.bind(('127.0.0.1', 0))
callback_socket.listen(1)
callback_host, callback_port = callback_socket.getsockname()
env = dict(os.environ)
env['_PYSPARK_DRIVER_CALLBACK_HOST'] = callback_host
env['_PYSPARK_DRIVER_CALLBACK_PORT'] = str(callback_port)
# Launch the Java gateway.
# We open a pipe to stdin so that the Java gateway can die when the pipe is broken
if not on_windows:
# Don't send ctrl-c / SIGINT to the Java gateway:
def preexec_func():
signal.signal(signal.SIGINT, signal.SIG_IGN)
proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
else:
# preexec_fn not supported on Windows
proc = Popen(command, stdin=PIPE, env=env)
gateway_port = None
# We use select() here in order to avoid blocking indefinitely if the subprocess dies
# before connecting
while gateway_port is None and proc.poll() is None:
timeout = 1 # (seconds)
readable, _, _ = select.select([callback_socket], [], [], timeout)
if callback_socket in readable:
gateway_connection = callback_socket.accept()[0]
# Determine which ephemeral port the server started on:
gateway_port = read_int(gateway_connection.makefile(mode="rb"))
gateway_connection.close()
callback_socket.close()
if gateway_port is None:
> raise Exception("Java gateway process exited before sending the driver its port number")
E Exception: Java gateway process exited before sending the driver its port number
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/java_gateway.py:95: Exception
___________________ ERROR at setup of test_train_pre_prepped ___________________
request = <SubRequest 'spark_context' for <Function 'test_dbn_train'>>
@pytest.fixture(scope="session")
def spark_context(request):
"""Fixture for creating a spark context.
Args:
request: pytest.FixtureRequest object
Returns:
SparkContext for tests
"""
quiet_log4j()
# Pull appropriate jvm dependencies from archiva. Would be nice
# if we could provide this in SparkConf, but in 2.1.x there isn't
# a way.
os.environ['PYSPARK_SUBMIT_ARGS'] = '--repositories %s pyspark-shell' % (
','.join(['https://archiva.wikimedia.org/repository/%s' % (repo)
for repo in ['releases', 'snapshots', 'mirrored']]))
conf = (
SparkConf()
.setMaster("local[2]")
.setAppName("pytest-pyspark-local-testing")
# Maven coordinates of jvm dependencies
.set('spark.jars.packages', ','.join([
'ml.dmlc:xgboost4j-spark:0.7-wmf-1',
'org.wikimedia.search:mjolnir:0.2',
'org.apache.spark:spark-streaming-kafka-0-8_2.11:2.1.0']))
# By default spark will shuffle to 200 partitions, which is
# way too many for our small test cases. This cuts execution
# time of the tests in half.
.set('spark.sql.shuffle.partitions', 4))
if 'XDG_CACHE_HOME' in os.environ:
conf.set('spark.jars.ivy', os.path.join(os.environ['XDG_CACHE_HOME'], 'ivy2'))
> sc = SparkContext(conf=conf)
mjolnir/test/conftest.py:62:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:115: in __init__
SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/context.py:256: in _ensure_initialized
SparkContext._gateway = gateway or launch_gateway(conf)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
conf = <pyspark.conf.SparkConf object at 0x7f79821821d0>
def launch_gateway(conf=None):
"""
launch jvm gateway
:param conf: spark configuration passed to spark-submit
:return:
"""
if "PYSPARK_GATEWAY_PORT" in os.environ:
gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
else:
SPARK_HOME = _find_spark_home()
# Launch the Py4j gateway using Spark's run command so that we pick up the
# proper classpath and settings from spark-env.sh
on_windows = platform.system() == "Windows"
script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit"
command = [os.path.join(SPARK_HOME, script)]
if conf:
for k, v in conf.getAll():
command += ['--conf', '%s=%s' % (k, v)]
submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
if os.environ.get("SPARK_TESTING"):
submit_args = ' '.join([
"--conf spark.ui.enabled=false",
submit_args
])
command = command + shlex.split(submit_args)
# Start a socket that will be used by PythonGatewayServer to communicate its port to us
callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
callback_socket.bind(('127.0.0.1', 0))
callback_socket.listen(1)
callback_host, callback_port = callback_socket.getsockname()
env = dict(os.environ)
env['_PYSPARK_DRIVER_CALLBACK_HOST'] = callback_host
env['_PYSPARK_DRIVER_CALLBACK_PORT'] = str(callback_port)
# Launch the Java gateway.
# We open a pipe to stdin so that the Java gateway can die when the pipe is broken
if not on_windows:
# Don't send ctrl-c / SIGINT to the Java gateway:
def preexec_func():
signal.signal(signal.SIGINT, signal.SIG_IGN)
proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
else:
# preexec_fn not supported on Windows
proc = Popen(command, stdin=PIPE, env=env)
gateway_port = None
# We use select() here in order to avoid blocking indefinitely if the subprocess dies
# before connecting
while gateway_port is None and proc.poll() is None:
timeout = 1 # (seconds)
readable, _, _ = select.select([callback_socket], [], [], timeout)
if callback_socket in readable:
gateway_connection = callback_socket.accept()[0]
# Determine which ephemeral port the server started on:
gateway_port = read_int(gateway_connection.makefile(mode="rb"))
gateway_connection.close()
callback_socket.close()
if gateway_port is None:
> raise Exception("Java gateway process exited before sending the driver its port number")
E Exception: Java gateway process exited before sending the driver its port number
/opt/spark-2.1.0-bin-hadoop2.6/python/pyspark/java_gateway.py:95: Exception
==================== 27 passed, 17 error in 370.24 seconds =====================
ERROR: InvocationError: '/src/.tox/pytest/bin/pytest --pyargs mjolnir'
___________________________________ summary ____________________________________
flake8: commands succeeded
ERROR: pytest: commands failed
+ capture_logs
+ cp --recursive /src/.tox/flake8/log/flake8-0.log /src/.tox/flake8/log/flake8-1.log /src/.tox/flake8/log/flake8-2.log /src/.tox/flake8/log/flake8-3.log /src/.tox/pytest/log/pytest-0.log /src/.tox/pytest/log/pytest-1.log /src/.tox/pytest/log/pytest-2.log /src/.tox/pytest/log/pytest-3.log /log
+ cp --recursive /src/.tox/log /log
Traceback (most recent call last):
File "./build.py", line 268, in <module>
if builder.run() is False:
File "./build.py", line 65, in run
if not all(map(self.build, dockerfiles)):
File "./build.py", line 250, in build
self.run_cmd(['bash', 'example-run.sh'], cwd=image_dir)
File "./build.py", line 263, in run_cmd
subprocess.check_call(args, **kwargs)
File "/usr/lib/python3.5/subprocess.py", line 271, in check_call
raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['bash', 'example-run.sh']' returned non-zero exit status 1