4
我使用IPython的筆記本與剛剛添加以下的筆記本電腦運行PySpark:pyspark在IPython的筆記本引起了Py4JNetworkError
import os
os.chdir('../data_files')
import sys
import pandas as pd
%pylab inline
from IPython.display import Image
os.environ['SPARK_HOME']="spark-1.3.1-bin-hadoop2.6"
sys.path.append(os.path.join(os.environ['SPARK_HOME'], 'python'))
sys.path.append(os.path.join(os.environ['SPARK_HOME'], 'bin'))
sys.path.append(os.path.join(os.environ['SPARK_HOME'], 'python/lib/py4j-0.8.2.1-src.zip'))
from pyspark import SparkContext
sc = SparkContext('local')
這一個項目工作的罰款。但在我的第二個項目,運行(不一樣的每一次)幾行後,我收到以下錯誤:
ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/py4j-0.8.2.1-py2.7.egg/py4j/java_gateway.py", line 425, in start
self.socket.connect((self.address, self.port))
File "/usr/lib/python2.7/socket.py", line 224, in meth
return getattr(self._sock,name)(*args)
error: [Errno 111] Connection refused
---------------------------------------------------------------------------
Py4JNetworkError Traceback (most recent call last)
<ipython-input-21-4626925bbe8f> in <module>()
----> 1 words.count()
/home/eee/Desktop/NLP/spark-1.3.1-bin-hadoop2.6/python/pyspark/rdd.pyc in count(self)
930 3
931 """
--> 932 return self.mapPartitions(lambda i: [sum(1 for _ in i)]).sum()
933
934 def stats(self):
/home/eee/Desktop/NLP/spark-1.3.1-bin-hadoop2.6/python/pyspark/rdd.pyc in sum(self)
921 6.0
922 """
--> 923 return self.mapPartitions(lambda x: [sum(x)]).reduce(operator.add)
924
925 def count(self):
/home/eee/Desktop/NLP/spark-1.3.1-bin-hadoop2.6/python/pyspark/rdd.pyc in reduce(self, f)
737 yield reduce(f, iterator, initial)
738
--> 739 vals = self.mapPartitions(func).collect()
740 if vals:
741 return reduce(f, vals)
/home/eee/Desktop/NLP/spark-1.3.1-bin-hadoop2.6/python/pyspark/rdd.pyc in collect(self)
710 Return a list that contains all of the elements in this RDD.
711 """
--> 712 with SCCallSiteSync(self.context) as css:
713 port = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
714 return list(_load_from_socket(port, self._jrdd_deserializer))
/home/eee/Desktop/NLP/spark-1.3.1-bin-hadoop2.6/python/pyspark/traceback_utils.pyc in __enter__(self)
70 def __enter__(self):
71 if SCCallSiteSync._spark_stack_depth == 0:
---> 72 self._context._jsc.setCallSite(self._call_site)
73 SCCallSiteSync._spark_stack_depth += 1
74
/usr/local/lib/python2.7/dist-packages/py4j-0.8.2.1-py2.7.egg/py4j/java_gateway.pyc in __call__(self, *args)
534 END_COMMAND_PART
535
--> 536 answer = self.gateway_client.send_command(command)
537 return_value = get_return_value(answer, self.gateway_client,
538 self.target_id, self.name)
/usr/local/lib/python2.7/dist-packages/py4j-0.8.2.1-py2.7.egg/py4j/java_gateway.pyc in send_command(self, command, retry)
360 the Py4J protocol.
361 """
--> 362 connection = self._get_connection()
363 try:
364 response = connection.send_command(command)
/usr/local/lib/python2.7/dist-packages/py4j-0.8.2.1-py2.7.egg/py4j/java_gateway.pyc in _get_connection(self)
316 connection = self.deque.pop()
317 except Exception:
--> 318 connection = self._create_connection()
319 return connection
320
/usr/local/lib/python2.7/dist-packages/py4j-0.8.2.1-py2.7.egg/py4j/java_gateway.pyc in _create_connection(self)
323 connection = GatewayConnection(self.address, self.port,
324 self.auto_close, self.gateway_property)
--> 325 connection.start()
326 return connection
327
/usr/local/lib/python2.7/dist-packages/py4j-0.8.2.1-py2.7.egg/py4j/java_gateway.pyc in start(self)
430 'server'
431 logger.exception(msg)
--> 432 raise Py4JNetworkError(msg)
433
434 def close(self):
Py4JNetworkError: An error occurred while trying to connect to the Java server
一旦發生這種情況,其他線路之前,現在的工作提出同樣的問題, 任何想法?