Learn practical skills, build real-world projects, and advance your career
Created 3 years ago
import os
import sys
os.environ["PYSPARK_PYTHON"] = "/opt/cloudera/parcels/Anaconda/bin/python"
os.environ["JAVA_HOME"] = "/usr/java/jdk1.8.0_232-cloudera/"
os.environ["SPARK_HOME"] = "/opt/cloudera/parcels/SPARK2-2.3.0.cloudera2-1.cdh5.13.3.p0.316101/lib/spark2/"
os.environ["PYLIB"] = os.environ["SPARK_HOME"] + "/python/lib"
sys.path.insert(0, os.environ["PYLIB"] +"/py4j-0.10.9-src.zip")
sys.path.insert(0, os.environ["PYLIB"] +"/pyspark.zip")
#pip install pyspark
Collecting pyspark
Downloading pyspark-3.1.2.tar.gz (212.4 MB)
Collecting py4j==0.10.9
Downloading py4j-0.10.9-py2.py3-none-any.whl (198 kB)
Building wheels for collected packages: pyspark
Building wheel for pyspark (setup.py): started
Building wheel for pyspark (setup.py): finished with status 'done'
Created wheel for pyspark: filename=pyspark-3.1.2-py2.py3-none-any.whl size=212880773 sha256=28c93708f13256711d24ac25fd3be44516792156567421d040bc3cc6290071f2
Stored in directory: c:\users\aruna\appdata\local\pip\cache\wheels\df\88\9e\58ef1f74892fef590330ca0830b5b6d995ba29b44f977b3926
Successfully built pyspark
Installing collected packages: py4j, pyspark
Successfully installed py4j-0.10.9 pyspark-3.1.2
Note: you may need to restart the kernel to use updated packages.
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('demo').master("local").getOrCreate()
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-15-371cbd2bc282> in <module>
----> 1 spark = SparkSession.builder.appName('demo').master("local").getOrCreate()
~\anaconda3\lib\site-packages\pyspark\sql\session.py in getOrCreate(self)
226 sparkConf.set(key, value)
227 # This SparkContext may be an existing one.
--> 228 sc = SparkContext.getOrCreate(sparkConf)
229 # Do not update `SparkConf` for existing `SparkContext`, as it's shared
230 # by all sessions.
~\anaconda3\lib\site-packages\pyspark\context.py in getOrCreate(cls, conf)
382 with SparkContext._lock:
383 if SparkContext._active_spark_context is None:
--> 384 SparkContext(conf=conf or SparkConf())
385 return SparkContext._active_spark_context
386
~\anaconda3\lib\site-packages\pyspark\context.py in __init__(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls)
142 " is not allowed as it is a security risk.")
143
--> 144 SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
145 try:
146 self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
~\anaconda3\lib\site-packages\pyspark\context.py in _ensure_initialized(cls, instance, gateway, conf)
329 with SparkContext._lock:
330 if not SparkContext._gateway:
--> 331 SparkContext._gateway = gateway or launch_gateway(conf)
332 SparkContext._jvm = SparkContext._gateway.jvm
333
~\anaconda3\lib\site-packages\pyspark\java_gateway.py in launch_gateway(conf, popen_kwargs)
99 else:
100 # preexec_fn not supported on Windows
--> 101 proc = Popen(command, **popen_kwargs)
102
103 # Wait for the file to appear, or for the process to exit, whichever happens first.
~\anaconda3\lib\subprocess.py in __init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, encoding, errors, text)
852 encoding=encoding, errors=errors)
853
--> 854 self._execute_child(args, executable, preexec_fn, close_fds,
855 pass_fds, cwd, env,
856 startupinfo, creationflags, shell,
~\anaconda3\lib\subprocess.py in _execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, unused_restore_signals, unused_start_new_session)
1305 # Start the process
1306 try:
-> 1307 hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
1308 # no special security
1309 None, None,
FileNotFoundError: [WinError 2] The system cannot find the file specified
sc = spark.SparkContext
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-14-e3a11d985a77> in <module>
----> 1 sc = spark.SparkContext
NameError: name 'spark' is not defined