Learn practical skills, build real-world projects, and advance your career

Installation

!pip install pyspark
!pip install -U -q PyDrive
!apt install openjdk-8-jdk-headless -qq
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
Requirement already satisfied: pyspark in /usr/local/lib/python3.7/dist-packages (3.2.0) Requirement already satisfied: py4j==0.10.9.2 in /usr/local/lib/python3.7/dist-packages (from pyspark) (0.10.9.2) openjdk-8-jdk-headless is already the newest version (8u292-b10-0ubuntu1~18.04). 0 upgraded, 0 newly installed, 0 to remove and 37 not upgraded.
# Let's import the libraries we will need
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import pyspark
from pyspark.sql import *
from pyspark.sql.functions import *
from pyspark import SparkContext, SparkConf
# create the session
conf = SparkConf().set("spark.ui.port", "4050")

# create the context
sc = pyspark.SparkContext(conf=conf)
spark = SparkSession.builder.getOrCreate()
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip ngrok-stable-linux-amd64.zip
get_ipython().system_raw('./ngrok http 4050 &')
!curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"
--2021-11-16 02:18:57-- https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip Resolving bin.equinox.io (bin.equinox.io)... 54.237.133.81, 18.205.222.128, 54.161.241.46, ... Connecting to bin.equinox.io (bin.equinox.io)|54.237.133.81|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 13832437 (13M) [application/octet-stream] Saving to: ‘ngrok-stable-linux-amd64.zip’ ngrok-stable-linux- 100%[===================>] 13.19M 36.0MB/s in 0.4s 2021-11-16 02:18:58 (36.0 MB/s) - ‘ngrok-stable-linux-amd64.zip’ saved [13832437/13832437] Archive: ngrok-stable-linux-amd64.zip inflating: ngrok Traceback (most recent call last): File "<string>", line 1, in <module> IndexError: list index out of range