Jovian
⭐️
Sign In
In [12]:
###### INPUT A SEARCH TERM For CASES IN SP UNKNOWNS: https://digital.pathology.johnshopkins.edu/repos/451 #########
txt = input("Search Term: ")


import importlib
pa = importlib.util.find_spec("pandas")
foundpa = pa is not None
if not foundpa:
    !pip install pandas
b = importlib.util.find_spec("bs4")
foundb = b is not None 
if not foundb:
    !pip install bs4
ga = importlib.util.find_spec("googleapiclient")
foundga = ga is not None 
if not foundga:
    !pip install google-api-python-client


import json
import re
import pandas as pd
import random
import urllib3
import requests
from bs4 import BeautifulSoup
from googleapiclient.discovery import build

filename = 'thisfile.csv'

commands = {}
with open(filename) as fh:
    for line in fh:
        #print(line)
        if bool(re.search(":", line)):
            command,desc= line.rsplit(':',1);
            command=command.replace(" ","")
            desc=desc.replace(",\n","")
            commands[command] = desc

def flatten(d,sep="_"):
    import collections

    obj = collections.OrderedDict()

    def recurse(t,parent_key=""):
        
        if isinstance(t,list):
            for i in range(len(t)):
                recurse(t[i],parent_key + sep + str(i) if parent_key else str(i))
        elif isinstance(t,dict):
            for k,v in t.items():
                recurse(v,parent_key + sep + k if parent_key else k)
        else:
            obj[parent_key] = t

    recurse(d)

    return obj



ran=bool(random.getrandbits(1))

if ran:
    my_api_key = "AIzaSyDSWjeCe5YDjpMyUo1ASvPs_YCiezla55U"
    my_cse_id = "012186595233664989134:ueeop7vueol"
else:
    my_api_key="AIzaSyB9nMMH7YuvDwhHVi4jSsPg6Q5RW7tSnTY"
    my_cse_id="012186595233664989134:duay3pheitw"


def google_search(search_term, api_key, cse_id, **kwargs):
    service = build("customsearch", "v1", developerKey=api_key)
    res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute()
    try:
        return res['items']
    except:
        print("No results found")
        

results = google_search(
    txt, my_api_key, my_cse_id,start=1)

results2 = google_search(
    txt, my_api_key, my_cse_id,start=11)


try:
    rr=flatten(results+results2)
    r2=[i for i in rr.values()]
    r3=[re.search("Week.+Case [0-9]+",i) for i in r2]
    r4=[j for j in r3 if j is not None]
    r5=[j[0].replace(" ","") for j in r4]

    url="http://apps.pathology.jhu.edu/sp/all-cases/"
    #page = urllib3.urllib3(page)
    response = requests.get( url)
    soup = BeautifulSoup(response.content)
    pd.set_option('display.max_colwidth', -1)

    ss=soup.find_all('a')
    ss3=[j["href"] for j in ss]
    ss2=[j.text.replace(" ","") for j in ss]
    ss4=pd.DataFrame(list(zip(ss2,ss3)))
    ss4.columns=["week","url"]
    csv=pd.DataFrame.from_dict(list(commands.items()))
    csv.columns=["week","diagnosis"]
    ss5=pd.merge(csv,ss4,on="week")
    ss6=ss5.drop_duplicates()
    seares=pd.DataFrame(r5,columns=["week"]).drop_duplicates()
    sear=pd.merge(seares,ss6,on="week")
    sear

#df = pd.DataFrame(['http://google.com', 'http://duckduckgo.com'])

    def make_clickable(val):
        return '<a href="{}">{}</a>'.format(val,val)

    pp=pd.DataFrame(sear).style.format(make_clickable,subset="url")

except:
        print("")
        print("")
        print("")
        print("")
        
pp

Search Term: fibroadenoma Requirement already satisfied: google-api-python-client in /srv/conda/envs/notebook/lib/python3.7/site-packages (1.7.11) Requirement already satisfied: six<2dev,>=1.6.1 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from google-api-python-client) (1.12.0) Requirement already satisfied: uritemplate<4dev,>=3.0.0 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from google-api-python-client) (3.0.1) Requirement already satisfied: google-auth-httplib2>=0.0.3 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from google-api-python-client) (0.0.3) Requirement already satisfied: httplib2<1dev,>=0.9.2 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from google-api-python-client) (0.15.0) Requirement already satisfied: google-auth>=1.4.1 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from google-api-python-client) (1.10.0) Requirement already satisfied: cachetools<5.0,>=2.0.0 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from google-auth>=1.4.1->google-api-python-client) (4.0.0) Requirement already satisfied: pyasn1-modules>=0.2.1 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from google-auth>=1.4.1->google-api-python-client) (0.2.7) Requirement already satisfied: setuptools>=40.3.0 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from google-auth>=1.4.1->google-api-python-client) (41.2.0) Requirement already satisfied: rsa<4.1,>=3.1.4 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from google-auth>=1.4.1->google-api-python-client) (4.0) Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from pyasn1-modules>=0.2.1->google-auth>=1.4.1->google-api-python-client) (0.4.8)
Out[12]:
In [4]:
 
Requirement already satisfied: pandas in /srv/conda/envs/notebook/lib/python3.7/site-packages (0.25.3) Requirement already satisfied: python-dateutil>=2.6.1 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from pandas) (2.8.0) Requirement already satisfied: numpy>=1.13.3 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from pandas) (1.18.0) Requirement already satisfied: pytz>=2017.2 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from pandas) (2019.3) Requirement already satisfied: six>=1.5 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from python-dateutil>=2.6.1->pandas) (1.12.0) Collecting bs4 Downloading https://files.pythonhosted.org/packages/10/ed/7e8b97591f6f456174139ec089c769f89a94a1a4025fe967691de971f314/bs4-0.0.1.tar.gz Collecting beautifulsoup4 (from bs4) Downloading https://files.pythonhosted.org/packages/cb/a1/c698cf319e9cfed6b17376281bd0efc6bfc8465698f54170ef60a485ab5d/beautifulsoup4-4.8.2-py3-none-any.whl (106kB) |████████████████████████████████| 112kB 5.4MB/s eta 0:00:01 Collecting soupsieve>=1.2 (from beautifulsoup4->bs4) Downloading https://files.pythonhosted.org/packages/81/94/03c0f04471fc245d08d0a99f7946ac228ca98da4fa75796c507f61e688c2/soupsieve-1.9.5-py2.py3-none-any.whl Building wheels for collected packages: bs4 Building wheel for bs4 (setup.py) ... done Created wheel for bs4: filename=bs4-0.0.1-cp37-none-any.whl size=1273 sha256=26c4de05716da549b731d10880bd9cf39499badf119d76724ae6b050b395a6a1 Stored in directory: /home/jovyan/.cache/pip/wheels/a0/b0/b2/4f80b9456b87abedbc0bf2d52235414c3467d8889be38dd472 Successfully built bs4 Installing collected packages: soupsieve, beautifulsoup4, bs4 Successfully installed beautifulsoup4-4.8.2 bs4-0.0.1 soupsieve-1.9.5