Learn practical skills, build real-world projects, and advance your career
Created 4 years ago
###### INPUT A SEARCH TERM For CASES IN SP UNKNOWNS: https://digital.pathology.johnshopkins.edu/repos/451 #########
txt = input("Search Term: ")
import importlib
pa = importlib.util.find_spec("pandas")
foundpa = pa is not None
if not foundpa:
!pip install pandas
b = importlib.util.find_spec("bs4")
foundb = b is not None
if not foundb:
!pip install bs4
ga = importlib.util.find_spec("googleapiclient")
foundga = ga is not None
if not foundga:
!pip install google-api-python-client
import json
import re
import pandas as pd
import random
import urllib3
import requests
from bs4 import BeautifulSoup
from googleapiclient.discovery import build
filename = 'thisfile.csv'
commands = {}
with open(filename) as fh:
for line in fh:
#print(line)
if bool(re.search(":", line)):
command,desc= line.rsplit(':',1);
command=command.replace(" ","")
desc=desc.replace(",\n","")
commands[command] = desc
def flatten(d,sep="_"):
import collections
obj = collections.OrderedDict()
def recurse(t,parent_key=""):
if isinstance(t,list):
for i in range(len(t)):
recurse(t[i],parent_key + sep + str(i) if parent_key else str(i))
elif isinstance(t,dict):
for k,v in t.items():
recurse(v,parent_key + sep + k if parent_key else k)
else:
obj[parent_key] = t
recurse(d)
return obj
ran=bool(random.getrandbits(1))
if ran:
my_api_key = "AIzaSyDSWjeCe5YDjpMyUo1ASvPs_YCiezla55U"
my_cse_id = "012186595233664989134:ueeop7vueol"
else:
my_api_key="AIzaSyB9nMMH7YuvDwhHVi4jSsPg6Q5RW7tSnTY"
my_cse_id="012186595233664989134:duay3pheitw"
def google_search(search_term, api_key, cse_id, **kwargs):
service = build("customsearch", "v1", developerKey=api_key)
res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute()
try:
return res['items']
except:
print("No results found")
results = google_search(
txt, my_api_key, my_cse_id,start=1)
results2 = google_search(
txt, my_api_key, my_cse_id,start=11)
try:
rr=flatten(results+results2)
r2=[i for i in rr.values()]
r3=[re.search("Week.+Case [0-9]+",i) for i in r2]
r4=[j for j in r3 if j is not None]
r5=[j[0].replace(" ","") for j in r4]
url="http://apps.pathology.jhu.edu/sp/all-cases/"
#page = urllib3.urllib3(page)
response = requests.get( url)
soup = BeautifulSoup(response.content)
pd.set_option('display.max_colwidth', -1)
ss=soup.find_all('a')
ss3=[j["href"] for j in ss]
ss2=[j.text.replace(" ","") for j in ss]
ss4=pd.DataFrame(list(zip(ss2,ss3)))
ss4.columns=["week","url"]
csv=pd.DataFrame.from_dict(list(commands.items()))
csv.columns=["week","diagnosis"]
ss5=pd.merge(csv,ss4,on="week")
ss6=ss5.drop_duplicates()
seares=pd.DataFrame(r5,columns=["week"]).drop_duplicates()
sear=pd.merge(seares,ss6,on="week")
sear
#df = pd.DataFrame(['http://google.com', 'http://duckduckgo.com'])
def make_clickable(val):
return '<a href="{}">{}</a>'.format(val,val)
pp=pd.DataFrame(sear).style.format(make_clickable,subset="url")
except:
print("")
print("")
print("")
print("")
pp
Search Term: fibroadenoma
Requirement already satisfied: google-api-python-client in /srv/conda/envs/notebook/lib/python3.7/site-packages (1.7.11)
Requirement already satisfied: six<2dev,>=1.6.1 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from google-api-python-client) (1.12.0)
Requirement already satisfied: uritemplate<4dev,>=3.0.0 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from google-api-python-client) (3.0.1)
Requirement already satisfied: google-auth-httplib2>=0.0.3 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from google-api-python-client) (0.0.3)
Requirement already satisfied: httplib2<1dev,>=0.9.2 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from google-api-python-client) (0.15.0)
Requirement already satisfied: google-auth>=1.4.1 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from google-api-python-client) (1.10.0)
Requirement already satisfied: cachetools<5.0,>=2.0.0 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from google-auth>=1.4.1->google-api-python-client) (4.0.0)
Requirement already satisfied: pyasn1-modules>=0.2.1 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from google-auth>=1.4.1->google-api-python-client) (0.2.7)
Requirement already satisfied: setuptools>=40.3.0 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from google-auth>=1.4.1->google-api-python-client) (41.2.0)
Requirement already satisfied: rsa<4.1,>=3.1.4 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from google-auth>=1.4.1->google-api-python-client) (4.0)
Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from pyasn1-modules>=0.2.1->google-auth>=1.4.1->google-api-python-client) (0.4.8)
Requirement already satisfied: pandas in /srv/conda/envs/notebook/lib/python3.7/site-packages (0.25.3)
Requirement already satisfied: python-dateutil>=2.6.1 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from pandas) (2.8.0)
Requirement already satisfied: numpy>=1.13.3 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from pandas) (1.18.0)
Requirement already satisfied: pytz>=2017.2 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from pandas) (2019.3)
Requirement already satisfied: six>=1.5 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from python-dateutil>=2.6.1->pandas) (1.12.0)
Collecting bs4
Downloading https://files.pythonhosted.org/packages/10/ed/7e8b97591f6f456174139ec089c769f89a94a1a4025fe967691de971f314/bs4-0.0.1.tar.gz
Collecting beautifulsoup4 (from bs4)
Downloading https://files.pythonhosted.org/packages/cb/a1/c698cf319e9cfed6b17376281bd0efc6bfc8465698f54170ef60a485ab5d/beautifulsoup4-4.8.2-py3-none-any.whl (106kB)
|████████████████████████████████| 112kB 5.4MB/s eta 0:00:01
Collecting soupsieve>=1.2 (from beautifulsoup4->bs4)
Downloading https://files.pythonhosted.org/packages/81/94/03c0f04471fc245d08d0a99f7946ac228ca98da4fa75796c507f61e688c2/soupsieve-1.9.5-py2.py3-none-any.whl
Building wheels for collected packages: bs4
Building wheel for bs4 (setup.py) ... done
Created wheel for bs4: filename=bs4-0.0.1-cp37-none-any.whl size=1273 sha256=26c4de05716da549b731d10880bd9cf39499badf119d76724ae6b050b395a6a1
Stored in directory: /home/jovyan/.cache/pip/wheels/a0/b0/b2/4f80b9456b87abedbc0bf2d52235414c3467d8889be38dd472
Successfully built bs4
Installing collected packages: soupsieve, beautifulsoup4, bs4
Successfully installed beautifulsoup4-4.8.2 bs4-0.0.1 soupsieve-1.9.5