import nltk as nl
import re
import seaborn as sns
import matplotlib.pyplot as plt
from nltk.corpus import stopwords #stopwords.words("english")
from spacy.lang.en.stop_words import STOP_WORDS # list of stopwords
from sklearn.feature_extraction import stop_words #stop_words.ENGLISH_STOP_WORD
# For FreqDistribution: nl.FreqDist(<list of words>)
a = """Sir Isaac Newton PRS (25 December 1642 – 20 March 1726/27[a]) was an English mathematician, physicist, astronomer, theologian, and author (described in his own day as a "natural philosopher") who is widely recognised as one of the most influential scientists of all time, and a key figure in the scientific revolution.
His book Philosophiæ Naturalis Principia Mathematica (Mathematical Principles of Natural Philosophy), first published in 1687, laid the foundations of classical mechanics. Newton also made seminal contributions to optics, and shares credit with Gottfried Wilhelm Leibniz for developing the infinitesimal calculus.
In Principia, sir Newton formulated the laws of motion and universal gravitation that formed the dominant scientific viewpoint until it was superseded by the theory of relativity.
Newton used his mathematical description of gravity to prove Kepler's laws of planetary motion, account for tides, the trajectories of comets, the precession of the equinoxes and other phenomena, eradicating doubt about the Solar System's heliocentricity.
He demonstrated that the motion of objects on Earth and celestial bodies could be accounted for by the same principles. Newton's inference that the Earth is an oblate spheroid was later confirmed by the geodetic measurements of Maupertuis, La Condamine, and others, convincing most European scientists of the superiority of Newtonian mechanics over earlier systems."""
print(a)
Sir Isaac Newton PRS (25 December 1642 – 20 March 1726/27[a]) was an English mathematician, physicist, astronomer, theologian, and author (described in his own day as a "natural philosopher") who is widely recognised as one of the most influential scientists of all time, and a key figure in the scientific revolution.
His book Philosophiæ Naturalis Principia Mathematica (Mathematical Principles of Natural Philosophy), first published in 1687, laid the foundations of classical mechanics. Newton also made seminal contributions to optics, and shares credit with Gottfried Wilhelm Leibniz for developing the infinitesimal calculus.
In Principia, sir Newton formulated the laws of motion and universal gravitation that formed the dominant scientific viewpoint until it was superseded by the theory of relativity.
Newton used his mathematical description of gravity to prove Kepler's laws of planetary motion, account for tides, the trajectories of comets, the precession of the equinoxes and other phenomena, eradicating doubt about the Solar System's heliocentricity.
He demonstrated that the motion of objects on Earth and celestial bodies could be accounted for by the same principles. Newton's inference that the Earth is an oblate spheroid was later confirmed by the geodetic measurements of Maupertuis, La Condamine, and others, convincing most European scientists of the superiority of Newtonian mechanics over earlier systems.
# re.sub("[^aA-zZ0-9]"," ",a)
# re.sub("[^\w]"," ",a)
b = re.sub("\W"," ",a) #\W is any non-word characaters
print(b)
Sir Isaac Newton PRS 25 December 1642 20 March 1726 27 a was an English mathematician physicist astronomer theologian and author described in his own day as a natural philosopher who is widely recognised as one of the most influential scientists of all time and a key figure in the scientific revolution His book Philosophiæ Naturalis Principia Mathematica Mathematical Principles of Natural Philosophy first published in 1687 laid the foundations of classical mechanics Newton also made seminal contributions to optics and shares credit with Gottfried Wilhelm Leibniz for developing the infinitesimal calculus In Principia sir Newton formulated the laws of motion and universal gravitation that formed the dominant scientific viewpoint until it was superseded by the theory of relativity Newton used his mathematical description of gravity to prove Kepler s laws of planetary motion account for tides the trajectories of comets the precession of the equinoxes and other phenomena eradicating doubt about the Solar System s heliocentricity He demonstrated that the motion of objects on Earth and celestial bodies could be accounted for by the same principles Newton s inference that the Earth is an oblate spheroid was later confirmed by the geodetic measurements of Maupertuis La Condamine and others convincing most European scientists of the superiority of Newtonian mechanics over earlier systems
c = re.sub("\d+"," ",b)
print(c)
Sir Isaac Newton PRS December March a was an English mathematician physicist astronomer theologian and author described in his own day as a natural philosopher who is widely recognised as one of the most influential scientists of all time and a key figure in the scientific revolution His book Philosophiæ Naturalis Principia Mathematica Mathematical Principles of Natural Philosophy first published in laid the foundations of classical mechanics Newton also made seminal contributions to optics and shares credit with Gottfried Wilhelm Leibniz for developing the infinitesimal calculus In Principia sir Newton formulated the laws of motion and universal gravitation that formed the dominant scientific viewpoint until it was superseded by the theory of relativity Newton used his mathematical description of gravity to prove Kepler s laws of planetary motion account for tides the trajectories of comets the precession of the equinoxes and other phenomena eradicating doubt about the Solar System s heliocentricity He demonstrated that the motion of objects on Earth and celestial bodies could be accounted for by the same principles Newton s inference that the Earth is an oblate spheroid was later confirmed by the geodetic measurements of Maupertuis La Condamine and others convincing most European scientists of the superiority of Newtonian mechanics over earlier systems
tst = c.lower()
print(tst)
sir isaac newton prs december march a was an english mathematician physicist astronomer theologian and author described in his own day as a natural philosopher who is widely recognised as one of the most influential scientists of all time and a key figure in the scientific revolution his book philosophiæ naturalis principia mathematica mathematical principles of natural philosophy first published in laid the foundations of classical mechanics newton also made seminal contributions to optics and shares credit with gottfried wilhelm leibniz for developing the infinitesimal calculus in principia sir newton formulated the laws of motion and universal gravitation that formed the dominant scientific viewpoint until it was superseded by the theory of relativity newton used his mathematical description of gravity to prove kepler s laws of planetary motion account for tides the trajectories of comets the precession of the equinoxes and other phenomena eradicating doubt about the solar system s heliocentricity he demonstrated that the motion of objects on earth and celestial bodies could be accounted for by the same principles newton s inference that the earth is an oblate spheroid was later confirmed by the geodetic measurements of maupertuis la condamine and others convincing most european scientists of the superiority of newtonian mechanics over earlier systems
txt = "This is line1. This is Line2. This is Line3"
print(txt)
This is line1. This is Line2. This is Line3
txt.split(".")
['This is line1', ' This is Line2', ' This is Line3']
txt2 = """It’s best to begin a paragraph with a sentence that defines the topic that will be discussed. Try to keep it fairly simple. If you include too many ideas in your opening sentence, you run the risk of getting tangled in an overly complex and incoherent paragraph. Every sentence in a paragraph should support the first, “topic sentence.” When you begin discussing a new or related topic, start a new paragraph.
The University of North Carolina helps students with paragraph construction by providing them with a 5-step process:
Express the idea that the paragraph will cover in a topic sentence.
Explain the idea.
Use an example.
Explain why the example relates to the idea.
Draw a conclusion.
Obviously, the recipe has been designed for those who want to write an informative and convincing work. And because these rules aren’t really cast in stone, you’ll find that different institutions give different recommendations."""
txt2.split("\n")
['It’s best to begin a paragraph with a sentence that defines the topic that will be discussed. Try to keep it fairly simple. If you include too many ideas in your opening sentence, you run the risk of getting tangled in an overly complex and incoherent paragraph. Every sentence in a paragraph should support the first, “topic sentence.” When you begin discussing a new or related topic, start a new paragraph.',
'',
'The University of North Carolina helps students with paragraph construction by providing them with a 5-step process:',
'',
'Express the idea that the paragraph will cover in a topic sentence.',
'Explain the idea.',
'Use an example.',
'Explain why the example relates to the idea.',
'Draw a conclusion.',
'Obviously, the recipe has been designed for those who want to write an informative and convincing work. And because these rules aren’t really cast in stone, you’ll find that different institutions give different recommendations.']
see above
tmp = txt2.split("\n")
tmp
['It’s best to begin a paragraph with a sentence that defines the topic that will be discussed. Try to keep it fairly simple. If you include too many ideas in your opening sentence, you run the risk of getting tangled in an overly complex and incoherent paragraph. Every sentence in a paragraph should support the first, “topic sentence.” When you begin discussing a new or related topic, start a new paragraph.',
'',
'The University of North Carolina helps students with paragraph construction by providing them with a 5-step process:',
'',
'Express the idea that the paragraph will cover in a topic sentence.',
'Explain the idea.',
'Use an example.',
'Explain why the example relates to the idea.',
'Draw a conclusion.',
'Obviously, the recipe has been designed for those who want to write an informative and convincing work. And because these rules aren’t really cast in stone, you’ll find that different institutions give different recommendations.']
for sent in tmp:
#print(sent)
print(sent.split(" "))
['It’s', 'best', 'to', 'begin', 'a', 'paragraph', 'with', 'a', 'sentence', 'that', 'defines', 'the', 'topic', 'that', 'will', 'be', 'discussed.', 'Try', 'to', 'keep', 'it', 'fairly', 'simple.', 'If', 'you', 'include', 'too', 'many', 'ideas', 'in', 'your', 'opening', 'sentence,', 'you', 'run', 'the', 'risk', 'of', 'getting', 'tangled', 'in', 'an', 'overly', 'complex', 'and', 'incoherent', 'paragraph.', 'Every', 'sentence', 'in', 'a', 'paragraph', 'should', 'support', 'the', 'first,', '“topic', 'sentence.”', 'When', 'you', 'begin', 'discussing', 'a', 'new', 'or', 'related', 'topic,', 'start', 'a', 'new', 'paragraph.']
['']
['The', 'University', 'of', 'North', 'Carolina', 'helps', 'students', 'with', 'paragraph', 'construction', 'by', 'providing', 'them', 'with', 'a', '5-step', 'process:']
['']
['Express', 'the', 'idea', 'that', 'the', 'paragraph', 'will', 'cover', 'in', 'a', 'topic', 'sentence.']
['Explain', 'the', 'idea.']
['Use', 'an', 'example.']
['Explain', 'why', 'the', 'example', 'relates', 'to', 'the', 'idea.']
['Draw', 'a', 'conclusion.']
['Obviously,', 'the', 'recipe', 'has', 'been', 'designed', 'for', 'those', 'who', 'want', 'to', 'write', 'an', 'informative', 'and', 'convincing', 'work.', 'And', 'because', 'these', 'rules', 'aren’t', 'really', 'cast', 'in', 'stone,', 'you’ll', 'find', 'that', 'different', 'institutions', 'give', 'different', 'recommendations.']
txt2
'It’s best to begin a paragraph with a sentence that defines the topic that will be discussed. Try to keep it fairly simple. If you include too many ideas in your opening sentence, you run the risk of getting tangled in an overly complex and incoherent paragraph. Every sentence in a paragraph should support the first, “topic sentence.” When you begin discussing a new or related topic, start a new paragraph.\n\nThe University of North Carolina helps students with paragraph construction by providing them with a 5-step process:\n\nExpress the idea that the paragraph will cover in a topic sentence.\nExplain the idea.\nUse an example.\nExplain why the example relates to the idea.\nDraw a conclusion.\nObviously, the recipe has been designed for those who want to write an informative and convincing work. And because these rules aren’t really cast in stone, you’ll find that different institutions give different recommendations.'
print([sent.split(" ") for sent in txt2.split("\n")])
[['It’s', 'best', 'to', 'begin', 'a', 'paragraph', 'with', 'a', 'sentence', 'that', 'defines', 'the', 'topic', 'that', 'will', 'be', 'discussed.', 'Try', 'to', 'keep', 'it', 'fairly', 'simple.', 'If', 'you', 'include', 'too', 'many', 'ideas', 'in', 'your', 'opening', 'sentence,', 'you', 'run', 'the', 'risk', 'of', 'getting', 'tangled', 'in', 'an', 'overly', 'complex', 'and', 'incoherent', 'paragraph.', 'Every', 'sentence', 'in', 'a', 'paragraph', 'should', 'support', 'the', 'first,', '“topic', 'sentence.”', 'When', 'you', 'begin', 'discussing', 'a', 'new', 'or', 'related', 'topic,', 'start', 'a', 'new', 'paragraph.'], [''], ['The', 'University', 'of', 'North', 'Carolina', 'helps', 'students', 'with', 'paragraph', 'construction', 'by', 'providing', 'them', 'with', 'a', '5-step', 'process:'], [''], ['Express', 'the', 'idea', 'that', 'the', 'paragraph', 'will', 'cover', 'in', 'a', 'topic', 'sentence.'], ['Explain', 'the', 'idea.'], ['Use', 'an', 'example.'], ['Explain', 'why', 'the', 'example', 'relates', 'to', 'the', 'idea.'], ['Draw', 'a', 'conclusion.'], ['Obviously,', 'the', 'recipe', 'has', 'been', 'designed', 'for', 'those', 'who', 'want', 'to', 'write', 'an', 'informative', 'and', 'convincing', 'work.', 'And', 'because', 'these', 'rules', 'aren’t', 'really', 'cast', 'in', 'stone,', 'you’ll', 'find', 'that', 'different', 'institutions', 'give', 'different', 'recommendations.']]
see the impact of followings:
print(c)
Sir Isaac Newton PRS December March a was an English mathematician physicist astronomer theologian and author described in his own day as a natural philosopher who is widely recognised as one of the most influential scientists of all time and a key figure in the scientific revolution His book Philosophiæ Naturalis Principia Mathematica Mathematical Principles of Natural Philosophy first published in laid the foundations of classical mechanics Newton also made seminal contributions to optics and shares credit with Gottfried Wilhelm Leibniz for developing the infinitesimal calculus In Principia sir Newton formulated the laws of motion and universal gravitation that formed the dominant scientific viewpoint until it was superseded by the theory of relativity Newton used his mathematical description of gravity to prove Kepler s laws of planetary motion account for tides the trajectories of comets the precession of the equinoxes and other phenomena eradicating doubt about the Solar System s heliocentricity He demonstrated that the motion of objects on Earth and celestial bodies could be accounted for by the same principles Newton s inference that the Earth is an oblate spheroid was later confirmed by the geodetic measurements of Maupertuis La Condamine and others convincing most European scientists of the superiority of Newtonian mechanics over earlier systems
# re.sub("\s{2,}"," ",c)
c_clean = re.sub("\s+"," ",c)
c_clean
'Sir Isaac Newton PRS December March a was an English mathematician physicist astronomer theologian and author described in his own day as a natural philosopher who is widely recognised as one of the most influential scientists of all time and a key figure in the scientific revolution His book Philosophiæ Naturalis Principia Mathematica Mathematical Principles of Natural Philosophy first published in laid the foundations of classical mechanics Newton also made seminal contributions to optics and shares credit with Gottfried Wilhelm Leibniz for developing the infinitesimal calculus In Principia sir Newton formulated the laws of motion and universal gravitation that formed the dominant scientific viewpoint until it was superseded by the theory of relativity Newton used his mathematical description of gravity to prove Kepler s laws of planetary motion account for tides the trajectories of comets the precession of the equinoxes and other phenomena eradicating doubt about the Solar System s heliocentricity He demonstrated that the motion of objects on Earth and celestial bodies could be accounted for by the same principles Newton s inference that the Earth is an oblate spheroid was later confirmed by the geodetic measurements of Maupertuis La Condamine and others convincing most European scientists of the superiority of Newtonian mechanics over earlier systems '
wd_token = c_clean.split(" ")
print(wd_token)
['Sir', 'Isaac', 'Newton', 'PRS', 'December', 'March', 'a', 'was', 'an', 'English', 'mathematician', 'physicist', 'astronomer', 'theologian', 'and', 'author', 'described', 'in', 'his', 'own', 'day', 'as', 'a', 'natural', 'philosopher', 'who', 'is', 'widely', 'recognised', 'as', 'one', 'of', 'the', 'most', 'influential', 'scientists', 'of', 'all', 'time', 'and', 'a', 'key', 'figure', 'in', 'the', 'scientific', 'revolution', 'His', 'book', 'Philosophiæ', 'Naturalis', 'Principia', 'Mathematica', 'Mathematical', 'Principles', 'of', 'Natural', 'Philosophy', 'first', 'published', 'in', 'laid', 'the', 'foundations', 'of', 'classical', 'mechanics', 'Newton', 'also', 'made', 'seminal', 'contributions', 'to', 'optics', 'and', 'shares', 'credit', 'with', 'Gottfried', 'Wilhelm', 'Leibniz', 'for', 'developing', 'the', 'infinitesimal', 'calculus', 'In', 'Principia', 'sir', 'Newton', 'formulated', 'the', 'laws', 'of', 'motion', 'and', 'universal', 'gravitation', 'that', 'formed', 'the', 'dominant', 'scientific', 'viewpoint', 'until', 'it', 'was', 'superseded', 'by', 'the', 'theory', 'of', 'relativity', 'Newton', 'used', 'his', 'mathematical', 'description', 'of', 'gravity', 'to', 'prove', 'Kepler', 's', 'laws', 'of', 'planetary', 'motion', 'account', 'for', 'tides', 'the', 'trajectories', 'of', 'comets', 'the', 'precession', 'of', 'the', 'equinoxes', 'and', 'other', 'phenomena', 'eradicating', 'doubt', 'about', 'the', 'Solar', 'System', 's', 'heliocentricity', 'He', 'demonstrated', 'that', 'the', 'motion', 'of', 'objects', 'on', 'Earth', 'and', 'celestial', 'bodies', 'could', 'be', 'accounted', 'for', 'by', 'the', 'same', 'principles', 'Newton', 's', 'inference', 'that', 'the', 'Earth', 'is', 'an', 'oblate', 'spheroid', 'was', 'later', 'confirmed', 'by', 'the', 'geodetic', 'measurements', 'of', 'Maupertuis', 'La', 'Condamine', 'and', 'others', 'convincing', 'most', 'European', 'scientists', 'of', 'the', 'superiority', 'of', 'Newtonian', 'mechanics', 'over', 'earlier', 'systems', '']
print({wd:wd_token.count(wd) for wd in wd_token})
{'Sir': 1, 'Isaac': 1, 'Newton': 5, 'PRS': 1, 'December': 1, 'March': 1, 'a': 3, 'was': 3, 'an': 2, 'English': 1, 'mathematician': 1, 'physicist': 1, 'astronomer': 1, 'theologian': 1, 'and': 7, 'author': 1, 'described': 1, 'in': 3, 'his': 2, 'own': 1, 'day': 1, 'as': 2, 'natural': 1, 'philosopher': 1, 'who': 1, 'is': 2, 'widely': 1, 'recognised': 1, 'one': 1, 'of': 14, 'the': 16, 'most': 2, 'influential': 1, 'scientists': 2, 'all': 1, 'time': 1, 'key': 1, 'figure': 1, 'scientific': 2, 'revolution': 1, 'His': 1, 'book': 1, 'Philosophiæ': 1, 'Naturalis': 1, 'Principia': 2, 'Mathematica': 1, 'Mathematical': 1, 'Principles': 1, 'Natural': 1, 'Philosophy': 1, 'first': 1, 'published': 1, 'laid': 1, 'foundations': 1, 'classical': 1, 'mechanics': 2, 'also': 1, 'made': 1, 'seminal': 1, 'contributions': 1, 'to': 2, 'optics': 1, 'shares': 1, 'credit': 1, 'with': 1, 'Gottfried': 1, 'Wilhelm': 1, 'Leibniz': 1, 'for': 3, 'developing': 1, 'infinitesimal': 1, 'calculus': 1, 'In': 1, 'sir': 1, 'formulated': 1, 'laws': 2, 'motion': 3, 'universal': 1, 'gravitation': 1, 'that': 3, 'formed': 1, 'dominant': 1, 'viewpoint': 1, 'until': 1, 'it': 1, 'superseded': 1, 'by': 3, 'theory': 1, 'relativity': 1, 'used': 1, 'mathematical': 1, 'description': 1, 'gravity': 1, 'prove': 1, 'Kepler': 1, 's': 3, 'planetary': 1, 'account': 1, 'tides': 1, 'trajectories': 1, 'comets': 1, 'precession': 1, 'equinoxes': 1, 'other': 1, 'phenomena': 1, 'eradicating': 1, 'doubt': 1, 'about': 1, 'Solar': 1, 'System': 1, 'heliocentricity': 1, 'He': 1, 'demonstrated': 1, 'objects': 1, 'on': 1, 'Earth': 2, 'celestial': 1, 'bodies': 1, 'could': 1, 'be': 1, 'accounted': 1, 'same': 1, 'principles': 1, 'inference': 1, 'oblate': 1, 'spheroid': 1, 'later': 1, 'confirmed': 1, 'geodetic': 1, 'measurements': 1, 'Maupertuis': 1, 'La': 1, 'Condamine': 1, 'others': 1, 'convincing': 1, 'European': 1, 'superiority': 1, 'Newtonian': 1, 'over': 1, 'earlier': 1, 'systems': 1, '': 1}
c_clean
'Sir Isaac Newton PRS December March a was an English mathematician physicist astronomer theologian and author described in his own day as a natural philosopher who is widely recognised as one of the most influential scientists of all time and a key figure in the scientific revolution His book Philosophiæ Naturalis Principia Mathematica Mathematical Principles of Natural Philosophy first published in laid the foundations of classical mechanics Newton also made seminal contributions to optics and shares credit with Gottfried Wilhelm Leibniz for developing the infinitesimal calculus In Principia sir Newton formulated the laws of motion and universal gravitation that formed the dominant scientific viewpoint until it was superseded by the theory of relativity Newton used his mathematical description of gravity to prove Kepler s laws of planetary motion account for tides the trajectories of comets the precession of the equinoxes and other phenomena eradicating doubt about the Solar System s heliocentricity He demonstrated that the motion of objects on Earth and celestial bodies could be accounted for by the same principles Newton s inference that the Earth is an oblate spheroid was later confirmed by the geodetic measurements of Maupertuis La Condamine and others convincing most European scientists of the superiority of Newtonian mechanics over earlier systems '
c_new = c_clean.lower()
wd_token_new = c_new.split(" ")
c_wd_count = {wd:wd_token_new.count(wd) for wd in wd_token_new}
print(c_wd_count)
{'sir': 2, 'isaac': 1, 'newton': 5, 'prs': 1, 'december': 1, 'march': 1, 'a': 3, 'was': 3, 'an': 2, 'english': 1, 'mathematician': 1, 'physicist': 1, 'astronomer': 1, 'theologian': 1, 'and': 7, 'author': 1, 'described': 1, 'in': 4, 'his': 3, 'own': 1, 'day': 1, 'as': 2, 'natural': 2, 'philosopher': 1, 'who': 1, 'is': 2, 'widely': 1, 'recognised': 1, 'one': 1, 'of': 14, 'the': 16, 'most': 2, 'influential': 1, 'scientists': 2, 'all': 1, 'time': 1, 'key': 1, 'figure': 1, 'scientific': 2, 'revolution': 1, 'book': 1, 'philosophiæ': 1, 'naturalis': 1, 'principia': 2, 'mathematica': 1, 'mathematical': 2, 'principles': 2, 'philosophy': 1, 'first': 1, 'published': 1, 'laid': 1, 'foundations': 1, 'classical': 1, 'mechanics': 2, 'also': 1, 'made': 1, 'seminal': 1, 'contributions': 1, 'to': 2, 'optics': 1, 'shares': 1, 'credit': 1, 'with': 1, 'gottfried': 1, 'wilhelm': 1, 'leibniz': 1, 'for': 3, 'developing': 1, 'infinitesimal': 1, 'calculus': 1, 'formulated': 1, 'laws': 2, 'motion': 3, 'universal': 1, 'gravitation': 1, 'that': 3, 'formed': 1, 'dominant': 1, 'viewpoint': 1, 'until': 1, 'it': 1, 'superseded': 1, 'by': 3, 'theory': 1, 'relativity': 1, 'used': 1, 'description': 1, 'gravity': 1, 'prove': 1, 'kepler': 1, 's': 3, 'planetary': 1, 'account': 1, 'tides': 1, 'trajectories': 1, 'comets': 1, 'precession': 1, 'equinoxes': 1, 'other': 1, 'phenomena': 1, 'eradicating': 1, 'doubt': 1, 'about': 1, 'solar': 1, 'system': 1, 'heliocentricity': 1, 'he': 1, 'demonstrated': 1, 'objects': 1, 'on': 1, 'earth': 2, 'celestial': 1, 'bodies': 1, 'could': 1, 'be': 1, 'accounted': 1, 'same': 1, 'inference': 1, 'oblate': 1, 'spheroid': 1, 'later': 1, 'confirmed': 1, 'geodetic': 1, 'measurements': 1, 'maupertuis': 1, 'la': 1, 'condamine': 1, 'others': 1, 'convincing': 1, 'european': 1, 'superiority': 1, 'newtonian': 1, 'over': 1, 'earlier': 1, 'systems': 1, '': 1}
plt.figure(figsize=(16,4))
sns.barplot(list(c_wd_count.keys()),list(c_wd_count.values()))
plt.xticks(rotation=90,fontsize=9)
plt.show()
from nltk.corpus import stopwords #stopwords.words("english")
from spacy.lang.en.stop_words import STOP_WORDS # list of stopwords
from sklearn.feature_extraction import stop_words #stop_words.ENGLISH_STOP_WORD
nl_sw = stopwords.words("english")
print(len(nl_sw))
# print(nl_sw)
179
sp_sw = STOP_WORDS
print(len(sp_sw))
326
sk_sw = stop_words.ENGLISH_STOP_WORDS
print(len(sk_sw))
318
print(len(wd_token_new))
print(wd_token_new)
208
['sir', 'isaac', 'newton', 'prs', 'december', 'march', 'a', 'was', 'an', 'english', 'mathematician', 'physicist', 'astronomer', 'theologian', 'and', 'author', 'described', 'in', 'his', 'own', 'day', 'as', 'a', 'natural', 'philosopher', 'who', 'is', 'widely', 'recognised', 'as', 'one', 'of', 'the', 'most', 'influential', 'scientists', 'of', 'all', 'time', 'and', 'a', 'key', 'figure', 'in', 'the', 'scientific', 'revolution', 'his', 'book', 'philosophiæ', 'naturalis', 'principia', 'mathematica', 'mathematical', 'principles', 'of', 'natural', 'philosophy', 'first', 'published', 'in', 'laid', 'the', 'foundations', 'of', 'classical', 'mechanics', 'newton', 'also', 'made', 'seminal', 'contributions', 'to', 'optics', 'and', 'shares', 'credit', 'with', 'gottfried', 'wilhelm', 'leibniz', 'for', 'developing', 'the', 'infinitesimal', 'calculus', 'in', 'principia', 'sir', 'newton', 'formulated', 'the', 'laws', 'of', 'motion', 'and', 'universal', 'gravitation', 'that', 'formed', 'the', 'dominant', 'scientific', 'viewpoint', 'until', 'it', 'was', 'superseded', 'by', 'the', 'theory', 'of', 'relativity', 'newton', 'used', 'his', 'mathematical', 'description', 'of', 'gravity', 'to', 'prove', 'kepler', 's', 'laws', 'of', 'planetary', 'motion', 'account', 'for', 'tides', 'the', 'trajectories', 'of', 'comets', 'the', 'precession', 'of', 'the', 'equinoxes', 'and', 'other', 'phenomena', 'eradicating', 'doubt', 'about', 'the', 'solar', 'system', 's', 'heliocentricity', 'he', 'demonstrated', 'that', 'the', 'motion', 'of', 'objects', 'on', 'earth', 'and', 'celestial', 'bodies', 'could', 'be', 'accounted', 'for', 'by', 'the', 'same', 'principles', 'newton', 's', 'inference', 'that', 'the', 'earth', 'is', 'an', 'oblate', 'spheroid', 'was', 'later', 'confirmed', 'by', 'the', 'geodetic', 'measurements', 'of', 'maupertuis', 'la', 'condamine', 'and', 'others', 'convincing', 'most', 'european', 'scientists', 'of', 'the', 'superiority', 'of', 'newtonian', 'mechanics', 'over', 'earlier', 'systems', '']
wd_token_new_wo_sw = [wd for wd in wd_token_new if wd not in sp_sw]
print(len(wd_token_new_wo_sw))
print(wd_token_new_wo_sw)
119
['sir', 'isaac', 'newton', 'prs', 'december', 'march', 'english', 'mathematician', 'physicist', 'astronomer', 'theologian', 'author', 'described', 'day', 'natural', 'philosopher', 'widely', 'recognised', 'influential', 'scientists', 'time', 'key', 'figure', 'scientific', 'revolution', 'book', 'philosophiæ', 'naturalis', 'principia', 'mathematica', 'mathematical', 'principles', 'natural', 'philosophy', 'published', 'laid', 'foundations', 'classical', 'mechanics', 'newton', 'seminal', 'contributions', 'optics', 'shares', 'credit', 'gottfried', 'wilhelm', 'leibniz', 'developing', 'infinitesimal', 'calculus', 'principia', 'sir', 'newton', 'formulated', 'laws', 'motion', 'universal', 'gravitation', 'formed', 'dominant', 'scientific', 'viewpoint', 'superseded', 'theory', 'relativity', 'newton', 'mathematical', 'description', 'gravity', 'prove', 'kepler', 's', 'laws', 'planetary', 'motion', 'account', 'tides', 'trajectories', 'comets', 'precession', 'equinoxes', 'phenomena', 'eradicating', 'doubt', 'solar', 'system', 's', 'heliocentricity', 'demonstrated', 'motion', 'objects', 'earth', 'celestial', 'bodies', 'accounted', 'principles', 'newton', 's', 'inference', 'earth', 'oblate', 'spheroid', 'later', 'confirmed', 'geodetic', 'measurements', 'maupertuis', 'la', 'condamine', 'convincing', 'european', 'scientists', 'superiority', 'newtonian', 'mechanics', 'earlier', 'systems', '']
wd_token_new_wo_sw.append("IT")
print(wd_token_new_wo_sw)
print(len(wd_token_new_wo_sw))
{'sir': 2, 'isaac': 1, 'newton': 5, 'prs': 1, 'december': 1, 'march': 1, 'english': 1, 'mathematician': 1, 'physicist': 1, 'astronomer': 1, 'theologian': 1, 'author': 1, 'described': 1, 'day': 1, 'natural': 2, 'philosopher': 1, 'widely': 1, 'recognised': 1, 'influential': 1, 'scientists': 2, 'time': 1, 'key': 1, 'figure': 1, 'scientific': 2, 'revolution': 1, 'book': 1, 'philosophiæ': 1, 'naturalis': 1, 'principia': 2, 'mathematica': 1, 'mathematical': 2, 'principles': 2, 'philosophy': 1, 'published': 1, 'laid': 1, 'foundations': 1, 'classical': 1, 'mechanics': 2, 'seminal': 1, 'contributions': 1, 'optics': 1, 'shares': 1, 'credit': 1, 'gottfried': 1, 'wilhelm': 1, 'leibniz': 1, 'developing': 1, 'infinitesimal': 1, 'calculus': 1, 'formulated': 1, 'laws': 2, 'motion': 3, 'universal': 1, 'gravitation': 1, 'formed': 1, 'dominant': 1, 'viewpoint': 1, 'superseded': 1, 'theory': 1, 'relativity': 1, 'description': 1, 'gravity': 1, 'prove': 1, 'kepler': 1, 's': 3, 'planetary': 1, 'account': 1, 'tides': 1, 'trajectories': 1, 'comets': 1, 'precession': 1, 'equinoxes': 1, 'phenomena': 1, 'eradicating': 1, 'doubt': 1, 'solar': 1, 'system': 1, 'heliocentricity': 1, 'demonstrated': 1, 'objects': 1, 'earth': 2, 'celestial': 1, 'bodies': 1, 'accounted': 1, 'inference': 1, 'oblate': 1, 'spheroid': 1, 'later': 1, 'confirmed': 1, 'geodetic': 1, 'measurements': 1, 'maupertuis': 1, 'la': 1, 'condamine': 1, 'convincing': 1, 'european': 1, 'superiority': 1, 'newtonian': 1, 'earlier': 1, 'systems': 1, '': 1, 'IT': 1}
102
wd_token_new_wo_sw = {wd:wd_token_new_wo_sw.count(wd) for wd in wd_token_new_wo_sw}
print(wd_token_new_wo_sw)
{'sir': 2, 'isaac': 1, 'newton': 5, 'prs': 1, 'december': 1, 'march': 1, 'english': 1, 'mathematician': 1, 'physicist': 1, 'astronomer': 1, 'theologian': 1, 'author': 1, 'described': 1, 'day': 1, 'natural': 2, 'philosopher': 1, 'widely': 1, 'recognised': 1, 'influential': 1, 'scientists': 2, 'time': 1, 'key': 1, 'figure': 1, 'scientific': 2, 'revolution': 1, 'book': 1, 'philosophiæ': 1, 'naturalis': 1, 'principia': 2, 'mathematica': 1, 'mathematical': 2, 'principles': 2, 'philosophy': 1, 'published': 1, 'laid': 1, 'foundations': 1, 'classical': 1, 'mechanics': 2, 'seminal': 1, 'contributions': 1, 'optics': 1, 'shares': 1, 'credit': 1, 'gottfried': 1, 'wilhelm': 1, 'leibniz': 1, 'developing': 1, 'infinitesimal': 1, 'calculus': 1, 'formulated': 1, 'laws': 2, 'motion': 3, 'universal': 1, 'gravitation': 1, 'formed': 1, 'dominant': 1, 'viewpoint': 1, 'superseded': 1, 'theory': 1, 'relativity': 1, 'description': 1, 'gravity': 1, 'prove': 1, 'kepler': 1, 's': 3, 'planetary': 1, 'account': 1, 'tides': 1, 'trajectories': 1, 'comets': 1, 'precession': 1, 'equinoxes': 1, 'phenomena': 1, 'eradicating': 1, 'doubt': 1, 'solar': 1, 'system': 1, 'heliocentricity': 1, 'demonstrated': 1, 'objects': 1, 'earth': 2, 'celestial': 1, 'bodies': 1, 'accounted': 1, 'inference': 1, 'oblate': 1, 'spheroid': 1, 'later': 1, 'confirmed': 1, 'geodetic': 1, 'measurements': 1, 'maupertuis': 1, 'la': 1, 'condamine': 1, 'convincing': 1, 'european': 1, 'superiority': 1, 'newtonian': 1, 'earlier': 1, 'systems': 1, '': 1, 'IT': 1}
plt.figure(figsize=(16,4))
sns.barplot(list(wd_token_new_wo_sw.keys()),list(wd_token_new_wo_sw.values()))
plt.xticks(rotation=90,fontsize=9)
plt.show()