Learn practical skills, build real-world projects, and advance your career
Created 3 years ago
Precision and recall curve
from sklearn.datasets import load_breast_cancer
import pandas as pd
from sklearn.metrics import precision_recall_curve, plot_precision_recall_curve
from sklearn.ensemble import RandomForestClassifier
import seaborn as sns
from sklearn import metrics
import matplotlib.pyplot as plt
we'll proceed to make the data frame particularly imbalanced
#we load the breast cancer data
df_dict = load_breast_cancer()
df = pd.DataFrame(df_dict.data, columns = df_dict.feature_names)
df["target"] = df_dict.target
#we manually create a highly imbalanced dataset
positives = df[df["target"]==1].sample(frac = .10)
negatives = df[df["target"]==0]
df_imbalanced = pd.concat([positives, negatives])
print(df_imbalanced['target'].mean())
0.14516129032258066