Learn practical skills, build real-world projects, and advance your career
import pandas as pd
train_sets=pd.read_csv('train.csv')
test_sets=pd.read_csv('test.csv')
gender_submission=pd.read_csv('gender_submission.csv')
train_sets
train_sets.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 891 entries, 0 to 890 Data columns (total 12 columns): PassengerId 891 non-null int64 Survived 891 non-null int64 Pclass 891 non-null int64 Name 891 non-null object Sex 891 non-null object Age 714 non-null float64 SibSp 891 non-null int64 Parch 891 non-null int64 Ticket 891 non-null object Fare 891 non-null float64 Cabin 204 non-null object Embarked 889 non-null object dtypes: float64(2), int64(5), object(5) memory usage: 83.6+ KB
train_num=train_sets['PassengerId']
train_sur=train_sets['Survived']
train_class=train_sets['Pclass']
train_sex=train_sets['Sex']
train_age=train_sets['Age']
train_sib=train_sets['SibSp']
train_ticket=train_sets['Ticket']
train_fare=train_sets['Fare']
train_cab=train_sets['Cabin']
train_emb=train_sets['Embarked']
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
#输入中文
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus']=False
figure=plt.figure()
figure.set(alpha=0.2)
plt.figure(figsize=(12, 8))


plt.subplot2grid((2,3),(0,0)),train_sets.Survived.value_counts().plot(kind='bar')# 柱状图 
plt.title("Survived situation") # 标题
plt.ylabel("数目")  

plt.subplot2grid((2,3),(0,1))
train_sets.Pclass.value_counts().plot(kind='bar')
plt.title('passenger ranks')
#plt.ylabel('num')

plt.subplot2grid((2,3),(0,2))
plt.scatter(train_sets.Survived, train_sets.Age)
plt.ylabel("age")                         # 设定纵坐标名称
plt.grid(b=True, which='major', axis='y') 
plt.title("age and survive")

plt.subplot2grid((2,3),(1,0),colspan=2)
train_sets.Age[train_sets.Pclass == 1].plot(kind='kde')   
train_sets.Age[train_sets.Pclass == 2].plot(kind='kde')
train_sets.Age[train_sets.Pclass == 3].plot(kind='kde')
plt.xlabel(u"age")# plots an axis lable
plt.ylabel(u"density") 
plt.title(u"Age distribution of passengers of all levels")
plt.legend((u'First class', u'Second class',u'Third class'),loc='best') 


plt.subplot2grid((2,3),(1,2))
train_sets.Embarked.value_counts().plot(kind='bar')
plt.title(u"aboard numbers")
plt.ylabel(u"num")  
plt.savefig("examples.jpg")
plt.show()



<matplotlib.figure.Figure at 0x15d147eb8d0>
#各个等级乘客获救情况
Survived_0 = train_sets.Pclass[train_sets.Survived == 0].value_counts()
Survived_1 = train_sets.Pclass[train_sets.Survived == 1].value_counts()
df=pd.DataFrame({u'saved':Survived_1, u'unsaved':Survived_0})
plt.figure(figsize=(8, 6))
df.plot(kind='bar', stacked=True)

plt.title(u"rescued situation distributes among classes")
plt.xlabel(u"passenger class") 
plt.ylabel(u"num") 
plt.savefig('resvued situation')


<matplotlib.figure.Figure at 0x20b1e2fe9e8>