Jovian
⭐️
Sign In
In [1]:
import pandas as pd
train_sets=pd.read_csv('train.csv')
test_sets=pd.read_csv('test.csv')
gender_submission=pd.read_csv('gender_submission.csv')
In [2]:
train_sets
train_sets.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 891 entries, 0 to 890 Data columns (total 12 columns): PassengerId 891 non-null int64 Survived 891 non-null int64 Pclass 891 non-null int64 Name 891 non-null object Sex 891 non-null object Age 714 non-null float64 SibSp 891 non-null int64 Parch 891 non-null int64 Ticket 891 non-null object Fare 891 non-null float64 Cabin 204 non-null object Embarked 889 non-null object dtypes: float64(2), int64(5), object(5) memory usage: 83.6+ KB
In [3]:
train_num=train_sets['PassengerId']
train_sur=train_sets['Survived']
train_class=train_sets['Pclass']
train_sex=train_sets['Sex']
train_age=train_sets['Age']
train_sib=train_sets['SibSp']
train_ticket=train_sets['Ticket']
train_fare=train_sets['Fare']
train_cab=train_sets['Cabin']
train_emb=train_sets['Embarked']
In [25]:
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
#输入中文
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus']=False
figure=plt.figure()
figure.set(alpha=0.2)
plt.figure(figsize=(12, 8))


plt.subplot2grid((2,3),(0,0)),train_sets.Survived.value_counts().plot(kind='bar')# 柱状图 
plt.title("Survived situation") # 标题
plt.ylabel("数目")  

plt.subplot2grid((2,3),(0,1))
train_sets.Pclass.value_counts().plot(kind='bar')
plt.title('passenger ranks')
#plt.ylabel('num')

plt.subplot2grid((2,3),(0,2))
plt.scatter(train_sets.Survived, train_sets.Age)
plt.ylabel("age")                         # 设定纵坐标名称
plt.grid(b=True, which='major', axis='y') 
plt.title("age and survive")

plt.subplot2grid((2,3),(1,0),colspan=2)
train_sets.Age[train_sets.Pclass == 1].plot(kind='kde')   
train_sets.Age[train_sets.Pclass == 2].plot(kind='kde')
train_sets.Age[train_sets.Pclass == 3].plot(kind='kde')
plt.xlabel(u"age")# plots an axis lable
plt.ylabel(u"density") 
plt.title(u"Age distribution of passengers of all levels")
plt.legend((u'First class', u'Second class',u'Third class'),loc='best') 


plt.subplot2grid((2,3),(1,2))
train_sets.Embarked.value_counts().plot(kind='bar')
plt.title(u"aboard numbers")
plt.ylabel(u"num")  
plt.savefig("examples.jpg")
plt.show()




<matplotlib.figure.Figure at 0x15d147eb8d0>
In [6]:
#各个等级乘客获救情况
Survived_0 = train_sets.Pclass[train_sets.Survived == 0].value_counts()
Survived_1 = train_sets.Pclass[train_sets.Survived == 1].value_counts()
df=pd.DataFrame({u'saved':Survived_1, u'unsaved':Survived_0})
plt.figure(figsize=(8, 6))
df.plot(kind='bar', stacked=True)

plt.title(u"rescued situation distributes among classes")
plt.xlabel(u"passenger class") 
plt.ylabel(u"num") 
plt.savefig('resvued situation')



<matplotlib.figure.Figure at 0x20b1e2fe9e8>
In [7]:
#看看各性别的获救情况
fig = plt.figure()
fig.set(alpha=0.2)  # 设定图表颜色alpha参数

Survived_0 = train_sets.Sex[train_sets.Survived == 0].value_counts()
Survived_1 = train_sets.Sex[train_sets.Survived == 1].value_counts()
df=pd.DataFrame({u'survived':Survived_1, u'unsurvived':Survived_0})
df.plot(kind='bar', stacked=True)
plt.title(u"sex distribution of survive")
plt.xlabel(u"sex") 
plt.ylabel(u"num")
plt.savefig('sex situation')

<matplotlib.figure.Figure at 0x20b1fd5eac8>
In [8]:

 #然后我们再来看看各种舱级别情况下各性别的获救情况

fig=plt.figure(figsize=(12,6))
fig.set(alpha=0.65) # 设置图像透明度,无所谓
plt.title(u"survived situatuin based on sex and class")

ax1=plt.subplot2grid((1,4),(0,0))
train_sets.Survived[train_sets.Sex == 'female'][train_sets.Pclass != 3].value_counts().plot(kind='bar', label="female highclass", color='#FA2479')
#ax1.set_xticklabels([u"saved", u"unsaved"], rotation=0)
ax1.legend([u"female/high class"], loc='best')

ax2=plt.subplot2grid((1,4),(0,1),sharey=ax1)
train_sets.Survived[train_sets.Sex == 'female'][train_sets.Pclass == 3].value_counts().plot(kind='bar', label='female, low class', color='pink')
#ax2.set_xticklabels([u"unsaved", u"saved"], rotation=0)
plt.legend([u"female/low class"], loc='best')

ax3=plt.subplot2grid((1,4),(0,2),sharey=ax1)
train_sets.Survived[train_sets.Sex == 'male'][train_sets.Pclass != 3].value_counts().plot(kind='bar', label='male, high class',color='lightblue')
#ax3.set_xticklabels([u"unsaved", u"saved"], rotation=0)
plt.legend([u"male/high class"], loc='best')

ax4=plt.subplot2grid((1,4),(0,3),sharey=ax1)
train_sets.Survived[train_sets.Sex == 'male'][train_sets.Pclass == 3].value_counts().plot(kind='bar', label='male low class', color='steelblue')
#ax4.set_xticklabels([u"unsaved", u"saved"], rotation=0)
plt.legend([u"male/low class"], loc='best')

plt.savefig('sex and class distribution')


In [9]:
fig = plt.figure(figsize=(8,6))
fig.set(alpha=0.2)  # 设定图表颜色alpha参数

Survived_0 = train_sets.Embarked[train_sets.Survived == 0].value_counts()
Survived_1 = train_sets.Embarked[train_sets.Survived == 1].value_counts()
df=pd.DataFrame({u'saved':Survived_1, u'unsaved':Survived_0})
df.plot(kind='bar', stacked=True)
plt.title(u"embarkeed distribution")
plt.xlabel(u"embarked") 
plt.ylabel(u"num") 

plt.savefig('embarked distribution')
<matplotlib.figure.Figure at 0x20b1e2feb00>
In [10]:
g = train_sets.groupby(['SibSp','Survived'])
df1 = pd.DataFrame(g.count()['PassengerId'])
df1


Out[10]:
In [11]:
g = train_sets.groupby(['SibSp','Survived'])
df2 = pd.DataFrame(g.count()['PassengerId'])
df2
Out[11]:
In [12]:
train_sets.Cabin.value_counts()

Out[12]:
B96 B98        4
C23 C25 C27    4
G6             4
E101           3
F33            3
C22 C26        3
F2             3
D              3
E67            2
D20            2
E33            2
D26            2
D35            2
C52            2
C123           2
C83            2
E24            2
B35            2
E44            2
C65            2
F4             2
C92            2
B49            2
B28            2
E8             2
E25            2
B51 B53 B55    2
C125           2
D33            2
B18            2
              ..
A26            1
B82 B84        1
B39            1
A24            1
E68            1
F E69          1
B3             1
C110           1
D15            1
D56            1
D50            1
B69            1
B73            1
D28            1
C30            1
A16            1
C46            1
A31            1
C90            1
B38            1
C104           1
D49            1
C82            1
B101           1
A6             1
C91            1
B71            1
F G63          1
B80            1
D9             1
Name: Cabin, Length: 147, dtype: int64
In [13]:
fig = plt.figure(figsize=(8,6))
fig.set(alpha=0.2)  # 设定图表颜色alpha参数

Survived_cabin = train_sets.Survived[pd.notnull(train_sets.Cabin)].value_counts()
Survived_nocabin = train_sets.Survived[pd.isnull(train_sets.Cabin)].value_counts()
df=pd.DataFrame({u'yes':Survived_cabin, u'no':Survived_nocabin}).transpose()
df.plot(kind='bar', stacked=True)
plt.title(u"Is cabin effect survive")
plt.xlabel(u"Cabin or not") 
plt.ylabel(u"num")
plt.savefig('Is cabin effect survive')

<matplotlib.figure.Figure at 0x20b1fd596a0>
In [3]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestRegressor

### 使用 RandomForestClassifier 填补缺失的年龄属性
def set_missing_ages(df):

    # 把已有的数值型特征取出来丢进Random Forest Regressor中
    age_df = df[['Age','Fare', 'Parch', 'SibSp', 'Pclass']]

    # 乘客分成已知年龄和未知年龄两部分
    known_age = age_df[age_df.Age.notnull()].values
    unknown_age = age_df[age_df.Age.isnull()].values

    # y即目标年龄
    y = known_age[:, 0]

    # X即特征属性值
    X = known_age[:, 1:]

    # fit到RandomForestRegressor之中
    rfr = RandomForestRegressor(random_state=0, n_estimators=2000, n_jobs=-1)
    rfr.fit(X, y)

    # 用得到的模型进行未知年龄结果预测
    predictedAges = rfr.predict(unknown_age[:, 1:])
    print(unknown_age[:, 1:])
    print(predictedAges)

    # 用得到的预测结果填补原缺失数据
    df.loc[ (df.Age.isnull()), 'Age' ] = predictedAges 

    return df, rfr

def set_Cabin_type(df):
    df.loc[ (df.Cabin.notnull()), 'Cabin' ] = "Yes"
    df.loc[ (df.Cabin.isnull()), 'Cabin' ] = "No"
    return df

train_sets, rfr = set_missing_ages(train_sets)
train_sets = set_Cabin_type(train_sets)


[[ 8.4583 0. 0. 3. ] [ 13. 0. 0. 2. ] [ 7.225 0. 0. 3. ] [ 7.225 0. 0. 3. ] [ 7.8792 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 146.5208 0. 1. 1. ] [ 7.75 0. 0. 3. ] [ 7.2292 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 15.5 0. 1. 3. ] [ 7.75 0. 0. 3. ] [ 21.6792 0. 2. 3. ] [ 35.5 0. 0. 1. ] [ 27.7208 0. 0. 1. ] [ 15.2458 1. 1. 3. ] [ 7.8958 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 7.7875 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 7.775 0. 0. 3. ] [ 24.15 0. 1. 3. ] [ 8.05 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 22.3583 1. 1. 3. ] [ 15.2458 2. 0. 3. ] [ 7.3125 0. 0. 3. ] [ 8.6625 0. 0. 3. ] [ 69.55 2. 8. 3. ] [ 55. 1. 0. 1. ] [ 25.925 0. 0. 1. ] [ 25.4667 1. 3. 3. ] [ 69.55 2. 8. 3. ] [ 15.05 0. 0. 2. ] [ 50. 0. 0. 1. ] [ 15.5 0. 1. 3. ] [ 7.75 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 69.55 2. 8. 3. ] [ 7.75 0. 1. 3. ] [ 7.8958 0. 0. 3. ] [ 25.4667 1. 3. 3. ] [ 7.55 0. 0. 3. ] [ 14.4542 0. 1. 3. ] [ 15.5 0. 1. 3. ] [ 7.25 0. 0. 3. ] [ 79.2 0. 0. 1. ] [ 7.75 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 31. 0. 0. 1. ] [ 7.75 0. 0. 3. ] [ 0. 0. 0. 2. ] [ 26. 0. 0. 1. ] [ 27.7208 0. 0. 1. ] [ 30.5 0. 0. 1. ] [ 7.75 0. 0. 3. ] [ 23.25 0. 2. 3. ] [ 12.35 0. 0. 2. ] [ 8.05 0. 0. 3. ] [ 110.8833 0. 0. 1. ] [ 69.55 2. 8. 3. ] [ 23.25 0. 2. 3. ] [ 133.65 0. 1. 1. ] [ 7.8958 0. 0. 3. ] [ 16.1 0. 1. 3. ] [ 35. 0. 0. 1. ] [ 7.225 0. 0. 3. ] [ 7.8792 0. 0. 3. ] [ 7.8792 0. 0. 3. ] [ 15.5 0. 1. 3. ] [ 7.2292 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 82.1708 0. 1. 1. ] [ 7.8958 0. 0. 3. ] [ 7.7292 0. 0. 3. ] [ 25.4667 1. 3. 3. ] [ 7.8958 0. 0. 3. ] [ 6.8583 0. 0. 3. ] [ 0. 0. 0. 2. ] [ 8.05 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 7.25 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 16.1 0. 1. 3. ] [ 8.1125 0. 0. 3. ] [ 19.9667 0. 1. 3. ] [ 8.05 0. 0. 3. ] [ 51.8625 0. 1. 1. ] [ 7.75 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 0. 0. 0. 2. ] [ 7.725 0. 0. 3. ] [ 7.25 0. 0. 3. ] [ 52. 0. 0. 1. ] [ 0. 0. 0. 2. ] [ 25.4667 1. 3. 3. ] [ 19.9667 0. 1. 3. ] [ 14.4583 0. 0. 3. ] [ 15.1 0. 0. 3. ] [ 7.6292 0. 0. 3. ] [ 26.55 0. 0. 1. ] [ 8.05 0. 0. 3. ] [ 24.15 0. 0. 3. ] [ 7.225 0. 0. 3. ] [ 7.2292 0. 0. 3. ] [ 221.7792 0. 0. 1. ] [ 7.2292 0. 0. 3. ] [ 22.3583 2. 0. 3. ] [ 14.5 0. 0. 3. ] [ 13.8625 0. 0. 2. ] [ 7.8292 0. 0. 3. ] [ 227.525 0. 0. 1. ] [ 7.75 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 7.2292 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 14.4583 0. 1. 3. ] [ 8.7125 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 7.75 2. 0. 3. ] [ 33. 0. 0. 2. ] [ 7.225 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 42.4 0. 0. 1. ] [ 7.05 0. 0. 3. ] [ 15.5 0. 1. 3. ] [ 7.75 0. 0. 3. ] [ 7.7333 0. 0. 3. ] [ 0. 0. 0. 1. ] [ 16.1 0. 1. 3. ] [ 56.4958 0. 0. 3. ] [ 7.55 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 7.8292 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 7.775 0. 0. 3. ] [ 52. 0. 1. 1. ] [ 0. 0. 0. 2. ] [ 8.1375 0. 0. 3. ] [ 56.4958 0. 0. 3. ] [ 7.7333 0. 0. 3. ] [ 15.2458 1. 1. 3. ] [ 26.55 0. 0. 1. ] [ 15.5 0. 0. 3. ] [ 7.7375 0. 0. 3. ] [ 0. 0. 0. 2. ] [ 7.8958 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 30. 0. 0. 1. ] [ 14.5 0. 0. 3. ] [ 39.6 0. 0. 1. ] [ 24.15 0. 1. 3. ] [ 7.225 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 7.7375 0. 0. 3. ] [ 23.45 2. 1. 3. ] [ 7.75 0. 0. 3. ] [ 69.55 2. 8. 3. ] [ 30.6958 0. 0. 1. ] [ 0. 0. 0. 1. ] [ 6.95 0. 0. 3. ] [ 56.4958 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 7.2292 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 29.7 0. 0. 1. ] [ 69.55 2. 8. 3. ] [ 89.1042 0. 1. 1. ] [ 7.2292 0. 0. 3. ] [ 69.55 2. 8. 3. ] [ 9.5 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 23.45 2. 1. 3. ]] [ 23.83895259 32.06649305 29.51820514 29.51820514 22.38011324 27.94720616 36.10804822 35.2958243 22.87630686 27.94720616 30.70572678 33.12898535 35.2958243 23.45968333 44.06483036 41.20008848 17.09991595 27.94720616 30.70572678 23.32262739 30.70572678 30.70572678 27.94720616 27.51545426 33.55117591 30.70572678 35.2958243 25.78337698 25.34409583 29.78279613 25.52340334 10.86986696 26.03188214 49.5542756 7.30954704 10.86986696 31.71894048 46.24976824 33.12898535 35.2958243 35.2958243 10.86986696 31.09342985 27.94720616 7.30954704 31.10838452 20.80015413 33.12898535 29.78279613 34.62028571 35.2958243 35.2958243 36.87489821 35.2958243 35.05181757 57.74249226 41.20008848 41.57487718 35.2958243 23.31368333 42.57451554 30.70572678 28.57888393 10.86986696 23.31368333 31.42587794 27.94720616 26.68916849 59.96916448 29.51820514 22.38011324 22.38011324 33.12898535 22.87630686 35.2958243 31.94479345 27.94720616 19.89558113 7.30954704 27.94720616 20.68131488 35.05181757 30.70572678 27.94720616 29.78279613 35.2958243 26.68916849 23.47643239 24.10899881 30.70572678 44.06001827 35.2958243 30.70572678 35.05181757 19.89558113 29.78279613 44.65953056 35.05181757 7.30954704 24.10899881 18.19479484 25.7554753 26.82073281 49.8994093 30.70572678 34.17374861 29.51820514 22.87630686 38.5155 22.87630686 8.01370298 24.33021696 32.79502428 26.3814249 38.5155 35.2958243 30.70572678 30.70572678 22.87630686 35.2958243 20.07016773 29.04293865 30.70572678 27.45639428 24.40235514 29.51820514 27.94720616 37.51615833 27.86873699 33.12898535 35.2958243 19.89558113 38.83477484 26.68916849 29.56889809 31.10838452 27.94720616 26.3814249 27.94720616 27.51545426 39.08817814 35.05181757 23.47643239 29.56889809 19.89558113 17.09991595 49.8994093 27.30887391 19.89558113 35.05181757 27.94720616 27.94720616 38.42663175 24.33021696 39.17490238 33.55117591 29.51820514 35.2958243 19.89558113 16.1939502 35.2958243 10.86986696 50.38328427 38.83477484 35.09787898 29.56889809 35.2958243 22.87630686 30.70572678 50.91095013 10.86986696 43.96476448 22.87630686 10.86986696 25.97788916 27.94720616 16.1939502 ]
In [56]:
age_df=train_sets[['Age','Fare', 'Parch', 'SibSp', 'Pclass']]
unknown_age = age_df[age_df.Age.isnull()].values
print(unknown_age.shape)
print(unknown_age[:,1:])


(177, 5) [[ 8.4583 0. 0. 3. ] [ 13. 0. 0. 2. ] [ 7.225 0. 0. 3. ] [ 7.225 0. 0. 3. ] [ 7.8792 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 146.5208 0. 1. 1. ] [ 7.75 0. 0. 3. ] [ 7.2292 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 15.5 0. 1. 3. ] [ 7.75 0. 0. 3. ] [ 21.6792 0. 2. 3. ] [ 35.5 0. 0. 1. ] [ 27.7208 0. 0. 1. ] [ 15.2458 1. 1. 3. ] [ 7.8958 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 7.7875 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 7.775 0. 0. 3. ] [ 24.15 0. 1. 3. ] [ 8.05 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 22.3583 1. 1. 3. ] [ 15.2458 2. 0. 3. ] [ 7.3125 0. 0. 3. ] [ 8.6625 0. 0. 3. ] [ 69.55 2. 8. 3. ] [ 55. 1. 0. 1. ] [ 25.925 0. 0. 1. ] [ 25.4667 1. 3. 3. ] [ 69.55 2. 8. 3. ] [ 15.05 0. 0. 2. ] [ 50. 0. 0. 1. ] [ 15.5 0. 1. 3. ] [ 7.75 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 69.55 2. 8. 3. ] [ 7.75 0. 1. 3. ] [ 7.8958 0. 0. 3. ] [ 25.4667 1. 3. 3. ] [ 7.55 0. 0. 3. ] [ 14.4542 0. 1. 3. ] [ 15.5 0. 1. 3. ] [ 7.25 0. 0. 3. ] [ 79.2 0. 0. 1. ] [ 7.75 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 31. 0. 0. 1. ] [ 7.75 0. 0. 3. ] [ 0. 0. 0. 2. ] [ 26. 0. 0. 1. ] [ 27.7208 0. 0. 1. ] [ 30.5 0. 0. 1. ] [ 7.75 0. 0. 3. ] [ 23.25 0. 2. 3. ] [ 12.35 0. 0. 2. ] [ 8.05 0. 0. 3. ] [ 110.8833 0. 0. 1. ] [ 69.55 2. 8. 3. ] [ 23.25 0. 2. 3. ] [ 133.65 0. 1. 1. ] [ 7.8958 0. 0. 3. ] [ 16.1 0. 1. 3. ] [ 35. 0. 0. 1. ] [ 7.225 0. 0. 3. ] [ 7.8792 0. 0. 3. ] [ 7.8792 0. 0. 3. ] [ 15.5 0. 1. 3. ] [ 7.2292 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 82.1708 0. 1. 1. ] [ 7.8958 0. 0. 3. ] [ 7.7292 0. 0. 3. ] [ 25.4667 1. 3. 3. ] [ 7.8958 0. 0. 3. ] [ 6.8583 0. 0. 3. ] [ 0. 0. 0. 2. ] [ 8.05 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 7.25 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 16.1 0. 1. 3. ] [ 8.1125 0. 0. 3. ] [ 19.9667 0. 1. 3. ] [ 8.05 0. 0. 3. ] [ 51.8625 0. 1. 1. ] [ 7.75 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 0. 0. 0. 2. ] [ 7.725 0. 0. 3. ] [ 7.25 0. 0. 3. ] [ 52. 0. 0. 1. ] [ 0. 0. 0. 2. ] [ 25.4667 1. 3. 3. ] [ 19.9667 0. 1. 3. ] [ 14.4583 0. 0. 3. ] [ 15.1 0. 0. 3. ] [ 7.6292 0. 0. 3. ] [ 26.55 0. 0. 1. ] [ 8.05 0. 0. 3. ] [ 24.15 0. 0. 3. ] [ 7.225 0. 0. 3. ] [ 7.2292 0. 0. 3. ] [ 221.7792 0. 0. 1. ] [ 7.2292 0. 0. 3. ] [ 22.3583 2. 0. 3. ] [ 14.5 0. 0. 3. ] [ 13.8625 0. 0. 2. ] [ 7.8292 0. 0. 3. ] [ 227.525 0. 0. 1. ] [ 7.75 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 7.2292 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 14.4583 0. 1. 3. ] [ 8.7125 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 7.75 2. 0. 3. ] [ 33. 0. 0. 2. ] [ 7.225 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 42.4 0. 0. 1. ] [ 7.05 0. 0. 3. ] [ 15.5 0. 1. 3. ] [ 7.75 0. 0. 3. ] [ 7.7333 0. 0. 3. ] [ 0. 0. 0. 1. ] [ 16.1 0. 1. 3. ] [ 56.4958 0. 0. 3. ] [ 7.55 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 7.8292 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 7.775 0. 0. 3. ] [ 52. 0. 1. 1. ] [ 0. 0. 0. 2. ] [ 8.1375 0. 0. 3. ] [ 56.4958 0. 0. 3. ] [ 7.7333 0. 0. 3. ] [ 15.2458 1. 1. 3. ] [ 26.55 0. 0. 1. ] [ 15.5 0. 0. 3. ] [ 7.7375 0. 0. 3. ] [ 0. 0. 0. 2. ] [ 7.8958 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 30. 0. 0. 1. ] [ 14.5 0. 0. 3. ] [ 39.6 0. 0. 1. ] [ 24.15 0. 1. 3. ] [ 7.225 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 7.7375 0. 0. 3. ] [ 23.45 2. 1. 3. ] [ 7.75 0. 0. 3. ] [ 69.55 2. 8. 3. ] [ 30.6958 0. 0. 1. ] [ 0. 0. 0. 1. ] [ 6.95 0. 0. 3. ] [ 56.4958 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 7.2292 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 29.7 0. 0. 1. ] [ 69.55 2. 8. 3. ] [ 89.1042 0. 1. 1. ] [ 7.2292 0. 0. 3. ] [ 69.55 2. 8. 3. ] [ 9.5 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 23.45 2. 1. 3. ]] [[ 8.4583 0. 0. 3. ] [ 13. 0. 0. 2. ] [ 7.225 0. 0. 3. ] [ 7.225 0. 0. 3. ] [ 7.8792 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 146.5208 0. 1. 1. ] [ 7.75 0. 0. 3. ] [ 7.2292 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 15.5 0. 1. 3. ] [ 7.75 0. 0. 3. ] [ 21.6792 0. 2. 3. ] [ 35.5 0. 0. 1. ] [ 27.7208 0. 0. 1. ] [ 15.2458 1. 1. 3. ] [ 7.8958 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 7.7875 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 7.775 0. 0. 3. ] [ 24.15 0. 1. 3. ] [ 8.05 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 22.3583 1. 1. 3. ] [ 15.2458 2. 0. 3. ] [ 7.3125 0. 0. 3. ] [ 8.6625 0. 0. 3. ] [ 69.55 2. 8. 3. ] [ 55. 1. 0. 1. ] [ 25.925 0. 0. 1. ] [ 25.4667 1. 3. 3. ] [ 69.55 2. 8. 3. ] [ 15.05 0. 0. 2. ] [ 50. 0. 0. 1. ] [ 15.5 0. 1. 3. ] [ 7.75 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 69.55 2. 8. 3. ] [ 7.75 0. 1. 3. ] [ 7.8958 0. 0. 3. ] [ 25.4667 1. 3. 3. ] [ 7.55 0. 0. 3. ] [ 14.4542 0. 1. 3. ] [ 15.5 0. 1. 3. ] [ 7.25 0. 0. 3. ] [ 79.2 0. 0. 1. ] [ 7.75 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 31. 0. 0. 1. ] [ 7.75 0. 0. 3. ] [ 0. 0. 0. 2. ] [ 26. 0. 0. 1. ] [ 27.7208 0. 0. 1. ] [ 30.5 0. 0. 1. ] [ 7.75 0. 0. 3. ] [ 23.25 0. 2. 3. ] [ 12.35 0. 0. 2. ] [ 8.05 0. 0. 3. ] [ 110.8833 0. 0. 1. ] [ 69.55 2. 8. 3. ] [ 23.25 0. 2. 3. ] [ 133.65 0. 1. 1. ] [ 7.8958 0. 0. 3. ] [ 16.1 0. 1. 3. ] [ 35. 0. 0. 1. ] [ 7.225 0. 0. 3. ] [ 7.8792 0. 0. 3. ] [ 7.8792 0. 0. 3. ] [ 15.5 0. 1. 3. ] [ 7.2292 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 82.1708 0. 1. 1. ] [ 7.8958 0. 0. 3. ] [ 7.7292 0. 0. 3. ] [ 25.4667 1. 3. 3. ] [ 7.8958 0. 0. 3. ] [ 6.8583 0. 0. 3. ] [ 0. 0. 0. 2. ] [ 8.05 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 7.25 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 16.1 0. 1. 3. ] [ 8.1125 0. 0. 3. ] [ 19.9667 0. 1. 3. ] [ 8.05 0. 0. 3. ] [ 51.8625 0. 1. 1. ] [ 7.75 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 0. 0. 0. 2. ] [ 7.725 0. 0. 3. ] [ 7.25 0. 0. 3. ] [ 52. 0. 0. 1. ] [ 0. 0. 0. 2. ] [ 25.4667 1. 3. 3. ] [ 19.9667 0. 1. 3. ] [ 14.4583 0. 0. 3. ] [ 15.1 0. 0. 3. ] [ 7.6292 0. 0. 3. ] [ 26.55 0. 0. 1. ] [ 8.05 0. 0. 3. ] [ 24.15 0. 0. 3. ] [ 7.225 0. 0. 3. ] [ 7.2292 0. 0. 3. ] [ 221.7792 0. 0. 1. ] [ 7.2292 0. 0. 3. ] [ 22.3583 2. 0. 3. ] [ 14.5 0. 0. 3. ] [ 13.8625 0. 0. 2. ] [ 7.8292 0. 0. 3. ] [ 227.525 0. 0. 1. ] [ 7.75 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 7.2292 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 14.4583 0. 1. 3. ] [ 8.7125 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 7.75 2. 0. 3. ] [ 33. 0. 0. 2. ] [ 7.225 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 42.4 0. 0. 1. ] [ 7.05 0. 0. 3. ] [ 15.5 0. 1. 3. ] [ 7.75 0. 0. 3. ] [ 7.7333 0. 0. 3. ] [ 0. 0. 0. 1. ] [ 16.1 0. 1. 3. ] [ 56.4958 0. 0. 3. ] [ 7.55 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 7.8292 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 7.775 0. 0. 3. ] [ 52. 0. 1. 1. ] [ 0. 0. 0. 2. ] [ 8.1375 0. 0. 3. ] [ 56.4958 0. 0. 3. ] [ 7.7333 0. 0. 3. ] [ 15.2458 1. 1. 3. ] [ 26.55 0. 0. 1. ] [ 15.5 0. 0. 3. ] [ 7.7375 0. 0. 3. ] [ 0. 0. 0. 2. ] [ 7.8958 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 30. 0. 0. 1. ] [ 14.5 0. 0. 3. ] [ 39.6 0. 0. 1. ] [ 24.15 0. 1. 3. ] [ 7.225 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 7.7375 0. 0. 3. ] [ 23.45 2. 1. 3. ] [ 7.75 0. 0. 3. ] [ 69.55 2. 8. 3. ] [ 30.6958 0. 0. 1. ] [ 0. 0. 0. 1. ] [ 6.95 0. 0. 3. ] [ 56.4958 0. 0. 3. ] [ 7.75 0. 0. 3. ] [ 7.2292 0. 0. 3. ] [ 8.05 0. 0. 3. ] [ 29.7 0. 0. 1. ] [ 69.55 2. 8. 3. ] [ 89.1042 0. 1. 1. ] [ 7.2292 0. 0. 3. ] [ 69.55 2. 8. 3. ] [ 9.5 0. 0. 3. ] [ 7.8958 0. 0. 3. ] [ 23.45 2. 1. 3. ]]
In [4]:
train_sets
Out[4]:
In [5]:

dummies_Cabin = pd.get_dummies(train_sets['Cabin'], prefix= 'Cabin')

dummies_Embarked = pd.get_dummies(train_sets['Embarked'], prefix= 'Embarked')

dummies_Sex = pd.get_dummies(train_sets['Sex'], prefix= 'Sex')

dummies_Pclass = pd.get_dummies(train_sets['Pclass'], prefix= 'Pclass')

df = pd.concat([train_sets, dummies_Cabin, dummies_Embarked, dummies_Sex, dummies_Pclass], axis=1)
df.drop(['Pclass', 'Name', 'Sex', 'Ticket', 'Cabin', 'Embarked'], axis=1, inplace=True)
df


Out[5]:
In [6]:
import sklearn.preprocessing as preprocessing
scaler = preprocessing.StandardScaler()
age_scale_param = scaler.fit(df['Age'].values.reshape(-1,1))
df['Age_scaled'] = scaler.fit_transform(df['Age'].values.reshape(-1,1), age_scale_param)
fare_scale_param = scaler.fit(df['Fare'].values.reshape(-1,1))
df['Fare_scaled'] = scaler.fit_transform(df['Fare'].values.reshape(-1,1), fare_scale_param)
df

Out[6]:
In [7]:
from sklearn import linear_model

# 用正则取出我们要的属性值
train_df = df.filter(regex='Survived|Age_.*|SibSp|Parch|Fare_.*|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
train_np = train_df.values

# y即第0列:Survival结果
y = train_np[:, 0]

# X即第1列及以后:特征属性值
X = train_np[:, 1:]

# fit到LogisticRegression之中
clf = linear_model.LogisticRegression(solver='liblinear',C=1.0, penalty='l1', tol=1e-6)
clf.fit(X, y)

clf
Out[7]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l1', random_state=None, solver='liblinear', tol=1e-06,
          verbose=0, warm_start=False)
In [8]:
data_test = pd.read_csv("test.csv")
data_test.loc[ (data_test.Fare.isnull()), 'Fare' ] = 0
# 接着我们对test_data做和train_data中一致的特征变换
# 首先用同样的RandomForestRegressor模型填上丢失的年龄
tmp_df = data_test[['Age','Fare', 'Parch', 'SibSp', 'Pclass']]
null_age = tmp_df[data_test.Age.isnull()].values
# 根据特征属性X预测年龄并补上
X = null_age[:, 1:]
predictedAges = rfr.predict(X)
data_test.loc[ (data_test.Age.isnull()), 'Age' ] = predictedAges

data_test = set_Cabin_type(data_test)
dummies_Cabin = pd.get_dummies(data_test['Cabin'], prefix= 'Cabin')
dummies_Embarked = pd.get_dummies(data_test['Embarked'], prefix= 'Embarked')
dummies_Sex = pd.get_dummies(data_test['Sex'], prefix= 'Sex')
dummies_Pclass = pd.get_dummies(data_test['Pclass'], prefix= 'Pclass')


df_test = pd.concat([data_test, dummies_Cabin, dummies_Embarked, dummies_Sex, dummies_Pclass], axis=1)
df_test.drop(['Pclass', 'Name', 'Sex', 'Ticket', 'Cabin', 'Embarked'], axis=1, inplace=True)
df_test['Age_scaled'] = scaler.fit_transform(df_test['Age'].values.reshape(-1,1), age_scale_param)
df_test['Fare_scaled'] = scaler.fit_transform(df_test['Fare'].values.reshape(-1,1), fare_scale_param)
df_test


Out[8]:
In [9]:

test = df_test.filter(regex='Age_.*|SibSp|Parch|Fare_.*|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
predictions = clf.predict(test)
print(predictions)
type(predictions)
[ 0. 0. 0. 0. 1. 0. 1. 0. 1. 0. 0. 0. 1. 0. 1. 1. 0. 0. 1. 1. 0. 0. 1. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 1. 1. 0. 1. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 1. 0. 1. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 1. 0. 1. 0. 0. 0. 1. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 1. 0. 1. 1. 0. 1. 0. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0. 0. 1. 1. 0. 1. 1. 0. 0. 1. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 1. 1. 0. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1. 0. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 1. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 1. 0. 1. 1. 1. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 1. 0. 1. 1. 0. 1. 0. 0. 0. 0. 1. 0. 1. 1. 1. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 1. 1. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 1. 0. 1. 1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 1. 1. 0. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 1. 0. 0. 1. 0. 1. 0. 0. 1. 0. 0. 1. 1. 1. 1. 1. 0. 1. 0. 0. 0.]
Out[9]:
numpy.ndarray
In [10]:
result = pd.DataFrame({'PassengerId':data_test['PassengerId'].values, 'Survived':predictions.tolist()})
result.to_csv("ogistic_regression_predictions.csv", index=False)
In [11]:
result1=pd.read_csv("ogistic_regression_predictions.csv")
result1
Out[11]:
In [99]:
pd.DataFrame({"columns":list(train_df.columns)[1:], "coef":list(clf.coef_.T)})

Out[99]:
In [12]:
from sklearn import cross_validation

 #简单看看打分情况
clf = linear_model.LogisticRegression(C=1.0, penalty='l1', tol=1e-6)
all_data = df.filter(regex='Survived|Age_.*|SibSp|Parch|Fare_.*|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
X = all_data.as_matrix()[:,1:]
y = all_data.as_matrix()[:,0]
print(cross_validation.cross_val_score(clf, X, y, cv=5))

[ 0.81564246 0.81564246 0.78651685 0.78651685 0.81355932]
C:\Users\ASUS\Anaconda3\envs\tensorflow\lib\site-packages\sklearn\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20. "This module will be removed in 0.20.", DeprecationWarning)
In [13]:
#查看比较差的结果
split_train, split_cv = cross_validation.train_test_split(df, test_size=0.3, random_state=42)

train_df = split_train.filter(regex='Survived|Age_.*|SibSp|Parch|Fare_.*|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
# 生成模型
clf = linear_model.LogisticRegression(solver='liblinear',C=1.0, penalty='l1', tol=1e-6)
clf.fit(train_df.values[:,1:], train_df.values[:,0])

# 对cross validation数据进行预测

cv_df = split_cv.filter(regex='Survived|Age_.*|SibSp|Parch|Fare_.*|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
predictions = clf.predict(cv_df.values[:,1:])

origin_data_train = pd.read_csv("train.csv")
bad_cases = origin_data_train.loc[origin_data_train['PassengerId'].isin(split_cv[predictions != cv_df.values[:,0]]['PassengerId'].values)]
bad_cases
Out[13]:
In [23]:
split_train, split_cv = cross_validation.train_test_split(df, test_size=0.3, random_state=2)

train_df = split_train.filter(regex='Survived|Age_.*|SibSp|Parch|Fare_.*|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
# 生成模型
clf = linear_model.LogisticRegression(solver='liblinear',C=1.0, penalty='l1', tol=1e-6)
clf.fit(train_df.values[:,1:], train_df.values[:,0])

# 对cross validation数据进行预测

cv_df = split_cv.filter(regex='Survived|Age_.*|SibSp|Parch|Fare_.*|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
predictions = clf.predict(cv_df.values[:,1:])

origin_data_train = pd.read_csv("train.csv")
bad_cases = origin_data_train.loc[origin_data_train['PassengerId'].isin(split_cv[predictions != cv_df.values[:,0]]['PassengerId'].values)]
bad_cases
Out[23]:
In [27]:
import numpy as np
import matplotlib.pyplot as plt
# from sklearn.learning_curve import learning_curve  修改以fix learning_curve DeprecationWarning
from sklearn.model_selection import learning_curve

# 用sklearn的learning_curve得到training_score和cv_score,使用matplotlib画出learning curve
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, n_jobs=1, 
                        train_sizes=np.linspace(.05, 1., 20), verbose=0, plot=True):
    """
    画出data在某模型上的learning curve.
    参数解释
    ----------
    estimator : 你用的分类器。
    title : 表格的标题。
    X : 输入的feature,numpy类型
    y : 输入的target vector
    ylim : tuple格式的(ymin, ymax), 设定图像中纵坐标的最低点和最高点
    cv : 做cross-validation的时候,数据分成的份数,其中一份作为cv集,其余n-1份作为training(默认为3份)
    n_jobs : 并行的的任务数(默认1)
    """
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes, verbose=verbose)
    
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)
    
    if plot:
        plt.figure()
        plt.title(title)
        #绘制最高点最低点
        if ylim is not None:
            plt.ylim(*ylim)
        plt.xlabel(u"训练样本数")
        plt.ylabel(u"得分")
        plt.gca().invert_yaxis()
        plt.grid()
    
        plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, 
                         alpha=0.1, color="b")
        plt.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, 
                         alpha=0.1, color="r")
        plt.plot(train_sizes, train_scores_mean, 'o-', color="b", label=u"训练集上得分")
        plt.plot(train_sizes, test_scores_mean, 'o-', color="r", label=u"交叉验证集上得分")
    
        plt.legend(loc="best")
        
        plt.draw()
        plt.gca().invert_yaxis()
        plt.show()
    
    midpoint = ((train_scores_mean[-1] + train_scores_std[-1]) + (test_scores_mean[-1] - test_scores_std[-1])) / 2
    diff = (train_scores_mean[-1] + train_scores_std[-1]) - (test_scores_mean[-1] - test_scores_std[-1])
    return midpoint, diff

plot_learning_curve(clf, u"学习曲线", X, y)
Out[27]:
(0.80656968448540245, 0.018258876711338634)
In [29]:
from sklearn.ensemble import BaggingRegressor

train_df = df.filter(regex='Survived|Age_.*|SibSp|Parch|Fare_.*|Cabin_.*|Embarked_.*|Sex_.*|Pclass.*|Mother|Child|Family|Title')
train_np = train_df.values

# y即Survival结果
y = train_np[:, 0]

# X即特征属性值
X = train_np[:, 1:]

# fit到BaggingRegressor之中
clf = linear_model.LogisticRegression(C=1.0, penalty='l1', tol=1e-6)
bagging_clf = BaggingRegressor(clf, n_estimators=20, max_samples=0.8, max_features=1.0, bootstrap=True, bootstrap_features=False, n_jobs=-1)
bagging_clf.fit(X, y)

test = df_test.filter(regex='Age_.*|SibSp|Parch|Fare_.*|Cabin_.*|Embarked_.*|Sex_.*|Pclass.*|Mother|Child|Family|Title')
predictions = bagging_clf.predict(test)
result = pd.DataFrame({'PassengerId':data_test['PassengerId'].values, 'Survived':predictions.astype(np.int32)})
result.to_csv("logistic_regression_bagging_predictions.csv", index=False)
result

Out[29]:
In [ ]:
import jovian
jovian.commit()
[jovian] Saving notebook..
[jovian] Creating a new notebook on https://jvn.io
[jovian] Error: The current API key is invalid or expired.
[jovian] Please enter your API key (from https://jvn.io ):