Learn practical skills, build real-world projects, and advance your career
import jovian
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm

import os
print(os.listdir("/storage/santander_comp/"))
['.ipynb_checkpoints', 'submissions', 'test.csv', 'models', 'train.csv', 'nbs']
test_path = "/storage/santander_comp/test.csv"

df_test = pd.read_csv(test_path)
df_test.drop(['ID_code'], axis = 1, inplace = True)
df_test = df_test.values
unique_samples = []
unique_count = np.zeros_like(df_test)
for feature in tqdm(range(df_test.shape[1])):
    _, index_, count_ = np.unique(df_test[:, feature], 
                                  return_counts=True, 
                                  return_index=True)
    unique_count[index_[count_ == 1], feature] += 1
HBox(children=(IntProgress(value=0, max=200), HTML(value='')))
real_samples_indexes = np.argwhere(np.sum(unique_count, axis = 1) > 0)[:, 0]
synthetic_samples_indexes = np.argwhere(np.sum(unique_count, axis = 1) == 0)[:, 0]