%matplotlib inline
%reload_ext autoreload
%autoreload
import jovian
from fastai.vision import *
import pandas as pd
import numpy as np
# get_category_names
with open('list_category_cloth.txt', 'r') as f:
categories = []
for i, line in enumerate(f.readlines()):
if i > 1:
categories.append(line.split(' ')[0])
# get image category map
with open('list_category_image.txt', 'r') as f:
images = []
for i, line in enumerate(f.readlines()):
if i > 1:
images.append([word.strip() for word in line.split(' ') if len(word) > 0])
#get train, valid, test split
with open('list_eval_partition.txt', 'r') as f:
images_partition = []
for i, line in enumerate(f.readlines()):
if i > 1:
images_partition.append([word.strip() for word in line.split(' ') if len(word) > 0])
data_df = pd.DataFrame(images, columns=['images', 'category_label'])
partition_df = pd.DataFrame(images_partition, columns=['images', 'dataset'])
data_df['category_label'] = data_df['category_label'].astype(int)
data_df = data_df.merge(partition_df, on='images')
data_df['dataset'].value_counts()
train 209222
val 40000
test 40000
Name: dataset, dtype: int64
data_df['category'] = data_df['category_label'].apply(lambda x: categories[int(x) - 1])
data_df['category_label'].nunique()
# seems like few labels were merged in Dress label
46
data_df.head()
from pathlib import Path
images_path = Path('/home/jupyter/deepFashion')
data_source = (ImageList.from_df(df=data_df, path=images_path, cols='images')
.split_by_idxs((data_df[data_df['dataset']=='train'].index), (data_df[data_df['dataset']=='val'].index))
.label_from_df(cols='category')
)
tmfs = get_transforms()
data = data_source.transform(tmfs, size=224).databunch(bs=128).normalize(imagenet_stats)
test_data = ImageList.from_df(df=data_df[data_df['dataset'] == 'test'], path=images_path, cols='images')
data.add_test(test_data)
# To maintain the order of images in train data, turning off shuffle
# data.train_dl = data.train_dl = data.train_dl.new(shuffle=False)
data.show_batch()