In disaster zones, especially in developing countries, maps and accessibility information are crucial for crisis response. We would like to pose the challenge of automatically extracting roads and street networks from satellite images.
For details about other DeepGlobe challenges and the workshop: deepglobe.org.
Some research papers about the problem statement.
14796
images. Out of which
6226
→ training satellite images6226
→ training mask images1243
→ validation satellite images1101
→ test satellite images2
classes (basically a binary image segmentation task).Source → https://towardsdatascience.com/iou-a-better-detection-evaluation-metric-45a511185be1
Metric(s)
import
Packages! pip install patool --quiet
import warnings
warnings.filterwarnings('ignore')
import patoolib
import os
import pandas as pd
import cv2 as cv
from tqdm import tqdm
from matplotlib import pyplot as plt
Loading the original data
# ! gdown --id 1Cf00v1ZEy0zK2hJFV0-Wgf6l0eOpM0Sw
# patoolib.extract_archive(archive='/content/DeepGlobe.zip', outdir='/content/DeepGlobe')
file_path = '/content/DeepGlobe/metadata.csv'
df = pd.read_csv(filepath_or_buffer=file_path)
df.shape
(8570, 4)
df.head()
df.isnull().sum()
image_id 0
split 0
sat_image_path 0
mask_path 2344
dtype: int64
data_path = '/content/DeepGlobe/'
train_data = df[df['split'] == 'train']
valid_data = df[df['split'] == 'valid']
test_data = df[df['split'] == 'test']
train_data.shape
(6226, 4)
valid_data.shape
(1243, 4)
test_data.shape
(1101, 4)
14796
files.(6226 * 2) + 1243 + 1101
14796
The size of each image is (1024, 1024)
.
def display_images(df, split_val, split_col='split', data_path='/content/DeepGlobe/', limit=5, show_plot=False):
# split_val → 'train' or 'valid' or 'test'
split_data = df[df[split_col] == split_val][:limit]
if (split_val == 'train'):
sat_images = [data_path + i for i in split_data['sat_image_path'].to_list()]
mask_images = [data_path + i for i in split_data['mask_path'].to_list()]
sat_titles = [i.split('/')[-1] for i in sat_images]
mask_titles = [i.split('/')[-1] for i in mask_images]
for i in list(range(len(sat_images))):
# satellite image reading in RGB
simage = cv.imread(sat_images[i])
simage = cv.cvtColor(simage, cv.COLOR_BGR2RGB)
# masked image
mimage = cv.imread(mask_images[i], cv.IMREAD_UNCHANGED)
if show_plot:
plt.figure(figsize=(10, 10))
plt.subplot(1, 2, 1)
plt.title(sat_titles[i])
plt.axis("off")
plt.imshow(simage)
plt.subplot(1, 2, 2)
plt.title(mask_titles[i])
plt.axis("off")
plt.imshow(mimage)
plt.show()
return None
# else part
sat_images = [data_path + i for i in split_data['sat_image_path'].to_list()]
sat_titles = [i.split('/')[-1] for i in sat_images]
for i in list(range(len(sat_images))):
# satellite image reading in RGB
simage = cv.imread(sat_images[i])
simage = cv.cvtColor(simage, cv.COLOR_BGR2RGB)
if show_plot:
plt.figure(figsize=(5, 5))
plt.title(sat_titles[i])
plt.axis("off")
plt.imshow(simage)
plt.show()
return None
display_images(df=df, split_val='train', show_plot=True)