142 lines
5.6 KiB
Python
142 lines
5.6 KiB
Python
|
|
# The following code is modified from https://github.com/shelhamer/clockwork-fcn
|
||
|
|
import sys
|
||
|
|
import os
|
||
|
|
import glob
|
||
|
|
import numpy as np
|
||
|
|
from PIL import Image
|
||
|
|
|
||
|
|
|
||
|
|
class cityscapes:
|
||
|
|
def __init__(self, data_path):
|
||
|
|
# data_path something like /data2/cityscapes
|
||
|
|
self.dir = data_path
|
||
|
|
self.classes = ['road', 'sidewalk', 'building', 'wall', 'fence',
|
||
|
|
'pole', 'traffic light', 'traffic sign', 'vegetation', 'terrain',
|
||
|
|
'sky', 'person', 'rider', 'car', 'truck',
|
||
|
|
'bus', 'train', 'motorcycle', 'bicycle']
|
||
|
|
self.mean = np.array((72.78044, 83.21195, 73.45286), dtype=np.float32)
|
||
|
|
# import cityscapes label helper and set up label mappings
|
||
|
|
sys.path.insert(0, '{}/scripts/helpers/'.format(self.dir))
|
||
|
|
labels = __import__('labels')
|
||
|
|
self.id2trainId = {label.id: label.trainId for label in labels.labels} # dictionary mapping from raw IDs to train IDs
|
||
|
|
self.trainId2color = {label.trainId: label.color for label in labels.labels} # dictionary mapping train IDs to colors as 3-tuples
|
||
|
|
|
||
|
|
def get_dset(self, split):
|
||
|
|
'''
|
||
|
|
List images as (city, id) for the specified split
|
||
|
|
|
||
|
|
TODO(shelhamer) generate splits from cityscapes itself, instead of
|
||
|
|
relying on these separately made text files.
|
||
|
|
'''
|
||
|
|
if split == 'train':
|
||
|
|
dataset = open('{}/ImageSets/segFine/train.txt'.format(self.dir)).read().splitlines()
|
||
|
|
else:
|
||
|
|
dataset = open('{}/ImageSets/segFine/val.txt'.format(self.dir)).read().splitlines()
|
||
|
|
return [(item.split('/')[0], item.split('/')[1]) for item in dataset]
|
||
|
|
|
||
|
|
def load_image(self, split, city, idx):
|
||
|
|
im = Image.open('{}/leftImg8bit_sequence/{}/{}/{}_leftImg8bit.png'.format(self.dir, split, city, idx))
|
||
|
|
return im
|
||
|
|
|
||
|
|
def assign_trainIds(self, label):
|
||
|
|
"""
|
||
|
|
Map the given label IDs to the train IDs appropriate for training
|
||
|
|
Use the label mapping provided in labels.py from the cityscapes scripts
|
||
|
|
"""
|
||
|
|
label = np.array(label, dtype=np.float32)
|
||
|
|
if sys.version_info[0] < 3:
|
||
|
|
for k, v in self.id2trainId.iteritems():
|
||
|
|
label[label == k] = v
|
||
|
|
else:
|
||
|
|
for k, v in self.id2trainId.items():
|
||
|
|
label[label == k] = v
|
||
|
|
return label
|
||
|
|
|
||
|
|
def load_label(self, split, city, idx):
|
||
|
|
"""
|
||
|
|
Load label image as 1 x height x width integer array of label indices.
|
||
|
|
The leading singleton dimension is required by the loss.
|
||
|
|
"""
|
||
|
|
label = Image.open('{}/gtFine/{}/{}/{}_gtFine_labelIds.png'.format(self.dir, split, city, idx))
|
||
|
|
label = self.assign_trainIds(label) # get proper labels for eval
|
||
|
|
label = np.array(label, dtype=np.uint8)
|
||
|
|
label = label[np.newaxis, ...]
|
||
|
|
return label
|
||
|
|
|
||
|
|
def preprocess(self, im):
|
||
|
|
"""
|
||
|
|
Preprocess loaded image (by load_image) for Caffe:
|
||
|
|
- cast to float
|
||
|
|
- switch channels RGB -> BGR
|
||
|
|
- subtract mean
|
||
|
|
- transpose to channel x height x width order
|
||
|
|
"""
|
||
|
|
in_ = np.array(im, dtype=np.float32)
|
||
|
|
in_ = in_[:, :, ::-1]
|
||
|
|
in_ -= self.mean
|
||
|
|
in_ = in_.transpose((2, 0, 1))
|
||
|
|
return in_
|
||
|
|
|
||
|
|
def palette(self, label):
|
||
|
|
'''
|
||
|
|
Map trainIds to colors as specified in labels.py
|
||
|
|
'''
|
||
|
|
if label.ndim == 3:
|
||
|
|
label = label[0]
|
||
|
|
color = np.empty((label.shape[0], label.shape[1], 3))
|
||
|
|
if sys.version_info[0] < 3:
|
||
|
|
for k, v in self.trainId2color.iteritems():
|
||
|
|
color[label == k, :] = v
|
||
|
|
else:
|
||
|
|
for k, v in self.trainId2color.items():
|
||
|
|
color[label == k, :] = v
|
||
|
|
return color
|
||
|
|
|
||
|
|
def make_boundaries(label, thickness=None):
|
||
|
|
"""
|
||
|
|
Input is an image label, output is a numpy array mask encoding the boundaries of the objects
|
||
|
|
Extract pixels at the true boundary by dilation - erosion of label.
|
||
|
|
Don't just pick the void label as it is not exclusive to the boundaries.
|
||
|
|
"""
|
||
|
|
assert(thickness is not None)
|
||
|
|
import skimage.morphology as skm
|
||
|
|
void = 255
|
||
|
|
mask = np.logical_and(label > 0, label != void)[0]
|
||
|
|
selem = skm.disk(thickness)
|
||
|
|
boundaries = np.logical_xor(skm.dilation(mask, selem),
|
||
|
|
skm.erosion(mask, selem))
|
||
|
|
return boundaries
|
||
|
|
|
||
|
|
def list_label_frames(self, split):
|
||
|
|
"""
|
||
|
|
Select labeled frames from a split for evaluation
|
||
|
|
collected as (city, shot, idx) tuples
|
||
|
|
"""
|
||
|
|
def file2idx(f):
|
||
|
|
"""Helper to convert file path into frame ID"""
|
||
|
|
city, shot, frame = (os.path.basename(f).split('_')[:3])
|
||
|
|
return "_".join([city, shot, frame])
|
||
|
|
frames = []
|
||
|
|
cities = [os.path.basename(f) for f in glob.glob('{}/gtFine/{}/*'.format(self.dir, split))]
|
||
|
|
for c in cities:
|
||
|
|
files = sorted(glob.glob('{}/gtFine/{}/{}/*labelIds.png'.format(self.dir, split, c)))
|
||
|
|
frames.extend([file2idx(f) for f in files])
|
||
|
|
return frames
|
||
|
|
|
||
|
|
def collect_frame_sequence(self, split, idx, length):
|
||
|
|
"""
|
||
|
|
Collect sequence of frames preceding (and including) a labeled frame
|
||
|
|
as a list of Images.
|
||
|
|
|
||
|
|
Note: 19 preceding frames are provided for each labeled frame.
|
||
|
|
"""
|
||
|
|
SEQ_LEN = length
|
||
|
|
city, shot, frame = idx.split('_')
|
||
|
|
frame = int(frame)
|
||
|
|
frame_seq = []
|
||
|
|
for i in range(frame - SEQ_LEN, frame + 1):
|
||
|
|
frame_path = '{0}/leftImg8bit_sequence/val/{1}/{1}_{2}_{3:0>6d}_leftImg8bit.png'.format(
|
||
|
|
self.dir, city, shot, i)
|
||
|
|
frame_seq.append(Image.open(frame_path))
|
||
|
|
return frame_seq
|