Commit 198f1e95 authored by timo's avatar timo
Browse files

init

parents
# Source code of Affordance Segmentation from Single Images
This repository contains code to reproduce the experiments from the paper. This is a simplified version of
our actual codebase, which is contains only the the necessary modules and should be more comprehensible. We will
assume that all commands are run from the project root (i.e., where `train.py` is located).
## Requirements
### Software
You'll need PyTorch (we use version 0.4) to run the code.
You can download binaries from [here](https://pytorch.org/).
Additionally, opencv is required. On Ubuntu there is a package `python3-opencv` which allows
an easy installation.
```
pip3 install numpy scipy
```
PSPNet uses a dilated ResNet encoder, we rely on the implementation by Fisher Yu:
```
git clone https://github.com/fyu/drn
```
### Data
We rely on the ADE20K dataset, which is available [here](http://groups.csail.mit.edu/vision/datasets/ADE20K/) and
The Oregon dataset available [here](http://web.engr.oregonstate.edu/~sinisa/research/publications/AffordanceGT.zip).
You can use this script to download both.
```bash
cd data
wget http://groups.csail.mit.edu/vision/datasets/ADE20K/ADE20K_2016_07_26.zip
unzip ADE20K_2016_07_26.zip -d data/
wget http://web.engr.oregonstate.edu/~sinisa/research/publications/AffordanceGT.zip
unzip AffordanceGT.zip -d data/
```
For legal reasons we can not provide the original images of the expert dataset but only our annotations (aff_expert.zip).
To extract the annotations and transfer the original images run
```bash
unzip data/aff_expert.zip -d aff_expert
python3 restore_expert_images.py
```
The simulated data can be obtained with:
```bash
cd data
wget https://owncloud.gwdg.de/index.php/s/8l3gBSAt1hVgEms affordances_simulated.zip
unzip affordances_simulated.zip
wget https://owncloud.gwdg.de/index.php/s/hCbJCtgy3AholZd pretrained.zip
unzip pretrained.zip
```
Make sure the paths in `paths.yaml` are properly set.
* `ADE20K_PATH` should point to a folder containing a folder "ADE20K_2016_07_26".
* `OREGON_AFFORDANCES` should point to a folder containing a folder "Annotations".
* `HQ_AFF` should point to a folder containing 50 folders with number names.
* `SYNTH_AFF`: should point to a folder
## Run the Code
* `train.py [Model] [Dataset]`: Training. You need to provide a model name and a dataset name.
* `score.py [Model] [Dataset]`: Evaluate the network. You need to provide a model name (can be a pretrained file)
and a dataset name.
* `sample.py [Model] [image filename]`: Visualize predictions by writing an image `output.png`.
`[Model]` can be either `ResNet50Dense` or `PSPNet`
`[Dataset]` can be either `ResNet50Dense` or `PSPNet`
Have Fun!
This diff is collapsed.
class BaseMetric(object):
def __init__(self, metric_names, eval_intermediate=True, eval_validation=True):
self._names = tuple(metric_names)
self._eval_intermediate = eval_intermediate
self._eval_validation = eval_validation
def eval_intermediate(self):
return self._eval_intermediate
def eval_validation(self):
return self._eval_validation
def names(self):
return self._names
def add(self, predictions, ground_truth):
raise NotImplementedError
def value(self):
raise NotImplementedError
import numpy as np
from .base import BaseMetric
class IoUMetric(BaseMetric):
"""
Computes intersection over union.
`binary`
`threshold`: value at which activations are considered to be on (only if `binary=true`)
`model_ref`: reference to model (if `threshold=None`)
`binary`
"""
def __init__(self, n_classes, binary=False, model_ref=None, threshold=None):
thres = '@{:.2f}'.format(threshold) if threshold is not None else ''
super().__init__(('IoU_cw' + thres, 'ioU_cw_mean' + thres))
self.threshold = threshold
self.n_classes = n_classes
self.binary = binary
self.model_ref = model_ref
self.intersections = []
self.unions = []
def add(self, pred, gt):
predictions = pred[0]
ground_truth = gt[0]
mask = gt[1] if len(gt) == 2 else None
predictions = predictions.detach().cpu().numpy()
ground_truth = ground_truth.detach().cpu().numpy().astype('int32')
if mask is not None:
mask = mask.detach().cpu().numpy()
if self.binary:
if self.threshold is None:
a = predictions > self.model_ref.thresholds.detach().cpu().numpy().reshape((1, -1, 1, 1))
else:
a = predictions > self.threshold
b = ground_truth > 0.5
intersection = (a * b).astype('float32')
union = np.clip(a + b, 0, 1).astype('float32')
if mask is not None:
intersection = intersection * mask
union = union * mask
intersection = intersection.sum(3).sum(2).sum(0)
union = union.sum(3).sum(2).sum(0)
self.intersections += [intersection]
self.unions += [union]
else:
assert mask is None or mask.sum() == (mask.size(0) * mask.size(1) * mask.size(2) * mask.size(3))
intersection, union = intersection_union(predictions.argmax(1), ground_truth, self.n_classes)
self.intersections += [intersection]
self.unions += [union]
def value(self):
intersections = np.array(self.intersections).sum(0)
unions = np.array(self.unions).sum(0)
classwise_iou = np.divide(intersections, unions, where=intersections > 0, out=np.zeros(intersections.shape))
return classwise_iou, np.mean(classwise_iou)
def intersection_union(prediction, ground_truth, n_classes):
"""
Computes the class-wise intersection and union and returns them as separate arrays.
"""
assert type(prediction) == np.ndarray
assert type(ground_truth) == np.ndarray
err_msg = 'int data type required (no unsigned because of ignore index). Actual type: {}'
assert prediction.dtype.name in {'int8', 'int16', 'int32', 'int64'}, err_msg.format(prediction.dtype)
assert ground_truth.dtype.name in {'int8', 'int16', 'int32', 'int64'}, err_msg.format(prediction.dtype)
prediction += 1
ground_truth += 1
# this will ignore all values that are zero (i.e. were -1)
prediction = (ground_truth > 0) * prediction
intersection = np.bincount((prediction * (prediction == ground_truth)).flatten(), minlength=n_classes+1)
pred = np.bincount(prediction.flatten(), minlength=n_classes+1)
gt = np.bincount(ground_truth.flatten(), minlength=n_classes+1)
union = pred + gt - intersection
return intersection[1:], union[1:]
def intersection_over_union(prediction, ground_truth, n_classes):
intersection, union = intersection_union(prediction, ground_truth, n_classes)
return intersection / union
\ No newline at end of file
import time
import numpy as np
from sklearn.metrics import average_precision_score
from .base import BaseMetric
class MAPMetric(BaseMetric):
def __init__(self, binary=False, **kwargs):
super().__init__(('mAP_cw', 'mAP_cw_mean'), **kwargs)
self.binary = binary
self.gts = []
self.preds = []
self.masks = []
def add(self, vars_x, vars_y):
predictions = vars_x[0]
ground_truth = vars_y[0]
mask = vars_y[1] if len(vars_y) == 2 else None
y_pred = predictions.detach().cpu().numpy()
y_gt = ground_truth.detach().cpu().numpy().astype('int32')
if mask is not None:
mask = mask.detach().cpu().numpy()
mask = mask.transpose([0, 2, 3, 1]).reshape((mask.shape[0] * mask.shape[2] * mask.shape[3], mask.shape[1]))
mask = mask.astype('bool')
self.gts += [y_gt.transpose([0, 2, 3, 1]).reshape((y_gt.shape[0] * y_gt.shape[2] * y_gt.shape[3], y_gt.shape[1])) > 0.5]
self.preds += [y_pred.transpose([0, 2, 3, 1]).reshape((y_pred.shape[0] * y_pred.shape[2] * y_pred.shape[3], y_pred.shape[1]))]
self.masks += [mask]
def value(self):
y_gt_samples = np.concatenate(self.gts, axis=0)
y_pred_samples = np.concatenate(self.preds, axis=0)
mask_samples = np.concatenate(self.masks, axis=0) if self.masks[0] is not None else None
t_start = time.time()
avg_prec = []
for i in range(y_gt_samples.shape[1]):
prec = average_precision_score(y_gt_samples[:, i], np.round(y_pred_samples[:, i], 3),
sample_weight=mask_samples[:, i] if mask_samples is not None else None)
avg_prec += [prec if not np.isnan(prec) else 0]
avg_prec = np.array(avg_prec)
print(avg_prec.shape)
print('mAP computation took {}s'.format(time.time() - t_start))
return avg_prec, float(avg_prec.mean())
import os
import yaml
from os.path import join, realpath, dirname
class SettingKeyError(BaseException):
pass
class SettingDict(dict):
def __init__(self, filename, **kwargs):
super().__init__(**kwargs)
self.filename = filename
with open(filename) as f:
super().update(yaml.load(f))
def __getitem__(self, key):
if super().__contains__(key):
return super().__getitem__(key)
else:
raise SettingKeyError('No entry for ' + key + ' was found in file ' + self.filename)
path_filename = join(dirname(realpath(__file__)), '..', 'paths.yaml')
config_filename = join(dirname(realpath(__file__)), '..', 'config.yaml')
if not os.path.isfile(path_filename):
# create sample paths
with open(config_filename, 'w') as f:
sample_content = ('AVA_ROOT: ' + realpath(join(__file__, '..')) + '\n'
'AVA_DATA: ' + realpath(join(__file__, '../ava/data')) + '\n'
'CACHE_PATH: cache\n'
'TRAINED_MODELS_PATH: pretrained_models')
f.write(sample_content)
print('paths.yaml was not found. Therefore, a new file was created. Please check its content.')
if not os.path.isfile(path_filename):
# create sample config
with open(join(dirname(realpath(__file__)), '..', 'config.yaml.sample')) as f:
with open(config_filename, 'w') as f2:
f2.write(f.read())
print('config.yaml was not found. Therefore, a new file was created from config.yaml.sample. '
'Please check the newly created config.yaml.')
PATHS = SettingDict(path_filename)
CONFIG = SettingDict(config_filename)
import cv2
import numpy as np
from ..assertions import assert_equal_shape
def crop_from_image(img, y_min, y_max, x_min, x_max, min_extent=80):
# object crop = max_entent around center
half_max_extent = max(int(min_extent/2), int(0.5 * max(y_max - y_min, x_max - x_min)))
center_y, center_x = int(y_min + 0.5 * (y_max - y_min)), int(x_min + 0.5 * (x_max - x_min))
# img[center_y - 2:center_y + 2, center_x - 2:center_x + 2, 0] = 255
# img[center_y - 2:center_y + 2, center_x - 2:center_x + 2, 1] = 255
# img[center_y - 2:center_y + 2, center_x - 2:center_x + 2, 2] = 0
img_crop = img[
max(0, center_y - half_max_extent): min(center_y + half_max_extent, img.shape[0]),
max(0, center_x - half_max_extent): min(center_x + half_max_extent, img.shape[1])
]
return img_crop
def random_crop_slices(origin_size, target_size):
"""
Gets slices of a random crop
"""
assert origin_size[0] >= target_size[0] and origin_size[1] >= target_size[1]
offset_y = np.random.randint(0, origin_size[0] - target_size[0] + 1) # range: 0 <= value < high
offset_x = np.random.randint(0, origin_size[1] - target_size[1] + 1)
return slice(offset_y, offset_y + target_size[0]), slice(offset_x, offset_x + target_size[1])
def random_crop(tensor, target_size, image_dimensions=(0, 1)):
"""
Randomly samples a crop of size `target_size` from `tensor` along `image_dimensions`
"""
assert len(image_dimensions) == 2 and type(image_dimensions[0]) == int and type(image_dimensions[1]) == int
# slices = random_crop_slices(tensor, target_size, image_dimensions)
origin_size = tensor.shape[image_dimensions[0]], tensor.shape[image_dimensions[1]]
slices_y, slices_x = random_crop_slices(origin_size, target_size)
slices = [slice(0, None) for _ in range(len(tensor.shape))]
slices[image_dimensions[0]] = slices_y
slices[image_dimensions[1]] = slices_x
slices = tuple(slices)
return tensor[slices]
def patch_sampling(img, segmentation, target_size, importance_map_index=None,
importance_map_size=(100, 100), random_scaling=True,
random_shift=True, label_input=False):
"""
Takes an image `img` (HxWx3) and a segmentation tensor `segmentation` (HxWxC or HxW, if label_input) and randomly samples a
smaller (`target size`) patch by considering a segmentation channel or index (`importance_map_index`) as
a probability density.
If random_scaling is True, before cropping, the image is scaled.
If random shift is True, the patch is slightly shifted from the center.
Returns:
img_crop: cropped image
seg_crop: cropped segmentation aligned with img_crop
img: resized image if random_scaling, else None
"""
msg = 'If label_input is True, segmentation must have two dimensions. Actual number of dimensions: '
msg += str(segmentation.ndim)
assert (segmentation.ndim == 3 and not label_input) or (segmentation.ndim == 2 and label_input), msg
if random_scaling:
# resize the input images randomly
max_scale = min(img.shape[0] / target_size[0], img.shape[1] / target_size[1])
scale_range = np.arange(min(1, max_scale), max_scale, 0.03)
if len(scale_range) == 0:
scale = 1
else:
scale_prob = 1.1 * max_scale - scale_range
scale_prob = scale_prob / scale_prob.sum()
# assert abs(0 - scale_prob.min()) < 0.0001, 'scale probabilities must be larger than zero'
assert abs(1 - scale_prob.sum()) < 0.001, 'actual sum of scale_prob: ' + str(scale_prob.sum())
scale = np.random.choice(scale_range, p=scale_prob)
if scale != 1:
scaled_size = (int(round(img.shape[1] / scale)), int(round(img.shape[0] / scale)))
img = cv2.resize(img, scaled_size)
if not label_input:
segmentation = np.dstack(
[cv2.resize(segmentation[:, :, s].astype('float32'), scaled_size, interpolation=cv2.INTER_NEAREST)
for s in range(segmentation.shape[2])])
else:
segmentation = cv2.resize(segmentation, scaled_size, interpolation=cv2.INTER_NEAREST)
# print('after resize', img.shape, aff.shape)
assert img.shape[:2] == segmentation.shape[:2]
if importance_map_index:
if not label_input:
importance_map = segmentation[:, :, importance_map_index].astype('float32')
else:
importance_map = (segmentation == importance_map_index).astype('float32')
else:
importance_map = None
if importance_map is None:
slice_indices = random_crop_slices(img.shape, target_size)
else:
slice_indices = sample_random_patch(img.shape, target_size, importance_map, random_shift=random_shift)
img_crop = img[slice_indices]
aff_crop = segmentation[slice_indices]
assert_equal_shape(img_crop.shape[:2], target_size)
assert_equal_shape(aff_crop.shape[:2], target_size)
return img_crop, aff_crop, img if random_scaling else None
def sample_random_patch(img_shape, target_size, importance_map=None, random_shift=True, rescale=None):
"""
Takes an image `img` (HxWx3) and randomly samples a
smaller (`target size`) patch according to the probability density `importance_map`.
If random_scaling is True, before cropping, the image is scaled.
If random shift is True, the patch is slightly shifted from the center.
importance_map_size can speed up the sampling process by using a smaller map than the image.
Returns:
numpy slice object to be applied on the image dimensions
"""
assert target_size[0] <= img_shape[0] and target_size[1] <= img_shape[1]
if importance_map is not None:
if rescale:
assert type(rescale) == tuple and len(rescale) == 2
importance_map_small = cv2.resize(importance_map, rescale)
# importance_map_small = importance_map_small.flatten()
importance_density = importance_map_small
else:
importance_density = importance_map
# importance_density = np.zeros_like(importance_density)
# importance_density[50:60, 70:80] = 1
# idx = np.random.choice(importance_density.shape[0] * importance_density.shape[1],
# p=(importance_density / importance_density.sum()).flatten())
# iy, ix = np.unravel_index(idx, importance_density.shape)
# idx = (iy, ix) if not rescale else (int(iy * sy), int(ix * sx))
assert importance_density.max() == 1
choices = np.where(importance_density == 1)
idx = np.random.choice(len(choices[0]))
idx = choices[0][idx], choices[1][idx]
if rescale:
sy, sx = img_shape[0] / rescale[0], img_shape[1] / rescale[1]
idx = int(idx[0] * sy), int(idx[1] * sx)
else:
importance_density = None
idx = np.random.choice(target_size[0] * target_size[1], p=importance_density)
idx = np.unravel_index(idx, target_size)
# shift_y = np.random.randint(-2 * int(sy), 2 * int(sy)) if int(sy) > 0 and random_shift else 0
# shift_x = np.random.randint(-2 * int(sx), 2 * int(sx)) if int(sx) > 0 and random_shift else 0
shift_y = np.random.randint(target_size[0] // 2 - 3) if random_shift else 0
shift_x = np.random.randint(target_size[1] // 2 - 3) if random_shift else 0
# shift_y, shift_x = 0, 0
# top_left = max(0, idx[0] - target_size[0] // 2 + shift_y), max(0, idx[1] - target_size[1] // 2 + shift_x)
top_left = idx[0] - target_size[0] // 2 + shift_y, idx[1] - target_size[1] // 2 + shift_x
# make sure we do not cut
top_left = min(top_left[0], img_shape[0] - target_size[0] - 1), min(top_left[1], img_shape[1] - target_size[1] - 1)
top_left = max(top_left[0], 0), max(top_left[1], 0)
slices = np.s_[top_left[0]:top_left[0] + target_size[0], top_left[1]: top_left[1] + target_size[1]]
# print(idx, top_left, slices, img_shape, target_size)
# return numpy slice object
return slices
\ No newline at end of file
import numpy as np
from itertools import product
from ..logging import log_warning
def remap(orig_labels, occurring_labels, label_remapping, label_remapping_mask, no_exclusive_labels):
n_properties = len(label_remapping[list(label_remapping.keys())[0]])
seg_new = np.zeros((orig_labels[0].shape[0], orig_labels[0].shape[1], n_properties), 'float32') # +1 because remapped labels start with 1
mask_new = np.zeros_like(seg_new) if label_remapping_mask is not None else None
# first use direct matches (without hierarchy)
for object_set, label_map in [(occurring_labels[0], orig_labels[0]), (occurring_labels[1], orig_labels[1])]:
for o_idx in object_set:
if o_idx in label_remapping:
matching_indices = np.where(label_map == o_idx)
seg_new[matching_indices] = label_remapping[o_idx]
if label_remapping_mask is not None:
mask_new[matching_indices] = label_remapping_mask[o_idx]
# now hierarchical labels, i.e. object/part style entries
for a, b in product(occurring_labels[0], occurring_labels[1]):
found = None
if (a, b) in label_remapping:
found = (a, b)
elif ('*', b) in label_remapping:
found = ('*', b)
if found is not None:
matching_indices = np.where((orig_labels[0] == a) * (orig_labels[1] == b))
# matching_indices = np.where((orig_labels[0] == a) * (orig_labels[1] == b))
# print('hit2: ', a, b, objects[a], objects[b], matching_indices[0].shape)
# print('found', self.objects[a], self.objects[b], cond)
seg_new[matching_indices] = label_remapping[found]
if label_remapping_mask is not None:
mask_new[matching_indices] = label_remapping_mask[found]
for a, b, c in product(occurring_labels[0], occurring_labels[1], occurring_labels[2]):
found = None
if (a, b, c) in label_remapping:
found = (a, b, c)
elif ('*', b, c) in label_remapping:
found = ('*', b, c)
if found is not None:
matching_indices = np.where(((orig_labels[0] == a) * (orig_labels[1] == b) * (orig_labels[2] == c)))
# print('hit3: ', a, b, c, objects[a], objects[b], objects[c], matching_indices[0].shape)
# print('found', self.objects[a], self.objects[b], cond)
seg_new[matching_indices] = label_remapping[found]
if label_remapping_mask is not None:
mask_new[matching_indices] = label_remapping_mask[found]
mask = None
if label_remapping_mask is not None:
mask = mask_new
if no_exclusive_labels:
seg = seg_new
else:
if seg_new.sum(2).max() > 1:
# raise ValueError('The remapping produced an overlapping segmentation')
log_warning('The remapping produced an overlapping segmentation')
seg = seg_new.argmax(2).astype('uint16')
if mask is not None:
mask = np.clip(mask.sum(2), 0, 1)
if mask is not None:
mask = mask.astype('bool')
return seg, mask
\ No newline at end of file
import cv2
import numpy as np
from ..assertions import assert_object_type
from ..logging import log_important, log_warning, log_detail
CV_INTERPOLATIONS = {'nearest': cv2.INTER_NEAREST, 'bilinear': cv2.INTER_LINEAR, 'cubic': cv2.INTER_CUBIC}
def scale_to_bound(tensor_shape, bounds, interpret_as_max_bound=False):
sf = bounds[0] / tensor_shape[0], bounds[1] / tensor_shape[1]
lt = sf[0] < sf[1] if interpret_as_max_bound else sf[1] < sf[0]
target_size = (bounds[0], int(tensor_shape[1] * sf[0])) if lt else (int(tensor_shape[0] * sf[1]), bounds[1])
# make sure do not violate the bounds
if interpret_as_max_bound:
target_size = min(target_size[0], bounds[0]), min(target_size[1], bounds[1])
else:
target_size = max(target_size[0], bounds[0]), max