Source code for edflow.applications.tf_perceptual_loss

import tensorflow as tf
import numpy as np

# vgg19 from keras
from tensorflow.contrib.keras.api.keras.models import Model
from tensorflow.contrib.keras.api.keras.applications.vgg19 import VGG19
from tensorflow.contrib.keras.api.keras import backend as K


[docs]def preprocess_input(x):
    """Preprocesses a tensor encoding a batch of images.
    Parameters
    ----------
    x : tf.Tenser
        input tensor, 4D in [-1,1]
    Returns
    -------
    Preprocessed tensor : tf.Tensor

    """
    # from [-1, 1] to [0,255.0]
    x = (x + 1.0) / 2.0 * 255.0
    # 'RGB'->'BGR'
    x = x[:, :, :, ::-1]
    # Zero-center by mean pixel
    x = x - np.array([103.939, 116.779, 123.68]).reshape((1, 1, 1, 3))
    return x


def _ll_loss(target, reconstruction, log_variance, calibrate):
    dim = np.prod(target.shape.as_list()[1:])
    variance = tf.exp(log_variance)
    log2pi = np.log(2.0 * np.pi)
    e = tf.reduce_mean(tf.square(target - reconstruction))
    l = 0.5 * dim * (e / variance + log_variance + log2pi)
    if calibrate:
        calibrate_op = tf.assign(log_variance, tf.log(e))
    else:
        calibrate_op = tf.no_op()
    return l, calibrate


[docs]class VGG19Features(object):
[docs]    def __init__(
        self,
        session,
        feature_layers=None,
        feature_weights=None,
        gram_weights=None,
        default_gram=0.1,
        original_scale=False,
    ):
        K.set_session(session)
        self.base_model = VGG19(include_top=False, weights="imagenet")
        if feature_layers is None:
            feature_layers = [
                "input_1",
                "block1_conv2",
                "block2_conv2",
                "block3_conv2",
                "block4_conv2",
                "block5_conv2",
            ]
        self.layer_names = [l.name for l in self.base_model.layers]
        for k in feature_layers:
            if not k in self.layer_names:
                raise KeyError(
                    "Invalid layer {}. Available layers: {}".format(k, self.layer_names)
                )
        self.feature_layers = feature_layers
        features = [self.base_model.get_layer(k).output for k in feature_layers]
        self.model = Model(inputs=self.base_model.input, outputs=features)
        if feature_weights is None:
            feature_weights = len(feature_layers) * [1.0]
        if gram_weights is None:
            gram_weights = len(feature_layers) * [default_gram]
        elif isinstance(gram_weights, (int, float)):
            gram_weights = len(feature_layers) * [gram_weights]
        self.feature_weights = feature_weights
        self.gram_weights = gram_weights
        assert len(self.feature_weights) == len(features)
        self.use_gram = np.max(self.gram_weights) > 0.0
        self.original_scale = original_scale

        self.variables = self.base_model.weights

[docs]    def extract_features(self, x):
        """x should be rgb in [-1,1]."""
        x = preprocess_input(x)
        features = self.model.predict(x)
        return features

[docs]    def make_feature_ops(self, x):
        """x should be rgb tensor in [-1,1]."""
        x = preprocess_input(x)
        features = self.model(x)
        return features

[docs]    def grams(self, fs):
        gs = list()
        for f in fs:
            bs, h, w, c = f.shape.as_list()
            bs = -1 if bs is None else bs
            f = tf.reshape(f, [bs, h * w, c])
            ft = tf.transpose(f, [0, 2, 1])
            g = tf.matmul(ft, f)
            g = g / (4.0 * h * w)
            gs.append(g)
        return gs

[docs]    def make_loss_op(self, x, y):
        """x, y should be rgb tensors in [-1,1]. Uses l1 and spatial average."""
        if self.original_scale:
            xy = tf.concat([x, y], axis=0)
            xy = tf.image.resize_bilinear(xy, [256, 256])
            bs = tf.shape(xy)[0]
            xy = tf.random_crop(xy, [bs, 224, 224, 3])
            x, y = tf.split(xy, 2, 0)

        x = preprocess_input(x)
        x_features = self.model(x)

        y = preprocess_input(y)
        y_features = self.model(y)

        x_grams = self.grams(x_features)
        y_grams = self.grams(y_features)

        losses = [
            tf.reduce_mean(tf.abs(xf - yf)) for xf, yf in zip(x_features, y_features)
        ]
        gram_losses = [
            tf.reduce_mean(tf.abs(xg - yg)) for xg, yg in zip(x_grams, y_grams)
        ]

        for i in range(len(losses)):
            losses[i] = self.feature_weights[i] * losses[i]
            gram_losses[i] = self.gram_weights[i] * gram_losses[i]
        loss = tf.add_n(losses)
        if self.use_gram:
            loss = loss + tf.add_n(gram_losses)

        self.losses = losses
        self.gram_losses = gram_losses

        return loss

[docs]    def make_nll_op(self, x, y, log_variances, gram_log_variances=None, calibrate=True):
        """x, y should be rgb tensors in [-1,1]. This version treats every
        layer independently."""
        use_gram = gram_log_variances is not None
        if self.original_scale:
            xy = tf.concat([x, y], axis=0)
            xy = tf.image.resize_bilinear(xy, [256, 256])
            bs = tf.shape(xy)[0]
            xy = tf.random_crop(xy, [bs, 224, 224, 3])
            x, y = tf.split(xy, 2, 0)

        x = preprocess_input(x)
        x_features = self.model(x)

        y = preprocess_input(y)
        y_features = self.model(y)

        if use_gram:
            x_grams = self.grams(x_features)
            y_grams = self.grams(y_features)

        if len(log_variances) == 1:
            log_variances = len(x_features) * [log_variances[0]]

        feature_ops = [
            _ll_loss(xf, yf, logvar, calibrate=calibrate)
            for xf, yf, logvar in zip(x_features, y_features, log_variances)
        ]
        losses = [f[0] for f in feature_ops]
        self.losses = losses
        calibrations = [f[1] for f in feature_ops]
        self.calibrations = calibrations
        if use_gram:
            gram_ops = [
                _ll_loss(xg, yg, glogvar)
                for xg, yg, glogvar in zip(x_grams, y_grams, gram_log_variances)
            ]
            gram_losses = [g[0] for g in gram_ops]
            self.gram_losses = gram_losses
            gram_calibrations = [g[1] for g in gram_ops]
            self.gram_calibrations = gram_calibrations

        loss = tf.add_n(losses)
        if use_gram:
            loss = loss + tf.add_n(gram_losses)

        return loss

[docs]    def make_l1_nll_op(self, x, y, log_variance):
        """x, y should be rgb tensors in [-1,1]. Uses make_loss_op to compute
        version compatible with previous experiments."""

        rec_loss = 1e-3 * self.make_loss_op(x, y)
        dim = np.prod(x.shape.as_list()[1:])
        log_gamma = log_variance
        gamma = tf.exp(log_gamma)
        log2pi = np.log(2.0 * np.pi)
        likelihood = 0.5 * dim * (rec_loss / gamma + log_gamma + log2pi)

        return likelihood

[docs]    def make_style_op(self, x, y):
        __feature_weights = self.feature_weights
        __gram_weights = self.gram_weights
        self.feature_weights = [0.01 for _ in __feature_weights]
        self.gram_weights = [1.0 for _ in __gram_weights]
        loss = self.make_loss_op(x, y)
        self.feature_weights = __feature_weights
        self.gram_weights = __gram_weights
        return loss