Source code for impressionismcat.paint.style

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
author:     Ewen Wang
email:      wolfgangwong2012@gmail.com
license:    Apache License 2.0
"""

import time
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import tensorflow as tf

mpl.rcParams['figure.figsize'] = (10,10)
mpl.rcParams['axes.grid'] = False



[docs]class StyleTransfer(object): """StyleTransfer uses deep learning to transfer an art style from one painting to another. """ def __init__(self, path_content=None, path_style=None, iterations=1000, display=True, display_interval=1, weight_content=1e3, weight_style=1e-2): """ Intialize a StyleTransfer Class. The initialization involves:\n 1) Parameters initialization\n 2) Images loading\n 3) Images initialization for TensorFlow computing\n 4) Model setup\n 5) Features representation\n 6) Optimization initialization Parameters: delta (int): The number of days ago. path_content (str): A content image path, default None. path_style (str): A style image path, default None. iterations (int): The number of iterations to render the content image, defualt 1000. display (bool): If display the results in progress, default True. display_interval (int): The interval to display the progress, default 1. weight_content (float): A weight for content image, default 1e3. weight_style (float): A weight for style image, default 1e-2. """ super(StyleTransfer, self).__init__() self.path_content=path_content self.path_style=path_style self.iterations=iterations self.display=display self.display_interval=display_interval self.weight_content=weight_content self.weight_style=weight_style self.loss_weights = (self.weight_style, self.weight_content) # load images self.content = self.load_resize_img(self.path_content) self.style = self.load_resize_img(self.path_style) # initialize self.input_vgg_content = self.process_img_as_vgg_input(self.content) self.input_vgg_style = self.process_img_as_vgg_input(self.style) self.input_vgg_init = self.process_img_as_vgg_input(self.content) self.input_vgg_init = tf.Variable(self.input_vgg_init, dtype=tf.float32) # model self.model = self.setup_model() # represent self.features_style, self.features_content = self.represent_features() self.features_style_gram = [self.gram_matrix(feature_style) for feature_style in self.features_style] # optimization initialize self.cfg = { 'model': self.model, 'loss_weights': self.loss_weights, 'input_vgg_init': self.input_vgg_init, 'features_style_gram': self.features_style_gram, 'features_content': self.features_content } self.best_loss, self.best_img = float('inf'), None self.imgs = [] ### input def load_resize_img(self, path, max_dim=512): """ Preprocess an image. Args: path (str): The path of the image. max_dim (int): The size of maximum dimension, default 512. Returns: img_array: A preprocessed image array, 3 dimensions (one more for a batch dimension). """ img = Image.open(path) long = max(img.size) scale = max_dim/long img = img.resize((round(img.size[0]*scale), round(img.size[1]*scale)), Image.ANTIALIAS) img_array = tf.keras.preprocessing.image.img_to_array(img) # We need to broadcast the image array such that it has a batch dimension img_array = np.expand_dims(img, axis=0) return img_array def display_img(self, img_array, title=None): """ Display an image. Args: img_array (numpy.array): The image array generated by load_resize_img. title (str): The image title, default None. """ # Remove the batch dimension out = np.squeeze(img_array, axis=0) # Normalize for display out = out.astype('uint8') plt.imshow(out) if title: plt.title(title) return None def process_img_as_vgg_input(self, img_array): """ Process a image to a VGG19 model input with tf.keras.applications.vgg19.preprocess_input. Args: img_array (numpy.array): An image array generated by load_resize_img. Returns: img_vgg: A processed image array. Note: Each Keras Application expects a specific kind of input preprocessing. For VGG19, call tf.keras.applications.vgg19.preprocess_input on your inputs before passing them to the model. vgg19.preprocess_input will convert the input images from RGB to BGR, then will zero-center each color channel with respect to the ImageNet dataset, without scaling. """ img_vgg = tf.keras.applications.vgg19.preprocess_input(img_array) return img_vgg def deprocess_img(self, img_processed): """ Deprocess a processed array in VGG19 input format back to its previous format for display reason. Args: img_vgg (numpy.array): A processed image array. Returns: img_array (numpy.array): An image array generated by load_resize_img. """ x = img_processed.copy() if len(x.shape) == 4: x = np.squeeze(x, 0) assert len(x.shape) == 3, ("Input to deprocess image must be an image of " "dimension [1, height, width, channel] or [height, width, channel]") if len(x.shape) != 3: raise ValueError("Invalid input to deprocessing image") # perform the inverse of the preprocessing step x[:, :, 0] += 103.939 x[:, :, 1] += 116.779 x[:, :, 2] += 123.68 x = x[:, :, ::-1] x = np.clip(x, 0, 255).astype('uint8') return x # model def setup_model(self): """ Setup a VGG19 model with selected layers. Note: https://keras.io/api/applications/vgg/#vgg19-function """ vgg = tf.keras.applications.VGG19(include_top=False, weights='imagenet') vgg.trainable = False # Content layer where will pull our feature maps layers_content = ['block5_conv2'] # Style layer we are interested in layers_style = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1'] self.num_layers_content = len(layers_content) self.num_layers_style = len(layers_style) # Get output layers corresponding to style and content layers outputs_style = [vgg.get_layer(name).output for name in layers_style] outputs_content = [vgg.get_layer(name).output for name in layers_content] outputs_model = outputs_style + outputs_content self.model = tf.keras.Model(inputs=vgg.input, outputs=outputs_model, name='style transfer') for layer in self.model.layers: layer.trainable = False return self.model def represent_features(self): """ Get the style and content feature representations from the model. """ # batch compute content and style features outputs_style = self.model(self.input_vgg_style) outputs_content = self.model(self.input_vgg_content) # Get the style and content feature representations from our model self.features_style = [style_layer[0] for style_layer in outputs_style[:self.num_layers_style]] self.features_content = [content_layer[0] for content_layer in outputs_content[self.num_layers_style:]] return self.features_style, self.features_content ### loss function def gram_matrix(self, input_tensor): """ Calculate the Gram Matrix of input tensor. A Gram matrix (often referred to as a Gramian matrix) is a matrix created by multiplying a matrix with its own transpose. The Gramian matrix provides a degree of correlation between the vectors of the matrix since we’re multiplying a matrix with its own transpose. Here, we use Gram matrix to find the correlation between parameters of different Convolutional Filters in a Convolutional Neural Network. Args: input_tensor (tensorflow.tensor): The input tensor. Returns: gram_matrix """ channels = int(input_tensor.shape[-1]) a = tf.reshape(input_tensor, [-1, channels]) n = tf.shape(a)[0] gram = tf.matmul(a, a, transpose_a=True) return gram / tf.cast(n, tf.float32) def get_loss_content(self, base_content, target): """ Calculate the loss of the content. Args: base_content (tensorflow.tensor): The processed content tensor. target (tensorflow.tensor): The target content tensor. Returns: Mean squared error. """ return tf.reduce_mean(tf.square(base_content - target)) def get_loss_style(self, base_style, gram_target): """ Calculate the loss of the style. Args: base_style (tensorflow.tensor): The processed style tensor. gram_target (tensorflow.tensor): The target content gram tensor. Returns: Mean squared error. """ height, width, channels = base_style.get_shape().as_list() gram_style = self.gram_matrix(base_style) return tf.reduce_mean(tf.square(gram_style - gram_target)) def compute_loss(self): """ Compute the total loss of the style and content from all layers. """ # Feed our init image through our model. This will give us the content and # style representations at our desired layers. Since we're using eager # our model is callable just like any other function! outputs_model = self.model(self.input_vgg_init) style_output_features = outputs_model[:self.num_layers_style] content_output_features = outputs_model[self.num_layers_style:] self.score_style = 0 self.score_content = 0 # Accumulate style losses from all layers # Here, we equally weight each contribution of each loss layer weight_per_style_layer = 1.0 / float(self.num_layers_style) for target_style, comb_style in zip(self.features_style_gram, style_output_features): self.score_style += weight_per_style_layer * self.get_loss_style(comb_style[0], target_style) # Accumulate content losses from all layers weight_per_content_layer = 1.0 / float(self.num_layers_content) for target_content, comb_content in zip(self.features_content, content_output_features): self.score_content += weight_per_content_layer * self.get_loss_content(comb_content[0], target_content) self.score_style *= self.weight_style self.score_content *= self.weight_content # Get total loss self.loss_total = self.score_style + self.score_content return self.loss_total, self.score_style, self.score_content ## optimization def compute_grads(self): """ Calcualte the gradients. """ with tf.GradientTape() as tape: self.losses = self.compute_loss() # Compute gradients wrt input image self.loss_total = self.losses[0] return tape.gradient(self.loss_total, self.cfg['input_vgg_init']), self.losses def optimize(self, iterations=1000, learning_rate=5, beta_1=0.99, epsilon=1e-1, display=True, display_interval=1, cache_interval=50, clear_cache=False): """ Optimize the transfer process. Parameters: iterations (int): The number of iterations for optimizing, default 1000. learning_rate (int): The learning rate of tf.optimizers.Adam, default 5. beta_1 (float): The beta 1 of tf.optimizers.Adam, default 0.99. epsilon (float): The epsiple of tf.optimizers.Adam, default 1e-1. display (bool): Control whether display the progress during learning, default True. display_interval (int): The interval to display the progress, default 1. cache_interval (int): The interval to save the progress, default 50. clear_cache (bool): Control whether clear the cache, default False. """ self.iterations = iterations self.display = display self.display_interval = display_interval self.cache_interval = cache_interval if clear_cache: self.imgs = [] # Use the .numpy() method to get the concrete numpy array plot_img = self.input_vgg_init.numpy() plot_img = self.deprocess_img(plot_img) self.imgs.append(plot_img) # Create our optimizer opt = tf.optimizers.Adam(learning_rate=learning_rate, beta_1=beta_1, epsilon=epsilon) start_time = time.time() global_start = time.time() norm_means = np.array([103.939, 116.779, 123.68]) min_vals = -norm_means max_vals = 255 - norm_means for i in range(self.iterations): self.grads, self.losses = self.compute_grads() self.loss, self.score_style, self.score_content = self.losses opt.apply_gradients([(self.grads, self.input_vgg_init)]) clipped = tf.clip_by_value(self.input_vgg_init, min_vals, max_vals) self.input_vgg_init.assign(clipped) end_time = time.time() if self.loss < self.best_loss: # Update best loss and best image from total loss. self.best_loss = self.loss self.best_img = self.deprocess_img(self.input_vgg_init.numpy()) # Use the .numpy() method to get the concrete numpy array plot_img = self.input_vgg_init.numpy() plot_img = self.deprocess_img(plot_img) if i % self.cache_interval== 0: self.imgs.append(plot_img) if self.display: import IPython.display if i % self.display_interval== 0: start_time = time.time() IPython.display.clear_output(wait=True) IPython.display.display_png(Image.fromarray(plot_img)) print('Iteration: {}'.format(i+1)) print('Total loss: {:.2e}, ' 'style loss: {:.2e}, ' 'content loss: {:.2e}, ' 'time: {:.4f}s'.format(self.loss, self.score_style, self.score_content, time.time()-start_time)) Image.fromarray(self.best_img) return None def show_results(self): """ Display the results. """ plt.figure(figsize=(10, 5)) plt.subplot(1, 2, 1) self.display_img(self.content, 'Content Image') plt.subplot(1, 2, 2) self.display_img(self.style, 'Style Image') plt.figure(figsize=(10, 10)) plt.imshow(self.best_img) plt.title('Output Image') plt.show() return None def show_inputs(self): """ Display the inputs. """ plt.figure(figsize=(15,15)) plt.subplot(1, 2, 1) self.display_img(self.content, 'Content Image') plt.subplot(1, 2, 2) self.display_img(self.style, 'Style Image') plt.show() return None def save_gif(self, path='style_transfer.gif'): """ Save the transfering progress as GIF. Args: path (str): The path to save the GIF, default 'style_transfer.gif'. """ images = [] for i in self.imgs: images.append(Image.fromarray(i)) images[0].save(path, save_all=True, append_images=images[1:], optimize=False, duration=10, loop=0) return None def save_pic(self, path='style_transfer.jpg'): """ Save the results as JPG. Args: path (str): The path to save the JPG, default 'style_transfer.jpg'. """ Image.fromarray(self.best_img).save(path) return None