Hemant Vishwakarma: image reconstruction from predicted array (normalize

I have two images, E1 and E3, and I am training a CNN model.

In order to train the model, I use E1 as train and E3 as y_train.

I extract tiles from these images in order to train the model on tiles.

The model, does not have an activation layer, so the output can take any value.

So, the predictions for example, preds , have values around preds.max() = 2.35 and preds.min() = -1.77.

My problem is that I can't reconstruct the image at the end using preds and I think the problem is the scaling-unscaling of the preds values.

If I just do np.uint8(preds) its is almost full of zeros since preds has small values.

The image should look like as close as possible to E2 image.

import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, \
    Input, Add
from tensorflow.keras.models import Model
from PIL import Image

CHANNELS = 1
HEIGHT = 32
WIDTH = 32
INIT_SIZE = ((1429, 1416))

def NormalizeData(data):
    return (data - np.min(data)) / (np.max(data) - np.min(data) + 1e-6)

def extract_image_tiles(size, im):
    im = im[:, :, :CHANNELS]
    w = h = size
    idxs = [(i, (i + h), j, (j + w)) for i in range(0, im.shape[0], h) for j in range(0, im.shape[1], w)]
    tiles_asarrays = []
    count = 0
    for k, (i_start, i_end, j_start, j_end) in enumerate(idxs):
        tile = im[i_start:i_end, j_start:j_end, ...]
        if tile.shape[:2] != (h, w):
            tile_ = tile
            tile_size = (h, w) if tile.ndim == 2 else (h, w, tile.shape[2])
            tile = np.zeros(tile_size, dtype=tile.dtype)
            tile[:tile_.shape[0], :tile_.shape[1], ...] = tile_
        
        count += 1
        tiles_asarrays.append(tile)
    return np.array(idxs), np.array(tiles_asarrays)


def build_model(height, width, channels):
    inputs = Input((height, width, channels))

    f1 = Conv2D(32, 3, padding='same')(inputs)
    f1 = BatchNormalization()(f1)
    f1 = Activation('relu')(f1)
    
    f2 = Conv2D(16, 3, padding='same')(f1)
    f2 = BatchNormalization()(f2)
    f2 = Activation('relu')(f2)
    
    f3 = Conv2D(16, 3, padding='same')(f2)
    f3 = BatchNormalization()(f3)
    f3 = Activation('relu')(f3)

    addition = Add()([f2, f3])
    
    f4 = Conv2D(32, 3, padding='same')(addition)
    
    f5 = Conv2D(16, 3, padding='same')(f4)
    f5 = BatchNormalization()(f5)
    f5 = Activation('relu')(f5)
   
    f6 = Conv2D(16, 3, padding='same')(f5)
    f6 = BatchNormalization()(f6)
    f6 = Activation('relu')(f6)
   
    output = Conv2D(1, 1, padding='same')(f6)

    model = Model(inputs, output)

    return model

# Load data
img = cv2.imread('E1.tif', cv2.IMREAD_UNCHANGED)
img = cv2.resize(img, (1408, 1408), interpolation=cv2.INTER_AREA)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.array(img, np.uint8)
#plt.imshow(img)
img3 = cv2.imread('E3.tif', cv2.IMREAD_UNCHANGED)
img3 = cv2.resize(img3, (1408, 1408), interpolation=cv2.INTER_AREA)
img3 = cv2.cvtColor(img3, cv2.COLOR_BGR2RGB)
img3 = np.array(img3, np.uint8)

# extract tiles from images
idxs, tiles = extract_image_tiles(WIDTH, img)
idxs2, tiles3 = extract_image_tiles(WIDTH, img3)

# split to train and test data
split_idx = int(tiles.shape[0] * 0.9)

train = tiles[:split_idx]
val = tiles[split_idx:]

y_train = tiles3[:split_idx]
y_val = tiles3[split_idx:]

# build model
model = build_model(HEIGHT, WIDTH, CHANNELS)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
              loss = tf.keras.losses.Huber(),
              metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])

# scale data before training
train  = train / 255.
val = val / 255.

y_train = y_train / 255.
y_val = y_val / 255.

# train
history = model.fit(train, 
                    y_train, 
                    validation_data=(val, y_val),
                    epochs=50)

# predict on E2
img2 = cv2.imread('E2.tif', cv2.IMREAD_UNCHANGED)
img2 = cv2.resize(img2, (1408, 1408), interpolation=cv2.INTER_AREA)
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)
img2 = np.array(img2, np.uint8)

# extract tiles from images
idxs, tiles2 = extract_image_tiles(WIDTH, img2)

#scale data
tiles2 = tiles2 / 255.

preds = model.predict(tiles2)
#preds = NormalizeData(preds)
#preds = np.uint8(preds)
# reconstruct predictions
reconstructed = np.zeros((img.shape[0],
                          img.shape[1]),
                          dtype=np.uint8)

# reconstruction process
for tile, (y_start, y_end, x_start, x_end) in zip(preds[:, :, -1], idxs):
    y_end = min(y_end, img.shape[0])
    x_end = min(x_end, img.shape[1])
    reconstructed[y_start:y_end, x_start:x_end] = tile[:(y_end - y_start), :(x_end - x_start)]


im = Image.fromarray(reconstructed)
im = im.resize(INIT_SIZE)
im.show()

You can find the data here

If I use :

def normalize_arr_to_uint8(arr):
  the_min = arr.min()
  the_max = arr.max()
  the_max -= the_min
  arr = ((arr - the_min) / the_max) * 255.
  return arr.astype(np.uint8)


preds = model.predict(tiles2)
preds = normalize_arr_to_uint8(preds)

then, I receive an image which seems right, but with lines all over.

from image reconstruction from predicted array (normalize - unnormalize array?)

Hemant Vishwakarma

Monday, 29 January 2024

image reconstruction from predicted array (normalize - unnormalize array?)

No comments:

Post a Comment