I have two images, E1
and E3
, and I am training a CNN model.
In order to train the model, I use E1
as train and E3
as y_train.
I extract tiles from these images in order to train the model on tiles.
The model, does not have an activation layer, so the output can take any value.
So, the predictions for example, preds
, have values around preds.max() = 2.35
and preds.min() = -1.77
.
My problem is that I can't reconstruct the image at the end using preds
and I think the problem is the scaling-unscaling of the preds
values.
If I just do np.uint8(preds)
its is almost full of zeros since preds
has small values.
The image should look like as close as possible to E2
image.
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, \
Input, Add
from tensorflow.keras.models import Model
from PIL import Image
CHANNELS = 1
HEIGHT = 32
WIDTH = 32
INIT_SIZE = ((1429, 1416))
def NormalizeData(data):
return (data - np.min(data)) / (np.max(data) - np.min(data) + 1e-6)
def extract_image_tiles(size, im):
im = im[:, :, :CHANNELS]
w = h = size
idxs = [(i, (i + h), j, (j + w)) for i in range(0, im.shape[0], h) for j in range(0, im.shape[1], w)]
tiles_asarrays = []
count = 0
for k, (i_start, i_end, j_start, j_end) in enumerate(idxs):
tile = im[i_start:i_end, j_start:j_end, ...]
if tile.shape[:2] != (h, w):
tile_ = tile
tile_size = (h, w) if tile.ndim == 2 else (h, w, tile.shape[2])
tile = np.zeros(tile_size, dtype=tile.dtype)
tile[:tile_.shape[0], :tile_.shape[1], ...] = tile_
count += 1
tiles_asarrays.append(tile)
return np.array(idxs), np.array(tiles_asarrays)
def build_model(height, width, channels):
inputs = Input((height, width, channels))
f1 = Conv2D(32, 3, padding='same')(inputs)
f1 = BatchNormalization()(f1)
f1 = Activation('relu')(f1)
f2 = Conv2D(16, 3, padding='same')(f1)
f2 = BatchNormalization()(f2)
f2 = Activation('relu')(f2)
f3 = Conv2D(16, 3, padding='same')(f2)
f3 = BatchNormalization()(f3)
f3 = Activation('relu')(f3)
addition = Add()([f2, f3])
f4 = Conv2D(32, 3, padding='same')(addition)
f5 = Conv2D(16, 3, padding='same')(f4)
f5 = BatchNormalization()(f5)
f5 = Activation('relu')(f5)
f6 = Conv2D(16, 3, padding='same')(f5)
f6 = BatchNormalization()(f6)
f6 = Activation('relu')(f6)
output = Conv2D(1, 1, padding='same')(f6)
model = Model(inputs, output)
return model
# Load data
img = cv2.imread('E1.tif', cv2.IMREAD_UNCHANGED)
img = cv2.resize(img, (1408, 1408), interpolation=cv2.INTER_AREA)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.array(img, np.uint8)
#plt.imshow(img)
img3 = cv2.imread('E3.tif', cv2.IMREAD_UNCHANGED)
img3 = cv2.resize(img3, (1408, 1408), interpolation=cv2.INTER_AREA)
img3 = cv2.cvtColor(img3, cv2.COLOR_BGR2RGB)
img3 = np.array(img3, np.uint8)
# extract tiles from images
idxs, tiles = extract_image_tiles(WIDTH, img)
idxs2, tiles3 = extract_image_tiles(WIDTH, img3)
# split to train and test data
split_idx = int(tiles.shape[0] * 0.9)
train = tiles[:split_idx]
val = tiles[split_idx:]
y_train = tiles3[:split_idx]
y_val = tiles3[split_idx:]
# build model
model = build_model(HEIGHT, WIDTH, CHANNELS)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
loss = tf.keras.losses.Huber(),
metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
# scale data before training
train = train / 255.
val = val / 255.
y_train = y_train / 255.
y_val = y_val / 255.
# train
history = model.fit(train,
y_train,
validation_data=(val, y_val),
epochs=50)
# predict on E2
img2 = cv2.imread('E2.tif', cv2.IMREAD_UNCHANGED)
img2 = cv2.resize(img2, (1408, 1408), interpolation=cv2.INTER_AREA)
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)
img2 = np.array(img2, np.uint8)
# extract tiles from images
idxs, tiles2 = extract_image_tiles(WIDTH, img2)
#scale data
tiles2 = tiles2 / 255.
preds = model.predict(tiles2)
#preds = NormalizeData(preds)
#preds = np.uint8(preds)
# reconstruct predictions
reconstructed = np.zeros((img.shape[0],
img.shape[1]),
dtype=np.uint8)
# reconstruction process
for tile, (y_start, y_end, x_start, x_end) in zip(preds[:, :, -1], idxs):
y_end = min(y_end, img.shape[0])
x_end = min(x_end, img.shape[1])
reconstructed[y_start:y_end, x_start:x_end] = tile[:(y_end - y_start), :(x_end - x_start)]
im = Image.fromarray(reconstructed)
im = im.resize(INIT_SIZE)
im.show()
You can find the data here
If I use :
def normalize_arr_to_uint8(arr):
the_min = arr.min()
the_max = arr.max()
the_max -= the_min
arr = ((arr - the_min) / the_max) * 255.
return arr.astype(np.uint8)
preds = model.predict(tiles2)
preds = normalize_arr_to_uint8(preds)
then, I receive an image which seems right, but with lines all over.
from image reconstruction from predicted array (normalize - unnormalize array?)
No comments:
Post a Comment