Tuesday 16 March 2021

use both CPU and GPU on google colab parallelly, using TensofFlow

I had single file ,say 10Million floating point numbers inside. Using mergesort(custome code below), i would like to sort this file. Here, i am reading 1Million at a time and sorting numbers and writing into a separate file to be merged later.

Custome Mergesort code below using TensorFlow 2.0 API.

import tensorflow as tf
def split_list(input_list):
    input_list_len = len(input_list)
    midpoint = input_list_len // 2
    return input_list[:midpoint], input_list[midpoint:]

def merge_sorted_lists(list_left, list_right):

    if tf.math.equal(len(list_left), 0) :
        return list_right
    elif tf.math.equal(len(list_right), 0):
        return list_left
    index_left = index_right = 0
    list_merged = []  # list to build and return
    list_len_target = len(list_left) + len(list_right)
    while tf.math.less(len(list_merged), list_len_target):
        if tf.math.less_equal(list_left[index_left], list_right[index_right]) :
            # Value on the left list is smaller (or equal so it should be selected)
            list_merged = [*list_merged,list_left[index_left]]
            index_left += 1
            # Right value bigger
            list_merged = [*list_merged,list_right[index_right]]
            index_right += 1

        if tf.math.equal(index_right, len(list_right)):
            list_merged = [*list_merged,*list_left[index_left:]]
        elif tf.math.equal(index_left, len(list_left)):
            list_merged = [*list_merged,*list_right[index_right:]]
    return list_merged

def merge_sort(input_list):
    if tf.math.less_equal(len(input_list), 1):
        return input_list
        left, right = split_list(input_list)
        return merge_sorted_lists(merge_sort(left), merge_sort(right))

Below code calls above merge_sort function on the required host/device.

def cpu(input_list):
  lines = []
  with tf.device('/cpu:0'):
    lines = merge_sort(input_list)
    lines1 = []
    lines1 = [str(x)+"\n" for x in lines]
  return lines1

def GPU(input_list):
  lines = []
  with tf.device('/device:GPU:0'):
    lines = merge_sort(input_list)
    lines1 = []
    lines1 = [str(x)+"\n" for x in lines]
  return lines1

The bove CPU or GPU functions are invoked as below, using iterators to read 1Million at a time:

dataset = tf.data.TextLineDataset("numbers.txt") #10 million rows in single file
dataset = dataset.batch(1_000_000)  # divide into 10 batches of 1million each
dataset = dataset.map(lambda x: tf.strings.to_number(tf.strings.strip(x), tf.float32))
iterator = dataset.__iter__()
start_time = timeit.default_timer()
lines = cpu(np.stack(list(iterator.get_next())))
print("cpu_time ", timeit.default_timer() - start_time)

start_time = timeit.default_timer()
lines = GPU(np.stack(list(iterator.get_next())))
print("gpu_time ", timeit.default_timer() - start_time)

fid = 1
f_out = open('chunk_{}.txt'.format(fid), 'w')

The above code is run Google Colab GPU instance, first 1Million records are sorted on CPU once and second million on GPU. However i would like to use both CPU and GPU parallely to run mergesort on different sets of million numbers. Ignore merging for now. I would appreciate, sample code lines(if there exists, few lines) or guide(if lot of work involved), to acheive this.

