I want to convert a tab-delimited text into a 2D tensor object so that I can feed the data into a CNN.
What is the proper way to do this?
I wrote the following:
from typing import List, Union, cast
import tensorflow as tf
CellType = Union[str, float, int, bool]
RowType = List[CellType]
# Mapping Python types to TensorFlow data types
TF_DATA_TYPES = {
str: tf.string,
float: tf.float32,
int: tf.int32,
bool: tf.bool
}
def convert_string_to_tensorflow_object(data_string):
# Split the string into lines
linesStringList1d: List[str] = data_string.strip().split('\n')
# Split each line into columns
dataStringList2d: List[List[str]] = []
for line in linesStringList1d:
rowItem: List[str] = line.split(' ')
dataStringList2d.append(rowItem)
# Convert the data to TensorFlow tensors
listOfRows: List[RowType] = []
for rowItem in dataStringList2d:
oneRow: RowType = []
for stringItem in rowItem:
oneRow.append(cast(CellType, stringItem))
listOfRows.append(oneRow)
# Get the TensorFlow data type based on the Python type of CellType
tf_data_type = TF_DATA_TYPES[type(CellType)]
listOfRows = tf.constant(listOfRows, dtype=tf_data_type)
# Create a TensorFlow dataset
return listOfRows
if __name__ == "__main__":
# Example usage
data_string: str = """
1 ASN C 7.042 9.118 0.000 1 1 1 1 1 0
2 LEU H 5.781 5.488 7.470 0 0 0 0 1 0
3 THR H 5.399 5.166 6.452 0 0 0 0 0 0
4 GLU H 5.373 4.852 6.069 0 0 0 0 1 0
5 LEU H 5.423 5.164 6.197 0 0 0 0 2 0
"""
tensorflow_dataset = convert_string_to_tensorflow_object(data_string)
print(tensorflow_dataset)
Output:
C:\Users\pc\AppData\Local\Programs\Python\Python311\python.exe C:/git/heca_v2~~2/src/cnn_lib/convert_string_to_tensorflow_object.py
Traceback (most recent call last):
File "C:\git\heca_v2~~2\src\cnn_lib\convert_string_to_tensorflow_object.py", line 51, in <module>
tensorflow_dataset = convert_string_to_tensorflow_object(data_string)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\git\heca_v2~~2\src\cnn_lib\convert_string_to_tensorflow_object.py", line 34, in convert_string_to_tensorflow_object
tf_data_type = TF_DATA_TYPES[type(CellType)]
~~~~~~~~~~~~~^^^^^^^^^^^^^^^^
KeyError: <class 'typing._UnionGenericAlias'>
Process finished with exit code 1
Can I resolve the error?
from How can I convert a Union type into a tensor type?
No comments:
Post a Comment