I have written a function to get a sequence for LSTM/GRU sequence model based on group ID. I am not getting expected output.
Python Function:
def windowGeneratorByID(data, target, id_col_index, lookback, offset, batch_size=16):
min_index=0
max_index = data.shape[0]-offset
i = min_index + lookback
while 1:
if i + batch_size >= max_index:
i = min_index + lookback
rows = np.arange(i, min(i + batch_size, max_index))
i += len(rows)
samples = np.zeros((len(rows), lookback, data.shape[-1]))
targets = np.zeros((len(rows), target.shape[-1]))
for j, row in enumerate(rows):
indices = range(rows[j] - lookback, rows[j])
if data[rows[j] + offset][id_col_index] in set(data[indices][:, id_col_index]):
if len(set(data[indices][:, id_col_index])) == 1:
samples[j] = data[indices]
targets[j] = target[rows[j] + offset]
yield np.delete(samples,id_col_index,axis=2) , targets
Input:
df=np.array([[1,1,0.1,11],[1,2,0.2,12], [1,3,0.3,13], [1,4,0.4,14], [2,5,0.5,15], [2,6,0.6,16], [2,7,0.7,17],[3,8,0.8,18],[3,9,0.9,19],[3,10,0.7,20]])
Output Code:
lookback=2
batch_size=2
offset = 0
windows = windowGeneratorByID(data=df, target=df[:,2:4],id_col_index=0 , offset=offset, lookback=lookback,batch_size=batch_size)
#The number of total batches are equal to the number of (training examples - lookback-offset)/batch_size
no_batches=int((df.shape[0]-lookback-offset)/batch_size)
# #print the batches
for i in range(no_batches):
#get the next batch from the windowGenerator
input,output=next(windows)
print("{}th batch: \ninput is:\n{}\n and \ntarget is:\n{}\n".format(i+1, input, output))
Expected Output:
1th batch:
input is:
[[[ 1. 0.1 11. ]
[ 2. 0.2 12. ]]
[[ 2. 0.2 12. ]
[ 3. 0.3 13. ]]]
and
target is:
[[ 0.3 13. ]
[ 0.4 14. ]]
2nd batch:
input is:
[[[ 5. 0.5 15. ]
[ 6. 0.6 16. ]]
[[[ 8. 0.8 18. ]
[ 9. 0.9 19. ]]
and
target is:
[[ 0.7 17. ]
[ 0.7 20. ]]
from Create Sequences by Group ID using numpy array
No comments:
Post a Comment