Thursday 22 July 2021

KeyError: "['month' 'day'] not in index"

I have the following code which:

Separates dataframes one for all years between 2005 and 2014 and another dataframe for 2015 alone. Then I get the min max per day across all weather stations. Finally I need to add the min and max of df2015 to df so I can compare if in year 2015 the min and max per day broke the previous 10 days min or max.

df = pd.read_csv('data/C2A2_data/BinnedCsvs_d400/fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89.csv'.format(binsize))
    df['Date'] =  pd.to_datetime(df['Date'], infer_datetime_format=True)
    df = df.sort_values(['Date', 'ID'], ascending=[True, False])
    df['day'] = df['Date'].dt.day
    df['month'] = df['Date'].dt.month
    df['year'] = df['Date'].dt.year
    df = df[df['year'] < 2015]
    df = df.groupby(['month','day'])['Data_Value'].agg(['min', 'max']) 
    print(df)
    
    df2015 = pd.read_csv('data/C2A2_data/BinnedCsvs_d400/fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89.csv'.format(binsize))
    df2015['Date'] =  pd.to_datetime(df2015['Date'], infer_datetime_format=True)
    df2015 = df2015.sort_values(['Date', 'ID'], ascending=[True, False])
    df2015['day'] = df2015['Date'].dt.day
    df2015['month'] = df2015['Date'].dt.month
    df2015['year'] = df2015['Date'].dt.year
    df2015 = df2015[df2015['year'] == 2015]
    df2015 = df2015.groupby(['month','day'])['Data_Value'].agg(['min', 'max']) 
    df2015.rename(columns = {'min':'min2015'}, inplace = True)
    df2015.rename(columns = {'max':'max2015'}, inplace = True)
    print(df2015)
    
    df_2_cols_to_use = ['month', 'day', 'min2015', 'max2015']
    new_df = pd.merge(df, df2015[df_2_cols_to_use], on=['month', 'day'], how='inner')
    print(dffinal)
    

so basically I need to add min2015 and max2015 columns to the df based on month and day column

each dataframe looks like this:

 min  max
month day          
1     1   -160  156
      2   -267  139
      3   -267  133
      4   -261  106






 min2015  max2015
month day                  
1     1       -133       11
      2       -122       39
      3        -67       39

However I get this error

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-26-ccd285c88c17> in <module>()
     42 
     43 
---> 44 leaflet_plot_stations(400,'fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89')

<ipython-input-26-ccd285c88c17> in leaflet_plot_stations(binsize, hashid)
     28 
     29     df_2_cols_to_use = ['month', 'day', 'min2015', 'max2015']
---> 30     new_df = pd.merge(df, df2015[df_2_cols_to_use], on=['month', 'day'], how='inner')
     31     print(dffinal)
     32 

/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2051         if isinstance(key, (Series, np.ndarray, Index, list)):
   2052             # either boolean or fancy integer index
-> 2053             return self._getitem_array(key)
   2054         elif isinstance(key, DataFrame):
   2055             return self._getitem_frame(key)

/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_array(self, key)
   2095             return self.take(indexer, axis=0, convert=False)
   2096         else:
-> 2097             indexer = self.ix._convert_to_indexer(key, axis=1)
   2098             return self.take(indexer, axis=1, convert=True)
   2099 

/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py in _convert_to_indexer(self, obj, axis, is_setter)
   1228                 mask = check == -1
   1229                 if mask.any():
-> 1230                     raise KeyError('%s not in index' % objarr[mask])
   1231 
   1232                 return _values_from_object(indexer)

KeyError: "['month' 'day'] not in index"

Edit:

  1. I resetted indexes
  2. I printed df and df2015 columns.

My output is:

Index(['month', 'day', 'min', 'max'], dtype='object')
Index(['month', 'day', 'min2015', 'max2015'], dtype='object')

my updated code:

 df = pd.read_csv('data/C2A2_data/BinnedCsvs_d400/fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89.csv'.format(binsize))
    df['Date'] =  pd.to_datetime(df['Date'], infer_datetime_format=True)
    df = df.sort_values(['Date', 'ID'], ascending=[True, False])
    df['day'] = df['Date'].dt.day
    df['month'] = df['Date'].dt.month
    df['year'] = df['Date'].dt.year
    df = df[df['year'] < 2015]
    #df = df.groupby(df['Date'].dt.strftime('%m-%d'))['Data_Value'].agg(['min', 'max'])
    df = df.groupby(['month','day'])['Data_Value'].agg(['min', 'max']) 
    df=df.reset_index()
    print(df.columns)
    
    df2015 = pd.read_csv('data/C2A2_data/BinnedCsvs_d400/fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89.csv'.format(binsize))
    df2015['Date'] =  pd.to_datetime(df2015['Date'], infer_datetime_format=True)
    df2015 = df2015.sort_values(['Date', 'ID'], ascending=[True, False])
    df2015['day'] = df2015['Date'].dt.day
    df2015['month'] = df2015['Date'].dt.month
    df2015['year'] = df2015['Date'].dt.year
    df2015 = df2015[df2015['year'] == 2015]
    df2015 = df2015.groupby(['month','day'])['Data_Value'].agg(['min', 'max']) 
    df2015.rename(columns = {'min':'min2015'}, inplace = True)
    df2015.rename(columns = {'max':'max2015'}, inplace = True)
    df2015=df2015.reset_index()
    print(df2015.columns)
    df_2_cols_to_use = ['min2015', 'max2015']   
    #Finally:
    new_df = pd.merge(df, df2015[df_2_cols_to_use], on=['month', 'day'], how='inner')

Full stack of error

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/opt/conda/lib/python3.6/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
   2133             try:
-> 2134                 return self._engine.get_loc(key)
   2135             except KeyError:

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4433)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4279)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13742)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13696)()

KeyError: 'month'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-19-b9dfd4ccf2f2> in <module>()
     43 
     44 
---> 45 leaflet_plot_stations(400,'fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89')

<ipython-input-19-b9dfd4ccf2f2> in leaflet_plot_stations(binsize, hashid)
     31     df_2_cols_to_use = ['min2015', 'max2015']
     32     #Finally:
---> 33     new_df = pd.merge(df, df2015[df_2_cols_to_use], on=['month', 'day'], how='inner')
     34 
     35 

/opt/conda/lib/python3.6/site-packages/pandas/tools/merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator)
     59                          right_on=right_on, left_index=left_index,
     60                          right_index=right_index, sort=sort, suffixes=suffixes,
---> 61                          copy=copy, indicator=indicator)
     62     return op.get_result()
     63 if __debug__:

/opt/conda/lib/python3.6/site-packages/pandas/tools/merge.py in __init__(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy, indicator)
    541         (self.left_join_keys,
    542          self.right_join_keys,
--> 543          self.join_names) = self._get_merge_keys()
    544 
    545     def get_result(self):

/opt/conda/lib/python3.6/site-packages/pandas/tools/merge.py in _get_merge_keys(self)
    808                     if not is_rkey(rk):
    809                         if rk is not None:
--> 810                             right_keys.append(right[rk]._values)
    811                         else:
    812                             # work-around for merge_asof(right_index=True)

/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2057             return self._getitem_multilevel(key)
   2058         else:
-> 2059             return self._getitem_column(key)
   2060 
   2061     def _getitem_column(self, key):

/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_column(self, key)
   2064         # get column
   2065         if self.columns.is_unique:
-> 2066             return self._get_item_cache(key)
   2067 
   2068         # duplicate columns & possible reduce dimensionality

/opt/conda/lib/python3.6/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
   1384         res = cache.get(item)
   1385         if res is None:
-> 1386             values = self._data.get(item)
   1387             res = self._box_item_values(item, values)
   1388             cache[item] = res

/opt/conda/lib/python3.6/site-packages/pandas/core/internals.py in get(self, item, fastpath)
   3541 
   3542             if not isnull(item):
-> 3543                 loc = self.items.get_loc(item)
   3544             else:
   3545                 indexer = np.arange(len(self.items))[isnull(self.items)]

/opt/conda/lib/python3.6/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
   2134                 return self._engine.get_loc(key)
   2135             except KeyError:
-> 2136                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2137 
   2138         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4433)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4279)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13742)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13696)()

KeyError: 'month'


from KeyError: "['month' 'day'] not in index"

No comments:

Post a Comment