I have the following code which:
Separates dataframes one for all years between 2005 and 2014 and another dataframe for 2015 alone. Then I get the min max per day across all weather stations. Finally I need to add the min and max of df2015 to df so I can compare if in year 2015 the min and max per day broke the previous 10 days min or max.
df = pd.read_csv('data/C2A2_data/BinnedCsvs_d400/fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89.csv'.format(binsize))
df['Date'] = pd.to_datetime(df['Date'], infer_datetime_format=True)
df = df.sort_values(['Date', 'ID'], ascending=[True, False])
df['day'] = df['Date'].dt.day
df['month'] = df['Date'].dt.month
df['year'] = df['Date'].dt.year
df = df[df['year'] < 2015]
df = df.groupby(['month','day'])['Data_Value'].agg(['min', 'max'])
print(df)
df2015 = pd.read_csv('data/C2A2_data/BinnedCsvs_d400/fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89.csv'.format(binsize))
df2015['Date'] = pd.to_datetime(df2015['Date'], infer_datetime_format=True)
df2015 = df2015.sort_values(['Date', 'ID'], ascending=[True, False])
df2015['day'] = df2015['Date'].dt.day
df2015['month'] = df2015['Date'].dt.month
df2015['year'] = df2015['Date'].dt.year
df2015 = df2015[df2015['year'] == 2015]
df2015 = df2015.groupby(['month','day'])['Data_Value'].agg(['min', 'max'])
df2015.rename(columns = {'min':'min2015'}, inplace = True)
df2015.rename(columns = {'max':'max2015'}, inplace = True)
print(df2015)
df_2_cols_to_use = ['month', 'day', 'min2015', 'max2015']
new_df = pd.merge(df, df2015[df_2_cols_to_use], on=['month', 'day'], how='inner')
print(dffinal)
so basically I need to add min2015 and max2015 columns to the df based on month and day column
each dataframe looks like this:
min max
month day
1 1 -160 156
2 -267 139
3 -267 133
4 -261 106
min2015 max2015
month day
1 1 -133 11
2 -122 39
3 -67 39
However I get this error
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-26-ccd285c88c17> in <module>()
42
43
---> 44 leaflet_plot_stations(400,'fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89')
<ipython-input-26-ccd285c88c17> in leaflet_plot_stations(binsize, hashid)
28
29 df_2_cols_to_use = ['month', 'day', 'min2015', 'max2015']
---> 30 new_df = pd.merge(df, df2015[df_2_cols_to_use], on=['month', 'day'], how='inner')
31 print(dffinal)
32
/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
2051 if isinstance(key, (Series, np.ndarray, Index, list)):
2052 # either boolean or fancy integer index
-> 2053 return self._getitem_array(key)
2054 elif isinstance(key, DataFrame):
2055 return self._getitem_frame(key)
/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_array(self, key)
2095 return self.take(indexer, axis=0, convert=False)
2096 else:
-> 2097 indexer = self.ix._convert_to_indexer(key, axis=1)
2098 return self.take(indexer, axis=1, convert=True)
2099
/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py in _convert_to_indexer(self, obj, axis, is_setter)
1228 mask = check == -1
1229 if mask.any():
-> 1230 raise KeyError('%s not in index' % objarr[mask])
1231
1232 return _values_from_object(indexer)
KeyError: "['month' 'day'] not in index"
Edit:
- I resetted indexes
- I printed df and df2015 columns.
My output is:
Index(['month', 'day', 'min', 'max'], dtype='object')
Index(['month', 'day', 'min2015', 'max2015'], dtype='object')
my updated code:
df = pd.read_csv('data/C2A2_data/BinnedCsvs_d400/fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89.csv'.format(binsize))
df['Date'] = pd.to_datetime(df['Date'], infer_datetime_format=True)
df = df.sort_values(['Date', 'ID'], ascending=[True, False])
df['day'] = df['Date'].dt.day
df['month'] = df['Date'].dt.month
df['year'] = df['Date'].dt.year
df = df[df['year'] < 2015]
#df = df.groupby(df['Date'].dt.strftime('%m-%d'))['Data_Value'].agg(['min', 'max'])
df = df.groupby(['month','day'])['Data_Value'].agg(['min', 'max'])
df=df.reset_index()
print(df.columns)
df2015 = pd.read_csv('data/C2A2_data/BinnedCsvs_d400/fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89.csv'.format(binsize))
df2015['Date'] = pd.to_datetime(df2015['Date'], infer_datetime_format=True)
df2015 = df2015.sort_values(['Date', 'ID'], ascending=[True, False])
df2015['day'] = df2015['Date'].dt.day
df2015['month'] = df2015['Date'].dt.month
df2015['year'] = df2015['Date'].dt.year
df2015 = df2015[df2015['year'] == 2015]
df2015 = df2015.groupby(['month','day'])['Data_Value'].agg(['min', 'max'])
df2015.rename(columns = {'min':'min2015'}, inplace = True)
df2015.rename(columns = {'max':'max2015'}, inplace = True)
df2015=df2015.reset_index()
print(df2015.columns)
df_2_cols_to_use = ['min2015', 'max2015']
#Finally:
new_df = pd.merge(df, df2015[df_2_cols_to_use], on=['month', 'day'], how='inner')
Full stack of error
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/opt/conda/lib/python3.6/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
2133 try:
-> 2134 return self._engine.get_loc(key)
2135 except KeyError:
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4433)()
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4279)()
pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13742)()
pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13696)()
KeyError: 'month'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-19-b9dfd4ccf2f2> in <module>()
43
44
---> 45 leaflet_plot_stations(400,'fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89')
<ipython-input-19-b9dfd4ccf2f2> in leaflet_plot_stations(binsize, hashid)
31 df_2_cols_to_use = ['min2015', 'max2015']
32 #Finally:
---> 33 new_df = pd.merge(df, df2015[df_2_cols_to_use], on=['month', 'day'], how='inner')
34
35
/opt/conda/lib/python3.6/site-packages/pandas/tools/merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator)
59 right_on=right_on, left_index=left_index,
60 right_index=right_index, sort=sort, suffixes=suffixes,
---> 61 copy=copy, indicator=indicator)
62 return op.get_result()
63 if __debug__:
/opt/conda/lib/python3.6/site-packages/pandas/tools/merge.py in __init__(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy, indicator)
541 (self.left_join_keys,
542 self.right_join_keys,
--> 543 self.join_names) = self._get_merge_keys()
544
545 def get_result(self):
/opt/conda/lib/python3.6/site-packages/pandas/tools/merge.py in _get_merge_keys(self)
808 if not is_rkey(rk):
809 if rk is not None:
--> 810 right_keys.append(right[rk]._values)
811 else:
812 # work-around for merge_asof(right_index=True)
/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
2057 return self._getitem_multilevel(key)
2058 else:
-> 2059 return self._getitem_column(key)
2060
2061 def _getitem_column(self, key):
/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_column(self, key)
2064 # get column
2065 if self.columns.is_unique:
-> 2066 return self._get_item_cache(key)
2067
2068 # duplicate columns & possible reduce dimensionality
/opt/conda/lib/python3.6/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
1384 res = cache.get(item)
1385 if res is None:
-> 1386 values = self._data.get(item)
1387 res = self._box_item_values(item, values)
1388 cache[item] = res
/opt/conda/lib/python3.6/site-packages/pandas/core/internals.py in get(self, item, fastpath)
3541
3542 if not isnull(item):
-> 3543 loc = self.items.get_loc(item)
3544 else:
3545 indexer = np.arange(len(self.items))[isnull(self.items)]
/opt/conda/lib/python3.6/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
2134 return self._engine.get_loc(key)
2135 except KeyError:
-> 2136 return self._engine.get_loc(self._maybe_cast_indexer(key))
2137
2138 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4433)()
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4279)()
pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13742)()
pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13696)()
KeyError: 'month'
from KeyError: "['month' 'day'] not in index"
No comments:
Post a Comment