As part of a larger analysis task, where I calculate the RMS of frequency bands from a number of spectra, I get an odd KeyError when trying to do a groupby-apply operation on a slice of a dataframe with a float index. I have reduce it to the following minimal example
--------------------------------------------------------------------------- KeyError Traceback (most recent call
last)
~Miniconda3envssens_test_v2_envlibsite-packagespandascoreindexesase.py
in get_loc(self, key, method, tolerance) 3079 try:
-> 3080 return self._engine.get_loc(casted_key) 3081 except KeyError as err:
pandas_libsindex.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas_libsindex.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas_libshashtable_class_helper.pxi in
pandas._libs.hashtable.Float64HashTable.get_item()
pandas_libshashtable_class_helper.pxi in
pandas._libs.hashtable.Float64HashTable.get_item()
KeyError: 0.0
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call
last) in
3 [f'Mean from {int(50*i)}' for i in df_test[50:].index // 50]
4 ).apply(
----> 5 np.mean
6 )
~Miniconda3envssens_test_v2_envlibsite-packagespandascoregroupbygroupby.py
in apply(self, func, *args, **kwargs)
892 with option_context("mode.chained_assignment", None):
893 try:
--> 894 result = self._python_apply_general(f, self._selected_obj)
895 except TypeError:
896 # gh-20949
~Miniconda3envssens_test_v2_envlibsite-packagespandascoregroupbygroupby.py
in _python_apply_general(self, f, data)
926 data after applying f
927 """
--> 928 keys, values, mutated = self.grouper.apply(f, data, self.axis)
929
930 return self._wrap_applied_output(
~Miniconda3envssens_test_v2_envlibsite-packagespandascoregroupbyops.py
in apply(self, f, data, axis)
201 ):
202 try:
--> 203 result_values, mutated = splitter.fast_apply(f, sdata, group_keys)
204
205 except libreduction.InvalidApply as err:
~Miniconda3envssens_test_v2_envlibsite-packagespandascoregroupbyops.py
in fast_apply(self, f, sdata, names)
991 # must return keys::list, values::list, mutated::bool
992 starts, ends = lib.generate_slices(self.slabels, self.ngroups)
--> 993 return libreduction.apply_frame_axis0(sdata, f, names, starts, ends)
994
995 def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame:
pandas_libs
eduction.pyx in
pandas._libs.reduction.apply_frame_axis0()
pandas_libs
eduction.pyx in
pandas._libs.reduction.BlockSlider.init()
~Miniconda3envssens_test_v2_envlibsite-packagespandascoreframe.py
in getitem(self, key) 2997 2998 # Do we have a
slicer (on rows)?
-> 2999 indexer = convert_to_index_sliceable(self, key) 3000 if indexer is not None: 3001 if
isinstance(indexer, np.ndarray):
~Miniconda3envssens_test_v2_envlibsite-packagespandascoreindexing.py
in convert_to_index_sliceable(obj, key) 2205 idx = obj.index
2206 if isinstance(key, slice):
-> 2207 return idx._convert_slice_indexer(key, kind="getitem") 2208 2209 elif isinstance(key, str):
~Miniconda3envssens_test_v2_envlibsite-packagespandascoreindexes
umeric.py
in _convert_slice_indexer(self, key, kind)
377 # We always treat getitem slicing as label-based
378 # translate to locations
--> 379 return self.slice_indexer(key.start, key.stop, key.step, kind=kind)
380
381 @doc(Index.get_loc)
~Miniconda3envssens_test_v2_envlibsite-packagespandascoreindexesase.py
in slice_indexer(self, start, end, step, kind) 5275
slice(1, 3, None) 5276 """
-> 5277 start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind) 5278 5279 # return a slice
~Miniconda3envssens_test_v2_envlibsite-packagespandascoreindexesase.py
in slice_locs(self, start, end, step, kind) 5480 end_slice
= None 5481 if end is not None:
-> 5482 end_slice = self.get_slice_bound(end, "right", kind) 5483 if end_slice is None: 5484
end_slice = len(self)
~Miniconda3envssens_test_v2_envlibsite-packagespandascoreindexesase.py
in get_slice_bound(self, label, side, kind) 5394 except
ValueError: 5395 # raise the original KeyError
-> 5396 raise err 5397 5398 if isinstance(slc, np.ndarray):
~Miniconda3envssens_test_v2_envlibsite-packagespandascoreindexesase.py
in get_slice_bound(self, label, side, kind) 5388 # we need
to look up the label 5389 try:
-> 5390 slc = self.get_loc(label) 5391 except KeyError as err: 5392 try:
~Miniconda3envssens_test_v2_envlibsite-packagespandascoreindexes
umeric.py
in get_loc(self, key, method, tolerance)
393 return nan_idxs
394
--> 395 return super().get_loc(key, method=method, tolerance=tolerance)
396
397 # ----------------------------------------------------------------
~Miniconda3envssens_test_v2_envlibsite-packagespandascoreindexesase.py
in get_loc(self, key, method, tolerance) 3080
return self._engine.get_loc(casted_key) 3081 except
KeyError as err:
-> 3082 raise KeyError(key) from err 3083 3084 if tolerance is not None:
KeyError: 0
There is a number of ways to make this work. First of all if I use groupby-aggregate instead, it behaves as I expect
If I do not slice the dataframe at first, it also works (except that I get the result for the entire dataframe):
Finally, had I used an integer index (but this does not make sense in my case), it would also have worked
Any clues to understand this is much appreciated, I must admit that I do not follow the error message.