Skip to content

add dimension check to PointWiseDownscaler inputs #38

@dgergel

Description

@dgergel

currently if member_id is an additional dimension (which is easy to have happen if you grab multiple ensemble members from CMIP6 when searching the catalog), the PointwiseDownscaler fails with a ValueError, ValueError: dimensions {'member_id'} do not exist. Expected one or more of ('time', 'lat', 'lon'), coming from a dask backend issue. I believe this also occurs with any other extra dims present in the input data (e.g. height). We should probably add a check with a more informative error message for extra dims that should be removed. Including the traceback below for additional ref.

ValueError                                Traceback (most recent call last)
<ipython-input-81-1210dae294dd> in <module>
----> 1 predicted = model.predict(holdout_subset).load()

/opt/conda/lib/python3.7/site-packages/xarray/core/dataarray.py in load(self, **kwargs)
    812         dask.array.compute
    813         """
--> 814         ds = self._to_temp_dataset().load(**kwargs)
    815         new = self._from_temp_dataset(ds)
    816         self._variable = new._variable

/opt/conda/lib/python3.7/site-packages/xarray/core/dataset.py in load(self, **kwargs)
    656 
    657             # evaluate all the dask arrays simultaneously
--> 658             evaluated_data = da.compute(*lazy_data.values(), **kwargs)
    659 
    660             for k, data in zip(lazy_data, evaluated_data):

/opt/conda/lib/python3.7/site-packages/dask/base.py in compute(*args, **kwargs)
    434     keys = [x.__dask_keys__() for x in collections]
    435     postcomputes = [x.__dask_postcompute__() for x in collections]
--> 436     results = schedule(dsk, keys, **kwargs)
    437     return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
    438 

/opt/conda/lib/python3.7/site-packages/distributed/client.py in get(self, dsk, keys, restrictions, loose_restrictions, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)
   2570                     should_rejoin = False
   2571             try:
-> 2572                 results = self.gather(packed, asynchronous=asynchronous, direct=direct)
   2573             finally:
   2574                 for f in futures.values():

/opt/conda/lib/python3.7/site-packages/distributed/client.py in gather(self, futures, errors, direct, asynchronous)
   1870                 direct=direct,
   1871                 local_worker=local_worker,
-> 1872                 asynchronous=asynchronous,
   1873             )
   1874 

/opt/conda/lib/python3.7/site-packages/distributed/client.py in sync(self, func, asynchronous, callback_timeout, *args, **kwargs)
    765         else:
    766             return sync(
--> 767                 self.loop, func, *args, callback_timeout=callback_timeout, **kwargs
    768             )
    769 

/opt/conda/lib/python3.7/site-packages/distributed/utils.py in sync(loop, func, callback_timeout, *args, **kwargs)
    332     if error[0]:
    333         typ, exc, tb = error[0]
--> 334         raise exc.with_traceback(tb)
    335     else:
    336         return result[0]

/opt/conda/lib/python3.7/site-packages/distributed/utils.py in f()
    316             if callback_timeout is not None:
    317                 future = gen.with_timeout(timedelta(seconds=callback_timeout), future)
--> 318             result[0] = yield future
    319         except Exception as exc:
    320             error[0] = sys.exc_info()

/opt/conda/lib/python3.7/site-packages/tornado/gen.py in run(self)
    733 
    734                     try:
--> 735                         value = future.result()
    736                     except Exception:
    737                         exc_info = sys.exc_info()

/opt/conda/lib/python3.7/site-packages/distributed/client.py in _gather(self, futures, errors, direct, local_worker)
   1726                             exc = CancelledError(key)
   1727                         else:
-> 1728                             raise exception.with_traceback(traceback)
   1729                         raise exc
   1730                     if errors == "skip":

/opt/conda/lib/python3.7/site-packages/xarray/core/parallel.py in _wrapper()
    285         ]
    286 
--> 287         result = func(*converted_args, **kwargs)
    288 
    289         # check all dims are present

/opt/conda/lib/python3.7/site-packages/skdownscale/pointwise_models/core.py in _fit_wrapper()

/opt/conda/lib/python3.7/site-packages/xarray/core/dataarray.py in __getitem__()
    641         else:
    642             # xarray-style array indexing
--> 643             return self.isel(indexers=self._item_key_to_dict(key))
    644 
    645     def __setitem__(self, key: Any, value: Any) -> None:

/opt/conda/lib/python3.7/site-packages/xarray/core/dataarray.py in isel()
   1051         # lists, or zero or one-dimensional np.ndarray's
   1052 
-> 1053         variable = self._variable.isel(indexers, missing_dims=missing_dims)
   1054 
   1055         coords = {}

/opt/conda/lib/python3.7/site-packages/xarray/core/variable.py in isel()
   1069         indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel")
   1070 
-> 1071         indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims)
   1072 
   1073         key = tuple(indexers.get(dim, slice(None)) for dim in self.dims)

/opt/conda/lib/python3.7/site-packages/xarray/core/utils.py in drop_dims_from_indexers()
    765         if invalid:
    766             raise ValueError(
--> 767                 f"dimensions {invalid} do not exist. Expected one or more of {dims}"
    768             )
    769 

ValueError: dimensions {'member_id'} do not exist. Expected one or more of ('time', 'lat', 'lon')

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions