Source code for ochanticipy.datasources.datasource
"""Base class for ochanticipy data source."""
from abc import ABC, abstractmethod
from pathlib import Path
from ochanticipy.config.countryconfig import CountryConfig
from ochanticipy.config.pathconfig import PathConfig
_GLOBAL_DIR = "glb"
[docs]
class DataSource(ABC):
"""
Base abstract class object that contains path convenience functions.
Cannot itself be instantiated. ``__init__``, ``download()``,
``load()``, and ``process()`` methods required for subclass to be
instantiated.
Parameters
----------
country_config: CountryConfig
Country configuration
datasource_base_dir : str
Module directory name (usually correspond to data source)
is_public: bool, default = False
Whether the dataset is public or private. Determines top-level
directory structure.
is_global_raw: bool, default = False
Whether the raw dataset should be saved in the `glb` folder. This is
normally done when it has global or regional coverage.
is_global_processed: bool, default = False
Whether the processed dataset should be saved in the `glb` folder.
This is normally done when it has global or regional coverage.
config_datasource_name: str = None
The name of the attribute in the config file
"""
@abstractmethod
def __init__(
self,
country_config: CountryConfig,
datasource_base_dir: str,
is_public: bool = False,
is_global_raw: bool = False,
is_global_processed: bool = False,
config_datasource_name: str = None,
):
if config_datasource_name is not None:
self._datasource_config = self._config_attribute_name_validator(
config_datasource_name=config_datasource_name,
country_config=country_config,
)
self._country_config = country_config
self._datasource_base_dir = datasource_base_dir
self._path_config = PathConfig()
self._raw_base_dir = self._get_base_dir(
is_public=is_public, is_raw=True, is_global=is_global_raw
)
self._processed_base_dir = self._get_base_dir(
is_public=is_public, is_raw=False, is_global=is_global_processed
)
@staticmethod
def _config_attribute_name_validator(
config_datasource_name: str, country_config: CountryConfig
):
try:
datasource_config = getattr(country_config, config_datasource_name)
except AttributeError:
datasource_config = None
# If the datasource is one of the defaults, it's set to None and
# thus an attribute error won't be raised. So also need to check for
# the case when it is None.
if datasource_config is None:
raise AttributeError(
f"{config_datasource_name} needs to be added to the "
f"config file. See the documentation for more details."
)
return datasource_config
def _get_base_dir(
self,
is_public: bool,
is_raw: bool,
is_global: bool = False,
) -> Path:
"""
Define the base_dir.
Parameters
----------
is_public: bool
Whether the dataset is public or private. Determines top-level
directory structure.
is_raw: bool
Whether the dataset is raw or processed
is_global: bool
Whether the dataset is global (or regional) or specific to the
iso3
"""
permission_dir = (
self._path_config.public
if is_public
else self._path_config.private
)
state_dir = (
self._path_config.raw if is_raw else self._path_config.processed
)
region_dir = (
self._country_config.iso3 if not is_global else _GLOBAL_DIR
)
return (
self._path_config.base_path
/ permission_dir
/ state_dir
/ region_dir
/ self._datasource_base_dir
)
[docs]
@abstractmethod
def download(self, clobber: bool = False):
"""Abstract method for downloading."""
pass
[docs]
@abstractmethod
def process(self, clobber: bool = False):
"""Abstract method for processing."""
pass
[docs]
@abstractmethod
def load(self):
"""Abstract method for loading."""
pass