Source code for tables_io.types

"""Type defintions for tables_io"""

import os

from collections import OrderedDict
from collections.abc import Mapping, Iterable

from .lazy_modules import apTable, pd
from .arrayUtils import arrayLength

# Tabular data formats
AP_TABLE = 0
NUMPY_DICT = 1
PD_DATAFRAME = 2

TABULAR_FORMAT_NAMES = OrderedDict([
    ('astropyTable', AP_TABLE),
    ('numpyDict', NUMPY_DICT),
    ('pandasDataFrame', PD_DATAFRAME)])

TABULAR_FORMATS = OrderedDict([(val, key) for key, val in TABULAR_FORMAT_NAMES.items()])


# File Formats
ASTROPY_FITS = 0
ASTROPY_HDF5 = 1
NUMPY_HDF5 = 2
PANDAS_HDF5 = 3
PANDAS_PARQUET = 4

FILE_FORMAT_NAMES = OrderedDict([
    ('astropyFits', ASTROPY_FITS),
    ('astropyHdf5', ASTROPY_HDF5),
    ('numpyHdf5', NUMPY_HDF5),
    ('pandasHdf5', PANDAS_HDF5),
    ('pandaParquet', PANDAS_PARQUET)])

# Default suffixes for various file formats
FILE_FORMAT_SUFFIXS = OrderedDict([
    ('fits', ASTROPY_FITS),
    ('hf5', ASTROPY_HDF5),
    ('hdf5', NUMPY_HDF5),
    ('h5', PANDAS_HDF5),
    ('pq', PANDAS_PARQUET)])

DEFAULT_TABLE_KEY = OrderedDict([
    ('fits', ''),
    ('hf5', None),
    ('hdf5', None),
    ('h5', 'data'),
    ('pq', '')])

FILE_FORMATS = OrderedDict([(val, key) for key, val in FILE_FORMAT_NAMES.items()])

FILE_FORMAT_SUFFIX_MAP = OrderedDict([(val, key) for key, val in FILE_FORMAT_SUFFIXS.items()])

# Default format to write various table types
NATIVE_FORMAT = OrderedDict([
    (AP_TABLE, ASTROPY_HDF5),
    (NUMPY_DICT, NUMPY_HDF5),
    (PD_DATAFRAME, PANDAS_PARQUET)])

NATIVE_TABLE_TYPE = OrderedDict([(val, key) for key, val in NATIVE_FORMAT.items()])

# Allowed formats to write various table types
ALLOWED_FORMATS = OrderedDict([
    (AP_TABLE, [ASTROPY_HDF5, ASTROPY_HDF5]),
    (NUMPY_DICT, NUMPY_HDF5),
    (PD_DATAFRAME, [PANDAS_PARQUET, PANDAS_HDF5])])


[docs]def tableType(obj): """ Identify the type of table we have Parameters ---------- obj : `object` The input object Returns ------- otype : `int` The object type, one of `TABULAR_FORMATS.keys()` Raises ------ TypeError : The object is not a supported type IndexError : One of the columns in a Mapping is the wrong length """ if isinstance(obj, apTable.Table): return AP_TABLE if isinstance(obj, pd.DataFrame): return PD_DATAFRAME if not isinstance(obj, Mapping): raise TypeError("Object of type %s is not one of the supported types %s" % (type(obj), list(TABULAR_FORMAT_NAMES.keys()))) nRow = None for key, val in obj.items(): if isinstance(val, (Mapping, apTable.Table, pd.DataFrame)): raise TypeError("Column %s of type a Mapping %s" % (key, type(val))) if not isinstance(val, Iterable): #pragma: no cover raise TypeError("Column %s of type %s is not iterable" % (key, type(val))) if nRow is None: nRow = arrayLength(val) else: if arrayLength(val) != nRow: raise IndexError("Column %s length %i != %i" % (key, arrayLength(val), nRow)) #pylint: disable=bad-string-format-type return NUMPY_DICT
[docs]def istablelike(obj): """ Test to see if an object is one of the supported table types Parameters ---------- obj : `object` The input object Returns ------- tablelike : `bool` True is the object is `Tablelike`, False otherwise """ try: _ = tableType(obj) except (TypeError, IndexError): return False return True
[docs]def istabledictlike(obj): """ Test to see if an object is a `Mapping`, (`str`, `Tablelike`) Parameters ---------- obj : `object` The input object Returns ------- tabledict : `bool` True is the object is a `Mapping`, (`str`, `Tablelike`), False otherwise """ if not isinstance(obj, Mapping): return False for val in obj.values(): if not istablelike(val): return False return True
[docs]def fileType(filepath, fmt=None): """ Identify the type of file we have Parameters ---------- filepath : `str` The path to the file fmt : `str` or `None` Overrides the file extension Returns ------- otype : `int` The object type, one of `FILE_FORMATS.keys()` Raises ------ KeyError : The file format is not a support value """ if fmt is None: fmt = os.path.splitext(filepath)[1][1:] try: return FILE_FORMAT_SUFFIXS[fmt] except KeyError as msg: raise KeyError("Unknown file format %s, supported types are %s" % (fmt, list(FILE_FORMAT_SUFFIXS.keys()))) from msg