Source code for tables_io.conv.conv_table

"""Single-Table Conversion Functions for tables_io"""

from collections import OrderedDict
from typing import Union, Mapping, Optional
from io import StringIO

import numpy as np

from ..utils.array_utils import force_to_pandables
from ..lazy_modules import apTable, fits, json, pd, pa
from ..types import (
    AP_TABLE,
    JSON_STRING,
    NUMPY_DICT,
    NUMPY_RECARRAY,
    PD_DATAFRAME,
    PA_TABLE,
    TABULAR_FORMAT_NAMES,
    TABULAR_FORMATS,
    is_table_like,
    table_type,
    tType_to_int,
)

### I. Single `Table-like` conversions

# I A. Generic `convert`


[docs] def convert_table(obj, tType: Union[str, int]): """ Convert a `Table-like` object to a specific tabular format. Accepted table formats: ================== =============== Format string Format integer ================== =============== "astropyTable" 0 "numpyDict" 1 "numpyRecarray" 2 "pandasDataFrame" 3 "pyarrowTable" 4 "jsonString" 5 ================== =============== Parameters ---------- obj : `Table-like` The object being converted tType : `int` or `str` The type of object to convert to Returns ------- out : `Table-like` The converted object """ # Convert tType to an int if necessary int_tType = tType_to_int(tType) assert obj is not None if int_tType == AP_TABLE: try: return convert_to_ap_table(obj) except Exception as e: raise RuntimeError( f"Could not convert object to {TABULAR_FORMATS[int_tType]} because of error: \n {e}. \n \n Object to convert: {obj}" ) from e if int_tType == NUMPY_DICT: try: return convert_to_dict(obj) except Exception as e: raise RuntimeError( f"Could not convert object to {TABULAR_FORMATS[int_tType]} because of error: \n {e}. \n \n Object to convert: {obj}" ) from e if int_tType == NUMPY_RECARRAY: try: return convert_to_recarray(obj) except Exception as e: raise RuntimeError( f"Could not convert object to {TABULAR_FORMATS[int_tType]} because of error: \n {e}. \n \n Object to convert: {obj}" ) from e if int_tType == PA_TABLE: try: return convert_to_pa_table(obj) except Exception as e: raise RuntimeError( f"Could not convert object to {TABULAR_FORMATS[int_tType]} because of error: \n {e}. \n \n Object to convert: {obj}" ) from e if int_tType == PD_DATAFRAME: try: return convert_to_dataframe(obj) except Exception as e: raise RuntimeError( f"Could not convert object to {TABULAR_FORMATS[int_tType]} because of error: \n {e}. \n \n Object to convert: {obj}" ) from e if int_tType == JSON_STRING: try: return convert_to_json(obj) except Exception as e: raise RuntimeError( f"Could not convert object to {TABULAR_FORMATS[int_tType]} because of error: \n {e}. \n \n Object to convert: {obj}" ) from e raise TypeError( f"Cannot convert to unsupported tableType {int_tType} ({int_tType})" )
### I B. Converting to `astropy.table.Table`
[docs] def pa_table_to_ap_table(table): """ Convert a `pyarrow.Table` to an `astropy.table.Table` Parameters ---------- table : `pyarrow.Table` The table Returns ------- tab : `astropy.table.Table` The table """ df = table.to_pandas() return data_frame_to_ap_table(df)
[docs] def data_frame_to_ap_table(df): """ Convert a `pandas.DataFrame` to an `astropy.table.Table` Parameters ---------- df : `pandas.DataFrame` The dataframe Returns ------- tab : `astropy.table.Table` The table """ o_dict = OrderedDict() for colname in df.columns: col = df[colname] if col.dtype.name == "object": o_dict[colname] = np.vstack(col.to_numpy()) else: o_dict[colname] = col.to_numpy() tab = apTable.Table(o_dict) for k, v in df.attrs.items(): tab.meta[k] = v return tab
[docs] def json_to_ap_table(json_data): """ Convert a json string to an `astropy.table.Table` Parameters ---------- json_data : `str` The json Returns ------- tab : `astropy.table.Table` The table """ data = json.loads(json_data) return apTable.Table(data)
[docs] def convert_to_ap_table(obj): """ Convert an object to an `astropy.table.Table` Parameters ---------- obj : `object` The object being converted Returns ------- tab : `astropy.table.Table` The table """ tType = table_type(obj) if tType == AP_TABLE: return obj if tType == NUMPY_DICT: return apTable.Table(obj) if tType == PA_TABLE: return pa_table_to_ap_table(obj) if tType == NUMPY_RECARRAY: return apTable.Table(obj) if tType == PD_DATAFRAME: # try this: apTable.from_pandas(obj) return data_frame_to_ap_table(obj) if tType == JSON_STRING: return json_to_ap_table(obj) raise TypeError( f"Table is an unsupported Table Type {tType}. Must be one of {TABULAR_FORMAT_NAMES.keys()}" )
### I C. Converting to `OrderedDict`, (`str`, `numpy.array`)
[docs] def ap_table_to_dict(tab) -> Mapping: """ Convert an `astropy.table.Table` to an `OrderedDict` of `str` : `numpy.array` Parameters ---------- tab : `astropy.table.Table` The table Returns -------- data : `OrderedDict`, (`str` : `numpy.array`) The tabledata """ data = OrderedDict() for key, val in zip(tab.colnames, tab.itercols()): data[key] = np.array(val) return data
[docs] def recarray_to_dict(rec: np.recarray) -> Mapping: """ Convert an `np.recarray` to an `OrderedDict` of `str` : `numpy.array` Parameters ---------- rec : `np.recarray` The input recarray Returns -------- data : `OrderedDict`, (`str` : `numpy.array`) The tabledata """ return OrderedDict([(colName, rec[colName]) for colName in rec.dtype.names])
[docs] def dataframe_to_dict(df) -> Mapping: """ Convert a `pandas.DataFrame` to an `OrderedDict` of `str` : `numpy.array` Parameters ---------- df : `pandas.DataFrame` The dataframe Returns -------- data : `OrderedDict`, (`str` : `numpy.array`) The tabledata """ data = OrderedDict() for key in df.keys(): col = df[key] if col.dtype.name == "object": data[key] = np.vstack(col.to_numpy()) else: data[key] = np.array(col) return data
[docs] def pa_table_to_dict(rec) -> Mapping: """ Convert an `pa.Table` to an `OrderedDict` of `str` : `numpy.array` Parameters ---------- rec : `pa.Table` The input table Returns -------- data : `OrderedDict`, (`str` : `numpy.array`) The tabledata """ return OrderedDict( [(colName, rec[colName].to_numpy()) for colName in rec.schema.names] )
[docs] def hdf5_group_to_dict(hg): """ Convert a `hdf5` object to an `OrderedDict`, (`str`, `numpy.array`) Parameters ---------- hg : `h5py.File` or `h5py.Group` The hdf5 object Returns -------- data : `OrderedDict`, (`str` : `numpy.array`) The tabledata """ data = OrderedDict() for key in hg.keys(): data[key] = np.array(hg[key]) return data
[docs] def json_to_dict(json_data): """ Convert a json string to an `OrderedDict` of numpy arrays Parameters ---------- json_data : `str` The json Returns ------- data : `OrderedDict`, (`str` : `numpy.array`) The tabledata """ data = json.loads(json_data) table = {kk: np.array(vv) for kk, vv in data.items()} return table
[docs] def convert_to_dict(obj): """ Convert an object to an `OrderedDict`, (`str`, `numpy.array`) Parameters ---------- obj : `object` The object being converted Returns ------- tab : `astropy.table.Table` The table """ tType = table_type(obj) if tType == AP_TABLE: return ap_table_to_dict(obj) if tType == PA_TABLE: return pa_table_to_dict(obj) if tType == NUMPY_DICT: return obj if tType == NUMPY_RECARRAY: return recarray_to_dict(obj) if tType == PD_DATAFRAME: return dataframe_to_dict(obj) if tType == JSON_STRING: return json_to_dict(obj) raise TypeError( f"Could not convert table because it is an unsupported TableType {tType}. Must be one of {TABULAR_FORMAT_NAMES.keys()}" )
### I D. Converting to `np.recarray`
[docs] def pa_table_to_recarray(tab): """ Convert an `pyarrow.Table` to an `numpy.recarray` Parameters ---------- tab : `pyarrow.Table` The table Returns -------- rec : `numpy.recarray` The output rec array """ raise NotImplementedError()
[docs] def ap_table_to_recarray(tab): """ Convert an `astropy.table.Table` to an `numpy.recarray` Parameters ---------- tab : `astropy.table.Table` The table Returns -------- rec : `numpy.recarray` The output rec array """ return fits.table_to_hdu(tab).data
[docs] def convert_to_recarray(obj): """ Convert an object to an `numpy.recarray` Parameters ---------- obj : `object` The object being converted Returns ------- rec : `numpy.recarray` The output recarray """ tType = table_type(obj) if tType == AP_TABLE: return ap_table_to_recarray(obj) if tType == NUMPY_DICT: return ap_table_to_recarray(apTable.Table(obj)) if tType == NUMPY_RECARRAY: return obj if tType == PD_DATAFRAME: return ap_table_to_recarray(data_frame_to_ap_table(obj)) if tType == PA_TABLE: return ap_table_to_recarray(pa_table_to_ap_table(obj)) if tType == JSON_STRING: return ap_table_to_recarray(json_to_ap_table(obj)) raise TypeError( f"Could not convert table because it is an unsupported TableType {tType}. Must be one of {TABULAR_FORMAT_NAMES.keys()}" )
### I E. Converting to `pandas.DataFrame`
[docs] def ap_table_to_dataframe(tab): """ Convert an `astropy.table.Table` to a `pandas.DataFrame` Parameters ---------- tab : `astropy.table.Table` The table Returns ------- df : `pandas.DataFrame` The dataframe """ o_dict = OrderedDict() for colname in tab.columns: col = tab[colname] o_dict[colname] = force_to_pandables(col.data) df = pd.DataFrame(o_dict) for k, v in tab.meta.items(): df.attrs[k] = v return df
[docs] def pa_table_to_dataframe(table): """Converts `pyarrow.Table` object to a `pandas.DataFrame` object. Parameters ---------- table : `pyarrow.Table` Input table Returns ------- df : `pandas.DataFrame` The converted dataframe. """ df = table.to_pandas() return df
[docs] def dict_to_dataframe(odict: Mapping, meta: Optional[Mapping] = None): """ Convert an `OrderedDict`, (`str`, `numpy.array`) to a `pandas.DataFrame` Parameters ---------- odict : `OrderedDict`, (`str`, `numpy.array`) The dict meta : `dict` or `None` Optional dictionary of metadata Returns ------- df : `pandas.DataFrame` The dataframe """ outdict = OrderedDict() for k, v in odict.items(): outdict[k] = force_to_pandables(v) df = pd.DataFrame(outdict) if meta is not None: for k, v in meta.items(): df.attrs[k] = v return df
[docs] def convert_to_dataframe(obj): """ Convert an object to a `pandas.DataFrame` Parameters ---------- obj : `object` The object being converted Returns ------- df : `pandas.DataFrame` The dataframe """ tType = table_type(obj) if tType == AP_TABLE: return ap_table_to_dataframe(obj) if tType == NUMPY_DICT: return dict_to_dataframe(obj) if tType == NUMPY_RECARRAY: odict = recarray_to_dict(obj) return dict_to_dataframe(odict) if tType == PA_TABLE: return pa_table_to_dataframe(obj) if tType == PD_DATAFRAME: return obj if tType == JSON_STRING: o_dict = json_to_dict(obj) return dict_to_dataframe(o_dict) raise TypeError( f"Could not convert table because it is an unsupported tableType {tType}. Must be one of {TABULAR_FORMAT_NAMES.keys()}" )
### I F. Converting to `pa.Table`
[docs] def ap_table_to_pa_table(tab): """ Convert an `astropy.table.Table` to a `pa.Table` Parameters ---------- tab : `astropy.table.Table` The table Returns ------- table : `pa.Table` The output table """ o_dict = OrderedDict() for colname in tab.columns: col = tab[colname] ndim = len(col.data.shape) if ndim == 1: o_dict[colname] = col.data elif ndim > 1: o_dict[colname] = force_to_pandables(col.data) metadata = {k: str(v) for k, v in tab.meta.items()} table = pa.Table.from_pydict(o_dict, metadata=metadata) return table
[docs] def dataframe_to_pa_table(df): """ Convert a `pandas.DataFrame` to an `pa.Table` Parameters ---------- df : `pandas.DataFrame` The dataframe Returns ------- table : `pa.Table` The table """ table = pa.Table.from_pandas(df) return table
[docs] def dict_to_pa_table(odict: Mapping, meta: Optional[Mapping] = None): """ Convert an `OrderedDict`, (`str`, `numpy.array`) to a `pa.Table` Parameters ---------- odict : `OrderedDict`, (`str`, `numpy.array`) The dict meta : `dict` or `None` Optional dictionary of metadata Returns ------- table : `pa.Table` The table """ out_dict = {key: force_to_pandables(val) for key, val in odict.items()} if meta is not None: metadata = {k: str(v) for k, v in meta.items()} else: metadata = None table = pa.Table.from_pydict(out_dict, metadata=metadata) return table
[docs] def convert_to_pa_table(obj): """ Convert an object to a `pa.Table` Parameters ---------- obj : `object` The object being converted Returns ------- table : `pa.Table` The table """ tType = table_type(obj) if tType == AP_TABLE: return ap_table_to_pa_table(obj) if tType == NUMPY_DICT: return dict_to_pa_table(obj) if tType == NUMPY_RECARRAY: odict = recarray_to_dict(obj) return dict_to_dataframe(odict) if tType == PD_DATAFRAME: return dataframe_to_pa_table(obj) if tType == PA_TABLE: return obj if tType == JSON_STRING: return dict_to_pa_table(json_to_dict(obj)) raise TypeError( f"Could not convert table because it is an unsupported tableType {tType}. Must be one of {TABULAR_FORMAT_NAMES.keys()}" )
### I G. Converting to `json`
[docs] def dict_to_json(the_dict): """ Convert a numpy dict to a json string Parameters ---------- the_dict : `dict[str, np.ndarray]` The dict being converted Returns ------- json_string : `str` The json string """ json_str = json.dumps( the_dict, default=lambda x: x.tolist() if isinstance(x, np.ndarray) else None ) return json_str
[docs] def convert_to_json(obj): """ Convert an object to a json string Parameters ---------- obj : `object` The object being converted Returns ------- json_string : `str` The json string """ tType = table_type(obj) if tType == AP_TABLE: odict = ap_table_to_dict(obj) return dict_to_json(odict) if tType == NUMPY_DICT: return dict_to_json(obj) if tType == NUMPY_RECARRAY: odict = recarray_to_dict(obj) return dict_to_json(odict) if tType == PD_DATAFRAME: odict = dataframe_to_dict(obj) return dict_to_json(odict) if tType == PA_TABLE: odict = pa_table_to_dict(obj) return dict_to_json(odict) if tType == JSON_STRING: return obj raise TypeError( f"Could not convert table because it is an unsupported tableType {tType}. Must be one of {TABULAR_FORMAT_NAMES.keys()}" )