Source code for cellarr_frame.frame
from typing import Any, List, Optional
import pandas as pd
import tiledb
from .base import CellArrayBaseFrame
__author__ = "Jayaram Kancherla"
__copyright__ = "Jayaram Kancherla"
__license__ = "MIT"
[docs]
class CellArrayFrame(CellArrayBaseFrame):
"""Implementation for TileDB DataFrames."""
def _read_slice(self, rows: Any, cols: Optional[List[str]]) -> pd.DataFrame:
"""Read data using direct slicing.
Args:
rows:
Slice object, specific index (int/str), or list of indices.
cols:
List of column names to retrieve.
"""
if isinstance(rows, slice):
start = rows.start
stop = rows.stop
step = rows.step
if stop is not None and isinstance(stop, int):
if stop == 0 and start is None:
stop = -1
else:
stop -= 1
rows = slice(start, stop, step)
with self.open_array(mode="r") as array:
attrs = cols if cols is not None else self.column_names
query = array.query(attrs=attrs)
return query.df[rows]
def _read_query(self, condition: str, columns: Optional[List[str]]) -> pd.DataFrame:
"""Read data using a string query condition.
Args:
condition:
TileDB query string (e.g. "val > 5.0").
columns:
List of column names to retrieve.
"""
with self.open_array(mode="r") as array:
attrs = columns if columns is not None else self.column_names
return array.query(cond=condition, attrs=attrs).df[:]
[docs]
def write_batch(self, data: pd.DataFrame, append: bool = True, **kwargs) -> None:
"""Write a batch of data to the frame.
Args:
data:
Pandas DataFrame to write.
append:
If True, appends to existing array. If False, might overwrite/schema_only
depending on lower-level tiledb.from_pandas behavior, but mostly used for appending.
"""
mode = "append" if append else "ingest"
tiledb.from_pandas(uri=self.uri, dataframe=data, mode=mode, ctx=self._ctx, **kwargs)
self._shape = None
self._index = None
[docs]
@classmethod
def create(
cls, uri: str, data: pd.DataFrame, index_dims: Optional[List[str]] = None, full_domain: bool = True, **kwargs
):
"""Helper to create a new CellFrame from a dataframe.
Args:
uri:
Path to create array.
data:
Initial dataframe (can be empty schema if used with mode='schema_only').
index_dims:
Columns to use as dimensions (indices).
full_domain:
Whether to allow the domain to extend to the full range of the dtype (default True).
"""
tiledb.from_pandas(uri, data, index_dims=index_dims, full_domain=full_domain, **kwargs)
return cls(uri=uri)