[docs]@lru_cachedefget_schema_names_frame(tiledb_obj:tiledb.Array)->List[str]:"""Get Attributes from a TileDB object. Args: tiledb_obj: A TileDB object. Returns: List of schema attributes. """columns=[]foriinrange(tiledb_obj.schema.nattr):columns.append(tiledb_obj.schema.attr(i).name)returncolumns
[docs]defsubset_frame(tiledb_obj:tiledb.Array,subset:Union[slice,tiledb.QueryCondition],columns:list,)->pd.DataFrame:"""Subset a TileDB object. Args: tiledb_obj: TileDB object to subset. subset: A :py:class:`slice` to subset. Alternatively, may provide a :py:class:`~tiledb.QueryCondition` to subset the object. columns: List specifying the atrributes from the schema to extract. Returns: A sliced `DataFrame` or a `matrix` with the subset. """ifisinstance(subset,str):warn("provided subset is string, its expected to be a 'query_condition'",UserWarning,)query=tiledb_obj.query(cond=subset,attrs=columns)data=query.df[:]else:data=tiledb_obj.df[subset][columns]re_null=re.compile(pattern="\x00")# replace null strings with nanresult=data.replace(regex=re_null,value=np.nan)result=result.dropna()returnresult
[docs]defsubset_array(tiledb_obj:tiledb.Array,row_subset:Union[slice,list,tuple],column_subset:Union[slice,list,tuple],shape:tuple,)->np.ndarray:"""Subset a tiledb storing array data. Uses multi_index to slice. Args: tiledb_obj: A TileDB object row_subset: Subset along the row axis. column_subset: Subset along the column axis. shape: Shape of the entire matrix. Returns: A dense array containing coverage. """data=tiledb_obj.multi_index[row_subset,column_subset]returndata["data"]
[docs]defget_a_column(tiledb_obj:tiledb.Array,column_name:Union[str,List[str]])->list:"""Access column(s) from the TileDB object. Args: tiledb_obj: A TileDB object. column_name: Name(s) of the column to access. Returns: List containing the column values. """ifcolumn_namenotinget_schema_names_frame(tiledb_obj):raiseValueError(f"Column '{column_name}' does not exist.")ifisinstance(column_name,str):column_name=[column_name]returntiledb_obj.query(attrs=column_name).df[:]
[docs]@lru_cachedefget_index(tiledb_obj:tiledb.Array)->list:"""Get the index of the TileDB object. Args: tiledb_obj: A TileDB object. Returns: A list containing the index values. """_index=tiledb_obj.unique_dim_values("__tiledb_rows")return[x.decode()forxin_index]