Core

`tablite.core`

Attributes

`tablite.core.log = logging.getLogger(name)` `module-attribute`

Classes

`tablite.core.Table(columns=None, headers=None, rows=None, _path=None)`

Bases: BaseTable

creates Table

PARAMETER	DESCRIPTION
`EITHER`	columns (dict, optional): dict with column names as keys, values as lists. Example: t = Table(columns={"a": [1, 2], "b": [3, 4]})

Source code in tablite/core.py

def __init__(self, columns=None, headers=None, rows=None, _path=None) -> None:
    """creates Table

    Args:
        EITHER:
            columns (dict, optional): dict with column names as keys, values as lists.
            Example: t = Table(columns={"a": [1, 2], "b": [3, 4]})
        OR
            headers (list of strings, optional): list of column names.
            rows (list of tuples or lists, optional): values for columns
            Example: t = Table(headers=["a", "b"], rows=[[1,3], [2,4]])
    """
    super().__init__(columns, headers, rows, _path)

Attributes

`tablite.core.Table.path = _path` `instance-attribute`

`tablite.core.Table.columns = {}` `instance-attribute`

`tablite.core.Table.rows` `property`

enables row based iteration in python types.

Example:

for row in Table.rows:
    print(row)

Yields: tuple: values is same order as columns.

Functions

`tablite.core.Table.from_file(path, columns=None, first_row_has_headers=True, header_row_index=0, encoding=None, start=0, limit=sys.maxsize, sheet=None, guess_datatypes=True, newline='\n', text_qualifier=None, delimiter=None, strip_leading_and_tailing_whitespace=True, text_escape_openings='', text_escape_closures='', skip_empty: ValidSkipEmpty = 'NONE', tqdm=_tqdm) -> Table` `classmethod`

    reads path and imports 1 or more tables

    REQUIRED
    --------
    path: pathlib.Path or str
        selection of filereader uses path.suffix.
        See `filereaders`.

    OPTIONAL
    --------
    columns:
        None: (default) All columns will be imported.
        List: only column names from list will be imported (if present in file)
              e.g. ['A', 'B', 'C', 'D']

              datatype is detected using Datatypes.guess(...)
              You can try it out with:
              >> from tablite.datatypes import DataTypes
              >> DataTypes.guess(['001','100'])
              [1,100]

              if the format cannot be achieved the read type is kept.
        Excess column names are ignored.

        HINT: To get the head of file use:
        >>> from tablite.tools import head
        >>> head = head(path)

    first_row_has_headers: boolean
        True: (default) first row is used as column names.
        False: integers are used as column names.

    encoding: str. Defaults to None (autodetect using n bytes).
        n is declared in filereader_utils as ENCODING_GUESS_BYTES

    start: the first line to be read (default: 0)

    limit: the number of lines to be read from start (default sys.maxint ~ 2**63)

    OPTIONAL FOR EXCEL AND ODS READERS
    ----------------------------------

    sheet: sheet name to import  (applicable to excel- and ods-reader only)
        e.g. 'sheet_1'
        sheets not found excess names are ignored.

    OPTIONAL FOR TEXT READERS
    -------------------------
    guess_datatype: bool
        True: (default) datatypes are guessed using DataTypes.guess(...)
        False: all data is imported as strings.

    newline: newline character (applicable to text_reader only)
        str: '

' (default) or ' '

    text_qualifier: character (applicable to text_reader only)
        None: No text qualifier is used.
        str: " or '

    delimiter: character (applicable to text_reader only)
        None: file suffix is used to determine field delimiter:
            .txt: "|"
            .csv: ",",
            .ssv: ";"
            .tsv: " " (tab)

    strip_leading_and_tailing_whitespace: bool:
        True: default

    text_escape_openings: (applicable to text_reader only)
        None: default
        str: list of characters such as ([{

    text_escape_closures: (applicable to text_reader only)
        None: default
        str: list of characters such as }])

Source code in tablite/core.py

@classmethod
def from_file(
    cls,
    path,
    columns=None,
    first_row_has_headers=True,
    header_row_index=0,
    encoding=None,
    start=0,
    limit=sys.maxsize,
    sheet=None,
    guess_datatypes=True,
    newline="\n",
    text_qualifier=None,
    delimiter=None,
    strip_leading_and_tailing_whitespace=True,
    text_escape_openings="",
    text_escape_closures="",
    skip_empty: ValidSkipEmpty="NONE",
    tqdm=_tqdm,
) -> "Table":
    """
    reads path and imports 1 or more tables

    REQUIRED
    --------
    path: pathlib.Path or str
        selection of filereader uses path.suffix.
        See `filereaders`.

    OPTIONAL
    --------
    columns:
        None: (default) All columns will be imported.
        List: only column names from list will be imported (if present in file)
              e.g. ['A', 'B', 'C', 'D']

              datatype is detected using Datatypes.guess(...)
              You can try it out with:
              >> from tablite.datatypes import DataTypes
              >> DataTypes.guess(['001','100'])
              [1,100]

              if the format cannot be achieved the read type is kept.
        Excess column names are ignored.

        HINT: To get the head of file use:
        >>> from tablite.tools import head
        >>> head = head(path)

    first_row_has_headers: boolean
        True: (default) first row is used as column names.
        False: integers are used as column names.

    encoding: str. Defaults to None (autodetect using n bytes).
        n is declared in filereader_utils as ENCODING_GUESS_BYTES

    start: the first line to be read (default: 0)

    limit: the number of lines to be read from start (default sys.maxint ~ 2**63)

    OPTIONAL FOR EXCEL AND ODS READERS
    ----------------------------------

    sheet: sheet name to import  (applicable to excel- and ods-reader only)
        e.g. 'sheet_1'
        sheets not found excess names are ignored.

    OPTIONAL FOR TEXT READERS
    -------------------------
    guess_datatype: bool
        True: (default) datatypes are guessed using DataTypes.guess(...)
        False: all data is imported as strings.

    newline: newline character (applicable to text_reader only)
        str: '\n' (default) or '\r\n'

    text_qualifier: character (applicable to text_reader only)
        None: No text qualifier is used.
        str: " or '

    delimiter: character (applicable to text_reader only)
        None: file suffix is used to determine field delimiter:
            .txt: "|"
            .csv: ",",
            .ssv: ";"
            .tsv: "\t" (tab)

    strip_leading_and_tailing_whitespace: bool:
        True: default

    text_escape_openings: (applicable to text_reader only)
        None: default
        str: list of characters such as ([{

    text_escape_closures: (applicable to text_reader only)
        None: default
        str: list of characters such as }])

    """
    if isinstance(path, str):
        path = Path(path)
    type_check(path, Path)

    if not path.exists():
        raise FileNotFoundError(f"file not found: {path}")

    if not isinstance(start, int) or not 0 <= start <= sys.maxsize:
        raise ValueError(f"start {start} not in range(0,{sys.maxsize})")

    if not isinstance(limit, int) or not 0 < limit <= sys.maxsize:
        raise ValueError(f"limit {limit} not in range(0,{sys.maxsize})")

    if not isinstance(first_row_has_headers, bool):
        raise TypeError("first_row_has_headers is not bool")

    import_as = path.suffix
    if import_as.startswith("."):
        import_as = import_as[1:]

    reader = import_utils.file_readers.get(import_as, None)
    if reader is None:
        raise ValueError(f"{import_as} is not in supported format: {import_utils.valid_readers}")

    additional_configs = {"tqdm": tqdm}
    if reader == import_utils.text_reader:
        # here we inject tqdm, if tqdm is not provided, use generic iterator
        # fmt:off
        config = (path, columns, first_row_has_headers, header_row_index, encoding, start, limit, newline,
                  guess_datatypes, text_qualifier, strip_leading_and_tailing_whitespace, skip_empty,
                  delimiter, text_escape_openings, text_escape_closures)
        # fmt:on

    elif reader == import_utils.from_html:
        config = (path,)
    elif reader == import_utils.from_hdf5:
        config = (path,)

    elif reader == import_utils.excel_reader:
        # config = path, first_row_has_headers, sheet, columns, start, limit
        config = (
            path,
            first_row_has_headers,
            header_row_index,
            sheet,
            columns,
            skip_empty,
            start,
            limit,
        )  # if file length changes - re-import.

    if reader == import_utils.ods_reader:
        # path, first_row_has_headers=True, sheet=None, columns=None, start=0, limit=sys.maxsize,
        config = (
            str(path),
            first_row_has_headers,
            header_row_index,
            sheet,
            columns,
            skip_empty,
            start,
            limit,
        )  # if file length changes - re-import.

    # At this point the import config seems valid.
    # Now we check if the file already has been imported.

    # publish the settings
    return reader(cls, *config, **additional_configs)

`tablite.core.Table.from_pandas(df)` `classmethod`

Creates Table using pd.to_dict('list')

similar to:

>>> import pandas as pd
>>> df = pd.DataFrame({'a':[1,2,3], 'b':[4,5,6]})
>>> df
    a  b
    0  1  4
    1  2  5
    2  3  6
>>> df.to_dict('list')
{'a': [1, 2, 3], 'b': [4, 5, 6]}
>>> t = Table.from_dict(df.to_dict('list))
>>> t.show()
    +===+===+===+
    | # | a | b |
    |row|int|int|
    +---+---+---+
    | 0 |  1|  4|
    | 1 |  2|  5|
    | 2 |  3|  6|
    +===+===+===+

Source code in tablite/core.py

@classmethod
def from_pandas(cls, df):
    """
    Creates Table using pd.to_dict('list')

    similar to:
    ```
    >>> import pandas as pd
    >>> df = pd.DataFrame({'a':[1,2,3], 'b':[4,5,6]})
    >>> df
        a  b
        0  1  4
        1  2  5
        2  3  6
    >>> df.to_dict('list')
    {'a': [1, 2, 3], 'b': [4, 5, 6]}
    >>> t = Table.from_dict(df.to_dict('list))
    >>> t.show()
        +===+===+===+
        | # | a | b |
        |row|int|int|
        +---+---+---+
        | 0 |  1|  4|
        | 1 |  2|  5|
        | 2 |  3|  6|
        +===+===+===+
    ```
    """
    return import_utils.from_pandas(cls, df)

`tablite.core.Table.from_hdf5(path)` `classmethod`

imports an exported hdf5 table.

Source code in tablite/core.py

@classmethod
def from_hdf5(cls, path):
    """
    imports an exported hdf5 table.
    """
    return import_utils.from_hdf5(cls, path)

`tablite.core.Table.from_json(jsn)` `classmethod`

Imports table exported using .to_json

Source code in tablite/core.py

@classmethod
def from_json(cls, jsn):
    """
    Imports table exported using .to_json
    """
    return import_utils.from_json(cls, jsn)

`tablite.core.Table.to_hdf5(path)`

creates a copy of the table as hdf5

Source code in tablite/core.py

def to_hdf5(self, path):
    """
    creates a copy of the table as hdf5
    """
    export_utils.to_hdf5(self, path)

`tablite.core.Table.to_pandas()`

returns pandas.DataFrame

Source code in tablite/core.py

def to_pandas(self):
    """
    returns pandas.DataFrame
    """
    return export_utils.to_pandas(self)

`tablite.core.Table.to_sql(name)`

generates ANSI-92 compliant SQL.

Source code in tablite/core.py

def to_sql(self, name):
    """
    generates ANSI-92 compliant SQL.
    """
    return export_utils.to_sql(self, name)  # remove after update to test suite.

`tablite.core.Table.to_json()`

returns JSON

Source code in tablite/core.py

def to_json(self):
    """
    returns JSON
    """
    return export_utils.to_json(self)

`tablite.core.Table.to_xlsx(path)`

exports table to path

Source code in tablite/core.py

def to_xlsx(self, path):
    """
    exports table to path
    """
    export_utils.path_suffix_check(path, ".xlsx")
    export_utils.excel_writer(self, path)

`tablite.core.Table.to_ods(path)`

exports table to path

Source code in tablite/core.py

def to_ods(self, path):
    """
    exports table to path
    """
    export_utils.path_suffix_check(path, ".ods")
    export_utils.excel_writer(self, path)

`tablite.core.Table.to_csv(path)`

exports table to path

Source code in tablite/core.py

def to_csv(self, path):
    """
    exports table to path
    """
    export_utils.path_suffix_check(path, ".csv")
    export_utils.text_writer(self, path)

`tablite.core.Table.to_tsv(path)`

exports table to path

Source code in tablite/core.py

def to_tsv(self, path):
    """
    exports table to path
    """
    export_utils.path_suffix_check(path, ".tsv")
    export_utils.text_writer(self, path)

`tablite.core.Table.to_text(path)`

exports table to path

Source code in tablite/core.py

def to_text(self, path):
    """
    exports table to path
    """
    export_utils.path_suffix_check(path, ".txt")
    export_utils.text_writer(self, path)

`tablite.core.Table.to_html(path)`

exports table to path

Source code in tablite/core.py

def to_html(self, path):
    """
    exports table to path
    """
    export_utils.path_suffix_check(path, ".html")
    export_utils.to_html(self, path)

`tablite.core.Table.expression(expression)`

filters based on an expression, such as:

"all((A==B, C!=4, 200<D))"

which is interpreted using python's compiler to:

def _f(A,B,C,D):
    return all((A==B, C!=4, 200<D))

Source code in tablite/core.py

def expression(self, expression):
    """
    filters based on an expression, such as:

        "all((A==B, C!=4, 200<D))"

    which is interpreted using python's compiler to:

        def _f(A,B,C,D):
            return all((A==B, C!=4, 200<D))
    """
    return redux._filter_using_expression(self, expression)

`tablite.core.Table.filter(expressions, filter_type='all', tqdm=_tqdm)`

enables filtering across columns for multiple criteria.

expressions:

str: Expression that can be compiled and executed row by row.
    exampLe: "all((A==B and C!=4 and 200<D))"

list of dicts: (example):

    L = [
        {'column1':'A', 'criteria': "==", 'column2': 'B'},
        {'column1':'C', 'criteria': "!=", "value2": '4'},
        {'value1': 200, 'criteria': "<", column2: 'D' }
    ]

accepted dictionary keys: 'column1', 'column2', 'criteria', 'value1', 'value2'

filter_type: 'all' or 'any'

Source code in tablite/core.py

def filter(self, expressions, filter_type="all", tqdm=_tqdm):
    """
    enables filtering across columns for multiple criteria.

    expressions:

        str: Expression that can be compiled and executed row by row.
            exampLe: "all((A==B and C!=4 and 200<D))"

        list of dicts: (example):

            L = [
                {'column1':'A', 'criteria': "==", 'column2': 'B'},
                {'column1':'C', 'criteria': "!=", "value2": '4'},
                {'value1': 200, 'criteria': "<", column2: 'D' }
            ]

        accepted dictionary keys: 'column1', 'column2', 'criteria', 'value1', 'value2'

    filter_type: 'all' or 'any'
    """
    return redux.filter(self, expressions, filter_type, tqdm)

`tablite.core.Table.sort_index(sort_mode='excel', tqdm=_tqdm, pbar=None, **kwargs)`

helper for methods sort and is_sorted

param: sort_mode: str: "alphanumeric", "unix", or, "excel" (default) param: **kwargs: sort criteria. See Table.sort()

Source code in tablite/core.py

def sort_index(self, sort_mode="excel", tqdm=_tqdm, pbar=None, **kwargs):
    """
    helper for methods `sort` and `is_sorted`

    param: sort_mode: str: "alphanumeric", "unix", or, "excel" (default)
    param: **kwargs: sort criteria. See Table.sort()
    """
    return sortation.sort_index(self, sort_mode, tqdm=tqdm, pbar=pbar, **kwargs)

`tablite.core.Table.reindex(index)`

index: list of integers that declare sort order.

Examples:

Table:  ['a','b','c','d','e','f','g','h']
index:  [0,2,4,6]
result: ['b','d','f','h']

Table:  ['a','b','c','d','e','f','g','h']
index:  [0,2,4,6,1,3,5,7]
result: ['a','c','e','g','b','d','f','h']

Source code in tablite/core.py

def reindex(self, index):
    """
    index: list of integers that declare sort order.

    Examples:

        Table:  ['a','b','c','d','e','f','g','h']
        index:  [0,2,4,6]
        result: ['b','d','f','h']

        Table:  ['a','b','c','d','e','f','g','h']
        index:  [0,2,4,6,1,3,5,7]
        result: ['a','c','e','g','b','d','f','h']

    """
    if isinstance(index, list):
        index = np.array(index)
    return _reindex.reindex(self, index)

`tablite.core.Table.drop_duplicates(*args)`

removes duplicate rows based on column names

args: (optional) column_names if no args, all columns are used.

Source code in tablite/core.py

def drop_duplicates(self, *args):
    """
    removes duplicate rows based on column names

    args: (optional) column_names
    if no args, all columns are used.
    """
    if not args:
        args = self.columns
    index = self.unique_index(*args)
    return self.reindex(index)

`tablite.core.Table.sort(mapping, sort_mode='excel', tqdm=_tqdm, pbar: _tqdm = None)`

Perform multi-pass sorting with precedence given order of column names.

PARAMETER	DESCRIPTION
`mapping`	keys as columns, values as boolean for 'reverse' TYPE: `dict`
`sort_mode`	str: "alphanumeric", "unix", or, "excel" DEFAULT: `'excel'`

RETURNS	DESCRIPTION
`None`	Table.sort is sorted inplace

Examples: Table.sort(mappinp={A':False}) means sort by 'A' in ascending order. Table.sort(mapping={'A':True, 'B':False}) means sort 'A' in descending order, then (2nd priority) sort B in ascending order.

Source code in tablite/core.py

def sort(self, mapping, sort_mode="excel", tqdm=_tqdm, pbar: _tqdm = None):
    """Perform multi-pass sorting with precedence given order of column names.

    Args:
        mapping (dict): keys as columns,
                        values as boolean for 'reverse'
        sort_mode: str: "alphanumeric", "unix", or, "excel"

    Returns:
        None: Table.sort is sorted inplace

    Examples:
    Table.sort(mappinp={A':False}) means sort by 'A' in ascending order.
    Table.sort(mapping={'A':True, 'B':False}) means sort 'A' in descending order, then (2nd priority)
    sort B in ascending order.
    """
    new = sortation.sort(self, mapping, sort_mode, tqdm=tqdm, pbar=pbar)
    self.columns = new.columns

`tablite.core.Table.sorted(mapping, sort_mode='excel', tqdm=_tqdm, pbar: _tqdm = None)`

See sort. Sorted returns a new table in contrast to "sort", which is in-place.

RETURNS	DESCRIPTION
	Table.

Source code in tablite/core.py

def sorted(self, mapping, sort_mode="excel", tqdm=_tqdm, pbar: _tqdm = None):
    """See sort.
    Sorted returns a new table in contrast to "sort", which is in-place.

    Returns:
        Table.
    """
    return sortation.sort(self, mapping, sort_mode, tqdm=tqdm, pbar=pbar)

`tablite.core.Table.is_sorted(mapping, sort_mode='excel')`

Performs multi-pass sorting check with precedence given order of column names. **kwargs: optional: sort criteria. See Table.sort() :return bool

Source code in tablite/core.py

def is_sorted(self, mapping, sort_mode="excel"):
    """Performs multi-pass sorting check with precedence given order of column names.
    **kwargs: optional: sort criteria. See Table.sort()
    :return bool
    """
    return sortation.is_sorted(self, mapping, sort_mode)

`tablite.core.Table.any(**kwargs)`

returns Table for rows where ANY kwargs match :param kwargs: dictionary with headers and values / boolean callable

Source code in tablite/core.py

def any(self, **kwargs):
    """
    returns Table for rows where ANY kwargs match
    :param kwargs: dictionary with headers and values / boolean callable
    """
    return redux.filter_any(self, **kwargs)

`tablite.core.Table.all(**kwargs)`

returns Table for rows where ALL kwargs match :param kwargs: dictionary with headers and values / boolean callable

Examples:

t = Table()
t['a'] = [1,2,3,4]
t['b'] = [10,20,30,40]

def f(x):
    return x == 4
def g(x):
    return x < 20

t2 = t.any( **{"a":f, "b":g})
assert [r for r in t2.rows] == [[1, 10], [4, 40]]

t2 = t.any(a=f,b=g)
assert [r for r in t2.rows] == [[1, 10], [4, 40]]

def h(x):
    return x>=2

def i(x):
    return x<=30

t2 = t.all(a=h,b=i)
assert [r for r in t2.rows] == [[2,20], [3, 30]]

Source code in tablite/core.py

def all(self, **kwargs):
    """
    returns Table for rows where ALL kwargs match
    :param kwargs: dictionary with headers and values / boolean callable

    Examples:

        t = Table()
        t['a'] = [1,2,3,4]
        t['b'] = [10,20,30,40]

        def f(x):
            return x == 4
        def g(x):
            return x < 20

        t2 = t.any( **{"a":f, "b":g})
        assert [r for r in t2.rows] == [[1, 10], [4, 40]]

        t2 = t.any(a=f,b=g)
        assert [r for r in t2.rows] == [[1, 10], [4, 40]]

        def h(x):
            return x>=2

        def i(x):
            return x<=30

        t2 = t.all(a=h,b=i)
        assert [r for r in t2.rows] == [[2,20], [3, 30]]


    """
    return redux.filter_all(self, **kwargs)

`tablite.core.Table.drop(*args)`

removes all rows where args are present.

Exmaple:

t = Table() t['A'] = [1,2,3,None] t['B'] = [None,2,3,4] t2 = t.drop(None) t2['A'][:], t2['B'][:] ([2,3], [2,3])

Source code in tablite/core.py

def drop(self, *args):
    """
    removes all rows where args are present.

    Exmaple:
    >>> t = Table()
    >>> t['A'] = [1,2,3,None]
    >>> t['B'] = [None,2,3,4]
    >>> t2 = t.drop(None)
    >>> t2['A'][:], t2['B'][:]
    ([2,3], [2,3])

    """
    if not args:
        raise ValueError("What to drop? None? np.nan? ")
    return redux.drop(self, *args)

`tablite.core.Table.replace(mapping, columns=None, tqdm=_tqdm, pbar=None)`

replaces all mapped keys with values from named columns

PARAMETER	DESCRIPTION
`mapping`	keys are targets for replacement, values are replacements. TYPE: `dict`
`columns`	target columns. Defaults to None (all columns) TYPE: `list or str` DEFAULT: `None`

RAISES	DESCRIPTION
`ValueError`	description

Source code in tablite/core.py

def replace(self, mapping, columns=None, tqdm=_tqdm, pbar=None):
    """replaces all mapped keys with values from named columns

    Args:
        mapping (dict): keys are targets for replacement,
                        values are replacements.
        columns (list or str, optional): target columns.
            Defaults to None (all columns)

    Raises:
        ValueError: _description_
    """
    if columns is None:
        columns = list(self.columns)
    if not isinstance(columns, list) and columns in self.columns:
        columns = [columns]
    type_check(columns, list)
    for n in columns:
        if n not in self.columns:
            raise ValueError(f"column not found: {n}")

    if pbar is None:
        total = len(columns)
        pbar = tqdm(total=total, desc="replace", disable=Config.TQDM_DISABLE)

    for name in columns:
        col = self.columns[name]
        col.replace(mapping)
        pbar.update(1)

`tablite.core.Table.groupby(keys, functions, tqdm=_tqdm, pbar=None)`

keys: column names for grouping. functions: [optional] list of column names and group functions (See GroupyBy class) returns: table

Example:

t = Table()
t.add_column('A', data=[1, 1, 2, 2, 3, 3] * 2)
t.add_column('B', data=[1, 2, 3, 4, 5, 6] * 2)
t.add_column('C', data=[6, 5, 4, 3, 2, 1] * 2)

t.show()
+=====+=====+=====+
|  A  |  B  |  C  |
| int | int | int |
+-----+-----+-----+
|    1|    1|    6|
|    1|    2|    5|
|    2|    3|    4|
|    2|    4|    3|
|    3|    5|    2|
|    3|    6|    1|
|    1|    1|    6|
|    1|    2|    5|
|    2|    3|    4|
|    2|    4|    3|
|    3|    5|    2|
|    3|    6|    1|
+=====+=====+=====+

g = t.groupby(keys=['A', 'C'], functions=[('B', gb.sum)])
g.show()
+===+===+===+======+
| # | A | C |Sum(B)|
|row|int|int| int  |
+---+---+---+------+
|0  |  1|  6|     2|
|1  |  1|  5|     4|
|2  |  2|  4|     6|
|3  |  2|  3|     8|
|4  |  3|  2|    10|
|5  |  3|  1|    12|
+===+===+===+======+

Cheat sheet:

list of unique values

>>> g1 = t.groupby(keys=['A'], functions=[])
>>> g1['A'][:]
[1,2,3]

alternatively:

t['A'].unique() [1,2,3]

list of unique values, grouped by longest combination.

>>> g2 = t.groupby(keys=['A', 'B'], functions=[])
>>> g2['A'][:], g2['B'][:]
([1,1,2,2,3,3], [1,2,3,4,5,6])

alternatively:

>>> list(zip(*t.index('A', 'B').keys()))
[(1,1,2,2,3,3) (1,2,3,4,5,6)]

A key (unique values) and count hereof.

>>> g3 = t.groupby(keys=['A'], functions=[('A', gb.count)])
>>> g3['A'][:], g3['Count(A)'][:]
([1,2,3], [4,4,4])

alternatively:

>>> t['A'].histogram()
([1,2,3], [4,4,4])

for more exmaples see: https://github.com/root-11/tablite/blob/master/tests/test_groupby.py

Source code in tablite/core.py

def groupby(self, keys, functions, tqdm=_tqdm, pbar=None):
    """
    keys: column names for grouping.
    functions: [optional] list of column names and group functions (See GroupyBy class)
    returns: table

    Example:
    ```
    t = Table()
    t.add_column('A', data=[1, 1, 2, 2, 3, 3] * 2)
    t.add_column('B', data=[1, 2, 3, 4, 5, 6] * 2)
    t.add_column('C', data=[6, 5, 4, 3, 2, 1] * 2)

    t.show()
    +=====+=====+=====+
    |  A  |  B  |  C  |
    | int | int | int |
    +-----+-----+-----+
    |    1|    1|    6|
    |    1|    2|    5|
    |    2|    3|    4|
    |    2|    4|    3|
    |    3|    5|    2|
    |    3|    6|    1|
    |    1|    1|    6|
    |    1|    2|    5|
    |    2|    3|    4|
    |    2|    4|    3|
    |    3|    5|    2|
    |    3|    6|    1|
    +=====+=====+=====+

    g = t.groupby(keys=['A', 'C'], functions=[('B', gb.sum)])
    g.show()
    +===+===+===+======+
    | # | A | C |Sum(B)|
    |row|int|int| int  |
    +---+---+---+------+
    |0  |  1|  6|     2|
    |1  |  1|  5|     4|
    |2  |  2|  4|     6|
    |3  |  2|  3|     8|
    |4  |  3|  2|    10|
    |5  |  3|  1|    12|
    +===+===+===+======+
    ```
    Cheat sheet:

    list of unique values
    ```
    >>> g1 = t.groupby(keys=['A'], functions=[])
    >>> g1['A'][:]
    [1,2,3]
    ```
    alternatively:
    >>> t['A'].unique()
    [1,2,3]

    list of unique values, grouped by longest combination.
    ```
    >>> g2 = t.groupby(keys=['A', 'B'], functions=[])
    >>> g2['A'][:], g2['B'][:]
    ([1,1,2,2,3,3], [1,2,3,4,5,6])
    ```
    alternatively:
    ```
    >>> list(zip(*t.index('A', 'B').keys()))
    [(1,1,2,2,3,3) (1,2,3,4,5,6)]
    ```
    A key (unique values) and count hereof.
    ```
    >>> g3 = t.groupby(keys=['A'], functions=[('A', gb.count)])
    >>> g3['A'][:], g3['Count(A)'][:]
    ([1,2,3], [4,4,4])
    ```
    alternatively:
    ```
    >>> t['A'].histogram()
    ([1,2,3], [4,4,4])
    ```
    for more exmaples see:
        https://github.com/root-11/tablite/blob/master/tests/test_groupby.py

    """
    return _groupby(self, keys, functions, tqdm, pbar)

`tablite.core.Table.pivot(rows, columns, functions, values_as_rows=True, tqdm=_tqdm, pbar=None)`

param: rows: column names to keep as rows param: columns: column names to keep as columns param: functions: aggregation functions from the Groupby class as

example:

t.show()
+=====+=====+=====+
|  A  |  B  |  C  |
| int | int | int |
+-----+-----+-----+
|    1|    1|    6|
|    1|    2|    5|
|    2|    3|    4|
|    2|    4|    3|
|    3|    5|    2|
|    3|    6|    1|
|    1|    1|    6|
|    1|    2|    5|
|    2|    3|    4|
|    2|    4|    3|
|    3|    5|    2|
|    3|    6|    1|
+=====+=====+=====+

t2 = t.pivot(rows=['C'], columns=['A'], functions=[('B', gb.sum)])
t2.show()
+===+===+========+=====+=====+=====+
| # | C |function|(A=1)|(A=2)|(A=3)|
|row|int|  str   |mixed|mixed|mixed|
+---+---+--------+-----+-----+-----+
|0  |  6|Sum(B)  |    2|None |None |
|1  |  5|Sum(B)  |    4|None |None |
|2  |  4|Sum(B)  |None |    6|None |
|3  |  3|Sum(B)  |None |    8|None |
|4  |  2|Sum(B)  |None |None |   10|
|5  |  1|Sum(B)  |None |None |   12|
+===+===+========+=====+=====+=====+

Source code in tablite/core.py

def pivot(self, rows, columns, functions, values_as_rows=True, tqdm=_tqdm, pbar=None):
    """
    param: rows: column names to keep as rows
    param: columns: column names to keep as columns
    param: functions: aggregation functions from the Groupby class as

    example:
    ```
    t.show()
    +=====+=====+=====+
    |  A  |  B  |  C  |
    | int | int | int |
    +-----+-----+-----+
    |    1|    1|    6|
    |    1|    2|    5|
    |    2|    3|    4|
    |    2|    4|    3|
    |    3|    5|    2|
    |    3|    6|    1|
    |    1|    1|    6|
    |    1|    2|    5|
    |    2|    3|    4|
    |    2|    4|    3|
    |    3|    5|    2|
    |    3|    6|    1|
    +=====+=====+=====+

    t2 = t.pivot(rows=['C'], columns=['A'], functions=[('B', gb.sum)])
    t2.show()
    +===+===+========+=====+=====+=====+
    | # | C |function|(A=1)|(A=2)|(A=3)|
    |row|int|  str   |mixed|mixed|mixed|
    +---+---+--------+-----+-----+-----+
    |0  |  6|Sum(B)  |    2|None |None |
    |1  |  5|Sum(B)  |    4|None |None |
    |2  |  4|Sum(B)  |None |    6|None |
    |3  |  3|Sum(B)  |None |    8|None |
    |4  |  2|Sum(B)  |None |None |   10|
    |5  |  1|Sum(B)  |None |None |   12|
    +===+===+========+=====+=====+=====+
    ```
    """
    return pivots.pivot(self, rows, columns, functions, values_as_rows, tqdm=tqdm, pbar=pbar)

`tablite.core.Table.merge(left, right, new, criteria)`

takes from LEFT where criteria is True else RIGHT. :param: T: Table :param: criteria: np.array(bool): if True take left column else take right column :param left: (str) column name :param right: (str) column name :param new: (str) new name

:returns: T

Example:

>>> c.show()
+==+====+====+====+====+
| #| A  | B  | C  | D  |
+--+----+----+----+----+
| 0|   1|  10|   1|  11|
| 1|   2|  20|   2|  12|
| 2|   3|None|   3|  13|
| 3|None|  40|None|None|
| 4|   5|  50|None|None|
| 5|None|None|   6|  16|
| 6|None|None|   7|  17|
+==+====+====+====+====+

>>> c.merge("A", "C", new="E", criteria=[v != None for v in c['A']])
>>> c.show()
+==+====+====+====+
| #| B  | D  | E  |
+--+----+----+----+
| 0|  10|  11|   1|
| 1|  20|  12|   2|
| 2|None|  13|   3|
| 3|  40|None|None|
| 4|  50|None|   5|
| 5|None|  16|   6|
| 6|None|  17|   7|
+==+====+====+====+

Source code in tablite/core.py

def merge(self, left, right, new, criteria):
    """ takes from LEFT where criteria is True else RIGHT.
    :param: T: Table
    :param: criteria: np.array(bool): 
            if True take left column
            else take right column
    :param left: (str) column name
    :param right: (str) column name
    :param new: (str) new name

    :returns: T

    Example:
    ```
    >>> c.show()
    +==+====+====+====+====+
    | #| A  | B  | C  | D  |
    +--+----+----+----+----+
    | 0|   1|  10|   1|  11|
    | 1|   2|  20|   2|  12|
    | 2|   3|None|   3|  13|
    | 3|None|  40|None|None|
    | 4|   5|  50|None|None|
    | 5|None|None|   6|  16|
    | 6|None|None|   7|  17|
    +==+====+====+====+====+

    >>> c.merge("A", "C", new="E", criteria=[v != None for v in c['A']])
    >>> c.show()
    +==+====+====+====+
    | #| B  | D  | E  |
    +--+----+----+----+
    | 0|  10|  11|   1|
    | 1|  20|  12|   2|
    | 2|None|  13|   3|
    | 3|  40|None|None|
    | 4|  50|None|   5|
    | 5|None|  16|   6|
    | 6|None|  17|   7|
    +==+====+====+====+
    ```
    """
    return merge.where(self, criteria,left,right,new)

`tablite.core.Table.column_select(cols: list[ColumnSelectorDict], tqdm=_tqdm, TaskManager=_TaskManager)`

type-casts columns from a given table to specified type(s)

cols

list of dicts: (example):

cols = [
    {'column':'A', 'type': 'bool'},
    {'column':'B', 'type': 'int', 'allow_empty': True},
    {'column':'B', 'type': 'float', 'allow_empty': False, 'rename': 'C'},
]

'column' : column name of the input table that we want to type-cast 'type' : type that we want to type-cast the specified column to 'allow_empty': should we allow empty values (None, str('')) through (Default: False) 'rename' : new name of the column, if None will keep the original name, in case of duplicates suffix will be added (Default: None)

supported types: 'bool', 'int', 'float', 'str', 'date', 'time', 'datetime'

if any of the columns is rejected, entire row is rejected

tqdm: progressbar constructor TaskManager: TaskManager constructor

(TABLE, TABLE)	DESCRIPTION
	first table contains the rows that were successfully cast to desired types
	second table contains rows that failed to cast + rejection reason

Source code in tablite/core.py

def column_select(self, cols: list[ColumnSelectorDict], tqdm=_tqdm, TaskManager=_TaskManager):
    """
    type-casts columns from a given table to specified type(s)

    cols:
        list of dicts: (example):

            cols = [
                {'column':'A', 'type': 'bool'},
                {'column':'B', 'type': 'int', 'allow_empty': True},
                {'column':'B', 'type': 'float', 'allow_empty': False, 'rename': 'C'},
            ]

        'column'     : column name of the input table that we want to type-cast
        'type'       : type that we want to type-cast the specified column to
        'allow_empty': should we allow empty values (None, str('')) through (Default: False)
        'rename'     : new name of the column, if None will keep the original name, in case of duplicates suffix will be added (Default: None)

        supported types: 'bool', 'int', 'float', 'str', 'date', 'time', 'datetime'

        if any of the columns is rejected, entire row is rejected

    tqdm: progressbar constructor
    TaskManager: TaskManager constructor

    returns: (Table, Table)
        first table contains the rows that were successfully cast to desired types
        second table contains rows that failed to cast + rejection reason
    """
    return _column_select(self, cols, tqdm=tqdm, TaskManager=TaskManager)

`tablite.core.Table.join(other, left_keys, right_keys, left_columns=None, right_columns=None, kind='inner', merge_keys=False, tqdm=_tqdm, pbar=None)`

short-cut for all join functions. kind: 'inner', 'left', 'outer', 'cross'

Source code in tablite/core.py

def join(self, other, left_keys, right_keys, left_columns=None, right_columns=None, kind="inner", merge_keys=False, tqdm=_tqdm, pbar=None):
    """
    short-cut for all join functions.
    kind: 'inner', 'left', 'outer', 'cross'
    """
    kinds = {
        "inner": self.inner_join,
        "left": self.left_join,
        "outer": self.outer_join,
        "cross": self.cross_join,
    }
    if kind not in kinds:
        raise ValueError(f"join type unknown: {kind}")
    f = kinds.get(kind, None)
    return f(other, left_keys, right_keys, left_columns, right_columns, merge_keys=merge_keys, tqdm=tqdm, pbar=pbar)

`tablite.core.Table.left_join(other, left_keys, right_keys, left_columns=None, right_columns=None, merge_keys=False, tqdm=_tqdm, pbar=None)`

:param other: self, other = (left, right) :param left_keys: list of keys for the join :param right_keys: list of keys for the join :param left_columns: list of left columns to retain, if None, all are retained. :param right_columns: list of right columns to retain, if None, all are retained. :return: new Table Example:

SQL:   SELECT number, letter FROM numbers LEFT JOIN letters ON numbers.colour == letters.color
Tablite: left_join = numbers.left_join(
    letters, left_keys=['colour'], right_keys=['color'], left_columns=['number'], right_columns=['letter']
)

Source code in tablite/core.py

def left_join(self, other, left_keys, right_keys, left_columns=None, right_columns=None, merge_keys=False, tqdm=_tqdm, pbar=None):
    """
    :param other: self, other = (left, right)
    :param left_keys: list of keys for the join
    :param right_keys: list of keys for the join
    :param left_columns: list of left columns to retain, if None, all are retained.
    :param right_columns: list of right columns to retain, if None, all are retained.
    :return: new Table
    Example:
    ```
    SQL:   SELECT number, letter FROM numbers LEFT JOIN letters ON numbers.colour == letters.color
    Tablite: left_join = numbers.left_join(
        letters, left_keys=['colour'], right_keys=['color'], left_columns=['number'], right_columns=['letter']
    )
    ```
    """
    return joins.left_join(self, other, left_keys, right_keys, left_columns, right_columns, merge_keys=merge_keys, tqdm=tqdm, pbar=pbar)

`tablite.core.Table.inner_join(other, left_keys, right_keys, left_columns=None, right_columns=None, merge_keys=False, tqdm=_tqdm, pbar=None)`

:param other: self, other = (left, right) :param left_keys: list of keys for the join :param right_keys: list of keys for the join :param left_columns: list of left columns to retain, if None, all are retained. :param right_columns: list of right columns to retain, if None, all are retained. :return: new Table Example:

SQL:   SELECT number, letter FROM numbers JOIN letters ON numbers.colour == letters.color
Tablite: inner_join = numbers.inner_join(
    letters, left_keys=['colour'], right_keys=['color'], left_columns=['number'], right_columns=['letter']
    )

Source code in tablite/core.py

def inner_join(self, other, left_keys, right_keys, left_columns=None, right_columns=None, merge_keys=False, tqdm=_tqdm, pbar=None):
    """
    :param other: self, other = (left, right)
    :param left_keys: list of keys for the join
    :param right_keys: list of keys for the join
    :param left_columns: list of left columns to retain, if None, all are retained.
    :param right_columns: list of right columns to retain, if None, all are retained.
    :return: new Table
    Example:
    ```
    SQL:   SELECT number, letter FROM numbers JOIN letters ON numbers.colour == letters.color
    Tablite: inner_join = numbers.inner_join(
        letters, left_keys=['colour'], right_keys=['color'], left_columns=['number'], right_columns=['letter']
        )
    ```
    """
    return joins.inner_join(self, other, left_keys, right_keys, left_columns, right_columns, merge_keys=merge_keys, tqdm=tqdm, pbar=pbar)

`tablite.core.Table.outer_join(other, left_keys, right_keys, left_columns=None, right_columns=None, merge_keys=False, tqdm=_tqdm, pbar=None)`

:param other: self, other = (left, right) :param left_keys: list of keys for the join :param right_keys: list of keys for the join :param left_columns: list of left columns to retain, if None, all are retained. :param right_columns: list of right columns to retain, if None, all are retained. :return: new Table Example:

SQL:   SELECT number, letter FROM numbers OUTER JOIN letters ON numbers.colour == letters.color
Tablite: outer_join = numbers.outer_join(
    letters, left_keys=['colour'], right_keys=['color'], left_columns=['number'], right_columns=['letter']
    )

Source code in tablite/core.py

def outer_join(self, other, left_keys, right_keys, left_columns=None, right_columns=None, merge_keys=False, tqdm=_tqdm, pbar=None):
    """
    :param other: self, other = (left, right)
    :param left_keys: list of keys for the join
    :param right_keys: list of keys for the join
    :param left_columns: list of left columns to retain, if None, all are retained.
    :param right_columns: list of right columns to retain, if None, all are retained.
    :return: new Table
    Example:
    ```
    SQL:   SELECT number, letter FROM numbers OUTER JOIN letters ON numbers.colour == letters.color
    Tablite: outer_join = numbers.outer_join(
        letters, left_keys=['colour'], right_keys=['color'], left_columns=['number'], right_columns=['letter']
        )
    ```
    """
    return joins.outer_join(self, other, left_keys, right_keys, left_columns, right_columns, merge_keys=merge_keys, tqdm=tqdm, pbar=pbar)

`tablite.core.Table.cross_join(other, left_keys, right_keys, left_columns=None, right_columns=None, merge_keys=False, tqdm=_tqdm, pbar=None)`

CROSS JOIN returns the Cartesian product of rows from tables in the join. In other words, it will produce rows which combine each row from the first table with each row from the second table

Source code in tablite/core.py

def cross_join(self, other, left_keys, right_keys, left_columns=None, right_columns=None, merge_keys=False, tqdm=_tqdm, pbar=None):
    """
    CROSS JOIN returns the Cartesian product of rows from tables in the join.
    In other words, it will produce rows which combine each row from the first table
    with each row from the second table
    """
    return joins.cross_join(self, other, left_keys, right_keys, left_columns, right_columns, merge_keys=merge_keys, tqdm=tqdm, pbar=pbar)

`tablite.core.Table.lookup(other, *criteria, all=True, tqdm=_tqdm)`

function for looking up values in other according to criteria in ascending order. :param: other: Table sorted in ascending search order. :param: criteria: Each criteria must be a tuple with value comparisons in the form: (LEFT, OPERATOR, RIGHT) :param: all: boolean: True=ALL, False=Any

OPERATOR must be a callable that returns a boolean LEFT must be a value that the OPERATOR can compare. RIGHT must be a value that the OPERATOR can compare.

Examples:

('column A', "==", 'column B')  # comparison of two columns
('Date', "<", DataTypes.date(24,12) )  # value from column 'Date' is before 24/12.
f = lambda L,R: all( ord(L) < ord(R) )  # uses custom function.
('text 1', f, 'text 2') value from column 'text 1' is compared with value from column 'text 2'

Source code in tablite/core.py

def lookup(self, other, *criteria, all=True, tqdm=_tqdm):
    """function for looking up values in `other` according to criteria in ascending order.
    :param: other: Table sorted in ascending search order.
    :param: criteria: Each criteria must be a tuple with value comparisons in the form:
        (LEFT, OPERATOR, RIGHT)
    :param: all: boolean: True=ALL, False=Any

    OPERATOR must be a callable that returns a boolean
    LEFT must be a value that the OPERATOR can compare.
    RIGHT must be a value that the OPERATOR can compare.

    Examples:
    ```
    ('column A', "==", 'column B')  # comparison of two columns
    ('Date', "<", DataTypes.date(24,12) )  # value from column 'Date' is before 24/12.
    f = lambda L,R: all( ord(L) < ord(R) )  # uses custom function.
    ('text 1', f, 'text 2') value from column 'text 1' is compared with value from column 'text 2'
    ```
    """
    return lookup.lookup(self, other, *criteria, all=all, tqdm=tqdm)

`tablite.core.Table.match(other, *criteria, keep_left=None, keep_right=None)`

performs inner join where T matches other and removes rows that do not match.

:param: T: Table :param: other: Table :param: criteria: Each criteria must be a tuple with value comparisons in the form:

(LEFT, OPERATOR, RIGHT), where operator must be "=="

Example:
    ('column A', "==", 'column B')

This syntax follows the lookup syntax. See Lookup for details.

:param: keep_left: list of columns to keep. :param: keep_right: list of right columns to keep.

Source code in tablite/core.py

def match(self, other, *criteria, keep_left=None, keep_right=None):
    """
    performs inner join where `T` matches `other` and removes rows that do not match.

    :param: T: Table
    :param: other: Table
    :param: criteria: Each criteria must be a tuple with value comparisons in the form:

        (LEFT, OPERATOR, RIGHT), where operator must be "=="

        Example:
            ('column A', "==", 'column B')

        This syntax follows the lookup syntax. See Lookup for details.

    :param: keep_left: list of columns to keep.
    :param: keep_right: list of right columns to keep.
    """
    return match.match(self, other, *criteria, keep_left=keep_left, keep_right=keep_right)

`tablite.core.Table.replace_missing_values(*args, **kwargs)`

Source code in tablite/core.py

def replace_missing_values(self, *args, **kwargs):
    raise AttributeError("See imputation")

`tablite.core.Table.imputation(targets, missing=None, method='carry forward', sources=None, tqdm=_tqdm)`

In statistics, imputation is the process of replacing missing data with substituted values.

See more: https://en.wikipedia.org/wiki/Imputation_(statistics)

PARAMETER	DESCRIPTION
`table`	source table. TYPE: `Table`
`targets`	column names to find and replace missing values TYPE: `str or list of strings`
`missing`	values to be replaced. TYPE: `None or iterable` DEFAULT: `None`
`method`	method to be used for replacement. Options: 'carry forward': takes the previous value, and carries forward into fields where values are missing. +: quick. Realistic on time series. -: Can produce strange outliers. 'mean': calculates the column mean (exclude `missing`) and copies the mean in as replacement. +: quick -: doesn't work on text. Causes data set to drift towards the mean. 'mode': calculates the column mode (exclude `missing`) and copies the mean in as replacement. +: quick -: most frequent value becomes over-represented in the sample 'nearest neighbour': calculates normalised distance between items in source columns selects nearest neighbour and copies value as replacement. +: works for any datatype. -: computationally intensive (e.g. slow) TYPE: `str` DEFAULT: `'carry forward'`
`sources`	NEAREST NEIGHBOUR ONLY column names to be used during imputation. if None or empty, all columns will be used. TYPE: `list of strings` DEFAULT: `None`

RETURNS	DESCRIPTION
`table`	table with replaced values.

Source code in tablite/core.py

def imputation(self, targets, missing=None, method="carry forward", sources=None, tqdm=_tqdm):
    """
    In statistics, imputation is the process of replacing missing data with substituted values.

    See more: https://en.wikipedia.org/wiki/Imputation_(statistics)

    Args:
        table (Table): source table.

        targets (str or list of strings): column names to find and
            replace missing values

        missing (None or iterable): values to be replaced.

        method (str): method to be used for replacement. Options:

            'carry forward':
                takes the previous value, and carries forward into fields
                where values are missing.
                +: quick. Realistic on time series.
                -: Can produce strange outliers.

            'mean':
                calculates the column mean (exclude `missing`) and copies
                the mean in as replacement.
                +: quick
                -: doesn't work on text. Causes data set to drift towards the mean.

            'mode':
                calculates the column mode (exclude `missing`) and copies
                the mean in as replacement.
                +: quick
                -: most frequent value becomes over-represented in the sample

            'nearest neighbour':
                calculates normalised distance between items in source columns
                selects nearest neighbour and copies value as replacement.
                +: works for any datatype.
                -: computationally intensive (e.g. slow)

        sources (list of strings): NEAREST NEIGHBOUR ONLY
            column names to be used during imputation.
            if None or empty, all columns will be used.

    Returns:
        table: table with replaced values.
    """
    return imputation.imputation(self, targets, missing, method, sources, tqdm=tqdm)

`tablite.core.Table.transpose(tqdm=_tqdm)`

Source code in tablite/core.py

def transpose(self, tqdm=_tqdm):
    return pivots.transpose(self, tqdm)

`tablite.core.Table.pivot_transpose(columns, keep=None, column_name='transpose', value_name='value', tqdm=_tqdm)`

Transpose a selection of columns to rows.

PARAMETER	DESCRIPTION
`columns`	column names to transpose TYPE: `list of column names`
`keep`	column names to keep (repeat) TYPE: `list of column names` DEFAULT: `None`

RETURNS	DESCRIPTION
`Table`	with columns transposed to rows

Example

transpose columns 1,2 and 3 and transpose the remaining columns, except sum.

Input:

| col1 | col2 | col3 | sun | mon | tue | ... | sat | sum  |
|------|------|------|-----|-----|-----|-----|-----|------|
| 1234 | 2345 | 3456 | 456 | 567 |     | ... |     | 1023 |
| 1244 | 2445 | 4456 |     |   7 |     | ... |     |    7 |
| ...  |      |      |     |     |     |     |     |      |

t.transpose(keep=[col1, col2, col3], transpose=[sun,mon,tue,wed,thu,fri,sat])`

Output:

|col1| col2| col3| transpose| value|
|----|-----|-----|----------|------|
|1234| 2345| 3456| sun      |   456|
|1234| 2345| 3456| mon      |   567|
|1244| 2445| 4456| mon      |     7|

Source code in tablite/core.py

def pivot_transpose(self, columns, keep=None, column_name="transpose", value_name="value", tqdm=_tqdm):
    """Transpose a selection of columns to rows.

    Args:
        columns (list of column names): column names to transpose
        keep (list of column names): column names to keep (repeat)

    Returns:
        Table: with columns transposed to rows

    Example:
        transpose columns 1,2 and 3 and transpose the remaining columns, except `sum`.

    Input:
    ```
    | col1 | col2 | col3 | sun | mon | tue | ... | sat | sum  |
    |------|------|------|-----|-----|-----|-----|-----|------|
    | 1234 | 2345 | 3456 | 456 | 567 |     | ... |     | 1023 |
    | 1244 | 2445 | 4456 |     |   7 |     | ... |     |    7 |
    | ...  |      |      |     |     |     |     |     |      |

    t.transpose(keep=[col1, col2, col3], transpose=[sun,mon,tue,wed,thu,fri,sat])`

    Output:

    |col1| col2| col3| transpose| value|
    |----|-----|-----|----------|------|
    |1234| 2345| 3456| sun      |   456|
    |1234| 2345| 3456| mon      |   567|
    |1244| 2445| 4456| mon      |     7|
    ```
    """
    return pivots.pivot_transpose(self, columns, keep, column_name, value_name, tqdm=tqdm)

`tablite.core.Table.diff(other, columns=None)`

compares table self with table other

PARAMETER	DESCRIPTION
`self`	Table TYPE: `Table`
`other`	Table TYPE: `Table`
`columns`	list of column names to include in comparison. Defaults to None. TYPE: `List` DEFAULT: `None`

RETURNS	DESCRIPTION
`Table`	diff of self and other with diff in columns 1st and 2nd.

Source code in tablite/core.py

def diff(self, other, columns=None):
    """compares table self with table other

    Args:
        self (Table): Table
        other (Table): Table
        columns (List, optional): list of column names to include in comparison. Defaults to None.

    Returns:
        Table: diff of self and other with diff in columns 1st and 2nd.
    """
    return diff.diff(self, other, columns)

`tablite.core.Table.str()`

Source code in tablite/base.py

def __str__(self):  # USER FUNCTION.
    return f"{self.__class__.__name__}({len(self.columns):,} columns, {len(self):,} rows)"

`tablite.core.Table.repr()`

Source code in tablite/base.py

def __repr__(self):
    return self.__str__()

`tablite.core.Table.nbytes()`

finds the total bytes of the table on disk

RETURNS	DESCRIPTION
`tuple`	int: real bytes used on disk int: total bytes used if flattened

Source code in tablite/base.py

def nbytes(self):  # USER FUNCTION.
    """finds the total bytes of the table on disk

    Returns:
        tuple:
            int: real bytes used on disk
            int: total bytes used if flattened
    """
    real = {}
    total = 0
    for column in self.columns.values():
        for page in set(column.pages):
            real[page] = page.path.stat().st_size
        for page in column.pages:
            total += real[page]
    return sum(real.values()), total

`tablite.core.Table.items()`

returns table as dict

RETURNS	DESCRIPTION
`dict`	Table as dict `{column_name: [values], ...}`

Source code in tablite/base.py

def items(self):  # USER FUNCTION.
    """returns table as dict

    Returns:
        dict: Table as dict `{column_name: [values], ...}`
    """
    return {
        name: column[:].tolist() for name, column in self.columns.items()
    }.items()

`tablite.core.Table.delitem(key)`

Examples:

>>> del table['a']  # removes column 'a'
>>> del table[-3:]  # removes last 3 rows from all columns.

Source code in tablite/base.py

def __delitem__(self, key):  # USER FUNCTION.
    """
    Examples:
    ```
    >>> del table['a']  # removes column 'a'
    >>> del table[-3:]  # removes last 3 rows from all columns.
    ```
    """
    if isinstance(key, (int, slice)):
        for column in self.columns.values():
            del column[key]
    elif key in self.columns:
        del self.columns[key]
    else:
        raise KeyError(f"Key not found: {key}")

`tablite.core.Table.setitem(key, value)`

table behaves like a dict. Args: key (str or hashable): column name value (iterable): list, tuple or nd.array with values.

As Table now accepts the keyword columns as a dict:

>>> t = Table(columns={'b':[4,5,6], 'c':[7,8,9]})

and the header/data combinations:

>>> t = Table(header=['b','c'], data=[[4,5,6],[7,8,9]])

This has the side-benefit that tuples now can be used as headers.

Source code in tablite/base.py

def __setitem__(self, key, value):  # USER FUNCTION
    """table behaves like a dict.
    Args:
        key (str or hashable): column name
        value (iterable): list, tuple or nd.array with values.

    As Table now accepts the keyword `columns` as a dict:
    ```
    >>> t = Table(columns={'b':[4,5,6], 'c':[7,8,9]})
    ```
    and the header/data combinations:
    ```
    >>> t = Table(header=['b','c'], data=[[4,5,6],[7,8,9]])
    ```
    This has the side-benefit that tuples now can be used as headers.
    """
    if value is None:
        self.columns[key] = Column(self.path, value=None)
    elif isinstance(value, (list, tuple)):
        value = list_to_np_array(value)
        self.columns[key] = Column(self.path, value)
    elif isinstance(value, (np.ndarray)):
        self.columns[key] = Column(self.path, value)
    elif isinstance(value, Column):
        self.columns[key] = value
    else:
        raise TypeError(f"{type(value)} not supported.")

`tablite.core.Table.getitem(keys)`

Enables selection of columns and rows

PARAMETER	DESCRIPTION
`keys`	TYPE: `column name, integer or slice`
`Examples`
`>>>`	10] selects first 10 rows from all columns TYPE: `table[`
`>>>`	20:3] selects column 'b' and 'c' and 'a' twice for a slice. TYPE: `table['b', 'a', 'a', 'c', 2`

Raises: KeyError: if key is not found. TypeError: if key is not a string, integer or slice.

RETURNS	DESCRIPTION
`Table`	returns columns in same order as selection.

Source code in tablite/base.py

def __getitem__(self, keys):  # USER FUNCTION
    """
    Enables selection of columns and rows

    Args:
        keys (column name, integer or slice):
        Examples:
        ```
        >>> table['a']                        selects column 'a'
        >>> table[3]                          selects row 3 as a tuple.
        >>> table[:10]                        selects first 10 rows from all columns
        >>> table['a','b', slice(3,20,2)]     selects a slice from columns 'a' and 'b'
        >>> table['b', 'a', 'a', 'c', 2:20:3] selects column 'b' and 'c' and 'a' twice for a slice.
        >>> table[('b', 'a', 'a', 'c')]       selects columns 'b', 'a', 'a', and 'c' using a tuple.
        ```
    Raises:
        KeyError: if key is not found.
        TypeError: if key is not a string, integer or slice.

    Returns:
        Table: returns columns in same order as selection.
    """

    if not isinstance(keys, tuple):
        if isinstance(keys, list):
            keys = tuple(keys)
        else:
            keys = (keys,)
    if isinstance(keys[0], tuple):
        keys = tuple(list(chain(*keys)))

    integers = [i for i in keys if isinstance(i, int)]
    if len(integers) == len(keys) == 1:  # return a single tuple.
        keys = [slice(keys[0])]

    column_names = [i for i in keys if isinstance(i, str)]
    column_names = list(self.columns) if not column_names else column_names
    not_found = [name for name in column_names if name not in self.columns]
    if not_found:
        raise KeyError(f"keys not found: {', '.join(not_found)}")

    slices = [i for i in keys if isinstance(i, slice)]
    slc = slice(0, len(self)) if not slices else slices[0]

    if (
        len(slices) == 0 and len(column_names) == 1
    ):  # e.g. tbl['a'] or tbl['a'][:10]
        col = self.columns[column_names[0]]
        if slices:
            return col[slc]  # return slice from column as list of values
        else:
            return col  # return whole column

    elif len(integers) == 1:  # return a single tuple.
        row_no = integers[0]
        slc = slice(row_no, row_no + 1)
        return tuple(self.columns[name][slc].tolist()[0] for name in column_names)

    elif not slices:  # e.g. new table with N whole columns.
        return self.__class__(
            columns={name: self.columns[name] for name in column_names}
        )

    else:  # e.g. new table from selection of columns and slices.
        t = self.__class__()
        for name in column_names:
            column = self.columns[name]

            new_column = Column(t.path)  # create new Column.
            for item in column.getpages(slc):
                if isinstance(item, np.ndarray):
                    new_column.extend(item)  # extend subslice (expensive)
                elif isinstance(item, SimplePage):
                    new_column.pages.append(item)  # extend page (cheap)
                else:
                    raise TypeError(f"Bad item: {item}")

            # below:
            # set the new column directly on t.columns.
            # Do not use t[name] as that triggers __setitem__ again.
            t.columns[name] = new_column

        return t

`tablite.core.Table.len()`

Source code in tablite/base.py

def __len__(self):  # USER FUNCTION.
    if not self.columns:
        return 0
    return max(len(c) for c in self.columns.values())

`tablite.core.Table.eq(other) -> bool`

Determines if two tables have identical content.

PARAMETER	DESCRIPTION
`other`	table for comparison TYPE: `Table`

RETURNS	DESCRIPTION
`bool`	True if tables are identical. TYPE: `bool`

Source code in tablite/base.py

def __eq__(self, other) -> bool:  # USER FUNCTION.
    """Determines if two tables have identical content.

    Args:
        other (Table): table for comparison

    Returns:
        bool: True if tables are identical.
    """
    if isinstance(other, dict):
        return self.items() == other.items()
    if not isinstance(other, BaseTable):
        return False
    if id(self) == id(other):
        return True
    if len(self) != len(other):
        return False
    if len(self) == len(other) == 0:
        return True
    if self.columns.keys() != other.columns.keys():
        return False
    for name, col in self.columns.items():
        if not (col == other.columns[name]):
            return False
    return True

`tablite.core.Table.clear()`

clears the table. Like dict().clear()

Source code in tablite/base.py

def clear(self):  # USER FUNCTION.
    """clears the table. Like dict().clear()"""
    self.columns.clear()

`tablite.core.Table.save(path, compression_method=zipfile.ZIP_DEFLATED, compression_level=1)`

saves table to compressed tpz file.

PARAMETER	DESCRIPTION
`path`	file destination. TYPE: `Path`
`compression_method`	See zipfile compression methods. Defaults to ZIP_DEFLATED. DEFAULT: `ZIP_DEFLATED`
`compression_level`	See zipfile compression levels. Defaults to 1. DEFAULT: `1`

The file format is as follows: .tpz is a gzip archive with table metadata captured as table.yml and the necessary set of pages saved as .npy files.

The zip contains table.yml which provides an overview of the data:

--------------------------------------
%YAML 1.2                              yaml version
columns:                               start of columns section.
    name: “列 1”                       name of column 1.
        pages: [p1b1, p1b2]            list of pages in column 1.
    name: “列 2”                       name of column 2
        pages: [p2b1, p2b2]            list of pages in column 2.
----------------------------------------

Source code in tablite/base.py

def save(
    self, path, compression_method=zipfile.ZIP_DEFLATED, compression_level=1
):  # USER FUNCTION.
    """saves table to compressed tpz file.

    Args:
        path (Path): file destination.
        compression_method: See zipfile compression methods. Defaults to ZIP_DEFLATED.
        compression_level: See zipfile compression levels. Defaults to 1.
        The default settings produce 80% compression at 10% slowdown.

    The file format is as follows:
    .tpz is a gzip archive with table metadata captured as table.yml
    and the necessary set of pages saved as .npy files.

    The zip contains table.yml which provides an overview of the data:
    ```
    --------------------------------------
    %YAML 1.2                              yaml version
    columns:                               start of columns section.
        name: “列 1”                       name of column 1.
            pages: [p1b1, p1b2]            list of pages in column 1.
        name: “列 2”                       name of column 2
            pages: [p2b1, p2b2]            list of pages in column 2.
    ----------------------------------------
    ```
    """
    if isinstance(path, str):
        path = Path(path)
    type_check(path, Path)
    if path.is_dir():
        raise TypeError(f"filename needed: {path}")
    if path.suffix != ".tpz":
        path = path.parent / (path.parts[-1] + ".tpz")

    # create yaml document
    _page_counter = 0
    d = {}
    cols = {}
    for name, col in self.columns.items():
        type_check(col, Column)
        cols[name] = {"pages": [p.path.name for p in col.pages]}
        _page_counter += len(col.pages)
    d["columns"] = cols
    yml = yaml.safe_dump(
        d, sort_keys=False, allow_unicode=True, default_flow_style=None
    )

    _file_counter = 0
    with zipfile.ZipFile(
        path, "w", compression=compression_method, compresslevel=compression_level
    ) as f:
        log.debug(f"writing .tpz to {path} with\n{yml}")
        f.writestr("table.yml", yml)
        for name, col in self.columns.items():
            for page in set(
                col.pages
            ):  # set of pages! remember t *= 1000 repeats t 1000x
                with open(page.path, "rb", buffering=0) as raw_io:
                    f.writestr(page.path.name, raw_io.read())
                _file_counter += 1
                log.debug(f"adding Page {page.path}")

        _fields = len(self) * len(self.columns)
        _avg = _fields // _page_counter
        log.debug(
            f"Wrote {_fields:,} on {_page_counter:,} pages in {_file_counter} files: {_avg} fields/page"
        )

`tablite.core.Table.load(path, tqdm=_tqdm)` `classmethod`

loads a table from .tpz file. See also Table.save for details on the file format.

PARAMETER	DESCRIPTION
`path`	source file TYPE: `Path`

RETURNS	DESCRIPTION
`Table`	table in read-only mode.

Source code in tablite/base.py

@classmethod
def load(cls, path, tqdm=_tqdm):  # USER FUNCTION.
    """loads a table from .tpz file.
    See also Table.save for details on the file format.

    Args:
        path (Path): source file

    Returns:
        Table: table in read-only mode.
    """
    path = Path(path)
    log.debug(f"loading {path}")
    with zipfile.ZipFile(path, "r") as f:
        yml = f.read("table.yml")
        metadata = yaml.safe_load(yml)
        t = cls()

        page_count = sum([len(c["pages"]) for c in metadata["columns"].values()])

        with tqdm(
            total=page_count,
            desc=f"loading '{path.name}' file",
            disable=Config.TQDM_DISABLE,
        ) as pbar:
            for name, d in metadata["columns"].items():
                column = Column(t.path)
                for page in d["pages"]:
                    bytestream = io.BytesIO(f.read(page))
                    data = np.load(bytestream, allow_pickle=True, fix_imports=False)
                    column.extend(data)
                    pbar.update(1)
                t.columns[name] = column
    update_access_time(path)
    return t

`tablite.core.Table.copy()`

Source code in tablite/base.py

def copy(self):
    cls = type(self)
    t = cls()
    for name, column in self.columns.items():
        new = Column(t.path)
        new.pages = column.pages[:]
        t.columns[name] = new
    return t

`tablite.core.Table.imul(other)`

Repeats instance of table N times.

Like list: t = t * N

PARAMETER	DESCRIPTION
`other`	multiplier TYPE: `int`

Source code in tablite/base.py

def __imul__(self, other):
    """Repeats instance of table N times.

    Like list: `t = t * N`

    Args:
        other (int): multiplier
    """
    if not (isinstance(other, int) and other > 0):
        raise TypeError(
            f"a table can be repeated an integer number of times, not {type(other)} number of times"
        )
    for col in self.columns.values():
        col *= other
    return self

`tablite.core.Table.mul(other)`

Repeat table N times. Like list: new = old * N

PARAMETER	DESCRIPTION
`other`	multiplier TYPE: `int`

RETURNS	DESCRIPTION
	Table

Source code in tablite/base.py

def __mul__(self, other):
    """Repeat table N times.
    Like list: `new = old * N`

    Args:
        other (int): multiplier

    Returns:
        Table
    """
    new = self.copy()
    return new.__imul__(other)

`tablite.core.Table.iadd(other)`

Concatenates tables with same column names.

Like list: table_1 += table_2

RAISES	DESCRIPTION
`ValueError`	If column names don't match.

RETURNS	DESCRIPTION
`None`	self is updated.

Source code in tablite/base.py

def __iadd__(self, other):
    """Concatenates tables with same column names.

    Like list: `table_1 += table_2`

    Args:
        other (Table)

    Raises:
        ValueError: If column names don't match.

    Returns:
        None: self is updated.
    """
    type_check(other, BaseTable)
    for name in self.columns.keys():
        if name not in other.columns:
            raise ValueError(f"{name} not in other")
    for name in other.columns.keys():
        if name not in self.columns:
            raise ValueError(f"{name} missing from self")

    for name, column in self.columns.items():
        other_col = other.columns.get(name, None)
        column.pages.extend(other_col.pages[:])
    return self

`tablite.core.Table.add(other)`

Concatenates tables with same column names.

Like list: table_3 = table_1 + table_2

RAISES	DESCRIPTION
`ValueError`	If column names don't match.

RETURNS	DESCRIPTION
	Table

Source code in tablite/base.py

def __add__(self, other):
    """Concatenates tables with same column names.

    Like list: `table_3 = table_1 + table_2`

    Args:
        other (Table)

    Raises:
        ValueError: If column names don't match.

    Returns:
        Table
    """
    type_check(other, BaseTable)
    cp = self.copy()
    cp += other
    return cp

`tablite.core.Table.add_rows(*args, **kwargs)`

its more efficient to add many rows at once.

if both args and kwargs, then args are added first, followed by kwargs.

supported cases:

>>> t = Table()
>>> t.add_columns('row','A','B','C')
>>> t.add_rows(1, 1, 2, 3)                              # (1) individual values as args
>>> t.add_rows([2, 1, 2, 3])                            # (2) list of values as args
>>> t.add_rows((3, 1, 2, 3))                            # (3) tuple of values as args
>>> t.add_rows(*(4, 1, 2, 3))                           # (4) unpacked tuple becomes arg like (1)
>>> t.add_rows(row=5, A=1, B=2, C=3)                    # (5) kwargs
>>> t.add_rows(**{'row': 6, 'A': 1, 'B': 2, 'C': 3})    # (6) dict / json interpreted a kwargs
>>> t.add_rows((7, 1, 2, 3), (8, 4, 5, 6))              # (7) two (or more) tuples as args
>>> t.add_rows([9, 1, 2, 3], [10, 4, 5, 6])             # (8) two or more lists as rgs
>>> t.add_rows(
    {'row': 11, 'A': 1, 'B': 2, 'C': 3},
    {'row': 12, 'A': 4, 'B': 5, 'C': 6}
    )                                                   # (9) two (or more) dicts as args - roughly comma sep'd json.
>>> t.add_rows( *[
    {'row': 13, 'A': 1, 'B': 2, 'C': 3},
    {'row': 14, 'A': 1, 'B': 2, 'C': 3}
    ])                                                  # (10) list of dicts as args
>>> t.add_rows(row=[15,16], A=[1,1], B=[2,2], C=[3,3])  # (11) kwargs with lists as values

Source code in tablite/base.py

def add_rows(self, *args, **kwargs):
    """its more efficient to add many rows at once.

    if both args and kwargs, then args are added first, followed by kwargs.

    supported cases:
    ```
    >>> t = Table()
    >>> t.add_columns('row','A','B','C')
    >>> t.add_rows(1, 1, 2, 3)                              # (1) individual values as args
    >>> t.add_rows([2, 1, 2, 3])                            # (2) list of values as args
    >>> t.add_rows((3, 1, 2, 3))                            # (3) tuple of values as args
    >>> t.add_rows(*(4, 1, 2, 3))                           # (4) unpacked tuple becomes arg like (1)
    >>> t.add_rows(row=5, A=1, B=2, C=3)                    # (5) kwargs
    >>> t.add_rows(**{'row': 6, 'A': 1, 'B': 2, 'C': 3})    # (6) dict / json interpreted a kwargs
    >>> t.add_rows((7, 1, 2, 3), (8, 4, 5, 6))              # (7) two (or more) tuples as args
    >>> t.add_rows([9, 1, 2, 3], [10, 4, 5, 6])             # (8) two or more lists as rgs
    >>> t.add_rows(
        {'row': 11, 'A': 1, 'B': 2, 'C': 3},
        {'row': 12, 'A': 4, 'B': 5, 'C': 6}
        )                                                   # (9) two (or more) dicts as args - roughly comma sep'd json.
    >>> t.add_rows( *[
        {'row': 13, 'A': 1, 'B': 2, 'C': 3},
        {'row': 14, 'A': 1, 'B': 2, 'C': 3}
        ])                                                  # (10) list of dicts as args
    >>> t.add_rows(row=[15,16], A=[1,1], B=[2,2], C=[3,3])  # (11) kwargs with lists as values
    ```

    """
    if not BaseTable._add_row_slow_warning:
        warnings.warn(
            "add_rows is slow. Consider using add_columns and then assigning values to the columns directly."
        )
        BaseTable._add_row_slow_warning = True

    if args:
        if not all(isinstance(i, (list, tuple, dict)) for i in args):  # 1,4
            args = [args]

        if all(isinstance(i, (list, tuple, dict)) for i in args):  # 2,3,7,8
            # 1. turn the data into columns:

            d = {n: [] for n in self.columns}
            for arg in args:
                if len(arg) != len(self.columns):
                    raise ValueError(
                        f"len({arg})== {len(arg)}, but there are {len(self.columns)} columns"
                    )

                if isinstance(arg, dict):
                    for k, v in arg.items():  # 7,8
                        d[k].append(v)

                elif isinstance(arg, (list, tuple)):  # 2,3
                    for n, v in zip(self.columns, arg):
                        d[n].append(v)

                else:
                    raise TypeError(f"{arg}?")
            # 2. extend the columns
            for n, values in d.items():
                col = self.columns[n]
                col.extend(list_to_np_array(values))

    if kwargs:
        if isinstance(kwargs, dict):
            if all(isinstance(v, (list, tuple)) for v in kwargs.values()):
                for k, v in kwargs.items():
                    col = self.columns[k]
                    col.extend(list_to_np_array(v))
            else:
                for k, v in kwargs.items():
                    col = self.columns[k]
                    col.extend(np.array([v]))
        else:
            raise ValueError(f"format not recognised: {kwargs}")

    return

`tablite.core.Table.add_columns(*names)`

Adds column names to table.

Source code in tablite/base.py

def add_columns(self, *names):
    """Adds column names to table."""
    for name in names:
        self.columns[name] = Column(self.path)

`tablite.core.Table.add_column(name, data=None)`

verbose alias for table[name] = data, that checks if name already exists

PARAMETER	DESCRIPTION
`name`	column name TYPE: `str`
`data`	values. Defaults to None. TYPE: `list,tuple)` DEFAULT: `None`

RAISES	DESCRIPTION
`TypeError`	name isn't string
`ValueError`	name already exists

Source code in tablite/base.py

def add_column(self, name, data=None):
    """verbose alias for table[name] = data, that checks if name already exists

    Args:
        name (str): column name
        data ((list,tuple), optional): values. Defaults to None.

    Raises:
        TypeError: name isn't string
        ValueError: name already exists
    """
    if not isinstance(name, str):
        raise TypeError("expected name as string")
    if name in self.columns:
        raise ValueError(f"{name} already in {self.columns}")
    self.__setitem__(name, data)

`tablite.core.Table.stack(other)`

returns the joint stack of tables with overlapping column names. Example:

| Table A|  +  | Table B| = |  Table AB |
| A| B| C|     | A| B| D|   | A| B| C| -|
                            | A| B| -| D|

Source code in tablite/base.py

def stack(self, other):
    """
    returns the joint stack of tables with overlapping column names.
    Example:
    ```
    | Table A|  +  | Table B| = |  Table AB |
    | A| B| C|     | A| B| D|   | A| B| C| -|
                                | A| B| -| D|
    ```
    """
    if not isinstance(other, BaseTable):
        raise TypeError(f"stack only works for Table, not {type(other)}")

    cp = self.copy()
    for name, col2 in other.columns.items():
        if name not in cp.columns:
            cp[name] = [None] * len(self)
        cp[name].pages.extend(col2.pages[:])

    for name in self.columns:
        if name not in other.columns:
            if len(cp) > 0:
                cp[name].extend(np.array([None] * len(other)))
    return cp

`tablite.core.Table.types()`

returns nested dict of data types in the form: {column name: {python type class: number of instances }, ... }

example:

>>> t.types()
{
    'A': {<class 'str'>: 7},
    'B': {<class 'int'>: 7}
}

Source code in tablite/base.py

def types(self):
    """
    returns nested dict of data types in the form:
    `{column name: {python type class: number of instances }, ... }`

    example:
    ```
    >>> t.types()
    {
        'A': {<class 'str'>: 7},
        'B': {<class 'int'>: 7}
    }
    ```
    """
    d = {}
    for name, col in self.columns.items():
        assert isinstance(col, Column)
        d[name] = col.types()
    return d

`tablite.core.Table.display_dict(slice_=None, blanks=None, dtype=False)`

helper for creating dict for display.

PARAMETER	DESCRIPTION
`slice_`	python slice. Defaults to None. TYPE: `slice` DEFAULT: `None`
`blanks`	fill value for `None`. Defaults to None. TYPE: `optional` DEFAULT: `None`
`dtype`	Adds datatype to each column. Defaults to False. TYPE: `bool` DEFAULT: `False`

RAISES	DESCRIPTION
`TypeError`	slice_ must be None or slice.

RETURNS	DESCRIPTION
`dict`	from Table.

Source code in tablite/base.py

def display_dict(self, slice_=None, blanks=None, dtype=False):
    """helper for creating dict for display.

    Args:
        slice_ (slice, optional): python slice. Defaults to None.
        blanks (optional): fill value for `None`. Defaults to None.
        dtype (bool, optional): Adds datatype to each column. Defaults to False.

    Raises:
        TypeError: slice_ must be None or slice.

    Returns:
        dict: from Table.
    """
    if not self.columns:
        print("Empty Table")
        return

    def datatype(col):  # PRIVATE
        """creates label for column datatype."""
        types = col.types()
        if len(types) == 0:
            typ = "empty"
        elif len(types) == 1:
            dt, _ = types.popitem()
            typ = dt.__name__
        else:
            typ = "mixed"
        return typ

    row_count_tags = ["#", "~", "*"]
    cols = set(self.columns)
    for n, tag in product(range(1, 6), row_count_tags):
        if n * tag not in cols:
            tag = n * tag
            break

    if not isinstance(slice_, (slice, type(None))):
        raise TypeError(f"slice_ must be None or slice, not {type(slice_)}")
    if isinstance(slice_, slice):
        slc = slice_
    if slice_ is None:
        if len(self) <= 20:
            slc = slice(0, 20, 1)
        else:
            slc = None

    n = len(self)
    if slc:  # either we want slc or we want everything.
        row_no = list(range(*slc.indices(len(self))))
        data = {tag: [f"{i:,}".rjust(2) for i in row_no]}
        for name, col in self.columns.items():
            data[name] = list(chain(iter(col), repeat(blanks, times=n - len(col))))[
                slc
            ]
    else:
        data = {}
        j = int(math.ceil(math.log10(n)) / 3) + len(str(n))
        row_no = (
            [f"{i:,}".rjust(j) for i in range(7)]
            + ["..."]
            + [f"{i:,}".rjust(j) for i in range(n - 7, n)]
        )
        data = {tag: row_no}

        for name, col in self.columns.items():
            if len(col) == n:
                row = col[:7].tolist() + ["..."] + col[-7:].tolist()
            else:
                empty = [blanks] * 7
                head = (col[:7].tolist() + empty)[:7]
                tail = (col[n - 7 :].tolist() + empty)[-7:]
                row = head + ["..."] + tail
            data[name] = row

    if dtype:
        for name, values in data.items():
            if name in self.columns:
                col = self.columns[name]
                values.insert(0, datatype(col))
            else:
                values.insert(0, "row")

    return data

`tablite.core.Table.to_ascii(slice_=None, blanks=None, dtype=False)`

returns ascii view of table as string.

PARAMETER	DESCRIPTION
`slice_`	slice to determine table snippet. TYPE: `slice` DEFAULT: `None`
`blanks`	value for whitespace. Defaults to None. TYPE: `str` DEFAULT: `None`
`dtype`	adds subheader with datatype for column. Defaults to False. TYPE: `bool` DEFAULT: `False`

Source code in tablite/base.py

def to_ascii(self, slice_=None, blanks=None, dtype=False):
    """returns ascii view of table as string.

    Args:
        slice_ (slice, optional): slice to determine table snippet.
        blanks (str, optional): value for whitespace. Defaults to None.
        dtype (bool, optional): adds subheader with datatype for column. Defaults to False.
    """

    def adjust(v, length):  # PRIVATE FUNCTION
        """whitespace justifies field values based on datatype"""
        if v is None:
            return str(blanks).ljust(length)
        elif isinstance(v, str):
            return v.ljust(length)
        else:
            return str(v).rjust(length)

    if not self.columns:
        return str(self)

    d = {}
    for name, values in self.display_dict(
        slice_=slice_, blanks=blanks, dtype=dtype
    ).items():
        as_text = [str(v) for v in values] + [str(name)]
        width = max(len(i) for i in as_text)
        new_name = name.center(width, " ")
        if dtype:
            values[0] = values[0].center(width, " ")
        d[new_name] = [adjust(v, width) for v in values]

    rows = dict_to_rows(d)
    s = []
    s.append("+" + "+".join(["=" * len(n) for n in rows[0]]) + "+")
    s.append("|" + "|".join(rows[0]) + "|")  # column names
    start = 1
    if dtype:
        s.append("|" + "|".join(rows[1]) + "|")  # datatypes
        start = 2

    s.append("+" + "+".join(["-" * len(n) for n in rows[0]]) + "+")
    for row in rows[start:]:
        s.append("|" + "|".join(row) + "|")
    s.append("+" + "+".join(["=" * len(n) for n in rows[0]]) + "+")

    if len(set(len(c) for c in self.columns.values())) != 1:
        warning = f"Warning: Columns have different lengths. {blanks} is used as fill value."
        s.append(warning)

    return "\n".join(s)

`tablite.core.Table.show(slice_=None, blanks=None, dtype=False)`

prints ascii view of table.

PARAMETER	DESCRIPTION
`slice_`	slice to determine table snippet. TYPE: `slice` DEFAULT: `None`
`blanks`	value for whitespace. Defaults to None. TYPE: `str` DEFAULT: `None`
`dtype`	adds subheader with datatype for column. Defaults to False. TYPE: `bool` DEFAULT: `False`

Source code in tablite/base.py

def show(self, slice_=None, blanks=None, dtype=False):
    """prints ascii view of table.

    Args:
        slice_ (slice, optional): slice to determine table snippet.
        blanks (str, optional): value for whitespace. Defaults to None.
        dtype (bool, optional): adds subheader with datatype for column. Defaults to False.
    """
    print(self.to_ascii(slice_=slice_, blanks=blanks, dtype=dtype))

`tablite.core.Table.to_dict(columns=None, slice_=None)`

columns: list of column names. Default is None == all columns. slice_: slice. Default is None == all rows.

returns: dict with columns as keys and lists of values.

Example:

>>> t.show()
+===+===+===+
| # | a | b |
|row|int|int|
+---+---+---+
| 0 |  1|  3|
| 1 |  2|  4|
+===+===+===+
>>> t.to_dict()
{'a':[1,2], 'b':[3,4]}

Source code in tablite/base.py

def to_dict(self, columns=None, slice_=None):
    """
    columns: list of column names. Default is None == all columns.
    slice_: slice. Default is None == all rows.

    returns: dict with columns as keys and lists of values.

    Example:
    ```
    >>> t.show()
    +===+===+===+
    | # | a | b |
    |row|int|int|
    +---+---+---+
    | 0 |  1|  3|
    | 1 |  2|  4|
    +===+===+===+
    >>> t.to_dict()
    {'a':[1,2], 'b':[3,4]}
    ```

    """
    if slice_ is None:
        slice_ = slice(0, len(self))
    assert isinstance(slice_, slice)

    if columns is None:
        columns = list(self.columns.keys())
    if not isinstance(columns, list):
        raise TypeError("expected columns as list of strings")

    return {name: list(self.columns[name][slice_]) for name in columns}

`tablite.core.Table.as_json_serializable(row_count='row id', start_on=1, columns=None, slice_=None)`

provides a JSON compatible format of the table.

PARAMETER	DESCRIPTION
`row_count`	Label for row counts. Defaults to "row id". TYPE: `str` DEFAULT: `'row id'`
`start_on`	row counts starts by default on 1. TYPE: `int` DEFAULT: `1`
`columns`	Column names. Defaults to None which returns all columns. TYPE: `list of str` DEFAULT: `None`
`slice_`	selector. Defaults to None which returns [:] TYPE: `slice` DEFAULT: `None`

RETURNS	DESCRIPTION
	JSON serializable dict: All python datatypes have been converted to JSON compliant data.

Source code in tablite/base.py

def as_json_serializable(
    self, row_count="row id", start_on=1, columns=None, slice_=None
):
    """provides a JSON compatible format of the table.

    Args:
        row_count (str, optional): Label for row counts. Defaults to "row id".
        start_on (int, optional): row counts starts by default on 1.
        columns (list of str, optional): Column names.
            Defaults to None which returns all columns.
        slice_ (slice, optional): selector. Defaults to None which returns [:]

    Returns:
        JSON serializable dict: All python datatypes have been converted to JSON compliant data.
    """
    if slice_ is None:
        slice_ = slice(0, len(self))

    assert isinstance(slice_, slice)
    new = {"columns": {}, "total_rows": len(self)}
    if row_count is not None:
        new["columns"][row_count] = [
            i + start_on for i in range(*slice_.indices(len(self)))
        ]

    d = self.to_dict(columns, slice_=slice_)
    for k, data in d.items():
        new_k = unique_name(
            k, new["columns"]
        )  # used to avoid overwriting the `row id` key.
        new["columns"][new_k] = [
            DataTypes.to_json(v) for v in data
        ]  # deal with non-json datatypes.
    return new

`tablite.core.Table.index(*args)`

param: *args: column names returns multikey index on the columns as d[(key tuple, )] = {index1, index2, ...}

Examples:

>>> table6 = Table()
>>> table6['A'] = ['Alice', 'Bob', 'Bob', 'Ben', 'Charlie', 'Ben','Albert']
>>> table6['B'] = ['Alison', 'Marley', 'Dylan', 'Affleck', 'Hepburn', 'Barnes', 'Einstein']

>>> table6.index('A')  # single key.
{('Alice',): [0],
 ('Bob',): [1, 2],
 ('Ben',): [3, 5],
 ('Charlie',): [4],
 ('Albert',): [6]})

>>> table6.index('A', 'B')  # multiple keys.
{('Alice', 'Alison'): [0],
 ('Bob', 'Marley'): [1],
 ('Bob', 'Dylan'): [2],
 ('Ben', 'Affleck'): [3],
 ('Charlie', 'Hepburn'): [4],
 ('Ben', 'Barnes'): [5],
 ('Albert', 'Einstein'): [6]})

Source code in tablite/base.py

def index(self, *args):
    """
    param: *args: column names
    returns multikey index on the columns as d[(key tuple, )] = {index1, index2, ...}

    Examples:
        ```
        >>> table6 = Table()
        >>> table6['A'] = ['Alice', 'Bob', 'Bob', 'Ben', 'Charlie', 'Ben','Albert']
        >>> table6['B'] = ['Alison', 'Marley', 'Dylan', 'Affleck', 'Hepburn', 'Barnes', 'Einstein']
        ```

        ```
        >>> table6.index('A')  # single key.
        {('Alice',): [0],
         ('Bob',): [1, 2],
         ('Ben',): [3, 5],
         ('Charlie',): [4],
         ('Albert',): [6]})
        ```

        ```
        >>> table6.index('A', 'B')  # multiple keys.
        {('Alice', 'Alison'): [0],
         ('Bob', 'Marley'): [1],
         ('Bob', 'Dylan'): [2],
         ('Ben', 'Affleck'): [3],
         ('Charlie', 'Hepburn'): [4],
         ('Ben', 'Barnes'): [5],
         ('Albert', 'Einstein'): [6]})
        ```

    """
    idx = defaultdict(list)
    iterators = [iter(self.columns[c]) for c in args]
    for ix, key in enumerate(zip(*iterators)):
        key = tuple(numpy_to_python(k) for k in key)
        idx[key].append(ix)
    return idx

`tablite.core.Table.unique_index(*args, tqdm=_tqdm)`

generates the index of unique rows given a list of column names

PARAMETER	DESCRIPTION
`*args`	columns names TYPE: `any` DEFAULT: `()`
`tqdm`	Defaults to _tqdm. TYPE: `tqdm` DEFAULT: `tqdm`

RETURNS	DESCRIPTION
	np.array(int64): indices of unique records.

Source code in tablite/base.py

def unique_index(self, *args, tqdm=_tqdm):
    """generates the index of unique rows given a list of column names

    Args:
        *args (any): columns names
        tqdm (tqdm, optional): Defaults to _tqdm.

    Returns:
        np.array(int64): indices of unique records.
    """
    if not args:
        raise ValueError("*args (column names) is required")
    seen = set()
    unique = set()
    iterators = [iter(self.columns[c]) for c in args]
    for ix, key in tqdm(enumerate(zip(*iterators)), disable=Config.TQDM_DISABLE):
        key_hash = hash(tuple(numpy_to_python(k) for k in key))
        if key_hash in seen:
            continue
        else:
            seen.add(key_hash)
            unique.add(ix)
    return np.array(sorted(unique))

Core

tablite.core

Attributes

tablite.core.log = logging.getLogger(__name__) module-attribute

Classes

tablite.core.Table(columns=None, headers=None, rows=None, _path=None)

Attributes

tablite.core.Table.path = _path instance-attribute

tablite.core.Table.columns = {} instance-attribute

tablite.core.Table.rows property

Functions

tablite.core.Table.from_pandas(df) classmethod

tablite.core.Table.from_hdf5(path) classmethod

tablite.core.Table.from_json(jsn) classmethod

tablite.core.Table.to_hdf5(path)

tablite.core.Table.to_pandas()

tablite.core.Table.to_sql(name)

tablite.core.Table.to_json()

tablite.core.Table.to_xlsx(path)

tablite.core.Table.to_ods(path)

tablite.core.Table.to_csv(path)

tablite.core.Table.to_tsv(path)

tablite.core.Table.to_text(path)

tablite.core.Table.to_html(path)

tablite.core.Table.expression(expression)

tablite.core.Table.filter(expressions, filter_type='all', tqdm=_tqdm)

tablite.core.Table.sort_index(sort_mode='excel', tqdm=_tqdm, pbar=None, **kwargs)

tablite.core.Table.reindex(index)

tablite.core.Table.drop_duplicates(*args)

tablite.core.Table.sort(mapping, sort_mode='excel', tqdm=_tqdm, pbar: _tqdm = None)

tablite.core.Table.sorted(mapping, sort_mode='excel', tqdm=_tqdm, pbar: _tqdm = None)

tablite.core.Table.is_sorted(mapping, sort_mode='excel')

tablite.core.Table.any(**kwargs)

tablite.core.Table.all(**kwargs)

tablite.core.Table.drop(*args)

tablite.core.Table.replace(mapping, columns=None, tqdm=_tqdm, pbar=None)

tablite.core.Table.groupby(keys, functions, tqdm=_tqdm, pbar=None)

tablite.core.Table.pivot(rows, columns, functions, values_as_rows=True, tqdm=_tqdm, pbar=None)

tablite.core.Table.merge(left, right, new, criteria)

tablite.core.Table.column_select(cols: list[ColumnSelectorDict], tqdm=_tqdm, TaskManager=_TaskManager)

tablite.core.Table.join(other, left_keys, right_keys, left_columns=None, right_columns=None, kind='inner', merge_keys=False, tqdm=_tqdm, pbar=None)

tablite.core.Table.left_join(other, left_keys, right_keys, left_columns=None, right_columns=None, merge_keys=False, tqdm=_tqdm, pbar=None)

tablite.core.Table.inner_join(other, left_keys, right_keys, left_columns=None, right_columns=None, merge_keys=False, tqdm=_tqdm, pbar=None)

tablite.core.Table.outer_join(other, left_keys, right_keys, left_columns=None, right_columns=None, merge_keys=False, tqdm=_tqdm, pbar=None)

tablite.core.Table.cross_join(other, left_keys, right_keys, left_columns=None, right_columns=None, merge_keys=False, tqdm=_tqdm, pbar=None)

tablite.core.Table.lookup(other, *criteria, all=True, tqdm=_tqdm)

tablite.core.Table.match(other, *criteria, keep_left=None, keep_right=None)

tablite.core.Table.replace_missing_values(*args, **kwargs)

tablite.core.Table.imputation(targets, missing=None, method='carry forward', sources=None, tqdm=_tqdm)

tablite.core.Table.transpose(tqdm=_tqdm)

tablite.core.Table.pivot_transpose(columns, keep=None, column_name='transpose', value_name='value', tqdm=_tqdm)

tablite.core.Table.diff(other, columns=None)

tablite.core.Table.__str__()

tablite.core.Table.__repr__()

tablite.core.Table.nbytes()

tablite.core.Table.items()

tablite.core.Table.__delitem__(key)

tablite.core.Table.__setitem__(key, value)

tablite.core.Table.__getitem__(keys)

tablite.core.Table.__len__()

tablite.core.Table.__eq__(other) -> bool

tablite.core.Table.clear()

tablite.core.Table.save(path, compression_method=zipfile.ZIP_DEFLATED, compression_level=1)

tablite.core.Table.load(path, tqdm=_tqdm) classmethod

tablite.core.Table.copy()

tablite.core.Table.__imul__(other)

tablite.core.Table.__mul__(other)

tablite.core.Table.__iadd__(other)

tablite.core.Table.__add__(other)

tablite.core.Table.add_rows(*args, **kwargs)

tablite.core.Table.add_columns(*names)

tablite.core.Table.add_column(name, data=None)

tablite.core.Table.stack(other)

tablite.core.Table.types()

tablite.core.Table.display_dict(slice_=None, blanks=None, dtype=False)

tablite.core.Table.to_ascii(slice_=None, blanks=None, dtype=False)

tablite.core.Table.show(slice_=None, blanks=None, dtype=False)

tablite.core.Table.to_dict(columns=None, slice_=None)

tablite.core.Table.as_json_serializable(row_count='row id', start_on=1, columns=None, slice_=None)

tablite.core.Table.index(*args)

`tablite.core`

`tablite.core.log = logging.getLogger(name)` `module-attribute`

`tablite.core.Table(columns=None, headers=None, rows=None, _path=None)`

`tablite.core.Table.path = _path` `instance-attribute`

`tablite.core.Table.columns = {}` `instance-attribute`

`tablite.core.Table.rows` `property`

`tablite.core.Table.from_pandas(df)` `classmethod`

`tablite.core.Table.from_hdf5(path)` `classmethod`

`tablite.core.Table.from_json(jsn)` `classmethod`

`tablite.core.Table.to_hdf5(path)`

`tablite.core.Table.to_pandas()`

`tablite.core.Table.to_sql(name)`

`tablite.core.Table.to_json()`

`tablite.core.Table.to_xlsx(path)`

`tablite.core.Table.to_ods(path)`

`tablite.core.Table.to_csv(path)`

`tablite.core.Table.to_tsv(path)`

`tablite.core.Table.to_text(path)`

`tablite.core.Table.to_html(path)`

`tablite.core.Table.expression(expression)`

`tablite.core.Table.filter(expressions, filter_type='all', tqdm=_tqdm)`

`tablite.core.Table.sort_index(sort_mode='excel', tqdm=_tqdm, pbar=None, **kwargs)`

`tablite.core.Table.reindex(index)`

`tablite.core.Table.drop_duplicates(*args)`

`tablite.core.Table.sort(mapping, sort_mode='excel', tqdm=_tqdm, pbar: _tqdm = None)`

`tablite.core.Table.sorted(mapping, sort_mode='excel', tqdm=_tqdm, pbar: _tqdm = None)`

`tablite.core.Table.is_sorted(mapping, sort_mode='excel')`

`tablite.core.Table.any(**kwargs)`

`tablite.core.Table.all(**kwargs)`

`tablite.core.Table.drop(*args)`

`tablite.core.Table.replace(mapping, columns=None, tqdm=_tqdm, pbar=None)`

`tablite.core.Table.groupby(keys, functions, tqdm=_tqdm, pbar=None)`

`tablite.core.Table.pivot(rows, columns, functions, values_as_rows=True, tqdm=_tqdm, pbar=None)`

`tablite.core.Table.merge(left, right, new, criteria)`

`tablite.core.Table.column_select(cols: list[ColumnSelectorDict], tqdm=_tqdm, TaskManager=_TaskManager)`

`tablite.core.Table.join(other, left_keys, right_keys, left_columns=None, right_columns=None, kind='inner', merge_keys=False, tqdm=_tqdm, pbar=None)`

`tablite.core.Table.left_join(other, left_keys, right_keys, left_columns=None, right_columns=None, merge_keys=False, tqdm=_tqdm, pbar=None)`

`tablite.core.Table.inner_join(other, left_keys, right_keys, left_columns=None, right_columns=None, merge_keys=False, tqdm=_tqdm, pbar=None)`

`tablite.core.Table.outer_join(other, left_keys, right_keys, left_columns=None, right_columns=None, merge_keys=False, tqdm=_tqdm, pbar=None)`

`tablite.core.Table.cross_join(other, left_keys, right_keys, left_columns=None, right_columns=None, merge_keys=False, tqdm=_tqdm, pbar=None)`

`tablite.core.Table.lookup(other, *criteria, all=True, tqdm=_tqdm)`

`tablite.core.Table.match(other, *criteria, keep_left=None, keep_right=None)`

`tablite.core.Table.replace_missing_values(*args, **kwargs)`

`tablite.core.Table.imputation(targets, missing=None, method='carry forward', sources=None, tqdm=_tqdm)`

`tablite.core.Table.transpose(tqdm=_tqdm)`

`tablite.core.Table.pivot_transpose(columns, keep=None, column_name='transpose', value_name='value', tqdm=_tqdm)`

`tablite.core.Table.diff(other, columns=None)`

`tablite.core.Table.str()`

`tablite.core.Table.repr()`

`tablite.core.Table.nbytes()`

`tablite.core.Table.items()`

`tablite.core.Table.delitem(key)`

`tablite.core.Table.setitem(key, value)`

`tablite.core.Table.getitem(keys)`

`tablite.core.Table.len()`

`tablite.core.Table.eq(other) -> bool`

`tablite.core.Table.clear()`

`tablite.core.Table.save(path, compression_method=zipfile.ZIP_DEFLATED, compression_level=1)`

`tablite.core.Table.load(path, tqdm=_tqdm)` `classmethod`

`tablite.core.Table.copy()`

`tablite.core.Table.imul(other)`

`tablite.core.Table.mul(other)`

`tablite.core.Table.iadd(other)`

`tablite.core.Table.add(other)`

`tablite.core.Table.add_rows(*args, **kwargs)`

`tablite.core.Table.add_columns(*names)`

`tablite.core.Table.add_column(name, data=None)`

`tablite.core.Table.stack(other)`

`tablite.core.Table.types()`

`tablite.core.Table.display_dict(slice_=None, blanks=None, dtype=False)`

`tablite.core.Table.to_ascii(slice_=None, blanks=None, dtype=False)`

`tablite.core.Table.show(slice_=None, blanks=None, dtype=False)`

`tablite.core.Table.to_dict(columns=None, slice_=None)`

`tablite.core.Table.as_json_serializable(row_count='row id', start_on=1, columns=None, slice_=None)`

`tablite.core.Table.index(*args)`

`tablite.core.Table.unique_index(*args, tqdm=_tqdm)`