Skip to content

Sortation

tablite.sortation

Attributes

Classes

Functions

tablite.sortation.sort_index(T, mapping, sort_mode='excel', tqdm=_tqdm, pbar=None)

helper for methods sort and is_sorted

param: sort_mode: str: "alphanumeric", "unix", or, "excel" (default) param: **kwargs: sort criteria. See Table.sort()

Source code in tablite/sortation.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def sort_index(T, mapping, sort_mode="excel", tqdm=_tqdm, pbar=None):
    """
    helper for methods `sort` and `is_sorted`

    param: sort_mode: str: "alphanumeric", "unix", or, "excel" (default)
    param: **kwargs: sort criteria. See Table.sort()
    """

    sub_cls_check(T, BaseTable)

    if not isinstance(mapping, dict) or not mapping:
        raise TypeError("Expected mapping (dict)?")

    for k, v in mapping.items():
        if k not in T.columns:
            raise ValueError(f"no column {k}")
        if not isinstance(v, bool):
            raise ValueError(f"{k} was mapped to {v} - a non-boolean")

    if sort_mode not in sort_modes:
        raise ValueError(f"{sort_mode} not in list of sort_modes: {list(sort_modes)}")

    rank = {i: tuple() for i in range(len(T))}  # create index and empty tuple for sortation.

    if pbar is None:
        pbar = tqdm(total=len(mapping.items()), desc="creating sort index")
        pbar_close = True
    else:
        pbar_close = False

    for key, reverse in mapping.items():
        col = T[key][:]
        ranks = sort_rank(values=[numpy_to_python(v) for v in multitype_set(col)], reverse=reverse, mode=sort_mode)
        assert isinstance(ranks, dict)
        for ix, v in enumerate(col):
            v2 = numpy_to_python(v)
            rank[ix] += (ranks[v2],)  # add tuple for each sortation level.

        pbar.update(1)

    if pbar_close:
        pbar.close()

    del col
    del ranks

    new_order = [(r, i) for i, r in rank.items()]  # tuples are listed and sort...
    del rank  # free memory.

    new_order.sort()
    sorted_index = [i for _, i in new_order]  # new index is extracted.
    new_order.clear()
    return np.array(sorted_index, dtype=np.int64)

tablite.sortation.reindex(T, index)

index: list of integers that declare sort order.

Examples:

Table:  ['a','b','c','d','e','f','g','h']
index:  [0,2,4,6]
result: ['b','d','f','h']

Table:  ['a','b','c','d','e','f','g','h']
index:  [0,2,4,6,1,3,5,7]
result: ['a','c','e','g','b','d','f','h']
Source code in tablite/sortation.py
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def reindex(T, index):
    """
    index: list of integers that declare sort order.

    Examples:

        Table:  ['a','b','c','d','e','f','g','h']
        index:  [0,2,4,6]
        result: ['b','d','f','h']

        Table:  ['a','b','c','d','e','f','g','h']
        index:  [0,2,4,6,1,3,5,7]
        result: ['a','c','e','g','b','d','f','h']

    """
    sub_cls_check(T, BaseTable)
    if isinstance(index, list):
        index = np.array(index, dtype=int)
    type_check(index, np.ndarray)
    if max(index) >= len(T):
        raise IndexError("index out of range: max(index) > len(self)")
    if min(index) < -len(T):
        raise IndexError("index out of range: min(index) < -len(self)")

    fields = len(T) * len(T.columns)
    m = select_processing_method(fields, _reindex, _mp_reindex)
    return m(T, index)

tablite.sortation.sort(T, mapping, sort_mode='excel', tqdm=_tqdm, pbar: _tqdm = None)

Perform multi-pass sorting with precedence given order of column names. sort_mode: str: "alphanumeric", "unix", or, "excel" kwargs: keys: columns, values: 'reverse' as boolean.

examples: Table.sort('A'=False) means sort by 'A' in ascending order. Table.sort('A'=True, 'B'=False) means sort 'A' in descending order, then (2nd priority) sort B in ascending order.

Source code in tablite/sortation.py
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
def sort(T, mapping, sort_mode="excel", tqdm=_tqdm, pbar: _tqdm = None):
    """Perform multi-pass sorting with precedence given order of column names.
    sort_mode: str: "alphanumeric", "unix", or, "excel"
    kwargs:
        keys: columns,
        values: 'reverse' as boolean.

    examples:
    Table.sort('A'=False) means sort by 'A' in ascending order.
    Table.sort('A'=True, 'B'=False) means sort 'A' in descending order, then (2nd priority)
    sort B in ascending order.
    """
    sub_cls_check(T, BaseTable)

    index = sort_index(T, mapping, sort_mode=sort_mode, tqdm=_tqdm, pbar=pbar)
    m = select_processing_method(len(T) * len(T.columns), _sp_reindex, _mp_reindex)
    return m(T, index, tqdm=tqdm, pbar=pbar)

tablite.sortation.is_sorted(T, mapping, sort_mode='excel')

Performs multi-pass sorting check with precedence given order of column names.

PARAMETER DESCRIPTION
mapping

sort criteria. See Table.sort()

RETURNS DESCRIPTION

bool

Source code in tablite/sortation.py
165
166
167
168
169
170
171
172
173
174
175
176
177
def is_sorted(T, mapping, sort_mode="excel"):
    """Performs multi-pass sorting check with precedence given order of column names.

    Args:
        mapping: sort criteria. See Table.sort()
        sort_mode = sort mode. See Table.sort()

    Returns:
        bool
    """
    index = sort_index(T, mapping, sort_mode=sort_mode)
    match = np.arange(len(T))
    return np.all(index == match)