Skip to content

Pivots

tablite.pivots

Classes

Functions

tablite.pivots.pivot(T, rows, columns, functions, values_as_rows=True, tqdm=_tqdm, pbar=None)

param: rows: column names to keep as rows param: columns: column names to keep as columns param: functions: aggregation functions from the Groupby class as

example:

>>> t.show()
+=====+=====+=====+
|  A  |  B  |  C  |
| int | int | int |
+-----+-----+-----+
|    1|    1|    6|
|    1|    2|    5|
|    2|    3|    4|
|    2|    4|    3|
|    3|    5|    2|
|    3|    6|    1|
|    1|    1|    6|
|    1|    2|    5|
|    2|    3|    4|
|    2|    4|    3|
|    3|    5|    2|
|    3|    6|    1|
+=====+=====+=====+

>>> t2 = t.pivot(rows=['C'], columns=['A'], functions=[('B', gb.sum)])
>>> t2.show()
+===+===+========+=====+=====+=====+
| # | C |function|(A=1)|(A=2)|(A=3)|
|row|int|  str   |mixed|mixed|mixed|
+---+---+--------+-----+-----+-----+
|0  |  6|Sum(B)  |    2|None |None |
|1  |  5|Sum(B)  |    4|None |None |
|2  |  4|Sum(B)  |None |    6|None |
|3  |  3|Sum(B)  |None |    8|None |
|4  |  2|Sum(B)  |None |None |   10|
|5  |  1|Sum(B)  |None |None |   12|
+===+===+========+=====+=====+=====+
Source code in tablite/pivots.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
def pivot(T, rows, columns, functions, values_as_rows=True, tqdm=_tqdm, pbar=None):
    """
    param: rows: column names to keep as rows
    param: columns: column names to keep as columns
    param: functions: aggregation functions from the Groupby class as

    example:
    ```
    >>> t.show()
    +=====+=====+=====+
    |  A  |  B  |  C  |
    | int | int | int |
    +-----+-----+-----+
    |    1|    1|    6|
    |    1|    2|    5|
    |    2|    3|    4|
    |    2|    4|    3|
    |    3|    5|    2|
    |    3|    6|    1|
    |    1|    1|    6|
    |    1|    2|    5|
    |    2|    3|    4|
    |    2|    4|    3|
    |    3|    5|    2|
    |    3|    6|    1|
    +=====+=====+=====+

    >>> t2 = t.pivot(rows=['C'], columns=['A'], functions=[('B', gb.sum)])
    >>> t2.show()
    +===+===+========+=====+=====+=====+
    | # | C |function|(A=1)|(A=2)|(A=3)|
    |row|int|  str   |mixed|mixed|mixed|
    +---+---+--------+-----+-----+-----+
    |0  |  6|Sum(B)  |    2|None |None |
    |1  |  5|Sum(B)  |    4|None |None |
    |2  |  4|Sum(B)  |None |    6|None |
    |3  |  3|Sum(B)  |None |    8|None |
    |4  |  2|Sum(B)  |None |None |   10|
    |5  |  1|Sum(B)  |None |None |   12|
    +===+===+========+=====+=====+=====+
    ```

    """
    sub_cls_check(T, BaseTable)

    if isinstance(rows, str):
        rows = [rows]
    if not all(isinstance(i, str) for i in rows):
        raise TypeError(f"Expected rows as a list of column names, not {[i for i in rows if not isinstance(i,str)]}")

    if isinstance(columns, str):
        columns = [columns]
    if not all(isinstance(i, str) for i in columns):
        raise TypeError(
            f"Expected columns as a list of column names, not {[i for i in columns if not isinstance(i, str)]}"
        )

    if not isinstance(values_as_rows, bool):
        raise TypeError(f"expected sum_on_rows as boolean, not {type(values_as_rows)}")

    keys = rows + columns
    assert isinstance(keys, list)

    extra_steps = 2

    if pbar is None:
        total = extra_steps

        if len(functions) == 0:
            total = total + len(keys)
        else:
            total = total + len(T)

        pbar = tqdm(total=total, desc="pivot")

    grpby = groupby(T, keys, functions, tqdm=tqdm)
    Constr = type(T)

    if len(grpby) == 0:  # return empty table. This must be a test?
        pbar.update(extra_steps)
        return Constr()

    # split keys to determine grid dimensions
    row_key_index = {}
    col_key_index = {}

    r = len(rows)
    c = len(columns)
    g = len(functions)

    records = defaultdict(dict)

    for row in grpby.rows:
        row_key = tuple(row[:r])
        col_key = tuple(row[r : r + c])
        func_key = tuple(row[r + c :])

        if row_key not in row_key_index:
            row_key_index[row_key] = len(row_key_index)  # Y

        if col_key not in col_key_index:
            col_key_index[col_key] = len(col_key_index)  # X

        rix = row_key_index[row_key]
        cix = col_key_index[col_key]
        if cix in records:
            if rix in records[cix]:
                raise ValueError("this should be empty.")
        records[cix][rix] = func_key

    pbar.update(1)
    result = type(T)()

    if values_as_rows:  # ---> leads to more rows.
        # first create all columns left to right

        n = r + 1  # rows keys + 1 col for function values.
        cols = [[] for _ in range(n)]
        for row, ix in row_key_index.items():
            for col_name, f in functions:
                cols[-1].append(f"{f}({col_name})")
                for col_ix, v in enumerate(row):
                    cols[col_ix].append(v)

        for col_name, values in zip(rows + ["function"], cols):
            col_name = unique_name(col_name, result.columns)
            result[col_name] = values
        col_length = len(cols[0])
        cols.clear()

        # then populate the sparse matrix.
        for col_key, c in col_key_index.items():
            col_name = "(" + ",".join([f"{col_name}={value}" for col_name, value in zip(columns, col_key)]) + ")"
            col_name = unique_name(col_name, result.columns)
            L = [None for _ in range(col_length)]
            for r, funcs in records[c].items():
                for ix, f in enumerate(funcs):
                    L[g * r + ix] = f
            result[col_name] = L

    else:  # ---> leads to more columns.
        n = r
        cols = [[] for _ in range(n)]
        for row in row_key_index:
            for col_ix, v in enumerate(row):
                cols[col_ix].append(v)  # write key columns.

        for col_name, values in zip(rows, cols):
            result[col_name] = values

        col_length = len(row_key_index)

        # now populate the sparse matrix.
        for col_key, c in col_key_index.items():  # select column.
            cols, names = [], []

            for f, v in zip(functions, func_key):
                agg_col, func = f
                terms = ",".join([agg_col] + [f"{col_name}={value}" for col_name, value in zip(columns, col_key)])
                col_name = f"{func}({terms})"
                col_name = unique_name(col_name, result.columns)
                names.append(col_name)
                cols.append([None for _ in range(col_length)])
            for r, funcs in records[c].items():
                for ix, f in enumerate(funcs):
                    cols[ix][r] = f
            for name, col in zip(names, cols):
                result[name] = col

    pbar.update(1)

    return result

tablite.pivots.transpose(T, tqdm=_tqdm)

performs a CCW matrix rotation of the table.

Source code in tablite/pivots.py
185
186
187
188
189
190
191
192
193
194
195
196
197
198
def transpose(T, tqdm=_tqdm):
    """performs a CCW matrix rotation of the table."""
    sub_cls_check(T, BaseTable)

    if len(T.columns) == 0:
        return type(T)()

    assert isinstance(T, BaseTable)
    new = type(T)()
    L = list(T.columns)
    new[L[0]] = L[1:]
    for row in tqdm(T.rows, desc="table transpose", total=len(T)):
        new[row[0]] = row[1:]
    return new

tablite.pivots.pivot_transpose(T, columns, keep=None, column_name='transpose', value_name='value', tqdm=_tqdm)

Transpose a selection of columns to rows.

PARAMETER DESCRIPTION
columns

column names to transpose

TYPE: list of column names

keep

column names to keep (repeat)

TYPE: list of column names DEFAULT: None

RETURNS DESCRIPTION
Table

with columns transposed to rows

Example

transpose columns 1,2 and 3 and transpose the remaining columns, except sum.

Input:

| col1 | col2 | col3 | sun | mon | tue | ... | sat | sum  |
|------|------|------|-----|-----|-----|-----|-----|------|
| 1234 | 2345 | 3456 | 456 | 567 |     | ... |     | 1023 |
| 1244 | 2445 | 4456 |     |   7 |     | ... |     |    7 |
| ...  |      |      |     |     |     |     |     |      |

>>> t.transpose(keep=[col1, col2, col3], transpose=[sun,mon,tue,wed,thu,fri,sat])`

Output:
|col1| col2| col3| transpose| value|
|----|-----|-----|----------|------|
|1234| 2345| 3456| sun      |   456|
|1234| 2345| 3456| mon      |   567|
|1244| 2445| 4456| mon      |     7|
Source code in tablite/pivots.py
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
def pivot_transpose(T, columns, keep=None, column_name="transpose", value_name="value", tqdm=_tqdm):
    """Transpose a selection of columns to rows.

    Args:
        columns (list of column names): column names to transpose
        keep (list of column names): column names to keep (repeat)

    Returns:
        Table: with columns transposed to rows

    Example:
        transpose columns 1,2 and 3 and transpose the remaining columns, except `sum`.

    Input:
    ```
    | col1 | col2 | col3 | sun | mon | tue | ... | sat | sum  |
    |------|------|------|-----|-----|-----|-----|-----|------|
    | 1234 | 2345 | 3456 | 456 | 567 |     | ... |     | 1023 |
    | 1244 | 2445 | 4456 |     |   7 |     | ... |     |    7 |
    | ...  |      |      |     |     |     |     |     |      |

    >>> t.transpose(keep=[col1, col2, col3], transpose=[sun,mon,tue,wed,thu,fri,sat])`

    Output:
    |col1| col2| col3| transpose| value|
    |----|-----|-----|----------|------|
    |1234| 2345| 3456| sun      |   456|
    |1234| 2345| 3456| mon      |   567|
    |1244| 2445| 4456| mon      |     7|
    ```

    """
    sub_cls_check(T, BaseTable)

    if not isinstance(columns, list):
        raise TypeError

    for i in columns:
        if not isinstance(i, str):
            raise TypeError
        if i not in T.columns:
            raise ValueError
        if columns.count(i)>1:
            raise ValueError(f"Column {i} appears more than once")

    if keep is None:
        keep = []
    for i in keep:
        if not isinstance(i, str):
            raise TypeError
        if i not in T.columns:
            raise ValueError

    if column_name in keep + columns:
        column_name = unique_name(column_name, set_of_names=keep + columns)
    if value_name in keep + columns + [column_name]:
        value_name = unique_name(value_name, set_of_names=keep + columns)

    new = type(T)()
    new.add_columns(*keep + [column_name, value_name])
    news = {name: [] for name in new.columns}

    n = len(keep)

    with tqdm(total=len(T), desc="transpose", disable=Config.TQDM_DISABLE) as pbar:
        it = T[keep + columns].rows if len(keep + columns) > 1 else ((v, ) for v in T[keep + columns])

        for ix, row in enumerate(it, start=1):
            keeps = row[:n]
            transposes = row[n:]

            for name, value in zip(keep, keeps):
                news[name].extend([value] * len(transposes))
            for name, value in zip(columns, transposes):
                news[column_name].append(name)
                news[value_name].append(value)

            if ix % Config.SINGLE_PROCESSING_LIMIT == 0:
                for name, values in news.items():
                    new[name].extend(values)
                    values.clear()

            pbar.update(1)

    for name, values in news.items():
        new[name].extend(np.array(values))
        values.clear()
    return new