Skip to content

Export utils

tablite.export_utils

Classes

Functions

tablite.export_utils.to_sql(table, name)

generates ANSI-92 compliant SQL.

PARAMETER DESCRIPTION
name

name of SQL table.

TYPE: str

Source code in tablite/export_utils.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
def to_sql(table, name):
    """
    generates ANSI-92 compliant SQL.

    args:
        name (str): name of SQL table.
    """
    sub_cls_check(table, BaseTable)
    type_check(name, str)

    prefix = name
    name = "T1"
    create_table = """CREATE TABLE {} ({})"""
    columns = []
    for name, col in table.columns.items():
        dtype = col.types()
        if len(dtype) == 1:
            dtype, _ = dtype.popitem()
            if dtype is int:
                dtype = "INTEGER"
            elif dtype is float:
                dtype = "REAL"
            else:
                dtype = "TEXT"
        else:
            dtype = "TEXT"
        definition = f"{name} {dtype}"
        columns.append(definition)

    create_table = create_table.format(prefix, ", ".join(columns))

    # return create_table
    row_inserts = []
    for row in table.rows:
        row_inserts.append(str(tuple([i if i is not None else "NULL" for i in row])))
    row_inserts = f"INSERT INTO {prefix} VALUES " + ",".join(row_inserts)
    return "begin; {}; {}; commit;".format(create_table, row_inserts)

tablite.export_utils.to_pandas(table)

returns pandas.DataFrame

Source code in tablite/export_utils.py
51
52
53
54
55
56
57
58
59
60
def to_pandas(table):
    """
    returns pandas.DataFrame
    """
    sub_cls_check(table, BaseTable)
    try:
        return pd.DataFrame(table.to_dict())  # noqa
    except ImportError:
        import pandas as pd  # noqa
    return pd.DataFrame(table.to_dict())  # noqa

tablite.export_utils.to_hdf5(table, path)

creates a copy of the table as hdf5

Note that some loss of type information is to be expected in columns of mixed type:

t.show(dtype=True) +===+===+=====+=====+====+=====+=====+===================+==========+========+===============+===+=========================+=====+===+ | # | A | B | C | D | E | F | G | H | I | J | K | L | M | O | |row|int|mixed|float|str |mixed| bool| datetime | date | time | timedelta |str| int |float|int| +---+---+-----+-----+----+-----+-----+-------------------+----------+--------+---------------+---+-------------------------+-----+---+ | 0 | -1|None | -1.1| |None |False|2023-06-09 09:12:06|2023-06-09|09:12:06| 1 day, 0:00:00|b |-100000000000000000000000| inf| 11| | 1 | 1| 1| 1.1|1000|1 | True|2023-06-09 09:12:06|2023-06-09|09:12:06|2 days, 0:06:40|嗨 | 100000000000000000000000| -inf|-11| +===+===+=====+=====+====+=====+=====+===================+==========+========+===============+===+=========================+=====+===+ t.to_hdf5(filename) t2 = Table.from_hdf5(filename) t2.show(dtype=True) +===+===+=====+=====+=====+=====+=====+===================+===================+========+===============+===+=========================+=====+===+ | # | A | B | C | D | E | F | G | H | I | J | K | L | M | O | |row|int|mixed|float|mixed|mixed| bool| datetime | datetime | time | str |str| int |float|int| +---+---+-----+-----+-----+-----+-----+-------------------+-------------------+--------+---------------+---+-------------------------+-----+---+ | 0 | -1|None | -1.1|None |None |False|2023-06-09 09:12:06|2023-06-09 00:00:00|09:12:06|1 day, 0:00:00 |b |-100000000000000000000000| inf| 11| | 1 | 1| 1| 1.1| 1000| 1| True|2023-06-09 09:12:06|2023-06-09 00:00:00|09:12:06|2 days, 0:06:40|嗨 | 100000000000000000000000| -inf|-11| +===+===+=====+=====+=====+=====+=====+===================+===================+========+===============+===+=========================+=====+===+

Source code in tablite/export_utils.py
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def to_hdf5(table, path):
    # fmt: off
    """
    creates a copy of the table as hdf5

    Note that some loss of type information is to be expected in columns of mixed type:
    >>> t.show(dtype=True)
    +===+===+=====+=====+====+=====+=====+===================+==========+========+===============+===+=========================+=====+===+
    | # | A |  B  |  C  | D  |  E  |  F  |         G         |    H     |   I    |       J       | K |            L            |  M  | O |
    |row|int|mixed|float|str |mixed| bool|      datetime     |   date   |  time  |   timedelta   |str|           int           |float|int|
    +---+---+-----+-----+----+-----+-----+-------------------+----------+--------+---------------+---+-------------------------+-----+---+
    | 0 | -1|None | -1.1|    |None |False|2023-06-09 09:12:06|2023-06-09|09:12:06| 1 day, 0:00:00|b  |-100000000000000000000000|  inf| 11|
    | 1 |  1|    1|  1.1|1000|1    | True|2023-06-09 09:12:06|2023-06-09|09:12:06|2 days, 0:06:40|嗨  | 100000000000000000000000| -inf|-11|
    +===+===+=====+=====+====+=====+=====+===================+==========+========+===============+===+=========================+=====+===+
    >>> t.to_hdf5(filename)
    >>> t2 = Table.from_hdf5(filename)
    >>> t2.show(dtype=True)
    +===+===+=====+=====+=====+=====+=====+===================+===================+========+===============+===+=========================+=====+===+
    | # | A |  B  |  C  |  D  |  E  |  F  |         G         |         H         |   I    |       J       | K |            L            |  M  | O |
    |row|int|mixed|float|mixed|mixed| bool|      datetime     |      datetime     |  time  |      str      |str|           int           |float|int|
    +---+---+-----+-----+-----+-----+-----+-------------------+-------------------+--------+---------------+---+-------------------------+-----+---+
    | 0 | -1|None | -1.1|None |None |False|2023-06-09 09:12:06|2023-06-09 00:00:00|09:12:06|1 day, 0:00:00 |b  |-100000000000000000000000|  inf| 11|
    | 1 |  1|    1|  1.1| 1000|    1| True|2023-06-09 09:12:06|2023-06-09 00:00:00|09:12:06|2 days, 0:06:40|嗨  | 100000000000000000000000| -inf|-11|
    +===+===+=====+=====+=====+=====+=====+===================+===================+========+===============+===+=========================+=====+===+
    """
    # fmt: in
    import h5py

    sub_cls_check(table, BaseTable)
    type_check(path, Path)

    total = f"{len(table.columns) * len(table):,}"  # noqa
    print(f"writing {total} records to {path}", end="")

    with h5py.File(path, "w") as f:
        n = 0
        for name, col in table.items():
            try:
                f.create_dataset(name, data=col[:])  # stored in hdf5 as '/name'
            except TypeError:
                f.create_dataset(name, data=[str(i) for i in col[:]])  # stored in hdf5 as '/name'
            n += 1
    print("... done")

tablite.export_utils.excel_writer(table, path)

writer for excel files.

This can create xlsx files beyond Excels. If you're using pyexcel to read the data, you'll see the data is there. If you're using Excel, Excel will stop loading after 1,048,576 rows.

See pyexcel for more details: http://docs.pyexcel.org/

Source code in tablite/export_utils.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
def excel_writer(table, path):
    """
    writer for excel files.

    This can create xlsx files beyond Excels.
    If you're using pyexcel to read the data, you'll see the data is there.
    If you're using Excel, Excel will stop loading after 1,048,576 rows.

    See pyexcel for more details:
    http://docs.pyexcel.org/
    """
    import pyexcel

    sub_cls_check(table, BaseTable)
    type_check(path, Path)

    def gen(table):  # local helper
        yield table.columns
        for row in table.rows:
            yield row

    data = list(gen(table))
    if path.suffix in [".xls", ".ods"]:
        data = [
            [str(v) if (isinstance(v, (int, float)) and abs(v) > 2**32 - 1) else DataTypes.to_json(v) for v in row]
            for row in data
        ]

    pyexcel.save_as(array=data, dest_file_name=str(path))

tablite.export_utils.to_json(table, *args, **kwargs)

Source code in tablite/export_utils.py
139
140
141
142
143
def to_json(table, *args, **kwargs):
    import json

    sub_cls_check(table, BaseTable)
    return json.dumps(table.as_json_serializable())

tablite.export_utils.path_suffix_check(path, kind)

Source code in tablite/export_utils.py
146
147
148
149
150
def path_suffix_check(path, kind):
    if not path.suffix == kind:
        raise ValueError(f"Suffix mismatch: Expected {kind}, got {path.suffix} in {path.name}")
    if not path.parent.exists():
        raise FileNotFoundError(f"directory {path.parent} not found.")

tablite.export_utils.text_writer(table, path, tqdm=_tqdm)

exports table to csv, tsv or txt dependening on path suffix. follows the JSON norm. text escape is ON for all strings.

Note:

If the delimiter is present in a string when the string is exported, text-escape is required, as the format otherwise is corrupted. When the file is being written, it is unknown whether any string in a column contrains the delimiter. As text escaping the few strings that may contain the delimiter would lead to an assymmetric format, the safer guess is to text escape all strings.

Source code in tablite/export_utils.py
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
def text_writer(table, path, tqdm=_tqdm):
    """exports table to csv, tsv or txt dependening on path suffix.
    follows the JSON norm. text escape is ON for all strings.

    Note:
    ----------------------
    If the delimiter is present in a string when the string is exported,
    text-escape is required, as the format otherwise is corrupted.
    When the file is being written, it is unknown whether any string in
    a column contrains the delimiter. As text escaping the few strings
    that may contain the delimiter would lead to an assymmetric format,
    the safer guess is to text escape all strings.
    """
    sub_cls_check(table, BaseTable)
    type_check(path, Path)

    def txt(value):  # helper for text writer
        if value is None:
            return ""  # A column with 1,None,2 must be "1,,2".
        elif isinstance(value, str):
            # if not (value.startswith('"') and value.endswith('"')):
            #     return f'"{value}"'  # this must be escape: "the quick fox, jumped over the comma"
            # else:
            return value  # this would for example be an empty string: ""
        else:
            return str(DataTypes.to_json(value))  # this handles datetimes, timedelta, etc.

    delimiters = {".csv": ",", ".tsv": "\t", ".txt": "|"}
    delimiter = delimiters.get(path.suffix)

    with path.open("w", encoding="utf-8") as fo:
        w = csv.writer(fo, delimiter=delimiter)
        w.writerow(c for c in table.columns)
        for row in tqdm(table.rows, total=len(table), disable=Config.TQDM_DISABLE):
            w.writerow(txt(c) for c in row)

tablite.export_utils.sql_writer(table, path)

Source code in tablite/export_utils.py
190
191
192
193
194
def sql_writer(table, path):
    type_check(table, BaseTable)
    type_check(path, Path)
    with path.open("w", encoding="utf-8") as fo:
        fo.write(to_sql(table))

tablite.export_utils.json_writer(table, path)

Source code in tablite/export_utils.py
197
198
199
200
201
def json_writer(table, path):
    type_check(table, BaseTable)
    type_check(path, Path)
    with path.open("w") as fo:
        fo.write(to_json(table))

tablite.export_utils.to_html(table, path)

Source code in tablite/export_utils.py
204
205
206
207
208
def to_html(table, path):
    type_check(table, BaseTable)
    type_check(path, Path)
    with path.open("w", encoding="utf-8") as fo:
        fo.write(table._repr_html_(slice(0, len(table))))