4.6. To Parquet
File paths works also with DATA
4.6.1. SetUp
>>> import pandas as pd
>>>
>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)
>>>
>>>
>>> data = pd.DataFrame([
... {'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30, 'lastlogin': pd.Timestamp('2000-01-01'), 'is_active': True},
... {'firstname': 'Bob', 'lastname': 'Blackthorn', 'age': 31, 'lastlogin': pd.Timestamp('2000-01-02'), 'is_active': True},
... {'firstname': 'Carol', 'lastname': 'Corn', 'age': 32, 'lastlogin': pd.Timestamp('2000-01-03'), 'is_active': False},
... {'firstname': 'Dave', 'lastname': 'Durian', 'age': 33, 'lastlogin': pd.Timestamp('2000-01-04'), 'is_active': False},
... {'firstname': 'Eve', 'lastname': 'Elderberry', 'age': 34, 'lastlogin': pd.Timestamp('2000-01-05'), 'is_active': True},
... {'firstname': 'Mallory', 'lastname': 'Melon', 'age': 15, 'lastlogin': pd.NaT, 'is_active': None},
... ])
>>>
>>> data
firstname lastname age lastlogin is_active
0 Alice Apricot 30 2000-01-01 True
1 Bob Blackthorn 31 2000-01-02 True
2 Carol Corn 32 2000-01-03 False
3 Dave Durian 33 2000-01-04 False
4 Eve Elderberry 34 2000-01-05 True
5 Mallory Melon 15 NaT None
4.6.2. Example
>>> data.to_parquet('/tmp/myfile.parquet')
$ file /tmp/myfile.parquet
/tmp/myfile.parquet: Apache Parquet
4.6.3. Assignments
# %% About
# - Name: Pandas To Parquet
# - Difficulty: easy
# - Lines: 1
# - Minutes: 2
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Export data from `data` to file `FILE`
# 2. Data has to be in Parquet format
# 3. Run doctests - all must succeed
# %% Polish
# 1. Wyeksportuj dane z `data` do pliku `FILE`
# 2. Dane mają być w formacie Parquet
# 3. Uruchom doctesty - wszystkie muszą się powieść
# %% Expected
# >>> result
# firstname lastname age email lastlogin is_active groups
# 0 Alice Apricot 30 alice@example.com 2000-01-01 True users;staff
# 1 Bob Blackthorn 31 bob@example.com 2000-01-02 True users;staff
# 2 Carol Corn 32 carol@example.com 2000-01-03 True users
# 3 Dave Durian 33 dave@example.org 2000-01-04 True users
# 4 Eve Elderberry 34 eve@example.org 2000-01-05 True users;staff;admins
# 5 Mallory Melon 15 mallory@example.net NaN False NaN
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> result = pd.read_parquet(FILE)
>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'
>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'
>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'
>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)
>>> result
firstname lastname age email lastlogin is_active groups
0 Alice Apricot 30 alice@example.com 2000-01-01 True users;staff
1 Bob Blackthorn 31 bob@example.com 2000-01-02 True users;staff
2 Carol Corn 32 carol@example.com 2000-01-03 True users
3 Dave Durian 33 dave@example.org 2000-01-04 True users
4 Eve Elderberry 34 eve@example.org 2000-01-05 True users;staff;admins
5 Mallory Melon 15 mallory@example.net NaN False NaN
>>> from os import remove
>>> remove(FILE)
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`
# %% Imports
import pandas as pd
# %% Types
# %% Data
DATA = 'https://python3.info/_static/example.parquet'
FILE = r'_temporary.parquet'
data = pd.read_parquet(DATA)
# %% Result