3.6. Read String
File paths works also with URLs
io.StringIO Converts
strto File-like object
3.6.1. SetUp
>>> import pandas as pd
>>>
>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)
3.6.2. Read From String
pd.read_csv()withio.StringIORead CSV file from string into DataFrame.
>>> DATA = """
... firstname lastname age
... Alice Apricot 30
... Bob Blackthorn 31
... Carol Corn 32
... Dave Durian 33
... Eve Elderberry 34
... Mallory Melon 15
... """
>>> pd.read_csv(DATA)
Traceback (most recent call last):
FileNotFoundError: [Errno 2] No such file or directory: '\nfirstname lastname age\nAlice Apricot 30\nBob Blackthorn 31\nCarol Corn 32\nDave Durian 33\nEve Elderberry 34\nMallory Melon 15\n'
>>>
>>> DATA
'\nfirstname lastname age\nAlice Apricot 30\nBob Blackthorn 31\nCarol Corn 32\nDave Durian 33\nEve Elderberry 34\nMallory Melon 15\n'
>>> from io import StringIO
>>>
>>> pd.read_csv(StringIO(DATA))
firstname lastname age
0 Alice Apricot 30
1 Bob Blackthorn 31
2 Carol Corn 32
3 Dave Durian 33
4 Eve Elderberry 34
5 Mallory Melon 15
3.6.3. Read Table
pd.read_table()delimiter=r'\s*\|\s*'engine='python'skiprows=3usecols=[1, 2, 3]names=['firstname', 'lastname', 'age']Read general delimited file into DataFrame.
>>> DATA = """
... | firstname | lastname | age |
... |-----------|------------|-----|
... | Alice | Apricot | 30 |
... | Bob | Blackthorn | 31 |
... | Carol | Corn | 32 |
... | Dave | Durian | 33 |
... | Eve | Elderberry | 34 |
... | Mallory | Melon | 15 |
... """
>>> pd.read_table(
... StringIO(DATA),
... delimiter='|',
... )
Unnamed: 0 firstname lastname age Unnamed: 4
0 NaN ----------- ------------ ----- NaN
1 NaN Alice Apricot 30 NaN
2 NaN Bob Blackthorn 31 NaN
3 NaN Carol Corn 32 NaN
4 NaN Dave Durian 33 NaN
5 NaN Eve Elderberry 34 NaN
6 NaN Mallory Melon 15 NaN
>>> pd.read_table(
... StringIO(DATA),
... delimiter='|',
... skiprows=3,
... usecols=[1, 2, 3],
... names=['firstname', 'lastname', 'age']
... )
firstname lastname age
0 Alice Apricot 30
1 Bob Blackthorn 31
2 Carol Corn 32
3 Dave Durian 33
4 Eve Elderberry 34
5 Mallory Melon 15
3.6.4. Assignments
# %% About
# - Name: Pandas ReadStr Data
# - Difficulty: easy
# - Lines: 1
# - Minutes: 2
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Read data `DATA` in JSON format to Pandas DataFrame
# 2. Define variable `result` with the solution
# 3. Run doctests - all must succeed
# %% Polish
# 1. Wczytaj dane `DATA` w formacie JSON do Pandas DataFrame
# 2. Zdefiniuj zmienną `result` z rozwiązaniem
# 3. Uruchom doctesty - wszystkie muszą się powieść
# %% Expected
# >>> result
# firstname lastname age
# 0 Alice Apricot 30
# 1 Bob Blackthorn 31
# 2 Carol Corn 32
# 3 Dave Durian 33
# 4 Eve Elderberry 34
# 5 Mallory Melon 15
# %% Hints
# - `DataFrame.read_json()`
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'
>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'
>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'
>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'
>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)
>>> result # doctest: +NORMALIZE_WHITESPACE
firstname lastname age
0 Alice Apricot 30
1 Bob Blackthorn 31
2 Carol Corn 32
3 Dave Durian 33
4 Eve Elderberry 34
5 Mallory Melon 15
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`
# %% Imports
from io import StringIO
import pandas as pd
# %% Types
result: pd.DataFrame
# %% Data
DATA = """
firstname lastname age
Alice Apricot 30
Bob Blackthorn 31
Carol Corn 32
Dave Durian 33
Eve Elderberry 34
Mallory Melon 15
"""
# %% Result
result = ...
# %% About
# - Name: Pandas ReadStr Markdown
# - Difficulty: hard
# - Lines: 1
# - Minutes: 5
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Read data `DATA` in Markdown format to Pandas DataFrame
# 2. Define variable `result` with the solution
# 3. Run doctests - all must succeed
# %% Polish
# 1. Wczytaj dane `DATA` w formacie Markdown do Pandas DataFrame
# 2. Zdefiniuj zmienną `result` z rozwiązaniem
# 3. Uruchom doctesty - wszystkie muszą się powieść
# %% Expected
# >>> result
# firstname lastname age
# 0 Alice Apricot 30
# 1 Bob Blackthorn 31
# 2 Carol Corn 32
# 3 Dave Durian 33
# 4 Eve Elderberry 34
# 5 Mallory Melon 15
# %% Hints
# - `DataFrame.read_table()`
# - `delimiter=r'\s*\|\s*'`
# - `engine='python'`
# - `skiprows=3`
# - `usecols=[1, 2, 3]`
# - `names=['firstname', 'lastname', 'age']`
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'
>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'
>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'
>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'
>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)
>>> result # doctest: +NORMALIZE_WHITESPACE
firstname lastname age
0 Alice Apricot 30
1 Bob Blackthorn 31
2 Carol Corn 32
3 Dave Durian 33
4 Eve Elderberry 34
5 Mallory Melon 15
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`
# %% Imports
from io import StringIO
import pandas as pd
# %% Types
result: pd.DataFrame
# %% Data
DATA = """
| firstname | lastname | age |
|-----------|------------|-----|
| Alice | Apricot | 30 |
| Bob | Blackthorn | 31 |
| Carol | Corn | 32 |
| Dave | Durian | 33 |
| Eve | Elderberry | 34 |
| Mallory | Melon | 15 |
"""
# %% Result
result = ...