3.6. Read String

  • File paths works also with URLs

  • io.StringIO Converts str to File-like object

3.6.1. SetUp

>>> import pandas as pd
>>>
>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)

3.6.2. Read From String

  • pd.read_csv() with io.StringIO

  • Read CSV file from string into DataFrame.

>>> DATA = """
... firstname  lastname    age
... Alice      Apricot     30
... Bob        Blackthorn  31
... Carol      Corn        32
... Dave       Durian      33
... Eve        Elderberry  34
... Mallory    Melon       15
... """
>>> pd.read_csv(DATA)
Traceback (most recent call last):
FileNotFoundError: [Errno 2] No such file or directory: '\nfirstname  lastname    age\nAlice      Apricot     30\nBob        Blackthorn  31\nCarol      Corn        32\nDave       Durian      33\nEve        Elderberry  34\nMallory    Melon       15\n'
>>>
>>> DATA
'\nfirstname  lastname    age\nAlice      Apricot     30\nBob        Blackthorn  31\nCarol      Corn        32\nDave       Durian      33\nEve        Elderberry  34\nMallory    Melon       15\n'
>>> from io import StringIO
>>>
>>> pd.read_csv(StringIO(DATA))
  firstname  lastname    age
0  Alice      Apricot     30
1  Bob        Blackthorn  31
2  Carol      Corn        32
3  Dave       Durian      33
4  Eve        Elderberry  34
5  Mallory    Melon       15

3.6.3. Read Table

  • pd.read_table()

  • delimiter=r'\s*\|\s*'

  • engine='python'

  • skiprows=3

  • usecols=[1, 2, 3]

  • names=['firstname', 'lastname', 'age']

  • Read general delimited file into DataFrame.

>>> DATA = """
... | firstname | lastname   | age |
... |-----------|------------|-----|
... | Alice     | Apricot    | 30  |
... | Bob       | Blackthorn | 31  |
... | Carol     | Corn       | 32  |
... | Dave      | Durian     | 33  |
... | Eve       | Elderberry | 34  |
... | Mallory   | Melon      | 15  |
... """
>>> pd.read_table(
...     StringIO(DATA),
...     delimiter='|',
... )
   Unnamed: 0   firstname    lastname      age   Unnamed: 4
0         NaN  -----------  ------------  -----         NaN
1         NaN   Alice        Apricot       30           NaN
2         NaN   Bob          Blackthorn    31           NaN
3         NaN   Carol        Corn          32           NaN
4         NaN   Dave         Durian        33           NaN
5         NaN   Eve          Elderberry    34           NaN
6         NaN   Mallory      Melon         15           NaN
>>> pd.read_table(
...     StringIO(DATA),
...     delimiter='|',
...     skiprows=3,
...     usecols=[1, 2, 3],
...     names=['firstname', 'lastname', 'age']
... )
     firstname      lastname  age
0   Alice        Apricot       30
1   Bob          Blackthorn    31
2   Carol        Corn          32
3   Dave         Durian        33
4   Eve          Elderberry    34
5   Mallory      Melon         15

3.6.4. Assignments

# %% About
# - Name: Pandas ReadStr Data
# - Difficulty: easy
# - Lines: 1
# - Minutes: 2

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Read data `DATA` in JSON format to Pandas DataFrame
# 2. Define variable `result` with the solution
# 3. Run doctests - all must succeed

# %% Polish
# 1. Wczytaj dane `DATA` w formacie JSON do Pandas DataFrame
# 2. Zdefiniuj zmienną `result` z rozwiązaniem
# 3. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
#   firstname  lastname    age
# 0  Alice      Apricot     30
# 1  Bob        Blackthorn  31
# 2  Carol      Corn        32
# 3  Dave       Durian      33
# 4  Eve        Elderberry  34
# 5  Mallory    Melon       15

# %% Hints
# - `DataFrame.read_json()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'

>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)

>>> result  # doctest: +NORMALIZE_WHITESPACE
  firstname  lastname    age
0  Alice      Apricot     30
1  Bob        Blackthorn  31
2  Carol      Corn        32
3  Dave       Durian      33
4  Eve        Elderberry  34
5  Mallory    Melon       15
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports
from io import StringIO
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data
DATA = """
firstname  lastname    age
Alice      Apricot     30
Bob        Blackthorn  31
Carol      Corn        32
Dave       Durian      33
Eve        Elderberry  34
Mallory    Melon       15
"""

# %% Result
result = ...

# %% About
# - Name: Pandas ReadStr Markdown
# - Difficulty: hard
# - Lines: 1
# - Minutes: 5

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Read data `DATA` in Markdown format to Pandas DataFrame
# 2. Define variable `result` with the solution
# 3. Run doctests - all must succeed

# %% Polish
# 1. Wczytaj dane `DATA` w formacie Markdown do Pandas DataFrame
# 2. Zdefiniuj zmienną `result` z rozwiązaniem
# 3. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
#   firstname  lastname    age
# 0  Alice      Apricot     30
# 1  Bob        Blackthorn  31
# 2  Carol      Corn        32
# 3  Dave       Durian      33
# 4  Eve        Elderberry  34
# 5  Mallory    Melon       15

# %% Hints
# - `DataFrame.read_table()`
# - `delimiter=r'\s*\|\s*'`
# - `engine='python'`
# - `skiprows=3`
# - `usecols=[1, 2, 3]`
# - `names=['firstname', 'lastname', 'age']`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'

>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)

>>> result  # doctest: +NORMALIZE_WHITESPACE
  firstname  lastname    age
0  Alice      Apricot     30
1  Bob        Blackthorn  31
2  Carol      Corn        32
3  Dave       Durian      33
4  Eve        Elderberry  34
5  Mallory    Melon       15
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports
from io import StringIO
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data
DATA = """
| firstname | lastname   | age |
|-----------|------------|-----|
| Alice     | Apricot    | 30  |
| Bob       | Blackthorn | 31  |
| Carol     | Corn       | 32  |
| Dave      | Durian     | 33  |
| Eve       | Elderberry | 34  |
| Mallory   | Melon      | 15  |
"""

# %% Result
result = ...