6.1. DataFrame Create

  • pd.DataFrame(list[dict])

  • pd.DataFrame(dict[str,list])

6.1.1. SetUp

>>> import pandas as pd
>>> import numpy as np

6.1.2. Create from List of Dicts

>>> pd.DataFrame([
...     {'A': 1.0, 'B': 2.0},
...     {'A': 3.0, 'B': 4.0},
... ])
     A    B
0  1.0  2.0
1  3.0  4.0
>>> pd.DataFrame([
...     {'A': 1.0, 'B': 2.0},
...     {'B': 3.0, 'C': 4.0},
... ])
     A    B    C
0  1.0  2.0  NaN
1  NaN  3.0  4.0
>>> pd.DataFrame([
...     {'firstname': 'Mark', 'lastname': 'Watney'},
...     {'firstname': 'Melissa', 'lastname': 'Lewis'},
...     {'firstname': 'Rick', 'lastname': 'Martinez'},
...     {'firstname': 'Alex', 'lastname': 'Vogel'},
... ])
  firstname  lastname
0      Mark    Watney
1   Melissa     Lewis
2      Rick  Martinez
3      Alex     Vogel

6.1.3. Create from Dict

>>> pd.DataFrame({
...     'A': ['a', 'b', 'c'],
...     'B': [1.0, 2.0, 3.0],
...     'C': [1, 2, 3],
... })
   A    B  C
0  a  1.0  1
1  b  2.0  2
2  c  3.0  3
>>> pd.DataFrame({
...     'firstname': ['Mark', 'Melissa', 'Rick', 'Alex'],
...     'lastname': ['Watney', 'Lewis', 'Martinez', 'Vogel'],
... })
  firstname  lastname
0      Mark    Watney
1   Melissa     Lewis
2      Rick  Martinez
3      Alex     Vogel

6.1.4. Create from NDArray

>>> import pandas as pd
>>> import numpy as np
>>> np.random.seed(0)
>>>
>>>
>>> df = pd.DataFrame(np.random.randn(7, 4))
>>>
>>> df
          0         1         2         3
0  1.764052  0.400157  0.978738  2.240893
1  1.867558 -0.977278  0.950088 -0.151357
2 -0.103219  0.410599  0.144044  1.454274
3  0.761038  0.121675  0.443863  0.333674
4  1.494079 -0.205158  0.313068 -0.854096
5 -2.552990  0.653619  0.864436 -0.742165
6  2.269755 -1.454366  0.045759 -0.187184

6.1.5. Use Case - 1

>>> import pandas as pd
>>> import numpy as np
>>>
>>>
>>> pd.DataFrame({
...     'A': 1.,
...     'B': pd.Timestamp('1961-04-12'),
...     'C': pd.Series(1, index=list(range(4)), dtype='float32'),
...     'D': np.array([3] * 4, dtype='int32'),
...     'E': pd.Categorical(["test", "train", "test", "train"]),
...     'F': 'foo',
...     'G': [1,2,3,4],
... })
     A          B    C  D      E    F  G
0  1.0 1961-04-12  1.0  3   test  foo  1
1  1.0 1961-04-12  1.0  3  train  foo  2
2  1.0 1961-04-12  1.0  3   test  foo  3
3  1.0 1961-04-12  1.0  3  train  foo  4

6.1.6. Use Case - 2

>>> import pandas as pd
>>> import numpy as np
>>> np.random.seed(0)
>>>
>>>
>>> df = pd.DataFrame(
...     columns = ['Morning', 'Noon', 'Evening', 'Midnight'],
...     index = pd.date_range('1999-12-30', periods=7),
...     data = np.random.randn(7, 4))
...
>>> df
             Morning      Noon   Evening  Midnight
1999-12-30  1.764052  0.400157  0.978738  2.240893
1999-12-31  1.867558 -0.977278  0.950088 -0.151357
2000-01-01 -0.103219  0.410599  0.144044  1.454274
2000-01-02  0.761038  0.121675  0.443863  0.333674
2000-01-03  1.494079 -0.205158  0.313068 -0.854096
2000-01-04 -2.552990  0.653619  0.864436 -0.742165
2000-01-05  2.269755 -1.454366  0.045759 -0.187184

6.1.7. Assignments

# %% About
# - Name: DataFrame Create
# - Difficulty: easy
# - Lines: 1
# - Minutes: 2

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Convert data `DATA` to Pandas DataFrame
# 2. Define variable `result` with the solution
# 3. Run doctests - all must succeed

# %% Polish
# 1. Przekonwertuj dane `DATA` do Pandas DataFrame
# 2. Zdefiniuj zmienną `result` z rozwiązaniem
# 3. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
#   firstname    lastname  age
# 0     Alice     Apricot   30
# 1       Bob  Blackthorn   31
# 2     Carol        Corn   32
# 3      Dave      Durian   33
# 4       Eve  Elderberry   34
# 5   Mallory       Melon   15

# %% Hints
# - `pd.DataFrame()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'

>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)

>>> result  # doctest: +NORMALIZE_WHITESPACE
  firstname    lastname  age
0     Alice     Apricot   30
1       Bob  Blackthorn   31
2     Carol        Corn   32
3      Dave      Durian   33
4       Eve  Elderberry   34
5   Mallory       Melon   15
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data
DATA = {
    'firstname': ['Alice', 'Bob', 'Carol', 'Dave', 'Eve', 'Mallory'],
    'lastname': ['Apricot', 'Blackthorn', 'Corn', 'Durian', 'Elderberry', 'Melon'],
    'age': [30, 31, 32, 33, 34, 15],
}

# %% Result
result = ...

# %% About
# - Name: DataFrame Create
# - Difficulty: easy
# - Lines: 1
# - Minutes: 2

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Convert data `DATA` to Pandas DataFrame
# 2. Define variable `result` with the solution
# 3. Run doctests - all must succeed

# %% Polish
# 1. Przekonwertuj dane `DATA` do Pandas DataFrame
# 2. Zdefiniuj zmienną `result` z rozwiązaniem
# 3. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
#   firstname    lastname  age
# 0     Alice     Apricot   30
# 1       Bob  Blackthorn   31
# 2     Carol        Corn   32
# 3      Dave      Durian   33
# 4       Eve  Elderberry   34
# 5   Mallory       Melon   15

# %% Hints
# - `pd.DataFrame()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'

>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)

>>> result  # doctest: +NORMALIZE_WHITESPACE
  firstname    lastname  age
0     Alice     Apricot   30
1       Bob  Blackthorn   31
2     Carol        Corn   32
3      Dave      Durian   33
4       Eve  Elderberry   34
5   Mallory       Melon   15
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data
DATA = [
    ('firstname', 'lastname', 'age'),
    ('Alice', 'Apricot', 30),
    ('Bob', 'Blackthorn', 31),
    ('Carol', 'Corn', 32),
    ('Dave', 'Durian', 33),
    ('Eve', 'Elderberry', 34),
    ('Mallory', 'Melon', 15),
]

# %% Result
result = ...

# %% About
# - Name: DataFrame Create
# - Difficulty: easy
# - Lines: 1
# - Minutes: 2

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Convert data `DATA` to Pandas DataFrame
# 2. Define variable `result` with the solution
# 3. Run doctests - all must succeed

# %% Polish
# 1. Przekonwertuj dane `DATA` do Pandas DataFrame
# 2. Zdefiniuj zmienną `result` z rozwiązaniem
# 3. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
#   firstname    lastname  age
# 0     Alice     Apricot   30
# 1       Bob  Blackthorn   31
# 2     Carol        Corn   32
# 3      Dave      Durian   33
# 4       Eve  Elderberry   34
# 5   Mallory       Melon   15

# %% Hints
# - `pd.DataFrame()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'

>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)

>>> result  # doctest: +NORMALIZE_WHITESPACE
  firstname    lastname  age
0     Alice     Apricot   30
1       Bob  Blackthorn   31
2     Carol        Corn   32
3      Dave      Durian   33
4       Eve  Elderberry   34
5   Mallory       Melon   15
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data
DATA = [
    {'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30},
    {'firstname': 'Bob', 'lastname': 'Blackthorn', 'age': 31},
    {'firstname': 'Carol', 'lastname': 'Corn', 'age': 32},
    {'firstname': 'Dave', 'lastname': 'Durian', 'age': 33},
    {'firstname': 'Eve', 'lastname': 'Elderberry', 'age': 34},
    {'firstname': 'Mallory', 'lastname': 'Melon', 'age': 15},
]

# %% Result
result = ...

# %% About
# - Name: DataFrame Create
# - Difficulty: easy
# - Lines: 7
# - Minutes: 3

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Convert Markdown table to Pandas DataFrame
# 2. Use selection with `alt` key in your IDE
#    to convert data to `dict[str,tuple]` format
# 3. Define variable `result` with the solution
# 4. Run doctests - all must succeed

# %% Polish
# 1. Przekonwertuj tabelkę Markdown do Pandas DataFrame
# 2. Zdefiniuj zmienną `result` z rozwiązaniem
# 3. Użyj zaznaczania z klawiszem `alt` w Twoim IDE
#    aby przekonwertować dane do formatu `dict[str,tuple]`
# 4. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
#   firstname    lastname  age
# 0     Alice     Apricot   30
# 1       Bob  Blackthorn   31
# 2     Carol        Corn   32
# 3      Dave      Durian   33
# 4       Eve  Elderberry   34
# 5   Mallory       Melon   15

# %% Hints
# - `pd.DataFrame()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'

>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)

>>> result  # doctest: +NORMALIZE_WHITESPACE
  firstname    lastname  age
0     Alice     Apricot   30
1       Bob  Blackthorn   31
2     Carol        Corn   32
3      Dave      Durian   33
4       Eve  Elderberry   34
5   Mallory       Melon   15
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data

# | firstname | lastname   | age |
# |-----------|------------|-----|
# | Alice     | Apricot    |  30 |
# | Bob       | Blackthorn |  31 |
# | Carol     | Corn       |  32 |
# | Dave      | Durian     |  33 |
# | Eve       | Elderberry |  34 |
# | Mallory   | Melon      |  15 |

# %% Result
result = ...

# %% About
# - Name: DataFrame Create
# - Difficulty: easy
# - Lines: 8
# - Minutes: 3

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Convert Markdown table to Pandas DataFrame
# 2. Use selection with `alt` key in your IDE
#    to convert data to `list[tuple]` format
# 3. Define variable `result` with the solution
# 4. Run doctests - all must succeed

# %% Polish
# 1. Przekonwertuj tabelkę Markdown do Pandas DataFrame
# 2. Zdefiniuj zmienną `result` z rozwiązaniem
# 3. Użyj zaznaczania z klawiszem `alt` w Twoim IDE
#    aby przekonwertować dane do formatu `list[tuple]`
# 4. Uruchom doctesty - wszystkie muszą się powieść

# %% Hints
# - `pd.DataFrame()`

# %% Expected
# >>> result
#   firstname    lastname  age
# 0     Alice     Apricot   30
# 1       Bob  Blackthorn   31
# 2     Carol        Corn   32
# 3      Dave      Durian   33
# 4       Eve  Elderberry   34
# 5   Mallory       Melon   15

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'

>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)

>>> result  # doctest: +NORMALIZE_WHITESPACE
  firstname    lastname  age
0     Alice     Apricot   30
1       Bob  Blackthorn   31
2     Carol        Corn   32
3      Dave      Durian   33
4       Eve  Elderberry   34
5   Mallory       Melon   15
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data

# | firstname | lastname   | age |
# |-----------|------------|-----|
# | Alice     | Apricot    |  30 |
# | Bob       | Blackthorn |  31 |
# | Carol     | Corn       |  32 |
# | Dave      | Durian     |  33 |
# | Eve       | Elderberry |  34 |
# | Mallory   | Melon      |  15 |

# %% Result
result = ...

# %% About
# - Name: DataFrame Create
# - Difficulty: easy
# - Lines: 8
# - Minutes: 3

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Convert Markdown table to Pandas DataFrame
# 2. Use selection with `alt` key in your IDE
#    to convert data to `list[dict]` format
# 3. Define variable `result` with the solution
# 4. Run doctests - all must succeed

# %% Polish
# 1. Przekonwertuj tabelkę Markdown do Pandas DataFrame
# 2. Zdefiniuj zmienną `result` z rozwiązaniem
# 3. Użyj zaznaczania z klawiszem `alt` w Twoim IDE
#    aby przekonwertować dane do formatu `list[dict]`
# 4. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
#   firstname    lastname  age
# 0     Alice     Apricot   30
# 1       Bob  Blackthorn   31
# 2     Carol        Corn   32
# 3      Dave      Durian   33
# 4       Eve  Elderberry   34
# 5   Mallory       Melon   15

# %% Hints
# - `pd.DataFrame()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'

>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)

>>> result  # doctest: +NORMALIZE_WHITESPACE
  firstname    lastname  age
0     Alice     Apricot   30
1       Bob  Blackthorn   31
2     Carol        Corn   32
3      Dave      Durian   33
4       Eve  Elderberry   34
5   Mallory       Melon   15
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data

# | firstname | lastname   | age |
# |-----------|------------|-----|
# | Alice     | Apricot    |  30 |
# | Bob       | Blackthorn |  31 |
# | Carol     | Corn       |  32 |
# | Dave      | Durian     |  33 |
# | Eve       | Elderberry |  34 |
# | Mallory   | Melon      |  15 |

# %% Result
result = ...