>>> def run():
... return 1
>>> def run():
... yield 1
>>> def run():
... return 1
...
>>> result = run()
>>>
>>> type(run) # plain function
<class 'function'>
>>>
>>> type(result) # plain object
<class 'int'>
>>> def run():
... yield 1
...
>>>
>>> result = run()
>>>
>>> type(run) # generator function
<class 'function'>
>>>
>>> type(result) # generator object
<class 'generator'>
#%% Differences
>>> def run():
... return 1
... return 2
...
>>> result = run()
>>>
>>> result
1
>>> def run():
... yield 1
... yield 2
...
>>>
>>> run()
<generator object run at 0x103c649e0>
>>>
>>> result = run()
>>>
>>> next(result)
1
>>> next(result)
2
>>> next(result)
StopIteration
#%% Example
>>> import sys
>>> DATA = [
... ('sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'),
... (5.8, 2.7, 5.1, 1.9, 'virginica'),
... (5.1, 3.5, 1.4, 0.2, 'setosa'),
... (5.7, 2.8, 4.1, 1.3, 'versicolor'),
... (6.3, 2.9, 5.6, 1.8, 'virginica'),
... (6.4, 3.2, 4.5, 1.5, 'versicolor'),
... (4.7, 3.2, 1.3, 0.2, 'setosa'),
... (7.0, 3.2, 4.7, 1.4, 'versicolor'),
... (7.6, 3.0, 6.6, 2.1, 'virginica'),
... (4.6, 3.1, 1.5, 0.2, 'setosa'),
... ]
>>> def get_values(species):
... result = []
... for *values,label in DATA:
... if label == species:
... result.append(values)
... return result
>>>
>>>
>>> for result in get_values('setosa'):
... print(result)
...
[5.1, 3.5, 1.4, 0.2]
[4.7, 3.2, 1.3, 0.2]
[4.6, 3.1, 1.5, 0.2]
>>>
>>> result = get_values('setosa')
>>> sys.getsizeof(result)
376
>>> def get_values(species):
... for *values,label in DATA:
... if label == species:
... yield values
...
>>>
>>>
>>> for result in get_values('setosa'):
... print(result)
...
[5.1, 3.5, 1.4, 0.2]
[4.7, 3.2, 1.3, 0.2]
[4.6, 3.1, 1.5, 0.2]
>>>
>>> result = get_values('setosa')
>>> sys.getsizeof(result)
208
>>> result = get_values('setosa')
>>>
>>> next(result)
[5.1, 3.5, 1.4, 0.2]
>>>
>>> next(result)
[4.7, 3.2, 1.3, 0.2]
>>>
>>> next(result)
[4.6, 3.1, 1.5, 0.2]
>>>
>>> next(result)
StopIteration
>>> result = get_values('setosa')
>>>
>>> x = next(result)
>>> print(x)
[5.1, 3.5, 1.4, 0.2]
>>>
>>> # robię coś
>>> # robię coś
>>> # robię coś
>>>
>>> x = next(result)
>>> print(x)
[4.7, 3.2, 1.3, 0.2]
>>>
>>> # robię coś
>>> # robię coś
>>> # robię coś
>>>
>>> x = next(result)
>>> print(x)
[4.6, 3.1, 1.5, 0.2]
>>>
>>> # robię coś
>>> # robię coś
>>> # robię coś
>>>
>>> next(result)
StopIteration
#%%
>>> def run():
... print('Starting worker...')
... while True:
... data = yield
... print(f'Processing {data}...')
...
>>> worker = run()
>>>
>>> worker.send(None)
Starting worker...
>>>
>>> worker.send('Mark')
Processing Mark...
>>>
>>> worker.send('Watney')
Processing Watney...
>>>
>>> worker.send([1,2,3])
Processing [1, 2, 3]...
>>>
>>> @dataclass
... class User:
... firstname: str
... lastname: str
...
>>> mark = User('Mark', 'Watney')
>>>
>>> worker.send(mark)
Processing User(firstname='Mark', lastname='Watney')...
>>> import numpy as np
>>>
>>> data = np.arange(1,10).reshape(3,3)
>>> data
array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
>>>
>>> worker.send(data)
Processing [[1 2 3]
[4 5 6]
[7 8 9]]...
#%% Yield From
>>> def run():
... for x in range(0,3):
... yield x
... for x in range(10,13):
... yield x
>>> def run():
... yield 0
... yield 1
... yield 2
... yield 10
... yield 11
... yield 12
>>> def a():
... yield 0
... yield 1
... yield 2
>>>
>>> def b():
... yield 10
... yield 11
... yield 12
>>> def run():
... yield a()
... yield b()
...
>>> result = run()
>>> next(result)
<generator object a at 0x10583acf0>
>>>
>>> next(result())
TypeError: 'generator' object is not callable
>>> def run():
... return a(), b()
...
>>>
>>> result = run()
>>> result
(<generator object a at 0x103c64880>, <generator object b at 0x119f8ef00>)
>>> list(result[0])
[0, 1, 2]
>>> list(result[1])
[10, 11, 12]
>>> def run():
... yield list(a())
... yield list(b())
...
>>>
>>> result = run()
>>> result
<generator object run at 0x119d74640>
>>>
>>> next(result)
[0, 1, 2]
>>>
>>> next(result)
[10, 11, 12]
>>> from itertools import chain
>>>
>>> result = chain(a(), b())
>>>
>>> next(result)
0
>>> next(result)
1
>>> next(result)
2
>>> next(result)
10
>>> next(result)
11
>>> next(result)
12
>>> next(result)
StopIteration
>>> def run():
... yield from a()
... yield from b()
...
>>>
>>> result = run()
>>>
>>> next(result)
0
>>> next(result)
1
>>> next(result)
2
>>> next(result)
10
>>> next(result)
11
>>> next(result)
12
>>> next(result)
StopIteration
>>>
>>> dir = Path('/Users/matt/Developer/2024-04-pythonadv-sages/_trener')
>>>
>>> file = dir.rglob('*.py')
>>>
>>> next(file)
PosixPath('/Users/matt/Developer/2024-04-pythonadv-sages/_trener/encapsulation/accessor_descriptor_a.py')
>>> next(file)
PosixPath('/Users/matt/Developer/2024-04-pythonadv-sages/_trener/encapsulation/accessor_reflection_a.py')
>>> next(file)
PosixPath('/Users/matt/Developer/2024-04-pythonadv-sages/_trener/encapsulation/accessor_property_a.py')
>>> next(file)
PosixPath('/Users/matt/Developer/2024-04-pythonadv-sages/_trener/encapsulation/accessor_property_b.py')
>>>
>>>
>>> for file in dir.rglob('*.py'):
... print(file)
...
>>> def get_files(directory):
... path = Path(directory)
... yield from path.rglob('*.py')
... yield from path.rglob('*.rst')
... yield from path.rglob('*.md')
>>>
>>>
>>> for file in get_files('/Users/matt/Developer/2024-04-pythonadv-sages/_trener/about'):
... print(file)
...
/Users/matt/Developer/2024-04-pythonadv-sages/_trener/about/about_entrytest_b.py
/Users/matt/Developer/2024-04-pythonadv-sages/_trener/about/about_entrytest_c.py
/Users/matt/Developer/2024-04-pythonadv-sages/_trener/about/about_entrytest_d.py
/Users/matt/Developer/2024-04-pythonadv-sages/_trener/about/about_entrytest_e.py
/Users/matt/Developer/2024-04-pythonadv-sages/_trener/about/about_entrytest_a.py
/Users/matt/Developer/2024-04-pythonadv-sages/_trener/about/_notes.rst
Generator About
Processes one element at a time
Does not remember previous element
Does not know next element
Can be used only once
Save memory (does not require more memory for processing large data)
Uses around 10% more CPU than regular processing
Typical usage: streams, processing larger than memory files or data
Cannot use
len()
as of generators don't have lengthPrevious element is overridden by current on
next()
Functions (list, dict, tuple, frozenset, set, sum, all, any, etc)
Inspect
Generator Expression
Comprehensions executes instantly
Comprehensions are stored in the memory until end of a program
Comprehensions should be used when accessing values more than one
Generator Expressions are lazy evaluated
Generator Expressions are cleared once they are executed
Generator Expressions should be used when accessing value once (for example in the loop)
List Comprehension
Comprehensions executes instantly
Comprehensions will be in the memory until end of a program
Comprehensions - Using values more than one
Generator Expression
Generators are lazy evaluated
Creates generator object and assign reference
Code is not executed instantly
Sometimes code is not executed at all!
Are cleared once they are executed
Generator will calculate next number for every loop iteration
Generator forgets previous number
Generator doesn't know the next number
It is used for one-time access to values
Comprehensions or Generator Expression
If you need values evaluated instantly, there is no point in using generators
Why Round Brackets?
Round brackets does not produce tuples (commas does)
Round brackets bounds context
Generator Function
yield
keyword turns function into generator function
Recap
Definition
Call Generator
Get Results
All generators implements Iterator protocol
Iterator has
obj.__iter__()
method which enable use ofiter(obj)
Iterator has
obj.__next__()
method which enable use ofnext(obj)
Yield Keyword
Yield in a Loop
Yields in Loops
Yield in a Zip Loop
Generator Inspect
Is Generator
Introspection
Memory Footprint
sys.getsizeof(obj)
returns the size of anobj
in bytessys.getsizeof(obj)
callsobj.__sizeof__()
methodsys.getsizeof(obj)
adds an additional garbage collector overhead if theobj
is managed by the garbage collector
Generator Yield From
Since Python 3.3: PEP 380 -- Syntax for Delegating to a Subgenerator
Helps with refactoring generators
Useful for large generators which can be split into smaller ones
Delegation call
yield from
terminates onGeneratorExit
from other functionThe value of the
yield from
expression is the first argument to theStopIteration
exception raised by the iterator when it terminatesReturn expr in a generator causes
StopIteration(expr)
to be raised upon exit from the generator
Why?
Execute
Itertools Chain
Delegation call
Yield From Sequences
Yield From Comprehensions
Yield From Generator Expression
Conclusion
Python yield keyword creates a generator function.
It's useful when the function returns a large amount of data by
Generator Send
.send()
method allows to pass value to the generatordata = yield
will receive this "sent" value
Why Send None?!
After running you have to send
None
value to begin processingSending anything other will raise
TypeError
Send Upstream Cascade
Generator Itertools
Itertools
Learn more at https://docs.python.org/library/itertools.html
More information in Itertools
from itertools import *
count(start=0, step=1)
cycle(iterable)
repeat(object[, times])
accumulate(iterable[, func, *, initial=None])
chain(*iterables)
compress(data, selectors)
islice(iterable, start, stop[, step])
starmap(function, iterable)
product(*iterables, repeat=1)
permutations(iterable, r=None)
combinations(iterable, r)
combinations_with_replacement(iterable, r)
groupby(iterable, key=None)
Itertools Count
itertools.count(start=0, step=1)
Itertools Cycle
itertools.cycle(iterable)
Itertools Repeat
itertools.repeat(object[, times])
Itertools Accumulate
itertools.accumulate(iterable[, func, *, initial=None])
Itertools Chain
Itertools Compress
Itertools ISlice
itertools.islice(iterable, start, stop[, step])
Itertools Starmap
itertools.starmap(function, iterable)
Itertools Product
itertools.product(*iterables, repeat=1)
Itertools Permutations
itertools.permutations(iterable, r=None)
Itertools Combinations
itertools.combinations(iterable, r)
Itertools Combinations With Replacement
itertools.combinations_with_replacement(iterable, r)
Itertools GroupBy
itertools.groupby(iterable, key=None)
Make an iterator that returns consecutive keys and groups from the