Iteration, Comprehensions, and Generators
What You'll Learn
Every pattern for iterating over data — from basic for loops to generators that handle millions of rows without running out of memory.
Basic Iteration
# Iterate over a list
fruits = ["apple", "banana", "cherry"]
for fruit in fruits:
print(fruit)
# Iterate over a string
for char in "Python":
print(char)
# Iterate over a range
for i in range(5):
print(i) # 0 1 2 3 4
# Iterate over dict items
config = {"host": "localhost", "port": 5432}
for key, value in config.items():
print(f"{key} = {value}")
enumerate() — Index + Value Together
Avoid manual counter variables:
fruits = ["apple", "banana", "cherry"]
# ❌ Manual counter
i = 0
for fruit in fruits:
print(i, fruit)
i += 1
# ✅ enumerate (cleaner)
for i, fruit in enumerate(fruits):
print(i, fruit)
# 0 apple
# 1 banana
# 2 cherry
# Start from 1
for i, fruit in enumerate(fruits, start=1):
print(f"{i}. {fruit}")
zip() — Iterate Multiple Lists Together
names = ["Alice", "Bob", "Charlie"]
scores = [95, 82, 74]
grades = ["A", "B", "C"]
for name, score, grade in zip(names, scores, grades):
print(f"{name}: {score} ({grade})")
# Alice: 95 (A)
# Bob: 82 (B)
# Charlie: 74 (C)
# zip stops at the shortest list
# Use zip_longest if you need all items
from itertools import zip_longest
for a, b in zip_longest([1, 2, 3], [10, 20], fillvalue=0):
print(a, b)
# 1 10 / 2 20 / 3 0
sorted(), reversed(), min(), max()
numbers = [3, 1, 4, 1, 5, 9, 2, 6]
# sorted() — returns a new sorted list
print(sorted(numbers)) # [1, 1, 2, 3, 4, 5, 6, 9]
print(sorted(numbers, reverse=True)) # descending
# Sort by a custom key
words = ["banana", "apple", "fig", "cherry"]
print(sorted(words, key=len)) # ['fig', 'apple', 'banana', 'cherry']
# Sort dicts by value
scores = {"Alice": 95, "Bob": 82, "Charlie": 74}
for name, score in sorted(scores.items(), key=lambda x: x[1], reverse=True):
print(f"{name}: {score}")
# reversed() — iterate in reverse (no copy made)
for item in reversed(numbers):
print(item)
print(min(numbers)) # 1
print(max(numbers)) # 9
Comprehensions
Comprehensions build collections from iterables in one line.
List Comprehension
# [expression for item in iterable if condition]
squares = [n**2 for n in range(10)]
evens = [n for n in range(20) if n % 2 == 0]
lengths = [len(w) for w in ["hello", "world", "python"]]
Dict Comprehension
# {key_expr: value_expr for item in iterable}
names = ["Alice", "Bob", "Charlie"]
name_lengths = {name: len(name) for name in names}
# {'Alice': 5, 'Bob': 3, 'Charlie': 7}
# Filter while building
scores = {"Alice": 95, "Bob": 62, "Charlie": 74}
passing = {k: v for k, v in scores.items() if v >= 70}
# {'Alice': 95, 'Charlie': 74}
Set Comprehension
words = ["hello", "world", "hello", "python"]
unique_lengths = {len(w) for w in words}
# {5, 6} (only unique lengths)
Generator Expression
Like a list comprehension but does not build the list in memory — evaluates lazily:
# List comprehension — builds entire list immediately
squares_list = [n**2 for n in range(1_000_000)] # uses lots of memory
# Generator expression — one value at a time
squares_gen = (n**2 for n in range(1_000_000)) # uses almost no memory
# Use it the same way
total = sum(n**2 for n in range(1_000_000)) # efficient
Generators with yield
A generator function uses yield instead of return. It produces values one at a time:
def count_up(start, end):
current = start
while current <= end:
yield current # pauses here and gives back a value
current += 1
for n in count_up(1, 5):
print(n)
# 1 2 3 4 5
# Only one value is in memory at a time
Real-World Generator: Reading Large Files
def read_large_file(filepath):
"""Read a file line by line without loading it all into memory."""
with open(filepath, encoding="utf-8") as f:
for line in f:
yield line.strip()
for line in read_large_file("huge_log.txt"):
if "ERROR" in line:
print(line)
Generator for Batching
def batch(items, size):
"""Yield items in chunks of `size`."""
for i in range(0, len(items), size):
yield items[i:i + size]
records = list(range(1000))
for chunk in batch(records, 100):
print(f"Processing batch of {len(chunk)} records")
itertools — Powerful Iteration Tools
import itertools
# chain — iterate multiple iterables as one
for item in itertools.chain([1, 2], [3, 4], [5]):
print(item) # 1 2 3 4 5
# islice — take first N from any iterable
first_5 = list(itertools.islice(count_up(1, 1000), 5))
# [1, 2, 3, 4, 5]
# groupby — group consecutive items by key
from itertools import groupby
data = [("a", 1), ("a", 2), ("b", 3), ("b", 4)]
for key, group in groupby(data, key=lambda x: x[0]):
print(key, list(group))
# a [('a', 1), ('a', 2)]
# b [('b', 3), ('b', 4)]
When to Use What
| Situation | Use |
|---|---|
| Need index + value | enumerate() |
| Iterate two lists in parallel | zip() |
| Build a list from a loop | List comprehension |
| Build a dict from a loop | Dict comprehension |
| Large data, memory matters | Generator expression / yield |
| Combine multiple iterables | itertools.chain() |
| Group sorted data | itertools.groupby() |
Common Mistakes
| Mistake | Fix |
|---|---|
Using range(len(lst)) | Use enumerate(lst) instead |
| Building huge lists unnecessarily | Use generator expressions |
| Nested comprehensions too deep | Break into named variables |
| Consuming a generator twice | Generators are one-shot; convert to list if needed |
Quick Reference
# enumerate
for i, v in enumerate(iterable, start=0):
# zip
for a, b in zip(list1, list2):
# sorted with key
sorted(items, key=lambda x: x.field, reverse=True)
# Comprehensions
[expr for x in iterable]
[expr for x in iterable if cond]
{k: v for k, v in pairs}
{expr for x in iterable}
(expr for x in iterable) # generator
# Generator function
def gen():
yield value
# itertools
import itertools
itertools.chain(a, b)
itertools.islice(gen, n)