This article was translated using AI.

Docstring

  • What the function does.
  • Arguments it accepts.
  • Return values.
  • Errors it intentionally raises.
  • Any other notes you want to record about the function.

Docstring Formats

  • Google Style
  • Numpydoc
  • reStructuredText
  • EpyText

Google Style

def function(arg_1, arg_2 = 42):
    """Description of what the function does.

    Args:
        arg_1 (str): Description of arg_1 that can break onto the next line if needed.
        arg_2 (int, optional): Write optional when an argument has a default value.

    Returns:
        bool: Optional description of the return value
        Extra lines are not indented.

    Raises:
        ValueError: Include any error types that the function intentionally raises.

    Notes:
        See https://www.datacamp.com/community/tutorials/docstrings-python
    """

Numpydoc

def function(arg_1, arg_2=42):
    """
    Description of what the function does.

    Parameters
    ----------
    arg_1 : expected type of arg_1
        Description of arg_1
    arg_2 : int, optional
        Write optional when an argument has a default value.
        Default=42.

    Returns
    -------
    The type of the return value
        Can include a description of the return value.
        Replace "Returns" with "Yields" if this function is a generator.
    """

Retrieving Docstrings

def the_answer():
    """Return the answer to life, the universe, and everything.

    Returns:
        int
    """

You can read a function’s docstring with function.__doc__.

import inspect
docstring = inspect.getdoc(pd.read_csv)

Don’t Repeat Yourself (DRY)

train = pd.read_csv("train.csv")
train_y = train["labels"].values
train_x = train[col for col in train.columns if col != "labels"].values
train_pca = PCA(n_components=2).fit_transform(train_x)
plt.scatter(train_pca[:, 0], train_pca[:, 1])

val = pd.read_csv("val.csv")
val_y = val["labels"].values
val_x = val[col for col in val.columns if col != "labels"].values
val_pca = PCA(n_components=2).fit_transform(val_x)
plt.scatter(val_pca[:, 0], val_pca[:, 1])

test = pd.read_csv("test.csv")
test_y = test["labels"].values
test_x = test[col for col in test.columns if col != "labels"].values
test_pca = PCA(n_components=2).fit_transform(test_x)
plt.scatter(test_pca[:, 0], test_pca[:, 1])

The code for train, val, and test is repeated three times.


Problems with Repetition

  • Bugs in one copy are hard to notice in the others.
  • Changes have to be applied everywhere, increasing the chance of mistakes.
def load_and_plot(path):
    """Load a data set and plot the first two principal components.

    Args:
        path (str): The location of a CSV file.

    Returns:
        tuple of ndarray: (features, labels)
    """
    data = pd.read_csv(path)
    y = data["label"].values
    x = data[col for col in data.columns if col != "label"].values
    pca = PCA(n_components=2).fit_transform(x)
    plt.scatter(pca[:, 0], pca[:, 1])
    return x, y

train_x, train_y = load_and_plot("train.csv")
val_x, val_y = load_and_plot("val.csv")
test_x, test_y = load_and_plot("test.csv")

Do One Thing

def load_data(path):
    """Load a data set.

    Args:
        path (str): The location of a CSV file.

    Returns:
        tuple of ndarray: (features, labels)
    """
    data = pd.read_csv(path)
    y = data["label"].values
    x = data[col for col in data.columns if col != "label"].values
    return x, y
def plot_data(x):
    """Plot the first two principal components of a matrix.

    Args:
        x (numpy.ndarray): The data to plot.
    """
    pca = PCA(n_components=2).fit_transform(x)
    plt.scatter(pca[:, 0], pca[:, 1])

Splitting the responsibilities makes the code flexible: you can load without plotting or plot previously loaded data. It also gets easier to understand, test, and debug.


Pass by Assignment

a = [1, 2, 3]
b = a
a.append(4)
print(b)

b.append(5)
print(a)

a = 42
print(b)

Output:

[1, 2, 3, 4]
[1, 2, 3, 4, 5]
[1, 2, 3, 4, 5]

Context Managers

with <context-manager>(<args>) as <variable_name>:
    # Code inside the context runs here

# This code runs after the context exits
with open("my_file.txt") as my_file:
    text = my_file.read()
    length = len(text)

print("The file is {} characters long".format(length))
@contextlib.contextmanager
def my_context():
    # Add any setup code you need
    yield
    # Add any teardown code you need
import contextlib

@contextlib.contextmanager
def my_context():
    print("hello")
    yield 42
    print("goodbye")

with my_context() as foo:
    print("foo is {}".format(foo))

Output:

hello
foo is 42
goodbye
@contextlib.contextmanager
def database(url):
    # set up database connection
    db = postgres.connect(url)
    yield db
    # tear down database connection
    db.disconnect()

url = "http://datacamp.com/data"
with database(url) as my_db:
    course_list = my_db.execute("SELECT * FROM courses")

Handling Errors

try:
    # code that might raise an error
except:
    # do something about the error
finally:
    # this code runs no matter what
def get_printer(ip):
    p = connect_to_printer(ip)
    try:
        yield p
    finally:
        p.disconnect()
        print("disconnected from printer")

doc = {"text": "This is my text."}

with get_printer("10.0.34.111") as printer:
    printer.print_page(doc["text"])

Functions as Variables

def my_function():
    print("Hello")

x = my_function
type(x)

x()

PrintMcPrintface = print
PrintMcPrintface("Python is awesome!")

Output:

<type 'function'>
Hello
Python is awesome!

Lists and Dictionaries of Functions

list_of_functions = [my_function, open, print]
list_of_functions[2]("I am printing with an element of a list")

dict_of_functions = {
    "func1": my_function,
    "func2": open,
    "func3": print,
}
dict_of_functions["func3"]("I am printing with a value of dict!")

Referencing a Function

def my_function():
    return 42

x = my_function
my_function()
my_function

Output:

42
<function __main__.my_function>

Functions as Arguments

def has_docstring(func):
    """Check to see if the function `func` has a docstring.

    Args:
        func (callable): A function.

    Returns:
        bool
    """
    return func.__doc__ is not None

def no():
    return 42

def yes():
    """Return the value 42"""
    return 42

has_docstring(no)
has_docstring(yes)

Output:

False
True

Defining a Function Inside Another Function

def foo(x, y):
    if x > 4 and x < 10 and y > 4 and y < 10:
        print(x * y)
def foo(x, y):
    def in_range(v):
        return v > 4 and v < 10

    if in_range(x) and in_range(y):
        print(x * y)

Functions as Return Values

def get_function():
    def print_me(s):
        print(s)
    return print_me

new_func = get_function()
new_func("This is a sentence.")

Output:

This is a sentence.

Scope

x = 7
y = 200
print(x)

def foo():
    x = 42
    print(x)
    print(y)
foo()

x = 7
def foo():
    global x
    x = 42
    print(x)
foo()
print(x)

def foo():
    x = 10
    def bar():
        nonlocal x
        x = 200
        print(x)
    bar()
    print(x)
foo()

Output:

7
42
200
42
42
200
200

Closures

def foo():
    a = 5
    def bar():
        print(a)
    return bar

func = foo()

print(func())
print(type(func.__closure__))
print(len(func.__closure__))

Output:

5
<class 'tuple'>
1

Definitions – Nested Function

# outer function
def parent():
    # nested function
    def child():
        pass
    return child

Definitions – Nonlocal Variables

def parent(arg_1, arg_2):
    # From child()'s point of view,
    # 'value' and 'my_dict' are nonlocal variables,
    # as are 'arg_1' and 'arg_2'
    value = 22
    my_dict = {"chocolate": "yummy"}

    def child():
        print(2 * value)
        print(my_dict["chocolate"])
        print(arg_1 + arg_2)

    return child

Closure: Nonlocal Variables Attached to the Returned Function

def parent(arg_1, arg_2):
    value = 22
    my_dict = {"chocolate": "yummy"}

    def child():
        print(2 * value)
        print(my_dict["chocolate"])
        print(arg_1 + arg_2)

    return child

new_function = parent(3, 4)
print([cell.cell_contents for cell in new_function.__closure__])

Output:

[3, 4, {'chocolate': 'yummy'}, 22]

Decorators

@double_args
def multiply(a, b):
    return a * b

multiply(1, 5)
def multiply(a, b):
    return a * b

def double_args(func):
    # Define a new function that we can modify
    def wrapper(a, b):
        # For now, just call the unmodified function
        return func(a * 2, b * 2)
    return wrapper

new_multiply = double_args(multiply)
new_multiply(1, 5)

Output:

20

Time a Function

import time

def timer(func):
    """A decorator that shows how long a function took to run."""
    # Define the wrapper function to return.
    def wrapper(*args, **kwargs):
        # When wrapper() is called, get the current time.
        t_start = time.time()
        # Call the decorated function and store the result.
        result = func(*args, **kwargs)
        # Get the total time it took to run, and print it
        t_total = time.time() - t_start
        print("{} took {}s".format(func.__name__, t_total))
        return result
    return wrapper

@timer
def sleep_n_seconds(n):
    time.sleep(n)

sleep_n_seconds(5)

Output:

sleep_n_seconds took 5.005098819732666s

Decorators and Metadata

@timer
def sleep_n_seconds(n=10):
    """Pause processing for n seconds.

    Args:
        n (int): The number of seconds to pause for.
    """
    time.sleep(n)
print(sleep_n_seconds.__doc__)
print(sleep_n_seconds.__name__)

Output:

None
wrapper

Use functools.wraps() to preserve metadata.

from functools import wraps

def timer(func):
    """A decorator that shows how long a function took to run."""

    @wraps(func)
    def wrapper(*args, **kwargs):
        t_start = time.time()
        result = func(*args, **kwargs)
        t_total = time.time() - t_start
        print("{} took {}s".format(func.__name__, t_total))
        return result
    return wrapper

@timer
def sleep_n_seconds(n=10):
    """Pause processing for n seconds.

    Args:
        n (int): The number of seconds to pause for.
    """
    time.sleep(n)
print(sleep_n_seconds.__doc__)
print(sleep_n_seconds.__name__)

Output:

Pause processing for n seconds.

    Args:
        n (int): The number of seconds to pause for.

sleep_n_seconds

You can also access the undecorated function with __wrapped__:

@timer
def sleep_n_seconds(n=10):
    """Pause processing for n seconds.

    Args:
        n (int): The number of seconds to pause for.
    """
    time.sleep(n)
print(sleep_n_seconds.__wrapped__)

Output:

<function sleep_n_seconds at 0x7f3245720cb0>

Decorators That Take Arguments

def run_three_times(func):
    def wrapper(*args, **kwargs):
        for _ in range(3):
            func(*args, **kwargs)
    return wrapper

@run_three_times
def print_sum(a, b):
    print(a + b)
print_sum(3, 5)

Output:

8
8
8

run_n_times()

def run_n_times(n):
    """Define and return a decorator."""
    def decorator(func):
        def wrapper(*args, **kwargs):
            for _ in range(n):
                func(*args, **kwargs)
        return wrapper
    return decorator

@run_n_times(3)
def print_sum(a, b):
    print(a + b)
print_sum(3, 5)

run_three_times = run_n_times(3)

@run_three_times
def print_sum(a, b):
    print(a + b)
print_sum(1, 3)

Output:

8
8
8
4
4
4

timeout(): A Real-World Example

import signal

def raise_timeout(*args, **kwargs):
    raise TimeoutError()
# When an "alarm" signal goes off, call raise_timeout()
signal.signal(signalnum=signal.SIGALRM, handler=raise_timeout)
# Set off an alarm in 5 seconds
signal.alarm(5)
# Cancel the alarm
signal.alarm(0)
def timeout_in_5s(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        # Set an alarm for 5 seconds
        signal.alarm(5)
        try:
            # Call the decorated function
            return func(*args, **kwargs)
        finally:
            # Cancel the alarm
            signal.alarm(0)
    return wrapper

@timeout_in_5s
def foo():
    time.sleep(10)
    print("foo!")

foo()

Output:

TimeoutError: