Docstring
- 함수의 기능
- 인수
- 반환 값
- 발생하는 오류에 대한 정보
- 함수에 대해 말하고 싶은 모든 것
Docstring formats
- Google Style
- Numpydoc
- reStructedText
- EpyText
Google Style
def function(arg_1, arg_2 = 42):
"""Description of what the function does.
Args:
arg_1 (str) : Description of arg_1 that can break onto the next line if needed.
arg_2 (int, optional) : Write optional when an argument has a default value.
Returns:
bool: Optional description of the return value
Extra lines are not indented.
Raises:
ValueError : Include any error types that the function intentionally raise
Notes:
See https://www.datacamp.com/community/tutorials/docstrings-python
"""
Numpydoc
def function(arg_1, arg_2=42):
"""
Description of what the function does.
Parameters
----------
arg_1 : expected type of arg_1
Description of arg_1
arg_2 : int, optional
Write optional when an argumenthas a default value.
Default=42.
Returns
-------
The type of the return value
Can include a description of the return value.
Replace "Returns" with "Yields" if this function is a generators.
"""
Retrieving docstrings
def the_answer():
"""Return the answer to life, the universe, and everything.
Returns:
int
"""
function.__doc__ 를통해 docs확인 가능
import inspect
docstring = inspect.getdoc(pd.read_csv)
Don’t repeat yourself ( DRY )
train = pd.read_csv("train.csv")
train_Y = train['labels'].values
train_X = train[col for in col in train.columns if col !='labels'].values
train_pca = PCA(n_components = 2).fit_transform(train_X)
plt.scatter(train_pac[:,0], train_pca[:,1])
val = pd.read_csv("val.csv")
val_Y = val['labels'].values
val_X = val[col for in col in val.columns if col !='labels'].values
val_pca = PCA(n_components = 2).fit_transform(val)
plt.scatter(val_pca[:,0], val_pca[:,1])
test = pd.read_csv("test.csv")
test_Y = test['labels'].values
test_X = test[col for in col in test.columns if col !='labels'].values
test_pca = PCA(n_components = 2).fit_transform(test)
plt.scatter(test_pac[:,0], test_pca[:,1])
위 코드를 보면 train, val, test 세개의 똑같은 코드가 반복
반복했을 때 문제점
- 실수를 발견하기 어렵다.
- 변경해야 할 때, 여러곳에서 변경해야 한다.
def load_and_plot(path):
"""Load a data set and plot the first two principal components.
Args:
path (str): The location of a CSV file.
Returns:
tuple of ndarray: (features, labels)
"""
data = pd.read_csv(path)
y = data['label'].values
X = data[col for col in data.columns if col != 'label'].values
pca = PCA(n_components = 2).fit_transform(X)
plt.scatter(pca[:,0], pca[:,1])
return X, y
train_X, train_y = load_and_plot("train.csv")
val_X, val_y = load_and_plot("val.csv")
test_X, test_y = load_and_plot("test.csv")
Do One Thing
def load_data(path):
"""Load a data set.
Args:
path (str): The location of a CSV file.
Returns:
tuple of ndarray: (features, labels)
"""
data = pd.read_csv(path)
y = data['label'].values
X = data[col for col in data.columns if col != 'label'].values
return X, y
def plot_data(X):
"""Plot the first two principal components of a matrix.
Args:
X (numpy.ndarray): The data to plot.
"""
pca = PCA(n_components=2).fit_transform(X)
plt.scatter(pca[:,0], pca[:,1])
함수를 나누면서 얻게 되는 장점
- 코드가 유연해진다. ( 둘중 한가지 일만 하고 싶을 때, 자유롭게 가능 )
- 이해하기 쉽다 + 테스트 및 디버그가 더 쉽다.
Pass by assignment
a = [1,2,3]
b = a
a.append(4)
print(b)
[output]
[1,2,3,4]
b.append(5)
print(a)
[output]
[1,2,3,4,5]
a = 42
print(b)
[output]
[1,2,3,4,5]
Context Managers
with <context-manager>(<args>) as <variable-name>:
# Run your code here
# This code is running "inside the context"
"This code runs after the context is removed"
with open("my_file.txt") as my_file:
text = my_file.read()
length = len(text)
print('The file is {} characters long'. format(length))
@contextlib.contextmanager
def my_context():
#Add any set up code you need
yield
# Add any teardown code you need
import contextlib
@contextlib.contextmanager
def my_context():
print("hello")
yield 42
print("goodbye")
with my_context() as foo:
print("foo is {}".format(foo))
[output]
hello
foo is 42
goodbye
@contextlib.contextmanager
def database(url):
# set up database connection
db = postgres.connect(url)
yield db
# tear down database connection
db.disconnect()
url = "http://datacamp.com/data"
with database(url) as my_db:
course_list = my_db.execute(
'SELECT * FROM courses'
)
Handling errors
try:
# code that might raise an error
except:
# do something about the error
finally:
# this code runs no matter what
-------------------------------------
def get_printer(ip):
p = connect_to_printer(ip)
try:
yield
finally:
p.disconnect()
print("disconnected from printer")
doc = {"text": "This is my text."}
with get_printer("10.0.34.111") as printer:
printer.print_page(doc['text'])
Functions as variables
def my_function():
print('Hello')
x = my_function
type(x)
[output]
<type 'function'>
----------------------
x()
[output]
Hello
----------------------
PrintMcPrintface = print
PrintMcPrintface('Python is awesome!')
[output]
Python is awesome!
List and dictionaries of functions
list_of_functions = [my_function, open, print]
list_of_functions[2]('I am printing with an element of a list')
[output]
I am printing with an element of a list
----------------------------------------------------------------
dict_of_functions = {
'func1' : my_function,
'func2' : open,
'func3' : print
}
dict_of_functions['func3']('I am printing with a value of dict!')
Referencing a function
def my_function():
return 42
x = my_function
my_function()
[output]
42
---------------------
my_function
[output]
<function __main__.my_function>
Functions as arguments
def has_docstring(func):
"""Check to see if the function
'func' has a docstring.
Args:
func (callable): A function.
Returns:
bool
"""
return func.__doc__ is not None
def no():
return 42
def yes():
"""Return the value 42
"""
return 42
has_docstring(no)
[output]
False
------------------------------------
has_docstring(yes)
[output]
True
Defining a function inside another function
def foo(x, y):
if x > 4 and x < 10 and y > 4 and y < 10:
print(x*y)
-------------------------
def foo(x,y):
def in_range(v):
return v > 4 and v <10
if in_range(x) and in_range(y):
print(x*y)
Functions as return values
def get_function():
def print_me(s):
print(s)
return print_me
new_func = get_function()
new_func('This is a sentence.')
[output]
This is a sentence.
Scope
x = 7
y = 200
print(x)
[output]
7
-------------
def foo():
x = 42
print(x)
print(y)
foo(x)
[output]
42
200
---------------
x = 7
def foo():
global x
x = 42
print(x)
foo()
print(x)
[output]
42
42
-------------------
def foo():
x = 10
def bar():
nonlocal x
x = 200
print(x)
bar()
print(x)
foo()
[output]
200
200
Closures
def foo():
a = 5
def bar():
print(a)
return bar
func = foo()
print(func())
print(type(func.__closure__))
print(len(func.__closure__))
[output]
5
<class 'tuple'>
1
Definitions - nested function
# outer function
def parent():
# nested function
def child():
pass
return child
Definitions - nonlocal variables
def parent(arg_1, arg_2):
# From child()'s point of view,
# 'value' and 'my_dict' are nonlocal variables,
# as are 'arg_1' and 'arg_2'
value =22
my_dict = {'chocolate':'yummy'}
def child():
print(2 * value)
print(my_dict['chocolate'])
print(arg_1 + arg_2)
return child
Closure : Nonlocal variables attached to a returned function.
def parent(arg_1, arg_2):
value = 22
my_dict = {'chocolate':'yummy'}
def child():
print(2*value)
print(my_dict['chocolate'])
print(arg_1+arg_2)
return child
new_function = parent(3,4)
print([cell.cell_contents for cell in new_function.__closure__])
[output]
[3, 4, {'chocolate': 'yummy'}, 22]
Decorators
@double_args
def multiply(a,b):
return a*b
multiply(1,5)
def multiply(a,b):
return a*b
def double_args(func):
# Define a new function that we can modify
def wrapper(a,b):
# For now, just call the unmodified function
return func(a*2,b*2)
return wrapper
new_multiply = double_args(multiply)
new_multiply(1,5)
[output]
20
Time a function
import time
def timer(func):
"""A decorator that print show how long a function took to run."""
# Define the wrapper function to return.
def wrapper(*args, **kwargs):
# When wrapper() is called, get the current time.
t_start = time.time()
# Call the decorated function and store the result.
result = func(*args, **kwargs)
# Get the total time it took to run, and print it
t_total = time.time() - t_start
print('{} took {}s'.format(func.__name__, t_total))
return result
return wrapper
@timer
def sleep_n_seconds(n):
time.sleep(n)
sleep_n_seconds(5)
[output]
sleep_n_seconds took 5.005098819732666s
Decorators and metadata
@timer
def sleep_n_seconds(n=10):
"""Pause processing for n seconds.
Args"
n (int): The number of seconds to pause for.
"""
time.sleep(n)
print(sleep_n_seconds.__doc__)
print(sleep_n_seconds.__name__)
[output]
None
wrapper
from functools import wraps
def timer(func):
"""A decorator that print show how long a function took to run."""
@wraps(func)
def wrapper(*args, **kwargs):
t_start = time.time()
result = func(*args, **kwargs)
t_total = time.time() - t_start
print('{} took {}s'.format(func.__name__, t_total))
return result
return wrapper
@timer
def sleep_n_seconds(n=10):
"""Pause processing for n seconds.
Args"
n (int): The number of seconds to pause for.
"""
time.sleep(n)
print(sleep_n_seconds.__doc__)
print(sleep_n_seconds.__name__)
[output]
Pause processing for n seconds.
Args"
n (int): The number of seconds to pause for.
sleep_n_seconds
# closures를 통해서도 가능하지만 다음 방법을 사용하면 쉬움
@timer
def sleep_n_seconds(n=10):
"""Pause processing for n seconds.
Args"
n (int): The number of seconds to pause for.
"""
time.sleep(n)
print(sleep_n_seconds.__wrapped__)
[output]
<function sleep_n_seconds at 0x7f3245720cb0>
Decorators that take arguments
def run_three_times(func):
def wrapper(*args, **kwargs):
for i in range(3):
func(*args, **kwargs)
return wrapper
@run_three_times
def print_sum(a, b):
print(a + b)
print_sum(3,5)
[output]
8
8
8
run_n_times()
def run_n_times(n):
"""Define and return a decorator"""
def decorator(func):
def wrapper(*args, **kwargs):
for i in range(n):
func(*args, **kwargs)
return wrapper
return decorator
@run_n_times(3)
def print_sum(a, b):
print(a + b)
print_sum(3,5)
run_three_times = run_n_times(3)
@run_three_times
def print_sum(a,b):
print(a + b)
print_sum(1,3)
[output]
8
8
8
4
4
4
Timeout(): a real world example
import signal
def raise_timeout(*args, **kwargs):
raise TimeoutError()
# When an "alarm" signal goes off, call raise_timeout()
signal.signal(signalnum = signal.SIGALRM, handler = raise_timeout)
# Set off an alarm in 5 seconds
signal.alarm(5)
# Cancel the alarm
signal.alarm(0)
def timeout_in_5s(func):
@wraps(func)
def wrapper(*args, **kwargs):
# Set an alarm for 5 seconds
signal.alarm(5)
try:
# Call the decorated func
return func(*args, **kwargs)
finally:
# Cancel alarm
signal.alarm(0)
return wrapper
@timeout_in_5s
def foo():
time.sleep(10)
print('foo!')
foo()
[output]
TimeoutError: