Originally published at https://mathdatasimplified.com on July 17, 2023.
Data scientists often use the print function to debug their code. However, as the number of print statements increases, it becomes difficult to identify where the output is from due to the lack of line numbers or function names.
def encode_data(data: list):
print("Encode data")
data_map = {'a': 1, 'b': 2, 'c': 3}
print(f"Data map: {data_map}")
return [data_map[num] for num in data]def add_one(data: list):
print("Add one")
return [num + 1 for num in data]
def process_data(data: list):
print("Process data")
data = encode_data(data)
print(f"Encoded data: {data}")
data = add_one(data)
print(f"Added one: {data}")
process_data(['a', 'a', 'c'])
Output:
Process data
Encode data
Data map: {'a': 1, 'b': 2, 'c': 3}
Encoded data: [1, 1, 3]
Add one
Added one: [2, 2, 4]
When putting the code into production, manually going through and removing all the debugging lines can be a tedious and error-prone task.
def encode_data(data: list):
print("Encode data")
data_map = {'a': 1, 'b': 2, 'c': 3}
return [data_map[num] for num in data]def add_one(data: list):
print("Add one")
return [num + 1 for num in data]
def process_data(data: list):
print("Process data")
data = encode_data(data)
data = add_one(data)
process_data(['a', 'a', 'c'])
Logging provides the perfect solution for this problem by allowing data scientists to specify different levels (debug, info, warning, error) for their output.
def encode_data(data: list):
logger.info("Encode data")
data_map = {'a': 1, 'b': 2, 'c': 3}
logger.debug(f"Data map: {data_map}")
return [data_map[num] for num in data]def add_one(data: list):
logger.info("Add one")
return [num + 1 for num in data]
def process_data(data: list):
logger.info("Process data")
data = encode_data(data)
logger.debug(f"Encoded data: {data}")
data = add_one(data)…