Definition
These methods allow you to apply custom functions to DataFrames and Series. apply() works on rows/columns, map() works on Series element-wise, and applymap() (now map() for DataFrames in newer versions) works on every element of a DataFrame.
Key Concepts
- apply(): Apply function along axis (rows or columns)
- map(): Element-wise transformation for Series
- applymap(): Element-wise transformation for DataFrame (deprecated, use map())
- Lambda Functions: Anonymous functions for quick operations
- Vectorization: Prefer built-in methods when possible for speed
Example
python
# Sample data
employees = pd.DataFrame({
'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
'Department': ['Sales', 'IT', 'HR', 'Sales', 'IT'],
'Salary': [50000, 60000, 45000, 55000, 65000],
'Years_Experience': [3, 5, 2, 4, 7],
'Performance_Score': [85, 92, 78, 88, 95]
})
print("Employee Data:")
print(employees)
print("\n")
# ========== APPLY ON SERIES ==========
# Apply function to a single column
def categorize_salary(salary):
if salary < 50000:
return 'Low'
elif salary < 60000:
return 'Medium'
else:
return 'High'
employees['Salary_Category'] = employees['Salary'].apply(categorize_salary)
print("Apply function to Series:")
print(employees[['Name', 'Salary', 'Salary_Category']])
print("\n")
# Using lambda function
employees['Bonus'] = employees['Salary'].apply(lambda x: x * 0.1)
print("Lambda function on Series:")
print(employees[['Name', 'Salary', 'Bonus']])
print("\n")
# ========== APPLY ON DATAFRAME (axis=1 for rows) ==========
# Apply function across rows
def calculate_performance_bonus(row):
base_bonus = row['Salary'] * 0.05
performance_multiplier = row['Performance_Score'] / 100
return base_bonus * performance_multiplier
employees['Performance_Bonus'] = employees.apply(calculate_performance_bonus, axis=1)
print("Apply function across rows:")
print(employees[['Name', 'Salary', 'Performance_Score', 'Performance_Bonus']])
print("\n")
# Multiple return values using apply
def employee_summary(row):
return pd.Series({
'Total_Comp': row['Salary'] + row['Performance_Bonus'],
'Experience_Level': 'Senior' if row['Years_Experience'] >= 5 else 'Junior',
'High_Performer': row['Performance_Score'] >= 90
})
summary_df = employees.apply(employee_summary, axis=1)
print("Apply with multiple return values:")
print(summary_df)
print("\n")
# ========== APPLY ON DATAFRAME (axis=0 for columns) ==========
numeric_df = employees[['Salary', 'Years_Experience', 'Performance_Score']]
# Apply function to each column
column_stats = numeric_df.apply(lambda x: pd.Series({
'mean': x.mean(),
'std': x.std(),
'min': x.min(),
'max': x.max()
}))
print("Apply function to columns:")
print(column_stats)
print("\n")
# ========== MAP ON SERIES ==========
# Map with dictionary
department_codes = {
'Sales': 'SLS',
'IT': 'INF',
'HR': 'HRM'
}
employees['Dept_Code'] = employees['Department'].map(department_codes)
print("Map with dictionary:")
print(employees[['Name', 'Department', 'Dept_Code']])
print("\n")
# Map with function
employees['Experience_Doubled'] = employees['Years_Experience'].map(lambda x: x * 2)
print("Map with function:")
print(employees[['Name', 'Years_Experience', 'Experience_Doubled']])
print("\n")
# Map with Series (useful for lookups)
dept_budgets = pd.Series({
'Sales': 1000000,
'IT': 1500000,
'HR': 500000
})
employees['Dept_Budget'] = employees['Department'].map(dept_budgets)
print("Map with Series:")
print(employees[['Name', 'Department', 'Dept_Budget']])
print("\n")
# ========== APPLYMAP (deprecated) / MAP for DataFrame ==========
# In newer Pandas versions, use .map() instead of .applymap()
sample_df = pd.DataFrame({
'A': [1, 2, 3],
'B': [4, 5, 6],
'C': [7, 8, 9]
})
print("Original DataFrame:")
print(sample_df)
print("\n")
# Apply function to every element (use map in Pandas 2.1+)
# For older versions: sample_df.applymap(lambda x: x ** 2)
try:
squared_df = sample_df.map(lambda x: x ** 2)
except AttributeError:
# Fallback for older Pandas versions
squared_df = sample_df.applymap(lambda x: x ** 2)
print("Squared values (element-wise):")
print(squared_df)
print("\n")
# ========== PERFORMANCE COMPARISON ==========
print("Performance Comparison:")
print("-" * 50)
# Create larger dataset
large_df = pd.DataFrame({
'values': np.random.randn(10000)
})
# Method 1: Apply (slower)
import time
start = time.time()
result1 = large_df['values'].apply(lambda x: x ** 2)
apply_time = time.time() - start
# Method 2: Vectorized operation (faster)
start = time.time()
result2 = large_df['values'] ** 2
vectorized_time = time.time() - start
print(f"Apply time: {apply_time:.4f} seconds")
print(f"Vectorized time: {vectorized_time:.4f} seconds")
print(f"Speedup: {apply_time/vectorized_time:.2f}x")
print("\n")
# ========== ADVANCED APPLY USAGE ==========
# Using apply with args
def add_value(x, value_to_add):
return x + value_to_add
employees['Salary_Plus_5000'] = employees['Salary'].apply(add_value, args=(5000,))
print("Apply with arguments:")
print(employees[['Name', 'Salary', 'Salary_Plus_5000']])
print("\n")
# Conditional apply
def conditional_bonus(row):
if row['Department'] == 'Sales':
return row['Salary'] * 0.15
elif row['Department'] == 'IT':
return row['Salary'] * 0.12
else:
return row['Salary'] * 0.10
employees['Dept_Specific_Bonus'] = employees.apply(conditional_bonus, axis=1)
print("Conditional apply across rows:")
print(employees[['Name', 'Department', 'Salary', 'Dept_Specific_Bonus']])
