Apply, Map, and ApplyMap

Definition

These methods allow you to apply custom functions to DataFrames and Series. apply() works on rows/columns, map() works on Series element-wise, and applymap() (now map() for DataFrames in newer versions) works on every element of a DataFrame.

Key Concepts

  • apply(): Apply function along axis (rows or columns)
  • map(): Element-wise transformation for Series
  • applymap(): Element-wise transformation for DataFrame (deprecated, use map())
  • Lambda Functions: Anonymous functions for quick operations
  • Vectorization: Prefer built-in methods when possible for speed

Example

python

# Sample data
employees = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Department': ['Sales', 'IT', 'HR', 'Sales', 'IT'],
    'Salary': [50000, 60000, 45000, 55000, 65000],
    'Years_Experience': [3, 5, 2, 4, 7],
    'Performance_Score': [85, 92, 78, 88, 95]
})

print("Employee Data:")
print(employees)
print("\n")

# ========== APPLY ON SERIES ==========
# Apply function to a single column
def categorize_salary(salary):
    if salary < 50000:
        return 'Low'
    elif salary < 60000:
        return 'Medium'
    else:
        return 'High'

employees['Salary_Category'] = employees['Salary'].apply(categorize_salary)
print("Apply function to Series:")
print(employees[['Name', 'Salary', 'Salary_Category']])
print("\n")

# Using lambda function
employees['Bonus'] = employees['Salary'].apply(lambda x: x * 0.1)
print("Lambda function on Series:")
print(employees[['Name', 'Salary', 'Bonus']])
print("\n")

# ========== APPLY ON DATAFRAME (axis=1 for rows) ==========
# Apply function across rows
def calculate_performance_bonus(row):
    base_bonus = row['Salary'] * 0.05
    performance_multiplier = row['Performance_Score'] / 100
    return base_bonus * performance_multiplier

employees['Performance_Bonus'] = employees.apply(calculate_performance_bonus, axis=1)
print("Apply function across rows:")
print(employees[['Name', 'Salary', 'Performance_Score', 'Performance_Bonus']])
print("\n")

# Multiple return values using apply
def employee_summary(row):
    return pd.Series({
        'Total_Comp': row['Salary'] + row['Performance_Bonus'],
        'Experience_Level': 'Senior' if row['Years_Experience'] >= 5 else 'Junior',
        'High_Performer': row['Performance_Score'] >= 90
    })

summary_df = employees.apply(employee_summary, axis=1)
print("Apply with multiple return values:")
print(summary_df)
print("\n")

# ========== APPLY ON DATAFRAME (axis=0 for columns) ==========
numeric_df = employees[['Salary', 'Years_Experience', 'Performance_Score']]

# Apply function to each column
column_stats = numeric_df.apply(lambda x: pd.Series({
    'mean': x.mean(),
    'std': x.std(),
    'min': x.min(),
    'max': x.max()
}))
print("Apply function to columns:")
print(column_stats)
print("\n")

# ========== MAP ON SERIES ==========
# Map with dictionary
department_codes = {
    'Sales': 'SLS',
    'IT': 'INF',
    'HR': 'HRM'
}
employees['Dept_Code'] = employees['Department'].map(department_codes)
print("Map with dictionary:")
print(employees[['Name', 'Department', 'Dept_Code']])
print("\n")

# Map with function
employees['Experience_Doubled'] = employees['Years_Experience'].map(lambda x: x * 2)
print("Map with function:")
print(employees[['Name', 'Years_Experience', 'Experience_Doubled']])
print("\n")

# Map with Series (useful for lookups)
dept_budgets = pd.Series({
    'Sales': 1000000,
    'IT': 1500000,
    'HR': 500000
})
employees['Dept_Budget'] = employees['Department'].map(dept_budgets)
print("Map with Series:")
print(employees[['Name', 'Department', 'Dept_Budget']])
print("\n")

# ========== APPLYMAP (deprecated) / MAP for DataFrame ==========
# In newer Pandas versions, use .map() instead of .applymap()
sample_df = pd.DataFrame({
    'A': [1, 2, 3],
    'B': [4, 5, 6],
    'C': [7, 8, 9]
})

print("Original DataFrame:")
print(sample_df)
print("\n")

# Apply function to every element (use map in Pandas 2.1+)
# For older versions: sample_df.applymap(lambda x: x ** 2)
try:
    squared_df = sample_df.map(lambda x: x ** 2)
except AttributeError:
    # Fallback for older Pandas versions
    squared_df = sample_df.applymap(lambda x: x ** 2)

print("Squared values (element-wise):")
print(squared_df)
print("\n")

# ========== PERFORMANCE COMPARISON ==========
print("Performance Comparison:")
print("-" * 50)

# Create larger dataset
large_df = pd.DataFrame({
    'values': np.random.randn(10000)
})

# Method 1: Apply (slower)
import time
start = time.time()
result1 = large_df['values'].apply(lambda x: x ** 2)
apply_time = time.time() - start

# Method 2: Vectorized operation (faster)
start = time.time()
result2 = large_df['values'] ** 2
vectorized_time = time.time() - start

print(f"Apply time: {apply_time:.4f} seconds")
print(f"Vectorized time: {vectorized_time:.4f} seconds")
print(f"Speedup: {apply_time/vectorized_time:.2f}x")
print("\n")

# ========== ADVANCED APPLY USAGE ==========
# Using apply with args
def add_value(x, value_to_add):
    return x + value_to_add

employees['Salary_Plus_5000'] = employees['Salary'].apply(add_value, args=(5000,))
print("Apply with arguments:")
print(employees[['Name', 'Salary', 'Salary_Plus_5000']])
print("\n")

# Conditional apply
def conditional_bonus(row):
    if row['Department'] == 'Sales':
        return row['Salary'] * 0.15
    elif row['Department'] == 'IT':
        return row['Salary'] * 0.12
    else:
        return row['Salary'] * 0.10

employees['Dept_Specific_Bonus'] = employees.apply(conditional_bonus, axis=1)
print("Conditional apply across rows:")
print(employees[['Name', 'Department', 'Salary', 'Dept_Specific_Bonus']])