导航菜单

数据挖掘实战

零售分析实战

1. 客户分群分析

使用RFM模型对客户进行分群,实现精准营销。

# RFM客户分群
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# 加载数据
def load_customer_data(file_path):
    df = pd.read_csv(file_path)
    return df

# 计算RFM指标
def calculate_rfm(df):
    # 计算最近一次购买时间
    df['recency'] = (pd.to_datetime('today') - pd.to_datetime(df['last_purchase'])).dt.days
    
    # 计算购买频率
    frequency = df.groupby('customer_id')['order_id'].count()
    
    # 计算购买金额
    monetary = df.groupby('customer_id')['amount'].sum()
    
    # 合并RFM指标
    rfm = pd.DataFrame({
        'recency': df.groupby('customer_id')['recency'].min(),
        'frequency': frequency,
        'monetary': monetary
    })
    
    return rfm

# 客户分群
def customer_segmentation(rfm, n_clusters=4):
    # 数据标准化
    scaler = StandardScaler()
    rfm_scaled = scaler.fit_transform(rfm)
    
    # K-means聚类
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    rfm['cluster'] = kmeans.fit_predict(rfm_scaled)
    
    return rfm

# 分析客户群特征
def analyze_segments(rfm):
    # 计算每个群的平均RFM值
    segment_analysis = rfm.groupby('cluster').agg({
        'recency': 'mean',
        'frequency': 'mean',
        'monetary': 'mean'
    })
    
    # 计算每个群的客户数量
    segment_size = rfm['cluster'].value_counts()
    
    return segment_analysis, segment_size

# 可视化结果
def plot_segments(rfm):
    plt.figure(figsize=(12, 8))
    
    # 绘制散点图
    scatter = plt.scatter(
        rfm['recency'],
        rfm['frequency'],
        c=rfm['cluster'],
        cmap='viridis',
        s=rfm['monetary']/100
    )
    
    plt.xlabel('最近购买时间')
    plt.ylabel('购买频率')
    plt.title('客户分群结果')
    
    # 添加图例
    legend1 = plt.legend(*scatter.legend_elements(),
                        title="客户群")
    plt.add_artist(legend1)
    
    plt.show()

# 应用示例
def retail_analysis(file_path):
    # 加载数据
    df = load_customer_data(file_path)
    
    # 计算RFM指标
    rfm = calculate_rfm(df)
    
    # 客户分群
    rfm = customer_segmentation(rfm)
    
    # 分析结果
    segment_analysis, segment_size = analyze_segments(rfm)
    
    # 可视化
    plot_segments(rfm)
    
    return segment_analysis, segment_size

2. 商品关联分析

使用Apriori算法挖掘商品之间的关联规则。

# 商品关联分析
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import matplotlib.pyplot as plt

# 加载数据
def load_transaction_data(file_path):
    df = pd.read_csv(file_path)
    return df

# 数据预处理
def preprocess_data(df):
    # 创建购物篮数据
    basket = df.pivot_table(
        index='transaction_id',
        columns='product_id',
        values='quantity',
        aggfunc='sum',
        fill_value=0
    )
    
    # 转换为二进制数据
    basket = (basket > 0).astype(int)
    
    return basket

# 挖掘关联规则
def mine_association_rules(basket, min_support=0.01, min_confidence=0.5):
    # 生成频繁项集
    frequent_itemsets = apriori(
        basket,
        min_support=min_support,
        use_colnames=True
    )
    
    # 生成关联规则
    rules = association_rules(
        frequent_itemsets,
        metric='confidence',
        min_threshold=min_confidence
    )
    
    return rules

# 可视化规则
def plot_rules(rules):
    plt.figure(figsize=(10, 6))
    plt.scatter(rules['support'], rules['confidence'],
                alpha=0.5, s=rules['lift']*100)
    
    plt.xlabel('支持度')
    plt.ylabel('置信度')
    plt.title('关联规则分布')
    
    # 添加规则标签
    for i, rule in rules.iterrows():
        plt.annotate(
            f"{rule['antecedents']} -> {rule['consequents']}",
            (rule['support'], rule['confidence']),
            xytext=(5, 5),
            textcoords='offset points'
        )
    
    plt.show()

# 生成营销建议
def generate_recommendations(rules):
    recommendations = []
    
    for _, rule in rules.iterrows():
        if rule['lift'] > 1:
            recommendations.append({
                'antecedent': rule['antecedents'],
                'consequent': rule['consequents'],
                'confidence': rule['confidence'],
                'lift': rule['lift']
            })
    
    return recommendations

# 应用示例
def product_association_analysis(file_path):
    # 加载数据
    df = load_transaction_data(file_path)
    
    # 数据预处理
    basket = preprocess_data(df)
    
    # 挖掘关联规则
    rules = mine_association_rules(basket)
    
    # 可视化规则
    plot_rules(rules)
    
    # 生成建议
    recommendations = generate_recommendations(rules)
    
    return recommendations