AUC 原理与代码详解

发布于 2026年03月29日 19:00 ·

AUC 原理与代码详解

1. 什么是 AUC

AUC（Area Under Curve）是一个用于评估分类模型性能的指标，特别是二分类模型。它衡量的是模型在不同阈值下正确预测正负样本的能力。

AUC 的值范围在 0 到 1 之间：

0.5: 模型表现等同于随机猜测

>0.5: 模型表现优于随机猜测

<0.5: 模型表现比随机猜测还差（说明可能存在标签错误或需要调整决策边界）

AUC 特别适用于以下场景：

类别不平衡的数据集

需要比较不同模型的性能

关注排序质量而非绝对概率值

2. ROC 曲线

AUC 的计算基于 ROC（Receiver Operating Characteristic）曲线。ROC 曲线以假正率（FPR）为横轴，真正率（TPR）为纵轴。

关键概念：

真正率（TPR/Sensitivity/Recall）: TP / (TP + FN)

假正率（FPR）: FP / (FP + TN)

3. AUC 计算方法

3.1 梯形法则（Trapezoidal Rule）

最常用的计算方法，通过将 ROC 曲线下的区域划分为多个梯形来计算面积。

import numpy as np
from sklearn.metrics import roccurve, auc
def calculateaucroc(ytrue, yscore):
    """
    使用梯形法则计算 AUC
    
    Parameters:
    ytrue: 真实标签 (0 或 1)
    yscore: 模型输出的预测分数
    
    Returns:
    aucvalue: AUC 值
    """
    
    # 计算 ROC 曲线的点
    fpr, tpr, thresholds = roccurve(ytrue, yscore)
    
    # 使用梯形法则计算 AUC
    # 对每个相邻点之间的梯形求和
    aucvalue = 0
    for i in range(1, len(fpr)):
        # 梯形的底边长度（FPR 变化量）
        base = fpr[i] - fpr[i-1]
        # 梯形的高度是 TPR 的平均值
        height = (tpr[i] + tpr[i-1]) / 2
        # 累加梯形面积
        aucvalue += base * height
    
    return aucvalue, fpr, tpr
示例数据
ytrue = [0, 0, 1, 1, 0, 1, 0, 1, 1, 0]
yscores = [0.1, 0.4, 0.35, 0.8, 0.2, 0.65, 0.3, 0.9, 0.7, 0.15]
aucval, fprpoints, tprpoints = calculateaucroc(ytrue, yscores)
print(f"手动计算的 AUC: {aucval:.4f}")
验证结果
from sklearn.metrics import rocaucscore
sklearnauc = rocaucscore(ytrue, yscores)
print(f"sklearn 的 AUC: {sklearnauc:.4f}")

3.2 排序法（Alternative Method）

另一种理解 AUC 的方式是通过样本对的比较：

def calculateaucbypairs(ytrue, yscore):
    """
    通过比较样本对来计算 AUC
    
    AUC = P(scorepositive > scorenegative) + 0.5 * P(scorepositive == scorenegative)
    """
    
    positiveindices = np.where(ytrue == 1)[0]
    negativeindices = np.where(ytrue == 0)[0]
    
    auc = 0
    totalpairs = 0
    
    for posidx in positiveindices:
        for negidx in negativeindices:
            totalpairs += 1
            if yscore[posidx] > yscore[negidx]:
                auc += 1
            elif yscore[posidx] == yscore[negidx]:
                auc += 0.5
    
    return auc / totalpairs if totalpairs > 0 else 0
测试
aucpairmethod = calculateaucbypairs(ytrue, yscores)
print(f"配对方法计算的 AUC: {aucpairmethod:.4f}")

4. 完整的 AUC 实现

```python
import numpy as np
import matplotlib.pyplot as plt
from typing import Tuple, List

class AUCAnalyzer:
"""AUC 分析器类"""

def init(self):
self.fpr = None
self.tpr = None
self.thresholds = None
self.aucvalue = None

def computeroccurve(self, ytrue: List[int], yscore: List[float]) -> None:
"""
计算 ROC 曲线

Args:
ytrue: 真实标签列表
yscore: 预测分数列表
"""
# 确保输入数据有效
if len(ytrue) != len(yscore):
raise ValueError("标签和分数的长度必须相同")

# 转换为 numpy 数组
ytrue = np.array(ytrue)
yscore = np.array(yscore)

# 获取唯一的分数阈值（按降序排列）
uniquescores = np.unique(yscore)
thresholds = sorted(uniquescores, reverse=True)

fprlist = []
tprlist = []

for threshold in thresholds:
# 根据当前阈值进行分类预测
ypred = (yscore >= threshold).astype(int)

tp = np.sum((ytrue == 1) & (ypred == 1))
fp = np.sum((ytrue == 0) & (ypred == 1))
fn = np.sum((ytrue == 1) & (ypred == 0))
tn = np.sum((ytrue == 0) & (ypred == 0))

# 计算 TPR 和 FPR
tpr = tp / (tp + fn) if (tp + fn) > 0 else 0
fpr = fp / (fp + tn) if (fp + tn) > 0 else 0

fprlist.append(fpr)
tprlist.append(tpr)

# 添加起始点 (0, 0) 和结束点 (1, 1)
fprlist = [0.0] + fprlist + [1.0]
tprlist = [0.0] + tprlist + [1.0]

self.fpr = np.array(fprlist)
self.tpr = np.array(tprlist)
self.thresholds = np.array(thresholds)

# 计算 AUC
self.aucvalue = self.calculateauctrapezoid()

def calculateauctrapezoid(self) -> float:
"""使用梯形法则计算 AUC"""
if self.fpr is None or self.tpr is None:
raise ValueError("请先调用 computeroccurve 方法")

auc = 0.0
for i in range(1, len(self.fpr)):
width = self.fpr[i] - self.fpr[i-1]
avgheight = (self.tpr[i] + self.tpr[i-1]) / 2
auc += width * avgheight

return auc

def getperformancemetrics(self, threshold: float) -> dict:
"""
获取指定阈值下的性能指标

Args:
threshold: 决策阈值

Returns:
包含各种性能指标的字典
"""
if self.thresholds is None:
raise ValueError("请先调用 computeroccurve 方法")

# 找到最接近阈值的索引
idx = np.abs(self.thresholds - threshold).argmin()

tpr = self.tpr[idx]
fpr = self.fpr[idx]

# 计算精确率（Precision）
# 假设总样本数为 N，正样本数为 P，则：
# TP = tpr * P
# FP = fpr * (N-P)
# Precision = TP / (TP + FP)

precision = tpr / (tpr + fpr) if (tpr + fpr) > 0 else 0

# 计算 F1 分数
f1 = 2 (precision tpr) / (precision + tpr) if (precision + tpr) > 0 else 0

return {
'threshold': threshold,
'tpr': tpr,
'fpr': fpr,
'precision': precision,
'f1score': f1,
'auc': self.aucvalue
}

def plotroccurve(self, title: str = "ROC Curve") -> None:
"""绘制 ROC 曲线"""
if self.fpr is None or self.tpr is None:
raise ValueError("请先调用 computeroc_curve 方法")

plt.figure(figsize=(8, 6))