Timm 库使用教程#

简介#

timm (PyTorch Image Models) 是一个由 Ross Wightman 开发的优秀计算机视觉库，提供了：

500+ 预训练模型
标准化的模型接口
高效的数据增强和训练工具
SOTA 模型的快速实现

安装方式：

1
pip install timm

一、ResNet 使用示例#

1.1 基础使用#

加载预训练模型#

1
import timm
2
import torch
3

4
# 查看所有可用的 ResNet 模型
5
resnet_models = timm.list_models('resnet*', pretrained=True)
6
print(f"可用的 ResNet 模型数量: {len(resnet_models)}")
7
print("部分模型:", resnet_models[:5])
8

9
# 加载预训练的 ResNet50
10
model = timm.create_model('resnet50', pretrained=True)
11
model.eval()
12

13
print(f"模型类型: {type(model)}")
14
print(f"参数量: {sum(p.numel() for p in model.parameters()) / 1e6:.2f}M")

推理示例#

1
from PIL import Image
2
import requests
3
from timm.data import resolve_data_config
4
from timm.data.transforms_factory import create_transform
5

6
# 1. 准备图像
7
url = 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'
8
image = Image.open(requests.get(url, stream=True).raw)
9

10
# 2. 获取模型的数据配置
11
config = resolve_data_config({}, model=model)
12
print("数据配置:", config)
13

14
# 3. 创建相应的变换
15
transform = create_transform(**config)
16

17
# 4. 预处理图像
18
input_tensor = transform(image).unsqueeze(0)  # 添加 batch 维度
19

20
# 5. 推理
21
with torch.no_grad():
22
    output = model(input_tensor)
23

24
print(f"输出形状: {output.shape}")  # [1, 1000] - ImageNet 1000 类
25

26
# 6. 获取预测结果
27
probabilities = torch.nn.functional.softmax(output[0], dim=0)
28
top5_prob, top5_catid = torch.topk(probabilities, 5)
29

30
print("\nTop-5 预测:")
31
for i in range(5):
32
    print(f"{i+1}. 类别 {top5_catid[i].item()}, 概率: {top5_prob[i].item():.4f}")

1.2 自定义分类头#

1
# 创建用于自定义类别数的模型
2
num_classes = 10  # 例如 CIFAR-10
3

4
model = timm.create_model(
5
    'resnet50',
6
    pretrained=True,
7
    num_classes=num_classes
8
)
9

10
print(f"新的分类头输出: {model.fc}")
11

12
# 也可以只提取特征（去掉分类头）
13
feature_model = timm.create_model(
14
    'resnet50',
15
    pretrained=True,
16
    num_classes=0  # 移除分类头
17
)
18

19
# 测试特征提取
20
with torch.no_grad():
21
    features = feature_model(input_tensor)
22
    print(f"特征向量形状: {features.shape}")  # [1, 2048]

1.3 ResNet 变体对比#

1
# 尝试不同的 ResNet 变体
2
variants = [
3
    'resnet34',      # 标准 ResNet34
4
    'resnet50',      # 标准 ResNet50
5
    'resnet50d',     # ResNet50-D (改进版本)
6
    'resnet101',     # 更深的 ResNet
7
    'resnetv2_50',   # ResNet V2
8
]
9

10
for variant in variants:
11
    model = timm.create_model(variant, pretrained=False)
12
    params = sum(p.numel() for p in model.parameters()) / 1e6
13
    print(f"{variant:20s} - 参数量: {params:6.2f}M")

1.4 微调训练示例#

1
import torch.nn as nn
2
import torch.optim as optim
3

4
# 创建模型
5
model = timm.create_model('resnet50', pretrained=True, num_classes=10)
6

7
# 冻结除最后一层外的所有层
8
for name, param in model.named_parameters():
9
    if 'fc' not in name:  # fc 是分类头
10
        param.requires_grad = False
11

12
# 设置优化器（只优化分类头）
13
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
14
criterion = nn.CrossEntropyLoss()
15

16
# 训练循环示例
17
model.train()
18
# dummy_data = torch.randn(8, 3, 224, 224)
19
# dummy_labels = torch.randint(0, 10, (8,))
20
#
21
# optimizer.zero_grad()
22
# outputs = model(dummy_data)
23
# loss = criterion(outputs, dummy_labels)
24
# loss.backward()
25
# optimizer.step()

二、Vision Transformer (ViT) 使用示例#

2.1 基础使用#

加载预训练 ViT 模型#

1
import timm
2
import torch
3

4
# 查看所有 ViT 模型
5
vit_models = timm.list_models('vit*', pretrained=True)
6
print(f"可用的 ViT 模型数量: {len(vit_models)}")
7
print("部分模型:", vit_models[:10])
8

9
# 加载 ViT-Base
10
model = timm.create_model('vit_base_patch16_224', pretrained=True)
11
model.eval()
12

13
print(f"\n模型参数量: {sum(p.numel() for p in model.parameters()) / 1e6:.2f}M")

ViT 推理#

1
from PIL import Image
2
import requests
3
from timm.data import resolve_data_config
4
from timm.data.transforms_factory import create_transform
5

6
# 准备图像
7
url = 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'
8
image = Image.open(requests.get(url, stream=True).raw)
9

10
# ViT 的数据预处理
11
config = resolve_data_config({}, model=model)
12
transform = create_transform(**config)
13

14
input_tensor = transform(image).unsqueeze(0)
15

16
# 推理
17
with torch.no_grad():
18
    output = model(input_tensor)
19

20
print(f"输出形状: {output.shape}")
21

22
# 预测
23
probabilities = torch.nn.functional.softmax(output[0], dim=0)
24
top5_prob, top5_catid = torch.topk(probabilities, 5)
25

26
print("\nTop-5 预测:")
27
for i in range(5):
28
    print(f"{i+1}. 类别 {top5_catid[i].item()}, 概率: {top5_prob[i].item():.4f}")

2.2 ViT 模型架构详解#

1
# 查看 ViT 的详细配置
2
model = timm.create_model('vit_base_patch16_224', pretrained=True)
3

4
print("ViT 配置信息:")
5
print(f"Patch 大小: {model.patch_embed.patch_size}")
6
print(f"嵌入维度: {model.embed_dim}")
7
print(f"深度（Transformer 层数）: {model.depth}")
8
print(f"注意力头数: {model.num_heads}")
9
print(f"图像尺寸: {model.patch_embed.img_size}")
10

11
# 查看模型结构
12
print("\n模型主要组件:")
13
print(f"1. Patch Embedding: {model.patch_embed}")
14
print(f"2. 位置编码形状: {model.pos_embed.shape}")
15
print(f"3. CLS Token 形状: {model.cls_token.shape}")
16
print(f"4. Transformer 块数量: {len(model.blocks)}")
17
print(f"5. 分类头: {model.head}")

2.3 ViT 变体对比#

1
# 不同尺寸的 ViT 模型
2
vit_variants = [
3
    'vit_tiny_patch16_224',    # Tiny: 5.7M 参数
4
    'vit_small_patch16_224',   # Small: 22M 参数
5
    'vit_base_patch16_224',    # Base: 86M 参数
6
    'vit_large_patch16_224',   # Large: 304M 参数
7
]
8

9
print("ViT 模型对比:\n")
10
for variant in vit_variants:
11
    try:
12
        model = timm.create_model(variant, pretrained=False)
13
        params = sum(p.numel() for p in model.parameters()) / 1e6
14
        print(f"{variant:30s} - 参数量: {params:7.2f}M")
15
    except:
16
        print(f"{variant:30s} - 不可用")
17

18
# 不同 Patch 大小的影响
19
print("\n不同 Patch 大小:")
20
patch_variants = [
21
    'vit_base_patch32_224',    # Patch 32x32
22
    'vit_base_patch16_224',    # Patch 16x16
23
]
24

25
for variant in patch_variants:
26
    model = timm.create_model(variant, pretrained=False)
27
    params = sum(p.numel() for p in model.parameters()) / 1e6
28
    num_patches = model.patch_embed.num_patches
29
    print(f"{variant:30s} - Patches: {num_patches:4d}, 参数: {params:.2f}M")

2.4 提取中间层特征#

1
import torch
2

3
model = timm.create_model('vit_base_patch16_224', pretrained=True)
4
model.eval()
5

6
# 方法1: 使用 forward_features 提取特征
7
with torch.no_grad():
8
    input_tensor = torch.randn(1, 3, 224, 224)
9

10
    # 提取所有 patch 的特征
11
    features = model.forward_features(input_tensor)
12
    print(f"特征形状: {features.shape}")  # [1, 197, 768]
13
    # 197 = 1 (CLS token) + 196 (14x14 patches)
14

15
    # 只获取 CLS token（通常用于分类）
16
    cls_token = features[:, 0]
17
    print(f"CLS Token 形状: {cls_token.shape}")  # [1, 768]
18

19
# 方法2: 使用 feature hooks
20
features_dict = {}
21

22
def get_features(name):
23
    def hook(model, input, output):
24
        features_dict[name] = output.detach()
25
    return hook
26

27
# 注册 hook
28
model.blocks[-1].register_forward_hook(get_features('last_block'))
29

30
with torch.no_grad():
31
    _ = model(input_tensor)
32

33
print(f"\n最后一个 Transformer 块的输出: {features_dict['last_block'].shape}")

2.5 ViT 微调示例#

1
import torch.nn as nn
2
import torch.optim as optim
3

4
# 创建自定义分类头的 ViT
5
num_classes = 10
6
model = timm.create_model(
7
    'vit_base_patch16_224',
8
    pretrained=True,
9
    num_classes=num_classes
10
)
11

12
# 策略1: 冻结 Patch Embedding 和部分 Transformer 层
13
for name, param in model.named_parameters():
14
    if 'patch_embed' in name or 'blocks.0.' in name or 'blocks.1.' in name:
15
        param.requires_grad = False
16

17
# 策略2: 使用不同的学习率
18
param_groups = [
19
    {'params': model.head.parameters(), 'lr': 1e-3},
20
    {'params': model.blocks.parameters(), 'lr': 1e-4},
21
    {'params': model.patch_embed.parameters(), 'lr': 1e-5}
22
]
23

24
optimizer = optim.AdamW(param_groups, weight_decay=0.05)
25
criterion = nn.CrossEntropyLoss()
26

27
print("ViT 微调配置完成")
28
print(f"可训练参数: {sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e6:.2f}M")
29
print(f"总参数: {sum(p.numel() for p in model.parameters()) / 1e6:.2f}M")

三、高级功能#

3.1 模型信息查询#

1
import timm
2

3
# 查看特定模型的详细信息
4
model_name = 'resnet50'
5
model = timm.create_model(model_name, pretrained=False)
6

7
# 获取默认配置
8
default_cfg = model.default_cfg
9
print("默认配置:")
10
for key, value in default_cfg.items():
11
    print(f"  {key}: {value}")
12

13
# 查看模型的输入要求
14
data_config = timm.data.resolve_data_config(model.default_cfg)
15
print(f"\n推荐输入尺寸: {data_config['input_size']}")
16
print(f"均值: {data_config['mean']}")
17
print(f"标准差: {data_config['std']}")

3.2 数据增强#

1
from timm.data import create_transform
2
from timm.data.auto_augment import rand_augment_transform
3

4
# 创建训练用的数据增强
5
transform_train = create_transform(
6
    input_size=(3, 224, 224),
7
    is_training=True,
8
    auto_augment='rand-m9-mstd0.5-inc1',  # RandAugment
9
    re_prob=0.25,  # Random Erasing 概率
10
    re_mode='pixel',
11
    re_count=1,
12
)
13

14
# 创建验证/测试用的变换
15
transform_eval = create_transform(
16
    input_size=(3, 224, 224),
17
    is_training=False,
18
)
19

20
print("训练变换:", transform_train)
21
print("\n评估变换:", transform_eval)

3.3 混合精度训练#

1
import torch
2
from torch.cuda.amp import autocast, GradScaler
3

4
model = timm.create_model('vit_base_patch16_224', pretrained=True, num_classes=10)
5
model = model.cuda()
6

7
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
8
scaler = GradScaler()
9

10
# 训练循环
11
# for batch in dataloader:
12
#     images, labels = batch
13
#     images, labels = images.cuda(), labels.cuda()
14
#
15
#     optimizer.zero_grad()
16
#
17
#     # 混合精度前向传播
18
#     with autocast():
19
#         outputs = model(images)
20
#         loss = criterion(outputs, labels)
21
#
22
#     # 混合精度反向传播
23
#     scaler.scale(loss).backward()
24
#     scaler.step(optimizer)
25
#     scaler.update()

3.4 模型导出#

1
import torch
2

3
model = timm.create_model('resnet50', pretrained=True)
4
model.eval()
5

6
# 导出为 ONNX 格式
7
dummy_input = torch.randn(1, 3, 224, 224)
8

9
torch.onnx.export(
10
    model,
11
    dummy_input,
12
    "resnet50.onnx",
13
    input_names=['input'],
14
    output_names=['output'],
15
    dynamic_axes={
16
        'input': {0: 'batch_size'},
17
        'output': {0: 'batch_size'}
18
    }
19
)
20

21
print("模型已导出为 ONNX 格式")
22

23
# 导出为 TorchScript
24
traced_model = torch.jit.trace(model, dummy_input)
25
traced_model.save("resnet50_traced.pt")
26

27
print("模型已导出为 TorchScript 格式")

四、常用技巧和最佳实践#

4.1 查找合适的模型#

1
import timm
2

3
# 按任务查找模型
4
# 图像分类
5
classification_models = timm.list_models(pretrained=True)
6
print(f"预训练分类模型总数: {len(classification_models)}")
7

8
# 查找特定架构
9
efficient_nets = timm.list_models('efficientnet*', pretrained=True)
10
convnext_models = timm.list_models('convnext*', pretrained=True)
11
swin_models = timm.list_models('swin*', pretrained=True)
12

13
print(f"\nEfficientNet 系列: {len(efficient_nets)} 个")
14
print(f"ConvNeXt 系列: {len(convnext_models)} 个")
15
print(f"Swin Transformer 系列: {len(swin_models)} 个")
16

17
# 按模型大小筛选
18
print("\n推荐的轻量级模型:")
19
lightweight = ['mobilenetv3_large_100', 'efficientnet_b0', 'resnet34']
20
for model_name in lightweight:
21
    model = timm.create_model(model_name, pretrained=False)
22
    params = sum(p.numel() for p in model.parameters()) / 1e6
23
    print(f"  {model_name:30s}: {params:5.2f}M 参数")

4.2 性能优化#

1
import torch
2
import timm
3

4
model = timm.create_model('resnet50', pretrained=True)
5
model.eval()
6

7
# 1. 使用 torch.compile (PyTorch 2.0+)
8
if hasattr(torch, 'compile'):
9
    model = torch.compile(model)
10
    print("模型已编译优化")
11

12
# 2. 使用 channels_last 内存格式
13
model = model.to(memory_format=torch.channels_last)
14
dummy_input = torch.randn(1, 3, 224, 224).to(memory_format=torch.channels_last)
15

16
# 3. 推理时禁用梯度
17
with torch.no_grad():
18
    output = model(dummy_input)

4.3 批量推理#

1
import torch
2
from torch.utils.data import DataLoader, Dataset
3
from timm.data import create_transform
4

5
class ImageDataset(Dataset):
6
    def __init__(self, image_paths, transform):
7
        self.image_paths = image_paths
8
        self.transform = transform
9

10
    def __len__(self):
11
        return len(self.image_paths)
12

13
    def __getitem__(self, idx):
14
        from PIL import Image
15
        image = Image.open(self.image_paths[idx]).convert('RGB')
16
        return self.transform(image)
17

18
# 设置
19
model = timm.create_model('resnet50', pretrained=True)
20
model.eval()
21
model = model.cuda()
22

23
transform = create_transform(
24
    input_size=(3, 224, 224),
25
    is_training=False
26
)
27

28
# 假设有图像路径列表
29
# image_paths = ['path1.jpg', 'path2.jpg', ...]
30
# dataset = ImageDataset(image_paths, transform)
31
# loader = DataLoader(dataset, batch_size=32, num_workers=4)
32

33
# for batch in loader:
34
#     batch = batch.cuda()
35
#     with torch.no_grad():
36
#         outputs = model(batch)
37
#     # 处理输出...

五、总结#

ResNet vs ViT 对比#

特性	ResNet	ViT
架构类型	CNN	Transformer
归纳偏置	强（局部性、平移不变性）	弱
数据需求	中等	大（需要大规模预训练）
计算效率	高	中等（与输入大小平方相关）
参数量	较少	较多
迁移学习	优秀	优秀（预训练后）
推理速度	快	中等

选择建议#

使用 ResNet 当:

数据集较小（< 100k 图像）
需要快速推理
计算资源有限
需要部署到边缘设备

使用 ViT 当:

有大规模预训练模型可用
数据集较大
追求最高精度
有足够的计算资源

常用资源#

GitHub: https://github.com/huggingface/pytorch-image-models
文档: https://huggingface.co/docs/timm
模型库: https://huggingface.co/timm
论文集合: https://github.com/rwightman/pytorch-image-models#papers

附录：完整代码示例#

ResNet 完整训练示例#

1
import torch
2
import torch.nn as nn
3
import torch.optim as optim
4
from torch.utils.data import DataLoader
5
import timm
6
from timm.data import create_transform
7

8
# 1. 创建模型
9
model = timm.create_model('resnet50', pretrained=True, num_classes=10)
10
model = model.cuda()
11

12
# 2. 数据增强
13
transform_train = create_transform(
14
    input_size=(3, 224, 224),
15
    is_training=True,
16
    auto_augment='rand-m9-mstd0.5-inc1',
17
)
18

19
transform_val = create_transform(
20
    input_size=(3, 224, 224),
21
    is_training=False,
22
)
23

24
# 3. 准备数据加载器
25
# train_dataset = YourDataset(transform=transform_train)
26
# val_dataset = YourDataset(transform=transform_val)
27
# train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
28
# val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
29

30
# 4. 优化器和损失函数
31
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=0.05)
32
criterion = nn.CrossEntropyLoss()
33

34
# 5. 训练循环
35
def train_epoch(model, loader, criterion, optimizer):
36
    model.train()
37
    running_loss = 0.0
38
    correct = 0
39
    total = 0
40

41
    for images, labels in loader:
42
        images, labels = images.cuda(), labels.cuda()
43

44
        optimizer.zero_grad()
45
        outputs = model(images)
46
        loss = criterion(outputs, labels)
47
        loss.backward()
48
        optimizer.step()
49

50
        running_loss += loss.item()
51
        _, predicted = outputs.max(1)
52
        total += labels.size(0)
53
        correct += predicted.eq(labels).sum().item()
54

55
    return running_loss / len(loader), 100. * correct / total
56

57
def validate(model, loader, criterion):
58
    model.eval()
59
    running_loss = 0.0
60
    correct = 0
61
    total = 0
62

63
    with torch.no_grad():
64
        for images, labels in loader:
65
            images, labels = images.cuda(), labels.cuda()
66
            outputs = model(images)
67
            loss = criterion(outputs, labels)
68

69
            running_loss += loss.item()
70
            _, predicted = outputs.max(1)
71
            total += labels.size(0)
72
            correct += predicted.eq(labels).sum().item()
73

74
    return running_loss / len(loader), 100. * correct / total
75

76
# 训练
77
# num_epochs = 10
78
# for epoch in range(num_epochs):
79
#     train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer)
80
#     val_loss, val_acc = validate(model, val_loader, criterion)
81
#
82
#     print(f'Epoch {epoch+1}/{num_epochs}')
83
#     print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
84
#     print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')

ViT 完整训练示例#

1
import torch
2
import torch.nn as nn
3
import torch.optim as optim
4
import timm
5

6
# 1. 创建 ViT 模型
7
model = timm.create_model('vit_base_patch16_224', pretrained=True, num_classes=10)
8
model = model.cuda()
9

10
# 2. 分层学习率
11
def get_parameter_groups(model):
12
    no_decay = ['bias', 'norm']
13
    return [
14
        {
15
            'params': [p for n, p in model.named_parameters()
16
                      if not any(nd in n for nd in no_decay) and 'head' not in n],
17
            'lr': 1e-4,
18
            'weight_decay': 0.05
19
        },
20
        {
21
            'params': [p for n, p in model.named_parameters()
22
                      if any(nd in n for nd in no_decay) and 'head' not in n],
23
            'lr': 1e-4,
24
            'weight_decay': 0.0
25
        },
26
        {
27
            'params': model.head.parameters(),
28
            'lr': 1e-3,
29
            'weight_decay': 0.05
30
        }
31
    ]
32

33
param_groups = get_parameter_groups(model)
34
optimizer = optim.AdamW(param_groups)
35

36
# 3. 学习率调度器
37
from timm.scheduler import CosineLRScheduler
38

39
scheduler = CosineLRScheduler(
40
    optimizer,
41
    t_initial=10,  # epochs
42
    lr_min=1e-6,
43
    warmup_t=1,
44
    warmup_lr_init=1e-6,
45
)
46

47
# 4. 训练（与 ResNet 类似，但可能需要更多 epochs）
48
criterion = nn.CrossEntropyLoss()
49
# ... 训练循环代码同上

Yuiの部屋

Timm 库使用教程#

简介#

一、ResNet 使用示例#

1.1 基础使用#

加载预训练模型#

推理示例#

1.2 自定义分类头#

1.3 ResNet 变体对比#

1.4 微调训练示例#

二、Vision Transformer (ViT) 使用示例#

2.1 基础使用#

加载预训练 ViT 模型#

ViT 推理#

2.2 ViT 模型架构详解#

2.3 ViT 变体对比#

2.4 提取中间层特征#

2.5 ViT 微调示例#

三、高级功能#

3.1 模型信息查询#

3.2 数据增强#

3.3 混合精度训练#

3.4 模型导出#

四、常用技巧和最佳实践#

4.1 查找合适的模型#

4.2 性能优化#

4.3 批量推理#

五、总结#

ResNet vs ViT 对比#

选择建议#

常用资源#

附录：完整代码示例#

ResNet 完整训练示例#

ViT 完整训练示例#

目录