Sometimes you just need the list of the modules in your ResNet (number of blocks, parameters of the convolutions, number of trainable parameters, etc). I don’t need a Medium tutorial showing me how to build a ResNet and I don’t want to parse the PyTorch source code. So next time, instead of spinning a new terminal with print(torchvision.models.resnet18()), I can just check this plain old listing.

Below is the architecture, i.e. the list of all the layers, for ResNet 18 and ResNet 50 with PyTorch. I am using Torchvision 0.9.1. I also included the shapes of the actual tensors storing the weights for these modules. Here you go:

ResNet 18

ResNet 18 has 11,689,512 trainable parameters.

Architecture

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (layer2): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (layer3): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (layer4): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
  (fc): Linear(in_features=512, out_features=1000, bias=True)
)

Parameter shapes

conv1.weight: torch.Size([64, 3, 7, 7])
bn1.weight: torch.Size([64])
bn1.bias: torch.Size([64])
layer1.0.conv1.weight: torch.Size([64, 64, 3, 3])
layer1.0.bn1.weight: torch.Size([64])
layer1.0.bn1.bias: torch.Size([64])
layer1.0.conv2.weight: torch.Size([64, 64, 3, 3])
layer1.0.bn2.weight: torch.Size([64])
layer1.0.bn2.bias: torch.Size([64])
layer1.1.conv1.weight: torch.Size([64, 64, 3, 3])
layer1.1.bn1.weight: torch.Size([64])
layer1.1.bn1.bias: torch.Size([64])
layer1.1.conv2.weight: torch.Size([64, 64, 3, 3])
layer1.1.bn2.weight: torch.Size([64])
layer1.1.bn2.bias: torch.Size([64])
layer2.0.conv1.weight: torch.Size([128, 64, 3, 3])
layer2.0.bn1.weight: torch.Size([128])
layer2.0.bn1.bias: torch.Size([128])
layer2.0.conv2.weight: torch.Size([128, 128, 3, 3])
layer2.0.bn2.weight: torch.Size([128])
layer2.0.bn2.bias: torch.Size([128])
layer2.0.downsample.0.weight: torch.Size([128, 64, 1, 1])
layer2.0.downsample.1.weight: torch.Size([128])
layer2.0.downsample.1.bias: torch.Size([128])
layer2.1.conv1.weight: torch.Size([128, 128, 3, 3])
layer2.1.bn1.weight: torch.Size([128])
layer2.1.bn1.bias: torch.Size([128])
layer2.1.conv2.weight: torch.Size([128, 128, 3, 3])
layer2.1.bn2.weight: torch.Size([128])
layer2.1.bn2.bias: torch.Size([128])
layer3.0.conv1.weight: torch.Size([256, 128, 3, 3])
layer3.0.bn1.weight: torch.Size([256])
layer3.0.bn1.bias: torch.Size([256])
layer3.0.conv2.weight: torch.Size([256, 256, 3, 3])
layer3.0.bn2.weight: torch.Size([256])
layer3.0.bn2.bias: torch.Size([256])
layer3.0.downsample.0.weight: torch.Size([256, 128, 1, 1])
layer3.0.downsample.1.weight: torch.Size([256])
layer3.0.downsample.1.bias: torch.Size([256])
layer3.1.conv1.weight: torch.Size([256, 256, 3, 3])
layer3.1.bn1.weight: torch.Size([256])
layer3.1.bn1.bias: torch.Size([256])
layer3.1.conv2.weight: torch.Size([256, 256, 3, 3])
layer3.1.bn2.weight: torch.Size([256])
layer3.1.bn2.bias: torch.Size([256])
layer4.0.conv1.weight: torch.Size([512, 256, 3, 3])
layer4.0.bn1.weight: torch.Size([512])
layer4.0.bn1.bias: torch.Size([512])
layer4.0.conv2.weight: torch.Size([512, 512, 3, 3])
layer4.0.bn2.weight: torch.Size([512])
layer4.0.bn2.bias: torch.Size([512])
layer4.0.downsample.0.weight: torch.Size([512, 256, 1, 1])
layer4.0.downsample.1.weight: torch.Size([512])
layer4.0.downsample.1.bias: torch.Size([512])
layer4.1.conv1.weight: torch.Size([512, 512, 3, 3])
layer4.1.bn1.weight: torch.Size([512])
layer4.1.bn1.bias: torch.Size([512])
layer4.1.conv2.weight: torch.Size([512, 512, 3, 3])
layer4.1.bn2.weight: torch.Size([512])
layer4.1.bn2.bias: torch.Size([512])
fc.weight: torch.Size([1000, 512])
fc.bias: torch.Size([1000])

ResNet 50

ResNet 50 has 25,557,032 trainable parameters.

Architecture

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): Bottleneck(
      (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (2): Bottleneck(
      (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
  )
  (layer2): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): Bottleneck(
      (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (2): Bottleneck(
      (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (3): Bottleneck(
      (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
  )
  (layer3): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): Bottleneck(
      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (2): Bottleneck(
      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (3): Bottleneck(
      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (4): Bottleneck(
      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (5): Bottleneck(
      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
  )
  (layer4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): Bottleneck(
      (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (2): Bottleneck(
      (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
  (fc): Linear(in_features=2048, out_features=1000, bias=True)
)

Parameter shapes

conv1.weight: torch.Size([64, 3, 7, 7])
bn1.weight: torch.Size([64])
bn1.bias: torch.Size([64])
layer1.0.conv1.weight: torch.Size([64, 64, 1, 1])
layer1.0.bn1.weight: torch.Size([64])
layer1.0.bn1.bias: torch.Size([64])
layer1.0.conv2.weight: torch.Size([64, 64, 3, 3])
layer1.0.bn2.weight: torch.Size([64])
layer1.0.bn2.bias: torch.Size([64])
layer1.0.conv3.weight: torch.Size([256, 64, 1, 1])
layer1.0.bn3.weight: torch.Size([256])
layer1.0.bn3.bias: torch.Size([256])
layer1.0.downsample.0.weight: torch.Size([256, 64, 1, 1])
layer1.0.downsample.1.weight: torch.Size([256])
layer1.0.downsample.1.bias: torch.Size([256])
layer1.1.conv1.weight: torch.Size([64, 256, 1, 1])
layer1.1.bn1.weight: torch.Size([64])
layer1.1.bn1.bias: torch.Size([64])
layer1.1.conv2.weight: torch.Size([64, 64, 3, 3])
layer1.1.bn2.weight: torch.Size([64])
layer1.1.bn2.bias: torch.Size([64])
layer1.1.conv3.weight: torch.Size([256, 64, 1, 1])
layer1.1.bn3.weight: torch.Size([256])
layer1.1.bn3.bias: torch.Size([256])
layer1.2.conv1.weight: torch.Size([64, 256, 1, 1])
layer1.2.bn1.weight: torch.Size([64])
layer1.2.bn1.bias: torch.Size([64])
layer1.2.conv2.weight: torch.Size([64, 64, 3, 3])
layer1.2.bn2.weight: torch.Size([64])
layer1.2.bn2.bias: torch.Size([64])
layer1.2.conv3.weight: torch.Size([256, 64, 1, 1])
layer1.2.bn3.weight: torch.Size([256])
layer1.2.bn3.bias: torch.Size([256])
layer2.0.conv1.weight: torch.Size([128, 256, 1, 1])
layer2.0.bn1.weight: torch.Size([128])
layer2.0.bn1.bias: torch.Size([128])
layer2.0.conv2.weight: torch.Size([128, 128, 3, 3])
layer2.0.bn2.weight: torch.Size([128])
layer2.0.bn2.bias: torch.Size([128])
layer2.0.conv3.weight: torch.Size([512, 128, 1, 1])
layer2.0.bn3.weight: torch.Size([512])
layer2.0.bn3.bias: torch.Size([512])
layer2.0.downsample.0.weight: torch.Size([512, 256, 1, 1])
layer2.0.downsample.1.weight: torch.Size([512])
layer2.0.downsample.1.bias: torch.Size([512])
layer2.1.conv1.weight: torch.Size([128, 512, 1, 1])
layer2.1.bn1.weight: torch.Size([128])
layer2.1.bn1.bias: torch.Size([128])
layer2.1.conv2.weight: torch.Size([128, 128, 3, 3])
layer2.1.bn2.weight: torch.Size([128])
layer2.1.bn2.bias: torch.Size([128])
layer2.1.conv3.weight: torch.Size([512, 128, 1, 1])
layer2.1.bn3.weight: torch.Size([512])
layer2.1.bn3.bias: torch.Size([512])
layer2.2.conv1.weight: torch.Size([128, 512, 1, 1])
layer2.2.bn1.weight: torch.Size([128])
layer2.2.bn1.bias: torch.Size([128])
layer2.2.conv2.weight: torch.Size([128, 128, 3, 3])
layer2.2.bn2.weight: torch.Size([128])
layer2.2.bn2.bias: torch.Size([128])
layer2.2.conv3.weight: torch.Size([512, 128, 1, 1])
layer2.2.bn3.weight: torch.Size([512])
layer2.2.bn3.bias: torch.Size([512])
layer2.3.conv1.weight: torch.Size([128, 512, 1, 1])
layer2.3.bn1.weight: torch.Size([128])
layer2.3.bn1.bias: torch.Size([128])
layer2.3.conv2.weight: torch.Size([128, 128, 3, 3])
layer2.3.bn2.weight: torch.Size([128])
layer2.3.bn2.bias: torch.Size([128])
layer2.3.conv3.weight: torch.Size([512, 128, 1, 1])
layer2.3.bn3.weight: torch.Size([512])
layer2.3.bn3.bias: torch.Size([512])
layer3.0.conv1.weight: torch.Size([256, 512, 1, 1])
layer3.0.bn1.weight: torch.Size([256])
layer3.0.bn1.bias: torch.Size([256])
layer3.0.conv2.weight: torch.Size([256, 256, 3, 3])
layer3.0.bn2.weight: torch.Size([256])
layer3.0.bn2.bias: torch.Size([256])
layer3.0.conv3.weight: torch.Size([1024, 256, 1, 1])
layer3.0.bn3.weight: torch.Size([1024])
layer3.0.bn3.bias: torch.Size([1024])
layer3.0.downsample.0.weight: torch.Size([1024, 512, 1, 1])
layer3.0.downsample.1.weight: torch.Size([1024])
layer3.0.downsample.1.bias: torch.Size([1024])
layer3.1.conv1.weight: torch.Size([256, 1024, 1, 1])
layer3.1.bn1.weight: torch.Size([256])
layer3.1.bn1.bias: torch.Size([256])
layer3.1.conv2.weight: torch.Size([256, 256, 3, 3])
layer3.1.bn2.weight: torch.Size([256])
layer3.1.bn2.bias: torch.Size([256])
layer3.1.conv3.weight: torch.Size([1024, 256, 1, 1])
layer3.1.bn3.weight: torch.Size([1024])
layer3.1.bn3.bias: torch.Size([1024])
layer3.2.conv1.weight: torch.Size([256, 1024, 1, 1])
layer3.2.bn1.weight: torch.Size([256])
layer3.2.bn1.bias: torch.Size([256])
layer3.2.conv2.weight: torch.Size([256, 256, 3, 3])
layer3.2.bn2.weight: torch.Size([256])
layer3.2.bn2.bias: torch.Size([256])
layer3.2.conv3.weight: torch.Size([1024, 256, 1, 1])
layer3.2.bn3.weight: torch.Size([1024])
layer3.2.bn3.bias: torch.Size([1024])
layer3.3.conv1.weight: torch.Size([256, 1024, 1, 1])
layer3.3.bn1.weight: torch.Size([256])
layer3.3.bn1.bias: torch.Size([256])
layer3.3.conv2.weight: torch.Size([256, 256, 3, 3])
layer3.3.bn2.weight: torch.Size([256])
layer3.3.bn2.bias: torch.Size([256])
layer3.3.conv3.weight: torch.Size([1024, 256, 1, 1])
layer3.3.bn3.weight: torch.Size([1024])
layer3.3.bn3.bias: torch.Size([1024])
layer3.4.conv1.weight: torch.Size([256, 1024, 1, 1])
layer3.4.bn1.weight: torch.Size([256])
layer3.4.bn1.bias: torch.Size([256])
layer3.4.conv2.weight: torch.Size([256, 256, 3, 3])
layer3.4.bn2.weight: torch.Size([256])
layer3.4.bn2.bias: torch.Size([256])
layer3.4.conv3.weight: torch.Size([1024, 256, 1, 1])
layer3.4.bn3.weight: torch.Size([1024])
layer3.4.bn3.bias: torch.Size([1024])
layer3.5.conv1.weight: torch.Size([256, 1024, 1, 1])
layer3.5.bn1.weight: torch.Size([256])
layer3.5.bn1.bias: torch.Size([256])
layer3.5.conv2.weight: torch.Size([256, 256, 3, 3])
layer3.5.bn2.weight: torch.Size([256])
layer3.5.bn2.bias: torch.Size([256])
layer3.5.conv3.weight: torch.Size([1024, 256, 1, 1])
layer3.5.bn3.weight: torch.Size([1024])
layer3.5.bn3.bias: torch.Size([1024])
layer4.0.conv1.weight: torch.Size([512, 1024, 1, 1])
layer4.0.bn1.weight: torch.Size([512])
layer4.0.bn1.bias: torch.Size([512])
layer4.0.conv2.weight: torch.Size([512, 512, 3, 3])
layer4.0.bn2.weight: torch.Size([512])
layer4.0.bn2.bias: torch.Size([512])
layer4.0.conv3.weight: torch.Size([2048, 512, 1, 1])
layer4.0.bn3.weight: torch.Size([2048])
layer4.0.bn3.bias: torch.Size([2048])
layer4.0.downsample.0.weight: torch.Size([2048, 1024, 1, 1])
layer4.0.downsample.1.weight: torch.Size([2048])
layer4.0.downsample.1.bias: torch.Size([2048])
layer4.1.conv1.weight: torch.Size([512, 2048, 1, 1])
layer4.1.bn1.weight: torch.Size([512])
layer4.1.bn1.bias: torch.Size([512])
layer4.1.conv2.weight: torch.Size([512, 512, 3, 3])
layer4.1.bn2.weight: torch.Size([512])
layer4.1.bn2.bias: torch.Size([512])
layer4.1.conv3.weight: torch.Size([2048, 512, 1, 1])
layer4.1.bn3.weight: torch.Size([2048])
layer4.1.bn3.bias: torch.Size([2048])
layer4.2.conv1.weight: torch.Size([512, 2048, 1, 1])
layer4.2.bn1.weight: torch.Size([512])
layer4.2.bn1.bias: torch.Size([512])
layer4.2.conv2.weight: torch.Size([512, 512, 3, 3])
layer4.2.bn2.weight: torch.Size([512])
layer4.2.bn2.bias: torch.Size([512])
layer4.2.conv3.weight: torch.Size([2048, 512, 1, 1])
layer4.2.bn3.weight: torch.Size([2048])
layer4.2.bn3.bias: torch.Size([2048])
fc.weight: torch.Size([1000, 2048])
fc.bias: torch.Size([1000])