updated cuda benchmarks
This commit is contained in:
@@ -0,0 +1,525 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Mode=test: NumWorkers= 1 BatchSize= 1 Time=33.559s Imgs/s= 38.14\n",
|
||||||
|
"Mode=test: NumWorkers= 1 BatchSize= 2 Time=16.639s Imgs/s= 76.93\n",
|
||||||
|
"Mode=test: NumWorkers= 1 BatchSize= 4 Time= 8.817s Imgs/s=145.17\n",
|
||||||
|
"Mode=test: NumWorkers= 1 BatchSize= 8 Time= 8.802s Imgs/s=145.41\n",
|
||||||
|
"Mode=test: NumWorkers= 1 BatchSize=16 Time= 9.094s Imgs/s=140.76\n",
|
||||||
|
"Mode=test: NumWorkers= 1 BatchSize=32 Time= 8.247s Imgs/s=155.21\n",
|
||||||
|
"Mode=test: NumWorkers= 2 BatchSize= 1 Time=34.151s Imgs/s= 37.48\n",
|
||||||
|
"Mode=test: NumWorkers= 2 BatchSize= 2 Time=16.366s Imgs/s= 78.21\n",
|
||||||
|
"Mode=test: NumWorkers= 2 BatchSize= 4 Time= 7.701s Imgs/s=166.20\n",
|
||||||
|
"Mode=test: NumWorkers= 2 BatchSize= 8 Time= 3.888s Imgs/s=329.25\n",
|
||||||
|
"Mode=test: NumWorkers= 2 BatchSize=16 Time= 3.824s Imgs/s=334.75\n",
|
||||||
|
"Mode=test: NumWorkers= 2 BatchSize=32 Time= 3.706s Imgs/s=345.38\n",
|
||||||
|
"Mode=test: NumWorkers= 4 BatchSize= 1 Time=34.202s Imgs/s= 37.43\n",
|
||||||
|
"Mode=test: NumWorkers= 4 BatchSize= 2 Time=16.350s Imgs/s= 78.29\n",
|
||||||
|
"Mode=test: NumWorkers= 4 BatchSize= 4 Time= 7.816s Imgs/s=163.76\n",
|
||||||
|
"Mode=test: NumWorkers= 4 BatchSize= 8 Time= 3.884s Imgs/s=329.59\n",
|
||||||
|
"Mode=test: NumWorkers= 4 BatchSize=16 Time= 2.029s Imgs/s=630.98\n",
|
||||||
|
"Mode=test: NumWorkers= 4 BatchSize=32 Time= 1.819s Imgs/s=703.63\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize= 1 Time=33.488s Imgs/s= 38.22\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize= 2 Time=16.172s Imgs/s= 79.15\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize= 4 Time= 7.842s Imgs/s=163.22\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize= 8 Time= 3.866s Imgs/s=331.12\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=16 Time= 2.034s Imgs/s=629.43\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=32 Time= 1.469s Imgs/s=871.30\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import torch\n",
|
||||||
|
"import torch.nn as nn\n",
|
||||||
|
"from torch.utils.data import DataLoader\n",
|
||||||
|
"\n",
|
||||||
|
"import torchvision.models as models\n",
|
||||||
|
"import torchvision.datasets as datasets\n",
|
||||||
|
"import torchvision.transforms as transforms\n",
|
||||||
|
"import time\n",
|
||||||
|
"\n",
|
||||||
|
"def main():\n",
|
||||||
|
" mode = 'test'\n",
|
||||||
|
" model = models.resnet50()\n",
|
||||||
|
" criterion = nn.CrossEntropyLoss()\n",
|
||||||
|
" optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)\n",
|
||||||
|
" N = 1280\n",
|
||||||
|
" dataset = datasets.FakeData(size=N, transform=transforms.ToTensor())\n",
|
||||||
|
" if mode=='test': # switch to evaluate mode\n",
|
||||||
|
" model.eval()\n",
|
||||||
|
" model.to('cuda')\n",
|
||||||
|
" for num_workers in [1, 2, 4, 8]: # 4 < 2 for test\n",
|
||||||
|
" for batch_size in [1, 2, 4, 8, 16, 32]:\n",
|
||||||
|
" loader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, pin_memory=True)\n",
|
||||||
|
" if mode=='test':\n",
|
||||||
|
" for i, (data, target) in enumerate(loader):\n",
|
||||||
|
" if i==1:\n",
|
||||||
|
" tm = time.time()\n",
|
||||||
|
" data = data.to('cuda', non_blocking=True)\n",
|
||||||
|
" output = model(data)\n",
|
||||||
|
" else: # mode=='train':\n",
|
||||||
|
" for i, (data, target) in enumerate(loader):\n",
|
||||||
|
" if i==1:\n",
|
||||||
|
" tm = time.time()\n",
|
||||||
|
" data = data.to('cuda', non_blocking=True)\n",
|
||||||
|
" target = target.to('cuda', non_blocking=True).long()\n",
|
||||||
|
" optimizer.zero_grad()\n",
|
||||||
|
" output = model(data)\n",
|
||||||
|
" loss = criterion(output, target)\n",
|
||||||
|
" loss.backward()\n",
|
||||||
|
" optimizer.step()\n",
|
||||||
|
" tm = time.time() - tm\n",
|
||||||
|
" print('Mode=%s: NumWorkers=%2d BatchSize=%2d Time=%6.3fs Imgs/s=%6.2f' % (mode, num_workers, batch_size, tm, N/tm))\n",
|
||||||
|
" torch.cuda.empty_cache() # doesn't seem to be working...\n",
|
||||||
|
"\n",
|
||||||
|
"if __name__ == '__main__':\n",
|
||||||
|
" main()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Mode=test: NumWorkers= 1 BatchSize=40 Time= 7.026s Imgs/s=182.17\n",
|
||||||
|
"Mode=test: NumWorkers= 2 BatchSize=40 Time= 3.407s Imgs/s=375.71\n",
|
||||||
|
"Mode=test: NumWorkers= 4 BatchSize=40 Time= 1.752s Imgs/s=730.46\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.323s Imgs/s=967.16\n",
|
||||||
|
"Mode=test: NumWorkers=16 BatchSize=40 Time= 1.419s Imgs/s=901.91\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import torch\n",
|
||||||
|
"import torch.nn as nn\n",
|
||||||
|
"from torch.utils.data import DataLoader\n",
|
||||||
|
"\n",
|
||||||
|
"import torchvision.models as models\n",
|
||||||
|
"import torchvision.datasets as datasets\n",
|
||||||
|
"import torchvision.transforms as transforms\n",
|
||||||
|
"import time\n",
|
||||||
|
"\n",
|
||||||
|
"def main():\n",
|
||||||
|
" mode = 'test'\n",
|
||||||
|
" model = models.resnet50()\n",
|
||||||
|
" criterion = nn.CrossEntropyLoss()\n",
|
||||||
|
" optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)\n",
|
||||||
|
" N = 1280\n",
|
||||||
|
" dataset = datasets.FakeData(size=N, transform=transforms.ToTensor())\n",
|
||||||
|
" if mode=='test': # switch to evaluate mode\n",
|
||||||
|
" model.eval()\n",
|
||||||
|
" model.to('cuda')\n",
|
||||||
|
" for num_workers in [1, 2, 4, 8, 16]: # 4 < 2 for test\n",
|
||||||
|
" for batch_size in [40]:\n",
|
||||||
|
" loader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, pin_memory=True)\n",
|
||||||
|
" if mode=='test':\n",
|
||||||
|
" for i, (data, target) in enumerate(loader):\n",
|
||||||
|
" if i==1:\n",
|
||||||
|
" tm = time.time()\n",
|
||||||
|
" data = data.to('cuda', non_blocking=True)\n",
|
||||||
|
" output = model(data)\n",
|
||||||
|
" else: # mode=='train':\n",
|
||||||
|
" for i, (data, target) in enumerate(loader):\n",
|
||||||
|
" if i==1:\n",
|
||||||
|
" tm = time.time()\n",
|
||||||
|
" data = data.to('cuda', non_blocking=True)\n",
|
||||||
|
" target = target.to('cuda', non_blocking=True).long()\n",
|
||||||
|
" optimizer.zero_grad()\n",
|
||||||
|
" output = model(data)\n",
|
||||||
|
" loss = criterion(output, target)\n",
|
||||||
|
" loss.backward()\n",
|
||||||
|
" optimizer.step()\n",
|
||||||
|
" tm = time.time() - tm\n",
|
||||||
|
" print('Mode=%s: NumWorkers=%2d BatchSize=%2d Time=%6.3fs Imgs/s=%6.2f' % (mode, num_workers, batch_size, tm, N/tm))\n",
|
||||||
|
" torch.cuda.empty_cache() # doesn't seem to be working...\n",
|
||||||
|
"\n",
|
||||||
|
"if __name__ == '__main__':\n",
|
||||||
|
" main()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# GPU Comparison\n",
|
||||||
|
"\n",
|
||||||
|
"## CUDA train"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"'GeForce RTX 2080 Ti'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"torch.cuda.get_device_name(0)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.084s Imgs/s=251.75\n",
|
||||||
|
"Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.076s Imgs/s=252.17\n",
|
||||||
|
"Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.079s Imgs/s=252.02\n",
|
||||||
|
"Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.437s Imgs/s=235.41\n",
|
||||||
|
"Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.157s Imgs/s=248.20\n",
|
||||||
|
"Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.139s Imgs/s=249.09\n",
|
||||||
|
"Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.191s Imgs/s=246.56\n",
|
||||||
|
"Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.304s Imgs/s=241.34\n",
|
||||||
|
"Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.275s Imgs/s=242.67\n",
|
||||||
|
"Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.202s Imgs/s=246.06\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import torch\n",
|
||||||
|
"import torch.nn as nn\n",
|
||||||
|
"from torch.utils.data import DataLoader\n",
|
||||||
|
"\n",
|
||||||
|
"import torchvision.models as models\n",
|
||||||
|
"import torchvision.datasets as datasets\n",
|
||||||
|
"import torchvision.transforms as transforms\n",
|
||||||
|
"import time\n",
|
||||||
|
"\n",
|
||||||
|
"def main():\n",
|
||||||
|
" mode = 'train'\n",
|
||||||
|
" model = models.resnet50()\n",
|
||||||
|
" criterion = nn.CrossEntropyLoss()\n",
|
||||||
|
" optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)\n",
|
||||||
|
" N = 1280\n",
|
||||||
|
" dataset = datasets.FakeData(size=N, transform=transforms.ToTensor())\n",
|
||||||
|
" if mode=='test': # switch to evaluate mode\n",
|
||||||
|
" model.eval()\n",
|
||||||
|
" model.to('cuda')\n",
|
||||||
|
" for _ in range (10):\n",
|
||||||
|
" num_workers = 8\n",
|
||||||
|
" batch_size = 40\n",
|
||||||
|
" loader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, pin_memory=True)\n",
|
||||||
|
" if mode=='test':\n",
|
||||||
|
" for i, (data, target) in enumerate(loader):\n",
|
||||||
|
" if i==1:\n",
|
||||||
|
" tm = time.time()\n",
|
||||||
|
" data = data.to('cuda', non_blocking=True)\n",
|
||||||
|
" output = model(data)\n",
|
||||||
|
" else: # mode=='train':\n",
|
||||||
|
" for i, (data, target) in enumerate(loader):\n",
|
||||||
|
" if i==1:\n",
|
||||||
|
" tm = time.time()\n",
|
||||||
|
" data = data.to('cuda', non_blocking=True)\n",
|
||||||
|
" target = target.to('cuda', non_blocking=True).long()\n",
|
||||||
|
" optimizer.zero_grad()\n",
|
||||||
|
" output = model(data)\n",
|
||||||
|
" loss = criterion(output, target)\n",
|
||||||
|
" loss.backward()\n",
|
||||||
|
" optimizer.step()\n",
|
||||||
|
" tm = time.time() - tm\n",
|
||||||
|
" print('Mode=%s: NumWorkers=%2d BatchSize=%2d Time=%6.3fs Imgs/s=%6.2f' % (mode, num_workers, batch_size, tm, N/tm))\n",
|
||||||
|
" torch.cuda.empty_cache() # doesn't seem to be working...\n",
|
||||||
|
"\n",
|
||||||
|
"if __name__ == '__main__':\n",
|
||||||
|
" main()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## CUDA test"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.418s Imgs/s=902.44\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.315s Imgs/s=973.75\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.348s Imgs/s=949.28\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.363s Imgs/s=938.86\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.353s Imgs/s=945.78\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.380s Imgs/s=927.33\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.365s Imgs/s=937.47\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.390s Imgs/s=920.68\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.354s Imgs/s=945.08\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.396s Imgs/s=916.73\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import torch\n",
|
||||||
|
"import torch.nn as nn\n",
|
||||||
|
"from torch.utils.data import DataLoader\n",
|
||||||
|
"\n",
|
||||||
|
"import torchvision.models as models\n",
|
||||||
|
"import torchvision.datasets as datasets\n",
|
||||||
|
"import torchvision.transforms as transforms\n",
|
||||||
|
"import time\n",
|
||||||
|
"\n",
|
||||||
|
"def main():\n",
|
||||||
|
" mode = 'test'\n",
|
||||||
|
" model = models.resnet50()\n",
|
||||||
|
" criterion = nn.CrossEntropyLoss()\n",
|
||||||
|
" optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)\n",
|
||||||
|
" N = 1280\n",
|
||||||
|
" dataset = datasets.FakeData(size=N, transform=transforms.ToTensor())\n",
|
||||||
|
" if mode=='test': # switch to evaluate mode\n",
|
||||||
|
" model.eval()\n",
|
||||||
|
" model.to('cuda')\n",
|
||||||
|
" for _ in range (10):\n",
|
||||||
|
" num_workers = 8\n",
|
||||||
|
" batch_size = 40\n",
|
||||||
|
" loader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, pin_memory=True)\n",
|
||||||
|
" if mode=='test':\n",
|
||||||
|
" for i, (data, target) in enumerate(loader):\n",
|
||||||
|
" if i==1:\n",
|
||||||
|
" tm = time.time()\n",
|
||||||
|
" data = data.to('cuda', non_blocking=True)\n",
|
||||||
|
" output = model(data)\n",
|
||||||
|
" else: # mode=='train':\n",
|
||||||
|
" for i, (data, target) in enumerate(loader):\n",
|
||||||
|
" if i==1:\n",
|
||||||
|
" tm = time.time()\n",
|
||||||
|
" data = data.to('cuda', non_blocking=True)\n",
|
||||||
|
" target = target.to('cuda', non_blocking=True).long()\n",
|
||||||
|
" optimizer.zero_grad()\n",
|
||||||
|
" output = model(data)\n",
|
||||||
|
" loss = criterion(output, target)\n",
|
||||||
|
" loss.backward()\n",
|
||||||
|
" optimizer.step()\n",
|
||||||
|
" tm = time.time() - tm\n",
|
||||||
|
" print('Mode=%s: NumWorkers=%2d BatchSize=%2d Time=%6.3fs Imgs/s=%6.2f' % (mode, num_workers, batch_size, tm, N/tm))\n",
|
||||||
|
" torch.cuda.empty_cache() # doesn't seem to be working...\n",
|
||||||
|
"\n",
|
||||||
|
"if __name__ == '__main__':\n",
|
||||||
|
" main()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## CPU train"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Mode=train: NumWorkers= 8 BatchSize=10 Time=299.952s Imgs/s= 4.27\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import torch\n",
|
||||||
|
"import torch.nn as nn\n",
|
||||||
|
"from torch.utils.data import DataLoader\n",
|
||||||
|
"\n",
|
||||||
|
"import torchvision.models as models\n",
|
||||||
|
"import torchvision.datasets as datasets\n",
|
||||||
|
"import torchvision.transforms as transforms\n",
|
||||||
|
"import time\n",
|
||||||
|
"\n",
|
||||||
|
"def main():\n",
|
||||||
|
" mode = 'train'\n",
|
||||||
|
" model = models.resnet50()\n",
|
||||||
|
" criterion = nn.CrossEntropyLoss()\n",
|
||||||
|
" optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)\n",
|
||||||
|
" N = 1280\n",
|
||||||
|
" dataset = datasets.FakeData(size=N, transform=transforms.ToTensor())\n",
|
||||||
|
" if mode=='test': # switch to evaluate mode\n",
|
||||||
|
" model.eval()\n",
|
||||||
|
" model.to('cpu')\n",
|
||||||
|
" for _ in range (1):\n",
|
||||||
|
" num_workers = 8\n",
|
||||||
|
" batch_size = 10\n",
|
||||||
|
" loader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, pin_memory=True)\n",
|
||||||
|
" if mode=='test':\n",
|
||||||
|
" for i, (data, target) in enumerate(loader):\n",
|
||||||
|
" if i==1:\n",
|
||||||
|
" tm = time.time()\n",
|
||||||
|
" data = data.to('cpu', non_blocking=True)\n",
|
||||||
|
" output = model(data)\n",
|
||||||
|
" else: # mode=='train':\n",
|
||||||
|
" for i, (data, target) in enumerate(loader):\n",
|
||||||
|
" if i==1:\n",
|
||||||
|
" tm = time.time()\n",
|
||||||
|
" data = data.to('cpu', non_blocking=True)\n",
|
||||||
|
" target = target.to('cpu', non_blocking=True).long()\n",
|
||||||
|
" optimizer.zero_grad()\n",
|
||||||
|
" output = model(data)\n",
|
||||||
|
" loss = criterion(output, target)\n",
|
||||||
|
" loss.backward()\n",
|
||||||
|
" optimizer.step()\n",
|
||||||
|
" tm = time.time() - tm\n",
|
||||||
|
" print('Mode=%s: NumWorkers=%2d BatchSize=%2d Time=%6.3fs Imgs/s=%6.2f' % (mode, num_workers, batch_size, tm, N/tm))\n",
|
||||||
|
"# torch.cuda.empty_cache() # doesn't seem to be working...\n",
|
||||||
|
"\n",
|
||||||
|
"if __name__ == '__main__':\n",
|
||||||
|
" main()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## CPU test"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=10 Time=103.033s Imgs/s= 12.42\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import torch\n",
|
||||||
|
"import torch.nn as nn\n",
|
||||||
|
"from torch.utils.data import DataLoader\n",
|
||||||
|
"\n",
|
||||||
|
"import torchvision.models as models\n",
|
||||||
|
"import torchvision.datasets as datasets\n",
|
||||||
|
"import torchvision.transforms as transforms\n",
|
||||||
|
"import time\n",
|
||||||
|
"\n",
|
||||||
|
"def main():\n",
|
||||||
|
" mode = 'test'\n",
|
||||||
|
" model = models.resnet50()\n",
|
||||||
|
" criterion = nn.CrossEntropyLoss()\n",
|
||||||
|
" optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)\n",
|
||||||
|
" N = 1280\n",
|
||||||
|
" dataset = datasets.FakeData(size=N, transform=transforms.ToTensor())\n",
|
||||||
|
" if mode=='test': # switch to evaluate mode\n",
|
||||||
|
" model.eval()\n",
|
||||||
|
" model.to('cpu')\n",
|
||||||
|
" for _ in range (1):\n",
|
||||||
|
" num_workers = 8\n",
|
||||||
|
" batch_size = 10\n",
|
||||||
|
" loader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, pin_memory=True)\n",
|
||||||
|
" if mode=='test':\n",
|
||||||
|
" for i, (data, target) in enumerate(loader):\n",
|
||||||
|
" if i==1:\n",
|
||||||
|
" tm = time.time()\n",
|
||||||
|
" data = data.to('cpu', non_blocking=True)\n",
|
||||||
|
" output = model(data)\n",
|
||||||
|
" else: # mode=='train':\n",
|
||||||
|
" for i, (data, target) in enumerate(loader):\n",
|
||||||
|
" if i==1:\n",
|
||||||
|
" tm = time.time()\n",
|
||||||
|
" data = data.to('cpu', non_blocking=True)\n",
|
||||||
|
" target = target.to('cpu', non_blocking=True).long()\n",
|
||||||
|
" optimizer.zero_grad()\n",
|
||||||
|
" output = model(data)\n",
|
||||||
|
" loss = criterion(output, target)\n",
|
||||||
|
" loss.backward()\n",
|
||||||
|
" optimizer.step()\n",
|
||||||
|
" tm = time.time() - tm\n",
|
||||||
|
" print('Mode=%s: NumWorkers=%2d BatchSize=%2d Time=%6.3fs Imgs/s=%6.2f' % (mode, num_workers, batch_size, tm, N/tm))\n",
|
||||||
|
"# torch.cuda.empty_cache() # doesn't seem to be working...\n",
|
||||||
|
"\n",
|
||||||
|
"if __name__ == '__main__':\n",
|
||||||
|
" main()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
" ## Comparison"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Image difference: 7967.23%\n",
|
||||||
|
"Time difference: 7966.02%\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"image_increase = ((960/11.9)-1)\n",
|
||||||
|
"time_increase = ((107.520/1.333)-1)\n",
|
||||||
|
"print(f'Image difference: {image_increase:.2%}')\n",
|
||||||
|
"print(f'Time difference: {time_increase:.2%}')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
@@ -150,12 +150,32 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "markdown",
|
||||||
"execution_count": 3,
|
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
"source": [
|
||||||
"torch.cuda.empty_cache()"
|
"# GPU Comparison\n",
|
||||||
|
"\n",
|
||||||
|
"## CUDA train"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"'GeForce RTX 2080 Ti'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"torch.cuda.get_device_name(0)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -167,36 +187,16 @@
|
|||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.509s Imgs/s=848.26\n",
|
"Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.084s Imgs/s=251.75\n",
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.310s Imgs/s=976.73\n",
|
"Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.076s Imgs/s=252.17\n",
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.348s Imgs/s=949.28\n",
|
"Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.079s Imgs/s=252.02\n",
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.324s Imgs/s=966.43\n",
|
"Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.437s Imgs/s=235.41\n",
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.348s Imgs/s=949.28\n",
|
"Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.157s Imgs/s=248.20\n",
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.362s Imgs/s=939.55\n",
|
"Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.139s Imgs/s=249.09\n",
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.415s Imgs/s=904.46\n",
|
"Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.191s Imgs/s=246.56\n",
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.314s Imgs/s=973.77\n",
|
"Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.304s Imgs/s=241.34\n",
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.445s Imgs/s=885.73\n",
|
"Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.275s Imgs/s=242.67\n",
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.417s Imgs/s=903.18\n",
|
"Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.202s Imgs/s=246.06\n"
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.415s Imgs/s=904.46\n",
|
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.432s Imgs/s=893.75\n",
|
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.553s Imgs/s=824.29\n",
|
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.328s Imgs/s=963.53\n",
|
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.498s Imgs/s=854.48\n",
|
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.394s Imgs/s=918.04\n",
|
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.531s Imgs/s=836.11\n",
|
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.375s Imgs/s=930.69\n",
|
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.401s Imgs/s=913.47\n",
|
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.391s Imgs/s=920.02\n",
|
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.328s Imgs/s=963.53\n",
|
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.328s Imgs/s=963.53\n",
|
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.431s Imgs/s=894.37\n",
|
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.326s Imgs/s=964.98\n",
|
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.386s Imgs/s=923.33\n",
|
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.329s Imgs/s=962.81\n",
|
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.459s Imgs/s=877.25\n",
|
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.427s Imgs/s=896.87\n",
|
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.441s Imgs/s=888.18\n",
|
|
||||||
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.448s Imgs/s=883.90\n"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -211,7 +211,7 @@
|
|||||||
"import time\n",
|
"import time\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def main():\n",
|
"def main():\n",
|
||||||
" mode = 'test'\n",
|
" mode = 'train'\n",
|
||||||
" model = models.resnet50()\n",
|
" model = models.resnet50()\n",
|
||||||
" criterion = nn.CrossEntropyLoss()\n",
|
" criterion = nn.CrossEntropyLoss()\n",
|
||||||
" optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)\n",
|
" optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)\n",
|
||||||
@@ -220,7 +220,7 @@
|
|||||||
" if mode=='test': # switch to evaluate mode\n",
|
" if mode=='test': # switch to evaluate mode\n",
|
||||||
" model.eval()\n",
|
" model.eval()\n",
|
||||||
" model.to('cuda')\n",
|
" model.to('cuda')\n",
|
||||||
" for _ in range (30):\n",
|
" for _ in range (10):\n",
|
||||||
" num_workers = 8\n",
|
" num_workers = 8\n",
|
||||||
" batch_size = 40\n",
|
" batch_size = 40\n",
|
||||||
" loader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, pin_memory=True)\n",
|
" loader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, pin_memory=True)\n",
|
||||||
@@ -249,6 +249,250 @@
|
|||||||
" main()"
|
" main()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## CUDA test"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.418s Imgs/s=902.44\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.315s Imgs/s=973.75\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.348s Imgs/s=949.28\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.363s Imgs/s=938.86\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.353s Imgs/s=945.78\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.380s Imgs/s=927.33\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.365s Imgs/s=937.47\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.390s Imgs/s=920.68\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.354s Imgs/s=945.08\n",
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.396s Imgs/s=916.73\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import torch\n",
|
||||||
|
"import torch.nn as nn\n",
|
||||||
|
"from torch.utils.data import DataLoader\n",
|
||||||
|
"\n",
|
||||||
|
"import torchvision.models as models\n",
|
||||||
|
"import torchvision.datasets as datasets\n",
|
||||||
|
"import torchvision.transforms as transforms\n",
|
||||||
|
"import time\n",
|
||||||
|
"\n",
|
||||||
|
"def main():\n",
|
||||||
|
" mode = 'test'\n",
|
||||||
|
" model = models.resnet50()\n",
|
||||||
|
" criterion = nn.CrossEntropyLoss()\n",
|
||||||
|
" optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)\n",
|
||||||
|
" N = 1280\n",
|
||||||
|
" dataset = datasets.FakeData(size=N, transform=transforms.ToTensor())\n",
|
||||||
|
" if mode=='test': # switch to evaluate mode\n",
|
||||||
|
" model.eval()\n",
|
||||||
|
" model.to('cuda')\n",
|
||||||
|
" for _ in range (10):\n",
|
||||||
|
" num_workers = 8\n",
|
||||||
|
" batch_size = 40\n",
|
||||||
|
" loader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, pin_memory=True)\n",
|
||||||
|
" if mode=='test':\n",
|
||||||
|
" for i, (data, target) in enumerate(loader):\n",
|
||||||
|
" if i==1:\n",
|
||||||
|
" tm = time.time()\n",
|
||||||
|
" data = data.to('cuda', non_blocking=True)\n",
|
||||||
|
" output = model(data)\n",
|
||||||
|
" else: # mode=='train':\n",
|
||||||
|
" for i, (data, target) in enumerate(loader):\n",
|
||||||
|
" if i==1:\n",
|
||||||
|
" tm = time.time()\n",
|
||||||
|
" data = data.to('cuda', non_blocking=True)\n",
|
||||||
|
" target = target.to('cuda', non_blocking=True).long()\n",
|
||||||
|
" optimizer.zero_grad()\n",
|
||||||
|
" output = model(data)\n",
|
||||||
|
" loss = criterion(output, target)\n",
|
||||||
|
" loss.backward()\n",
|
||||||
|
" optimizer.step()\n",
|
||||||
|
" tm = time.time() - tm\n",
|
||||||
|
" print('Mode=%s: NumWorkers=%2d BatchSize=%2d Time=%6.3fs Imgs/s=%6.2f' % (mode, num_workers, batch_size, tm, N/tm))\n",
|
||||||
|
" torch.cuda.empty_cache() # doesn't seem to be working...\n",
|
||||||
|
"\n",
|
||||||
|
"if __name__ == '__main__':\n",
|
||||||
|
" main()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## CPU train"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Mode=train: NumWorkers= 8 BatchSize=10 Time=299.952s Imgs/s= 4.27\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import torch\n",
|
||||||
|
"import torch.nn as nn\n",
|
||||||
|
"from torch.utils.data import DataLoader\n",
|
||||||
|
"\n",
|
||||||
|
"import torchvision.models as models\n",
|
||||||
|
"import torchvision.datasets as datasets\n",
|
||||||
|
"import torchvision.transforms as transforms\n",
|
||||||
|
"import time\n",
|
||||||
|
"\n",
|
||||||
|
"def main():\n",
|
||||||
|
" mode = 'train'\n",
|
||||||
|
" model = models.resnet50()\n",
|
||||||
|
" criterion = nn.CrossEntropyLoss()\n",
|
||||||
|
" optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)\n",
|
||||||
|
" N = 1280\n",
|
||||||
|
" dataset = datasets.FakeData(size=N, transform=transforms.ToTensor())\n",
|
||||||
|
" if mode=='test': # switch to evaluate mode\n",
|
||||||
|
" model.eval()\n",
|
||||||
|
" model.to('cpu')\n",
|
||||||
|
" for _ in range (1):\n",
|
||||||
|
" num_workers = 8\n",
|
||||||
|
" batch_size = 10\n",
|
||||||
|
" loader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, pin_memory=True)\n",
|
||||||
|
" if mode=='test':\n",
|
||||||
|
" for i, (data, target) in enumerate(loader):\n",
|
||||||
|
" if i==1:\n",
|
||||||
|
" tm = time.time()\n",
|
||||||
|
" data = data.to('cpu', non_blocking=True)\n",
|
||||||
|
" output = model(data)\n",
|
||||||
|
" else: # mode=='train':\n",
|
||||||
|
" for i, (data, target) in enumerate(loader):\n",
|
||||||
|
" if i==1:\n",
|
||||||
|
" tm = time.time()\n",
|
||||||
|
" data = data.to('cpu', non_blocking=True)\n",
|
||||||
|
" target = target.to('cpu', non_blocking=True).long()\n",
|
||||||
|
" optimizer.zero_grad()\n",
|
||||||
|
" output = model(data)\n",
|
||||||
|
" loss = criterion(output, target)\n",
|
||||||
|
" loss.backward()\n",
|
||||||
|
" optimizer.step()\n",
|
||||||
|
" tm = time.time() - tm\n",
|
||||||
|
" print('Mode=%s: NumWorkers=%2d BatchSize=%2d Time=%6.3fs Imgs/s=%6.2f' % (mode, num_workers, batch_size, tm, N/tm))\n",
|
||||||
|
"# torch.cuda.empty_cache() # doesn't seem to be working...\n",
|
||||||
|
"\n",
|
||||||
|
"if __name__ == '__main__':\n",
|
||||||
|
" main()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## CPU test"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Mode=test: NumWorkers= 8 BatchSize=10 Time=103.033s Imgs/s= 12.42\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import torch\n",
|
||||||
|
"import torch.nn as nn\n",
|
||||||
|
"from torch.utils.data import DataLoader\n",
|
||||||
|
"\n",
|
||||||
|
"import torchvision.models as models\n",
|
||||||
|
"import torchvision.datasets as datasets\n",
|
||||||
|
"import torchvision.transforms as transforms\n",
|
||||||
|
"import time\n",
|
||||||
|
"\n",
|
||||||
|
"def main():\n",
|
||||||
|
" mode = 'test'\n",
|
||||||
|
" model = models.resnet50()\n",
|
||||||
|
" criterion = nn.CrossEntropyLoss()\n",
|
||||||
|
" optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)\n",
|
||||||
|
" N = 1280\n",
|
||||||
|
" dataset = datasets.FakeData(size=N, transform=transforms.ToTensor())\n",
|
||||||
|
" if mode=='test': # switch to evaluate mode\n",
|
||||||
|
" model.eval()\n",
|
||||||
|
" model.to('cpu')\n",
|
||||||
|
" for _ in range (1):\n",
|
||||||
|
" num_workers = 8\n",
|
||||||
|
" batch_size = 10\n",
|
||||||
|
" loader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, pin_memory=True)\n",
|
||||||
|
" if mode=='test':\n",
|
||||||
|
" for i, (data, target) in enumerate(loader):\n",
|
||||||
|
" if i==1:\n",
|
||||||
|
" tm = time.time()\n",
|
||||||
|
" data = data.to('cpu', non_blocking=True)\n",
|
||||||
|
" output = model(data)\n",
|
||||||
|
" else: # mode=='train':\n",
|
||||||
|
" for i, (data, target) in enumerate(loader):\n",
|
||||||
|
" if i==1:\n",
|
||||||
|
" tm = time.time()\n",
|
||||||
|
" data = data.to('cpu', non_blocking=True)\n",
|
||||||
|
" target = target.to('cpu', non_blocking=True).long()\n",
|
||||||
|
" optimizer.zero_grad()\n",
|
||||||
|
" output = model(data)\n",
|
||||||
|
" loss = criterion(output, target)\n",
|
||||||
|
" loss.backward()\n",
|
||||||
|
" optimizer.step()\n",
|
||||||
|
" tm = time.time() - tm\n",
|
||||||
|
" print('Mode=%s: NumWorkers=%2d BatchSize=%2d Time=%6.3fs Imgs/s=%6.2f' % (mode, num_workers, batch_size, tm, N/tm))\n",
|
||||||
|
"# torch.cuda.empty_cache() # doesn't seem to be working...\n",
|
||||||
|
"\n",
|
||||||
|
"if __name__ == '__main__':\n",
|
||||||
|
" main()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
" ## Comparison"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Image difference: 7967.23%\n",
|
||||||
|
"Time difference: 7966.02%\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"image_increase = ((960/11.9)-1)\n",
|
||||||
|
"time_increase = ((107.520/1.333)-1)\n",
|
||||||
|
"print(f'Image difference: {image_increase:.2%}')\n",
|
||||||
|
"print(f'Time difference: {time_increase:.2%}')"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
|
|||||||
Reference in New Issue
Block a user