{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mode=test: NumWorkers= 1 BatchSize= 1 Time=33.559s Imgs/s= 38.14\n", "Mode=test: NumWorkers= 1 BatchSize= 2 Time=16.639s Imgs/s= 76.93\n", "Mode=test: NumWorkers= 1 BatchSize= 4 Time= 8.817s Imgs/s=145.17\n", "Mode=test: NumWorkers= 1 BatchSize= 8 Time= 8.802s Imgs/s=145.41\n", "Mode=test: NumWorkers= 1 BatchSize=16 Time= 9.094s Imgs/s=140.76\n", "Mode=test: NumWorkers= 1 BatchSize=32 Time= 8.247s Imgs/s=155.21\n", "Mode=test: NumWorkers= 2 BatchSize= 1 Time=34.151s Imgs/s= 37.48\n", "Mode=test: NumWorkers= 2 BatchSize= 2 Time=16.366s Imgs/s= 78.21\n", "Mode=test: NumWorkers= 2 BatchSize= 4 Time= 7.701s Imgs/s=166.20\n", "Mode=test: NumWorkers= 2 BatchSize= 8 Time= 3.888s Imgs/s=329.25\n", "Mode=test: NumWorkers= 2 BatchSize=16 Time= 3.824s Imgs/s=334.75\n", "Mode=test: NumWorkers= 2 BatchSize=32 Time= 3.706s Imgs/s=345.38\n", "Mode=test: NumWorkers= 4 BatchSize= 1 Time=34.202s Imgs/s= 37.43\n", "Mode=test: NumWorkers= 4 BatchSize= 2 Time=16.350s Imgs/s= 78.29\n", "Mode=test: NumWorkers= 4 BatchSize= 4 Time= 7.816s Imgs/s=163.76\n", "Mode=test: NumWorkers= 4 BatchSize= 8 Time= 3.884s Imgs/s=329.59\n", "Mode=test: NumWorkers= 4 BatchSize=16 Time= 2.029s Imgs/s=630.98\n", "Mode=test: NumWorkers= 4 BatchSize=32 Time= 1.819s Imgs/s=703.63\n", "Mode=test: NumWorkers= 8 BatchSize= 1 Time=33.488s Imgs/s= 38.22\n", "Mode=test: NumWorkers= 8 BatchSize= 2 Time=16.172s Imgs/s= 79.15\n", "Mode=test: NumWorkers= 8 BatchSize= 4 Time= 7.842s Imgs/s=163.22\n", "Mode=test: NumWorkers= 8 BatchSize= 8 Time= 3.866s Imgs/s=331.12\n", "Mode=test: NumWorkers= 8 BatchSize=16 Time= 2.034s Imgs/s=629.43\n", "Mode=test: NumWorkers= 8 BatchSize=32 Time= 1.469s Imgs/s=871.30\n" ] } ], "source": [ "import torch\n", "import torch.nn as nn\n", "from torch.utils.data import DataLoader\n", "\n", "import torchvision.models as models\n", "import torchvision.datasets as datasets\n", "import torchvision.transforms as transforms\n", "import time\n", "\n", "def main():\n", " mode = 'test'\n", " model = models.resnet50()\n", " criterion = nn.CrossEntropyLoss()\n", " optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)\n", " N = 1280\n", " dataset = datasets.FakeData(size=N, transform=transforms.ToTensor())\n", " if mode=='test': # switch to evaluate mode\n", " model.eval()\n", " model.to('cuda')\n", " for num_workers in [1, 2, 4, 8]: # 4 < 2 for test\n", " for batch_size in [1, 2, 4, 8, 16, 32]:\n", " loader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, pin_memory=True)\n", " if mode=='test':\n", " for i, (data, target) in enumerate(loader):\n", " if i==1:\n", " tm = time.time()\n", " data = data.to('cuda', non_blocking=True)\n", " output = model(data)\n", " else: # mode=='train':\n", " for i, (data, target) in enumerate(loader):\n", " if i==1:\n", " tm = time.time()\n", " data = data.to('cuda', non_blocking=True)\n", " target = target.to('cuda', non_blocking=True).long()\n", " optimizer.zero_grad()\n", " output = model(data)\n", " loss = criterion(output, target)\n", " loss.backward()\n", " optimizer.step()\n", " tm = time.time() - tm\n", " print('Mode=%s: NumWorkers=%2d BatchSize=%2d Time=%6.3fs Imgs/s=%6.2f' % (mode, num_workers, batch_size, tm, N/tm))\n", " torch.cuda.empty_cache() # doesn't seem to be working...\n", "\n", "if __name__ == '__main__':\n", " main()" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mode=test: NumWorkers= 1 BatchSize=40 Time= 7.026s Imgs/s=182.17\n", "Mode=test: NumWorkers= 2 BatchSize=40 Time= 3.407s Imgs/s=375.71\n", "Mode=test: NumWorkers= 4 BatchSize=40 Time= 1.752s Imgs/s=730.46\n", "Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.323s Imgs/s=967.16\n", "Mode=test: NumWorkers=16 BatchSize=40 Time= 1.419s Imgs/s=901.91\n" ] } ], "source": [ "import torch\n", "import torch.nn as nn\n", "from torch.utils.data import DataLoader\n", "\n", "import torchvision.models as models\n", "import torchvision.datasets as datasets\n", "import torchvision.transforms as transforms\n", "import time\n", "\n", "def main():\n", " mode = 'test'\n", " model = models.resnet50()\n", " criterion = nn.CrossEntropyLoss()\n", " optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)\n", " N = 1280\n", " dataset = datasets.FakeData(size=N, transform=transforms.ToTensor())\n", " if mode=='test': # switch to evaluate mode\n", " model.eval()\n", " model.to('cuda')\n", " for num_workers in [1, 2, 4, 8, 16]: # 4 < 2 for test\n", " for batch_size in [40]:\n", " loader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, pin_memory=True)\n", " if mode=='test':\n", " for i, (data, target) in enumerate(loader):\n", " if i==1:\n", " tm = time.time()\n", " data = data.to('cuda', non_blocking=True)\n", " output = model(data)\n", " else: # mode=='train':\n", " for i, (data, target) in enumerate(loader):\n", " if i==1:\n", " tm = time.time()\n", " data = data.to('cuda', non_blocking=True)\n", " target = target.to('cuda', non_blocking=True).long()\n", " optimizer.zero_grad()\n", " output = model(data)\n", " loss = criterion(output, target)\n", " loss.backward()\n", " optimizer.step()\n", " tm = time.time() - tm\n", " print('Mode=%s: NumWorkers=%2d BatchSize=%2d Time=%6.3fs Imgs/s=%6.2f' % (mode, num_workers, batch_size, tm, N/tm))\n", " torch.cuda.empty_cache() # doesn't seem to be working...\n", "\n", "if __name__ == '__main__':\n", " main()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# GPU Comparison\n", "\n", "## CUDA train" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'GeForce RTX 2080 Ti'" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.cuda.get_device_name(0)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.084s Imgs/s=251.75\n", "Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.076s Imgs/s=252.17\n", "Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.079s Imgs/s=252.02\n", "Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.437s Imgs/s=235.41\n", "Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.157s Imgs/s=248.20\n", "Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.139s Imgs/s=249.09\n", "Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.191s Imgs/s=246.56\n", "Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.304s Imgs/s=241.34\n", "Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.275s Imgs/s=242.67\n", "Mode=train: NumWorkers= 8 BatchSize=40 Time= 5.202s Imgs/s=246.06\n" ] } ], "source": [ "import torch\n", "import torch.nn as nn\n", "from torch.utils.data import DataLoader\n", "\n", "import torchvision.models as models\n", "import torchvision.datasets as datasets\n", "import torchvision.transforms as transforms\n", "import time\n", "\n", "def main():\n", " mode = 'train'\n", " model = models.resnet50()\n", " criterion = nn.CrossEntropyLoss()\n", " optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)\n", " N = 1280\n", " dataset = datasets.FakeData(size=N, transform=transforms.ToTensor())\n", " if mode=='test': # switch to evaluate mode\n", " model.eval()\n", " model.to('cuda')\n", " for _ in range (10):\n", " num_workers = 8\n", " batch_size = 40\n", " loader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, pin_memory=True)\n", " if mode=='test':\n", " for i, (data, target) in enumerate(loader):\n", " if i==1:\n", " tm = time.time()\n", " data = data.to('cuda', non_blocking=True)\n", " output = model(data)\n", " else: # mode=='train':\n", " for i, (data, target) in enumerate(loader):\n", " if i==1:\n", " tm = time.time()\n", " data = data.to('cuda', non_blocking=True)\n", " target = target.to('cuda', non_blocking=True).long()\n", " optimizer.zero_grad()\n", " output = model(data)\n", " loss = criterion(output, target)\n", " loss.backward()\n", " optimizer.step()\n", " tm = time.time() - tm\n", " print('Mode=%s: NumWorkers=%2d BatchSize=%2d Time=%6.3fs Imgs/s=%6.2f' % (mode, num_workers, batch_size, tm, N/tm))\n", " torch.cuda.empty_cache() # doesn't seem to be working...\n", "\n", "if __name__ == '__main__':\n", " main()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## CUDA test" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.418s Imgs/s=902.44\n", "Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.315s Imgs/s=973.75\n", "Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.348s Imgs/s=949.28\n", "Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.363s Imgs/s=938.86\n", "Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.353s Imgs/s=945.78\n", "Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.380s Imgs/s=927.33\n", "Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.365s Imgs/s=937.47\n", "Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.390s Imgs/s=920.68\n", "Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.354s Imgs/s=945.08\n", "Mode=test: NumWorkers= 8 BatchSize=40 Time= 1.396s Imgs/s=916.73\n" ] } ], "source": [ "import torch\n", "import torch.nn as nn\n", "from torch.utils.data import DataLoader\n", "\n", "import torchvision.models as models\n", "import torchvision.datasets as datasets\n", "import torchvision.transforms as transforms\n", "import time\n", "\n", "def main():\n", " mode = 'test'\n", " model = models.resnet50()\n", " criterion = nn.CrossEntropyLoss()\n", " optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)\n", " N = 1280\n", " dataset = datasets.FakeData(size=N, transform=transforms.ToTensor())\n", " if mode=='test': # switch to evaluate mode\n", " model.eval()\n", " model.to('cuda')\n", " for _ in range (10):\n", " num_workers = 8\n", " batch_size = 40\n", " loader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, pin_memory=True)\n", " if mode=='test':\n", " for i, (data, target) in enumerate(loader):\n", " if i==1:\n", " tm = time.time()\n", " data = data.to('cuda', non_blocking=True)\n", " output = model(data)\n", " else: # mode=='train':\n", " for i, (data, target) in enumerate(loader):\n", " if i==1:\n", " tm = time.time()\n", " data = data.to('cuda', non_blocking=True)\n", " target = target.to('cuda', non_blocking=True).long()\n", " optimizer.zero_grad()\n", " output = model(data)\n", " loss = criterion(output, target)\n", " loss.backward()\n", " optimizer.step()\n", " tm = time.time() - tm\n", " print('Mode=%s: NumWorkers=%2d BatchSize=%2d Time=%6.3fs Imgs/s=%6.2f' % (mode, num_workers, batch_size, tm, N/tm))\n", " torch.cuda.empty_cache() # doesn't seem to be working...\n", "\n", "if __name__ == '__main__':\n", " main()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## CPU train" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mode=train: NumWorkers= 8 BatchSize=10 Time=299.952s Imgs/s= 4.27\n" ] } ], "source": [ "import torch\n", "import torch.nn as nn\n", "from torch.utils.data import DataLoader\n", "\n", "import torchvision.models as models\n", "import torchvision.datasets as datasets\n", "import torchvision.transforms as transforms\n", "import time\n", "\n", "def main():\n", " mode = 'train'\n", " model = models.resnet50()\n", " criterion = nn.CrossEntropyLoss()\n", " optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)\n", " N = 1280\n", " dataset = datasets.FakeData(size=N, transform=transforms.ToTensor())\n", " if mode=='test': # switch to evaluate mode\n", " model.eval()\n", " model.to('cpu')\n", " for _ in range (1):\n", " num_workers = 8\n", " batch_size = 10\n", " loader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, pin_memory=True)\n", " if mode=='test':\n", " for i, (data, target) in enumerate(loader):\n", " if i==1:\n", " tm = time.time()\n", " data = data.to('cpu', non_blocking=True)\n", " output = model(data)\n", " else: # mode=='train':\n", " for i, (data, target) in enumerate(loader):\n", " if i==1:\n", " tm = time.time()\n", " data = data.to('cpu', non_blocking=True)\n", " target = target.to('cpu', non_blocking=True).long()\n", " optimizer.zero_grad()\n", " output = model(data)\n", " loss = criterion(output, target)\n", " loss.backward()\n", " optimizer.step()\n", " tm = time.time() - tm\n", " print('Mode=%s: NumWorkers=%2d BatchSize=%2d Time=%6.3fs Imgs/s=%6.2f' % (mode, num_workers, batch_size, tm, N/tm))\n", "# torch.cuda.empty_cache() # doesn't seem to be working...\n", "\n", "if __name__ == '__main__':\n", " main()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## CPU test" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mode=test: NumWorkers= 8 BatchSize=10 Time=103.033s Imgs/s= 12.42\n" ] } ], "source": [ "import torch\n", "import torch.nn as nn\n", "from torch.utils.data import DataLoader\n", "\n", "import torchvision.models as models\n", "import torchvision.datasets as datasets\n", "import torchvision.transforms as transforms\n", "import time\n", "\n", "def main():\n", " mode = 'test'\n", " model = models.resnet50()\n", " criterion = nn.CrossEntropyLoss()\n", " optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)\n", " N = 1280\n", " dataset = datasets.FakeData(size=N, transform=transforms.ToTensor())\n", " if mode=='test': # switch to evaluate mode\n", " model.eval()\n", " model.to('cpu')\n", " for _ in range (1):\n", " num_workers = 8\n", " batch_size = 10\n", " loader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, pin_memory=True)\n", " if mode=='test':\n", " for i, (data, target) in enumerate(loader):\n", " if i==1:\n", " tm = time.time()\n", " data = data.to('cpu', non_blocking=True)\n", " output = model(data)\n", " else: # mode=='train':\n", " for i, (data, target) in enumerate(loader):\n", " if i==1:\n", " tm = time.time()\n", " data = data.to('cpu', non_blocking=True)\n", " target = target.to('cpu', non_blocking=True).long()\n", " optimizer.zero_grad()\n", " output = model(data)\n", " loss = criterion(output, target)\n", " loss.backward()\n", " optimizer.step()\n", " tm = time.time() - tm\n", " print('Mode=%s: NumWorkers=%2d BatchSize=%2d Time=%6.3fs Imgs/s=%6.2f' % (mode, num_workers, batch_size, tm, N/tm))\n", "# torch.cuda.empty_cache() # doesn't seem to be working...\n", "\n", "if __name__ == '__main__':\n", " main()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " ## Comparison" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Image difference: 7967.23%\n", "Time difference: 7966.02%\n" ] } ], "source": [ "image_increase = ((960/11.9)-1)\n", "time_increase = ((107.520/1.333)-1)\n", "print(f'Image difference: {image_increase:.2%}')\n", "print(f'Time difference: {time_increase:.2%}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }