ROCm이 설치된 Ubuntu 환경에서 Docker-Pytorch 활용 딥러닝에 대해 정리
Radeon VII 사용
Ubuntu 환경에 ROCm과 Docker가 설치된 것을 가정
(ROCm 설치 글 Ubuntu, ROCm 설치 및 제거 (tistory.com))
(Docker 설치 글 Ubuntu, docker 설치 및 제거 정리 (tistory.com))
공식 Docker image
rocm/pytorch - Docker Image | Docker Hub
1. rocm/pytorch 이미지 run
size가 꽤나 크므로 시간이 조금 걸릴 수 있다
sudo docker run -it --network=host --device=/dev/kfd --device=/dev/dri --shm-size 8G --group-add=video --ipc=host --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -v $HOME/dockerx:/dockerx
설명:
-it: docker에 접속해서 bash command 사용
--network=host: host network 와 동기화 (port 작업 생략)
--device=/dev/kfd --device=/dev/dri: amd gpu 연결
--group-add=video: gpu 사용 group 에 user 포함
--ipc=host: host system 과 동기화 (memory 등등)
--cao-add=SYS_PTRACE: to trace the runtime behavior (뭔지 잘 모르겠음)
--security-opt seccomp=unconfined: Docker run reference | Docker Documentation
-v $HOME/dockerx:/dockerx: host의 home 디렉토리 밑에 dockerx폴더와 container 안 /dockerx 폴더를 연결 및 동기화
--shm-size 8G: 통신 시 공유메모리 지정 (미지정시 모델 학습 중 system down)
container running 후 container 안에서 rocminfo와 rocm-smi 정보가 확인되어야 한다.
이후 설명은 container 안에서 작업
2. pip 버전 upgrade
pip install --upgrade pip
3. jupyter lab 설치
pip install jupyterlab
4. jupyter lab 실행
jupyter lab --ip 0.0.0.0 --allow-root
이후 딥러닝 작업은 jupyter lab에서 실행 (nvidia 환경에서 deep learning과 동일)
amd 튜토리얼 참고
Deep Learning Training (amd.com)
5. pretrain 된 모델 확인
import torch
import torchvision
# load pretrained model
model = torch.hub.load('pytorch/vision:v0.10.0', 'inception_v3', pretrained=True)
model.eval()
print('yes')
# get test image
import urllib
url, filename = ("https://github.com/pytorch/hub/raw/master/images/dog.jpg", "dog.jpg")
try: urllib.URLopener().retrieve(url, filename)
except: urllib.request.urlretrieve(url, filename)
# load test image
from PIL import Image
from torchvision import transforms
input_image = Image.open(filename)
# define preprocessing
preprocess = transforms.Compose([
transforms.Resize(299),
transforms.CenterCrop(299),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
# preprocess the image and attach to cuda
input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0)
if torch.cuda.is_available():
input_batch = input_batch.to('cuda')
model.to('cuda')
# inference the image
with torch.no_grad():
output = model(input_batch)
#print(output[0])
probabilities = torch.nn.functional.softmax(output[0], dim=0)
#print(probabilities)
# get class label
!wget https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt
# print out the inference result with class labels
with open("imagenet_classes.txt", "r") as f:
categories = [s.strip() for s in f.readlines()]
top5_prob, top5_catid = torch.topk(probabilities, 5)
for i in range(top5_prob.size(0)):
print(categories[top5_catid[i]], top5_prob[i].item())
6. 모델 fine tuning
6-1. tiny-imagenet-200 다운로드 및 압축해제
!wget http://cs231n.stanford.edu/tiny-imagenet-200.zip
!unzip tiny-imagenet-200.zip
6-2. 압축 해제된 데이터 정리
import io
import glob
import os
from shutil import move
from os.path import join
from os import listdir, rmdir
target_folder = './tiny-imagenet-200/val/'
val_dict = {}
with open('./tiny-imagenet-200/val/val_annotations.txt', 'r') as f:
for line in f.readlines():
split_line = line.split('\t')
val_dict[split_line[0]] = split_line[1]
paths = glob.glob('./tiny-imagenet-200/val/images/*')
for path in paths:
file = path.split('/')[-1]
folder = val_dict[file]
if not os.path.exists(target_folder + str(folder)):
os.mkdir(target_folder + str(folder))
os.mkdir(target_folder + str(folder) + '/images')
for path in paths:
file = path.split('/')[-1]
folder = val_dict[file]
dest = target_folder + str(folder) + '/images/' + str(file)
move(path, dest)
rmdir('./tiny-imagenet-200/val/images')
6-3. 학습 준비 (1)
import torch
import os
import torchvision
from torchvision import transforms
from torchvision.transforms.functional import InterpolationMode
# define train parameter
device = "cuda"
data_path = "tiny-imagenet-200"
train_crop_size = 299
interpolation = "bilinear"
val_crop_size = 299
val_resize_size = 342
batch_size = 32
model_name = "inception_v3"
pretrained = True
num_workers = 16
learning_rate = 0.1
momentum = 0.9
weight_decay = 1e-4
lr_step_size = 30
lr_gamma = 0.1
epochs = 3
# define data path
train_dir = os.path.join(data_path, "train")
val_dir = os.path.join(data_path, "val")
6-4. 학습 준비 (2)
# define preprocess and dataset
interpolation = InterpolationMode(interpolation)
TRAIN_TRANSFORM_IMG = transforms.Compose([
#Normalizaing and standardardizing the image
transforms.RandomResizedCrop(train_crop_size, interpolation=interpolation),
transforms.PILToTensor(),
transforms.ConvertImageDtype(torch.float),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225] )
])
dataset = torchvision.datasets.ImageFolder(
train_dir,
transform=TRAIN_TRANSFORM_IMG
)
TEST_TRANSFORM_IMG = transforms.Compose([
transforms.Resize(val_resize_size, interpolation=interpolation),
transforms.CenterCrop(val_crop_size),
transforms.PILToTensor(),
transforms.ConvertImageDtype(torch.float),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225] )
])
dataset_test = torchvision.datasets.ImageFolder(
val_dir,
transform=TEST_TRANSFORM_IMG
)
# define dataloader
print("Creating data loaders")
train_sampler = torch.utils.data.RandomSampler(dataset)
test_sampler = torch.utils.data.SequentialSampler(dataset_test)
data_loader = torch.utils.data.DataLoader(
dataset,
batch_size=batch_size,
sampler=train_sampler,
num_workers=num_workers,
pin_memory=True
)
data_loader_test = torch.utils.data.DataLoader(
dataset_test, batch_size=batch_size, sampler=test_sampler, num_workers=num_workers, pin_memory=True
)
6-5. 학습 준비 (3)
# load model
print("Creating model")
print("Num classes = ", len(dataset.classes))
model = torchvision.models.__dict__[model_name](pretrained=pretrained)
# output 수(200)에 맞도록 마지막 layer 재정의
model.fc = torch.nn.Linear(model.fc.in_features, len(dataset.classes))
model.aux_logits = False
model.AuxLogits = None
# define loss function
model.to(device)
criterion = torch.nn.CrossEntropyLoss()
# define optimizer
optimizer = torch.optim.SGD(
model.parameters(),
lr=learning_rate,
momentum=momentum,
weight_decay=weight_decay
)
# define scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step_size, gamma=lr_gamma)
6-6. 학습 시작
from tqdm import tqdm,trange
print("Start training")
for epoch in range(epochs):
model.train()
epoch_loss = 0
len_dataset = 0
epoch_corrects = 0
for step, (image, target) in enumerate(tqdm(data_loader)):
image, target = image.to(device), target.to(device)
output = model(image)
_, preds = torch.max(output, 1)
loss = criterion(output, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_loss += output.shape[0] * loss.item()
epoch_corrects += torch.sum(preds == target.data)
len_dataset += output.shape[0];
# if step % 10 == 0:
# print('Epoch: ', epoch, '| step : %d' % step, '| train loss : %0.4f' % loss.item() )
epoch_loss = epoch_loss / len_dataset
epoch_corrects = epoch_corrects.double() / len_dataset
print('Epoch: ', epoch, '| train loss : %0.4f' % epoch_loss, '| train acc : %0.4f' % epoch_corrects)
lr_scheduler.step()
model.eval()
with torch.inference_mode():
running_loss = 0
running_corrects = 0
for step, (image, target) in enumerate(data_loader_test):
image, target = image.to(device), target.to(device)
output = model(image)
_, preds = torch.max(output, 1)
loss = criterion(output, target)
running_loss += loss.item()
running_corrects += torch.sum(preds == target.data)
running_loss = running_loss / len(data_loader_test)
running_corrects = running_corrects / len(data_loader_test)
print('Epoch: ', epoch, '| test loss : %0.4f' % running_loss, '| test acc : %0.4f' % running_corrects)
# save model
torch.save(model.state_dict(), "trained_inception_v3.pt")
끝
'컴퓨터 > 머신러닝 (Machine Learning)' 카테고리의 다른 글
TensorRT Docker 사용 정리 (0) | 2022.12.13 |
---|---|
3080, Radeon vii, 6900xt, 딥러닝 (image classification) 학습 성능 비교 (0) | 2022.12.12 |
Pytorch, grad-cam 사용 정리 (0) | 2022.12.10 |
Ubuntu, TensorRT로 Yolov5 inference (0) | 2022.12.05 |
Windows, OpenVINO 설치하기 (1) | 2022.11.30 |