각종 사용법

CODE BLOCK

notou10 2022. 2. 16. 20:38
from natsort import natsorted

self.data = []
        for index, i in enumerate(natsorted(os.listdir(self.path))):
            self.data.append(i)

pytorch parameter값 직접 보기

#########parameter 확인
teacher = CNN((3,32,32), 10).to('cuda')
params = list(teacher.parameters())
print(params[0][0])
a = input("sdsds")

 

 

Argparse  사용법

(main문에서)

 

import argparse

parser = argparse.ArgumentParser()
parser.add_argument('--epoch', type=int, default=50, required=True)
parser.add_argument('--t', type=float, default=1., required=True)
parser.add_argument('--dataset', type=str, default='cifar10', required=True)
parser.add_argument('--n_class', type=int, default=10, required=True)
parser.add_argument('--teacher', type=str, default='resnet152', required=True)
parser.add_argument('--student', type=str, default='resnet50', required=True)
parser.add_argument('--finetune', type=str2bool, required=True)
parser.add_argument('--teacher_only', type=str2bool, required=True)
parser.add_argument('--lr', type=float, default=1e-1)
parser.add_argument('--bs', type=int, default=64)
parser.add_argument('--alpha', type=float, default=0.5)
parser.add_argument('--checkpoint', default=None)
parser.add_argument('--save_last', type=str2bool, default=False)
parser.add_argument('--exp_name', type=str, required=True)
parser.add_argument('--optimizer', type=str, default='adamw')
parser.add_argument('--pretrain', type=str2bool, default=True)
parser.add_argument('--teacher_dir', type=str, default=None)
parser.add_argument('--save_every', type=int, default=100)
args = parser.parse_args()
print(args)

epochs = args.epoch
temperature = args.t
lr = args.lr
batch_size = args.bs
dataset = args.dataset
n_class = args.n_class
teacher = args.teacher
student = args.student
finetune = args.finetune
teacher_only = args.teacher_only
alpha = args.alpha
checkpoint = args.checkpoint
exp_name = args.exp_name
optim = args.optimizer
save_last = args.save_last
save_every = args.save_every
pretrain = args.pretrain
teacher_dir = args.teacher_dir

 

str2bool 사용법

#메인문에선 이거
from distillation.utils import Model, str2bool


#distillation/util.py에서는

def str2bool(v):
    if isinstance(v, bool):
        return v
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Boolean value expected.')

 

timm 사용법

#Main에서는
from distillation.utils import Model 

#모델 선언
teacher = Model(model_name='resnet152', n_class=10, pretrained=False).to('cuda')


#distillation/utils.py에서는

import torch
import torch.nn as nn
import timm

class Model(nn.Module):
    def __init__(self, model_name, n_class, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        n_features = self.model.fc.in_features
        #print(n_features)
        self.model.fc = nn.Linear(n_features, n_class)

    def forward(self, x):
        #print(x.shape)
        return x

 

timm 사용법 응용

#main에선
from utils.models import ModelBuilder

model  = ModelBuilder(cfg)

teacher = model.create_teacher().to(device)
student = model.create_student().to(device)


#utils/model.py에선

import torch
import torch.nn as nn
import timm

class Backbone(nn.Module):
    def __init__(self, model_name, n_class, pretrained=False):
        super(Backbone, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        n_features = self.model.fc.in_features
        self.model.fc = nn.Linear(n_features, n_class)

    def forward(self, x):
        x = self.model(x)
        return x
    
class ModelBuilder():
    def __init__(self, cfg):
        super(ModelBuilder, self).__init__()
        self.teacher = cfg.MODEL.teacher
        self.student = cfg.MODEL.student
        self.pretrain = cfg.MODEL.pretrain
        self.n_class = cfg.DATASET.n_class

    def create_teacher(self):
        teacher = Backbone(self.teacher, n_class=self.n_class, pretrained=self.pretrain)
        return teacher

    def create_student(self):
        student = Backbone(self.student, n_class=self.n_class, pretrained=False)
        return student

 

matplotlib으로 1장 save

 

import matplotlib.pyplot as plt

plt.imshow(imgs_dset[4*count + i].transpose(1,2,0))
plt.savefig("./tset.png")
plt.show()
a = input("fdgsdfg")

 

state_dict 일부만 불러오기

pretrained_dict = pretrained_model.state_dict()
new_model_dict = new_model.state_dict()
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in new_model_dict}
new_model_dict.update(pretrained_dict)
new_model.load_state_dict(new_model_dict)

 

dataloader

class Dataset(nn.Module):
    def __init__(self, cfg):
        self.dataset_name = cfg.DATASET.name
        self.root_path = cfg.DATASET.root_path
        self.train_batch = cfg.TRAIN.batch_size
        self.valid_batch = cfg.VALID.batch_size

    def load_dataset(self):
        if self.dataset_name.lower() == 'cifar10':
            trainset = torchvision.datasets.CIFAR10(root=self.root_path, train=True,
                                                    download=True, transform=cifar_train_transform())
            validset = torchvision.datasets.CIFAR10(root=self.root_path, train=False,
                                                    download=True, transform=cifar_valid_transform())
        elif self.dataset_name.lower() == 'cifar100':
            trainset = torchvision.datasets.CIFAR100(root=self.root_path, train=True,
                                                     download=True, transform=cifar_train_transform())
            validset = torchvision.datasets.CIFAR100(root=self.root_path, train=False,
                                                     download=True, transform=cifar_valid_transform())
        elif self.dataset_name.lower() == 'relabel_binary' or 'relabel':
            trainset = torchvision.datasets.ImageFolder(root=f"{self.root_path}/train", transform=imagenet_train_transform())
            validset = torchvision.datasets.ImageFolder(root=f"{self.root_path}/val", transform=imagenet_valid_transform())
        
        # elif self.dataset_name.lower() == 'relabel':
        #     trainset = torchvision.datasets.ImageFolder(root=self.root_path, transform=imagenet_train_transform())
        #     validset = torchvision.datasets.ImageFolder(root=self.root_path, transform=imagenet_valid_transform())
        else:
            print("try other dataset")
            print(trainset, "###")

        print(trainset.classes)
        print(trainset.class_to_idx)
        
        return trainset, validset

    def prepare_dataloader(self):
        trainset, validset = self.load_dataset()
        trainloader = torch.utils.data.DataLoader(
            trainset, batch_size=self.train_batch, shuffle=True)

        validloader = torch.utils.data.DataLoader(
            validset, batch_size=self.valid_batch, shuffle=False)
        return trainloader, validloader

 

vscode에서 .py돌릴 때 path는 무조건 도커기준으로 쓰기. (마운트한 기준)

 

matplotlib 여러개 plot

    def forward(self, g_s, g_t): # list의 형식으로 받음. 여러 지점의 feature를 뽑을 수 있으므로
        t_comb_sim = self.insimilarity_scores(g_t) # torch.Size([120, 128]) <= [16C2, B]
        s_comb_sim = self.insimilarity_scores(g_s)
        
        plt.subplot(2,2,1)        
        plt.bar(range(6),t_comb_sim[:,0].detach().cpu().numpy())
        plt.title('before_softmax', size=6)
        ax_x=["(crop0 & crop1)","(crop0 & crop2)","(crop0 & crop3)","(crop1 & crop2)","(crop1 & crop3)","(crop2 & crop3)"]
        plt.xticks([0, 1,2, 3, 4, 5],ax_x, size=3)
        # plt.savefig(f"before_softmax_teacher_innerProduct_T{self.T}.png", dpi=600)
        # plt.show()
        # plt.close()
        # a = input("##")
        
        
        t_sim_soft = F.softmax(t_comb_sim/self.T, dim=0).permute(1,0) # 1)patch쌍별(열들) / 2)batch별(행) sim : torch.Size([128, 120])
        s_sim_soft = F.log_softmax(s_comb_sim/self.T, dim=0).permute(1,0)
        
        #################### T0.01
        plt.subplot(2,2,2)
        a=F.softmax(t_comb_sim/0.01, dim=0).permute(1,0)
        plt.bar(range(6),a[0,:].detach().cpu().numpy())
        
        plt.title(f'after_softmax_T_0.01', size=6)
        ax_x=["(crop0 & crop1)","(crop0 & crop2)","(crop0 & crop3)","(crop1 & crop2)","(crop1 & crop3)","(crop2 & crop3)"]
        plt.xticks([0, 1,2, 3, 4, 5],ax_x, size=3)
        plt.savefig(f"./sim/after_softmax_teacher_cos_sim_T_0.01.png", dpi=600)
        #plt.show()
        #plt.close()
        ############################### T 0.1 
        plt.subplot(2,2,3)
        b=F.softmax(t_comb_sim/0.1, dim=0).permute(1,0)
        plt.bar(range(6),b[0,:].detach().cpu().numpy())
        
        plt.title(f'after_softmax_T_0.1', size=6)
        ax_x=["(crop0 & crop1)","(crop0 & crop2)","(crop0 & crop3)","(crop1 & crop2)","(crop1 & crop3)","(crop2 & crop3)"]
        plt.xticks([0, 1,2, 3, 4, 5],ax_x, size=3)
        #plt.savefig(f"./sim/after_softmax_teacher_cos_sim_T_0.1.png", dpi=600)
       #plt.show()
        #plt.close()
        
        ###################### T 1
        plt.subplot(2,2,4)
        c=F.softmax(t_comb_sim/1, dim=0).permute(1,0)
        plt.bar(range(6),c[0,:].detach().cpu().numpy())
        
        plt.title(f'after_softmax_T_1', size=6)
        ax_x=["(crop0 & crop1)","(crop0 & crop2)","(crop0 & crop3)","(crop1 & crop2)","(crop1 & crop3)","(crop2 & crop3)"]
        plt.xticks([0, 1,2, 3, 4, 5],ax_x, size=3)

        plt.savefig(f"./sim/s.png", dpi=600)
        
        plt.show()
        plt.close()

 

plt 그리드, 숫자 채워넣기

 print(data.squeeze().shape)
            
            for x in range(2):
                for y in range(2):
                    plt.text(60+ 120*x, 60+ 120*y, f"crop{2*x+y}", ha="center", va="center", size=10)
            plt.imshow(data.squeeze().permute(1,2,0).detach().cpu().numpy())
            plt.xticks([0, 120, 240])
            plt.yticks([0, 120, 240])
            
            # for x in range(4):
            #     for y in range(4):
            #         plt.text(30+ 60*x, 30+ 60*y, f"crop{4*x+y}", ha="center", va="center", size=10)
        
            # plt.imshow(data.squeeze().permute(1,2,0).detach().cpu().numpy())
            # plt.xticks([0, 60, 120, 180, 240])
            # plt.yticks([0, 60, 120, 180, 240])
            
            
            plt.grid()
            plt.savefig(f"img{iter_}.png", dpi=600)
            plt.show()
            plt.close()

 

다른 폴더에서 import 하기

https://freedata.tistory.com/70

절대경로에서 import
sys.path.append('/workspace/workspace/DIST')

from lib.models.builder import build_model

 

로그파일 만들고 저장하기

#함수
def log_epoch(epoch,exp_name, metrics):
    
    logDir = os.path.join('logs', f'{exp_name}')   
    if not os.path.isdir(logDir):
            os.makedirs(logDir, exist_ok=True)
        
    """
    Print training metrics for epoch.
    """
    template = f'Epoch: {epoch:3}'
    for name, metric in metrics.items():
        template += f'\t {name}: {metric:3.3f}'

    with open(logDir+"/log", "a") as f:
        f.write(template + "\n")
    print(template)
    
    코드에 삽입
        metric={'Epoch':epoch, 'Average loss':test_loss / len(cifar100_test_loader.dataset),
    'Accuracy' : correct.float() / len(cifar100_test_loader.dataset)}
        
    log_epoch(epoch, "ckpt폴더명" ,metric)

 현재 경로 출력

path = os.getcwd()
    print(path)

파이썬은 터미널 위치 기준이 현재 경로기준임..! 

(돌리는 .py 파일 위치가 아니라!)

LR schduler, optim (+LR visualize)

from torch.optim.lr_scheduler import _LRScheduler
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.optim.lr_scheduler import StepLR, CosineAnnealingLR, LambdaLR, MultiStepLR


def build_scheduler(sched_type, optimizer,  min_lr=1e-5):
    if sched_type == 'step':
        scheduler = StepLR(optimizer, step_size, gamma=decay_rate)
        decay_by_epoch = False
    elif sched_type == 'cosine':
        scheduler = CosineAnnealingLR(optimizer, T_max=total_steps - warmup_steps, eta_min=min_lr)
    elif sched_type == 'linear':
        scheduler = LambdaLR(optimizer, lambda epoch: (total_steps - warmup_steps - epoch) / (total_steps - warmup_steps))
    elif sched_type== 'multi_step':
        scheduler = MultiStepLR(optimizer, milestones=[150,180,210], gamma=0.1)
        return scheduler
  
  
 #이후 main문에서
 optimizer = optim.SGD
 scheduler = build_scheduler(args.sched,
                                optimizer)
   
   #미니배치 내에서
   scheduler.step()
 배치 끝나고
 scheduler.step()
 
 #lr 보기
 {'lr' : scheduler.get_last_lr()}

ckpt 불러오기

(load_state_dict)

 

class CheckpointManager():
    def __init__(self, model, optimizer=None, save_dir=''):
        self.model = model
        self.optimizer = optimizer
        self.save_dir = save_dir
        self.ckpts = []
        if self.rank == 0:
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)
            self.metrics_fp = open(os.path.join(save_dir, 'metrics.csv'), 'a')
            self.metrics_fp.write('epoch,train_loss,test_loss,top1,top5\n')

    def update(self, epoch, save_freq, metrics, score_key='top1'):
        if self.rank == 0:
            self.metrics_fp.write(f"Epoch : {epoch}, Train loss : {metrics['train_total_loss']:.2f}, Test loss : {metrics['test_total_loss']:.2f}, Top1 acc : {metrics['top1']}%, Top5 acc : {metrics['top5']}%\n")
            #self.metrics_fp.write('{},{},{},{},{}\n'.format(epoch, metrics['train_loss'], metrics['test_loss'], metrics['top1'], metrics['top5']))
            self.metrics_fp.flush()
            
    #         L1_distance' : torch.sum(L1_distance), 
    # 'L2_distance' : torch.sum(L2_distance), 'L1_distance_scaled':torch.sum(L1_distance_scaled), 'L2_distance_scaled'
    # :torch.sum(L2_distance_scaled)

        score = metrics[score_key]
        insert_idx = 0
        for ckpt_, score_ in self.ckpts:
            if score > score_:
                break
            insert_idx += 1
        if insert_idx < self.keep_num:
            save_path = os.path.join(self.save_dir, 'checkpoint-{}.pth.tar'.format(epoch))
            #self.ckpts.insert(insert_idx, [save_path, score])
            if len(self.ckpts) > self.keep_num:
                remove_ckpt = self.ckpts.pop(-1)[0]
                if self.rank == 0:
                    if os.path.exists(remove_ckpt):
                        os.remove(remove_ckpt)
            if(epoch%save_freq==0):
                self._save(save_path, epoch, is_best=(insert_idx == 0))
        else:
            self._save(os.path.join(self.save_dir, 'last.pth.tar'), epoch)
        return self.ckpts

    def _save(self, save_path, epoch, is_best=False):
        if self.rank != 0:
            return
        save_dict = {
            'epoch': epoch,
            'model': self.model.module.state_dict() if isinstance(self.model, DDP) else self.model.state_dict(),
            'ema_model': self.ema_model.state_dict() if self.ema_model else None,
            'optimizer': self.optimizer.state_dict() if self.optimizer else None,
        }
        for key, value in self.additions.items():
            save_dict[key] = value.state_dict() if hasattr(value, 'state_dict') else value

        torch.save(save_dict, save_path)
        if save_path != os.path.join(self.save_dir, 'last.pth.tar'):
            shutil.copy(save_path, os.path.join(self.save_dir, 'last.pth.tar'))
        if is_best:
            shutil.copy(save_path, os.path.join(self.save_dir, 'best.pth.tar'))

    def load(self, ckpt_path):
        save_dict = torch.load(ckpt_path, map_location='cpu')

        
        for key, value in self.additions.items():
            if hasattr(value, 'load_state_dict'):
                value.load_state_dict(save_dict[key])
            else:
                self.additions[key] = save_dict[key]

        missing_keys, unexpected_keys = \
            self.model.load_state_dict(save_dict['model'])
        if len(missing_keys) != 0:
            logger.info(f'Missing keys in source state dict: {missing_keys}')
        if len(unexpected_keys) != 0:
            logger.info(f'Unexpected keys in source state dict: {unexpected_keys}')
        
        if self.ema_model is not None:
            self.ema_model.load_state_dict(save_dict['ema_model'])
        if self.optimizer is not None:
            self.optimizer.load_state_dict(save_dict['optimizer'])
        epoch = save_dict['epoch']

        '''avoid memory leak'''
        del save_dict
        torch.cuda.empty_cache()

        return epoch
        
 #main문에서
#클래스 선언 
    ckpt_manager = CheckpointManager(model,
                                     optimizer,
                                     save_dir=args.exp_dir,
                                     })

#로드
 start_epoch = ckpt_manager.load(args.resume)

#ckpt 저장
 ckpts = ckpt_manager.update(epoch, args.save_freq, metrics)

 

Classification계열 모델 빌드 총정리

 

ef build_model(args, model_name, pretrained=False, pretrained_ckpt=''):

    elif model_name.lower().startswith('resnet'):
        # resnet variants (the same as torchvision)
        model = getattr(resnet, model_name.lower())(num_classes=args.num_classes)

    elif model_name.lower() == 'mobilenet_v1':
        # mobilenet v1
        model = MobileNetV1(num_classes=args.num_classes)

    elif model_name.startswith('tv_'):
        # build model using torchvision
        import torchvision
        model = getattr(torchvision.models, model_name[3:])(pretrained=pretrained)

    elif model_name.startswith('timm_'):
        # build model using timm
        import timm     
        model = timm.create_model(model_name[5:], pretrained=False, num_classes=args.num_classes)


    elif model_name.startswith('cifar_'):
        from .cifar import model_dict
        # model_name = model_name[6:]
        # print(model_name)

        model = model_dict[model_name](num_classes=args.num_classes)
    
    ##dk
    elif model_name == 'custom_resnet34':
        from . import custom_model
        model = custom_model.cm_resnet34()
    
    elif model_name == 'v2_resnet20':
        from . import resnet_v2
        model = resnet_v2.resnet20()
        
 #main문에서
model = build_model(args, args.model)

 

 

심볼릭링크 사용법

(링크 걸 프로젝트 폴더에서)

ln -s [절대경로] [링크이름]

 

ln -s /mnt/hdd/dongkyun/afhq/train link_afhq

 

plt make_grid (여러 이미지 마진 없이 붙이기)

중요 : [batch, C, H , W] 순 맞추어주기, tensor가 input으로 들어감

for image in os.listdir(args.content_dir):   
    print(image)

    path = os.path.join(args.content_dir, image)
    content = content_tf(Image.open(str(path))).squeeze()
    content = content.to(device).unsqueeze(0)
    
    
    output_tensor_array = torch.zeros(15, 3, 512, 512)

    for i, style in enumerate(styles):
        
        with torch.no_grad():
            output = style_transfer(vgg, decoder, content, style,
                                    args.alpha)
        output = torch.clamp(output, 0., 1.)
        
        output_tensor_array[i] = content
        output_tensor_array[i+5] = styles[i]
        output_tensor_array[i+10] = output.unsqueeze(0)
        
        output = output.squeeze().permute(1, 2, 0).cpu().numpy()

        output = (output * 255).astype(np.uint8)

        out_path = os.path.join(args.output,str(image[:-4]))
        
        #import pdb; pdb.set_trace()
        if not os.path.isdir(out_path):
            os.makedirs(out_path)
    
        plt.imsave(str(os.path.join(args.output,str(image[:-4]),str(f"ref{i+1}") + "_" + str(image))), output)
    
    grid = torchvision.utils.make_grid(output_tensor_array, nrow = 5)
    #lt.figure(figsize = (300, 500))
    plt.imshow(grid.permute(1, 2, 0))
    plt.axis('off')
    plt.savefig(os.path.join(output_dir,f"{image}"), dpi = 600)

 

tqdm 사용법

+ 토치 이미지 저장

 

from tqdm import tqdm

for iter_, (data,label) in tqdm(enumerate(trainloader), total=len(trainloader)):
    data = data.to(device)

    for i, style in enumerate(styles):
        
        with torch.no_grad():
            output = style_transfer(vgg, decoder, data, style,
                                    args.alpha)
        output = torch.clamp(output, 0., 1.)
        #output = output.squeeze().permute(1, 2, 0).cpu().numpy()
        
        for p in range(output.size(0)):
            output_dir = os.path.join(args.output, f'{p+16*iter_}')
            if not os.path.isdir(output_dir):
                os.makedirs(output_dir)
            
            torchvision.utils.save_image(output[p, :, :, :], os.path.join(output_dir, f'{p+16*iter_}_ref{i}.jpg'))

 

폴더에서 파일 랜덤 뽑아서 이동

import os
import random
import shutil

source = '../../dataset/dongkyun/afhq/train/fsmr_dataset/stylized_4000'
dest = '../../dataset/dongkyun/afhq/train/fsmr_dataset/cat_subfolder_stylized_1000'
files = os.listdir(source)
no_of_files = 1000

print(no_of_files)
if not os.path.isdir(dest):
    os.makedirs(dest)
                
for file_name in random.sample(files, no_of_files):
    shutil.move(os.path.join(source, file_name), dest)

 

1 10 100 indexed -> 1 2 3 순으로 바꾸기

 

 

이미지 resize (cv2)

for num, img in tqdm(enumerate(os.listdir(path)), total = len(os.listdir(path))):
    filename = os.path.join(path, img)
    image = cv2.imread(filename) #Read the image with OpenCV

    image = cv2.resize(image, (256,256))
            loc = os.path.join(out_dir,f'centered_{num}.png')
            cv2.imwrite(loc, image)