- κ°λ¨ν λ₯λ¬λ λͺ¨λΈ μ±λ₯ ν₯μ ν ν¬λ 2κ°μ§ - Input Mixup & Label Smoothing
- 2. Label Smoothing
- π» μ½λ μ€μ΅ - Input Mixup & Label Smoothing
- κ°λ¨ν λ₯λ¬λ λͺ¨λΈ μ±λ₯ ν₯μ ν ν¬λ 2κ°μ§ - Input Mixup & Label Smoothing
- 2. Label Smoothing
- π» μ½λ μ€μ΅ - Input Mixup & Label Smoothing
κ°λ¨ν λ₯λ¬λ λͺ¨λΈ μ±λ₯ ν₯μ ν ν¬λ 2κ°μ§ - Input Mixup & Label Smoothing
λ³Έ ν¬μ€νΈμμλ μ΅κ·Ό λ₯λ¬λ λͺ¨λΈ νμ΅μμ λ§μ΄ μ¬μ©λλ©°, λͺ¨λΈμ ꡬ쑰λ₯Ό λ°κΎΈμ§ μκ³ κ°λ¨νκ² λ₯λ¬λ λͺ¨λΈ μ±λ₯μ ν₯μμν¬ μ μλ 2κ°μ§ λ°©λ²μ λν΄ κ³΅λΆνκ³ μ 리νμλ€.
1. Mixup Training
νμ΅μ μ§νν λ λλ€νκ² λ κ°μ μνμ λ½μμ MIXUP ν λ€μ νμ΅μ μ¬μ©νλ€. μ κ·Έλ¦Όμμλ lambdaκ° 0.5λ‘ μ€μ λμμ§λ§, 보ν΅μ νΉμ μ΄λ―Έμ§μ λ λμ κ°μ΄ λΆμ¬λλ€.
π€ Mixupμ μ¬μ©νλ©΄ μ μ±λ₯μ΄ λ μ’μμ§κΉ?
1οΈβ£ Data Augmentation
λλ€μ±μ λ λ ννλ‘ λ°μ΄ν° μ¦μ§μ μννλ©΄ μ€μ λͺ¨λΈμ λμ± λ λ§μ λ°μ΄ν°λ‘ νμ΅μ ν κ²κ³Ό κ°μ ν¨κ³Ό
2οΈβ£ Over-Fitting λ°©μ§
μ΄λ μ λ Regularizationμ μννλ ν¨κ³Ό
2. Label Smoothing
Label Smoothingμ μΌλ°ν Generalization μ±λ₯μ λμ΄κΈ° μν΄ labelμ smoothingνλ λ°©λ²μ΄λ€. Mixup Trainingκ³Ό μ μ¬ν λΆλΆμ΄ μμ§λ§ κ°μ₯ ν° μ°¨μ΄μ μ μ΄λ―Έμ§λ 건λλ¦¬μ§ μκ³ labelλ§ λ°κΏμ€λ€λ κ²μ΄λ€.
β‘οΈ μ λ΅ λ μ΄λΈμ λν΄μλ§ 100% νλ₯ μ λΆμ¬νμ§ μλλ€.
Hard Label λ°©μ μ λ΅ λ μ΄λΈμ λν΄μλ§ 1, λλ¨Έμ§ λ μ΄λΈμ 0μΌλ‘ λΆμ¬νλ κ²μ΄ μλλΌ
Soft Label λ°©μSmoothing μμ
μ ν΅ν΄ μ λ΅ λ μ΄λΈμ 1κ³Ό κ°κΉμ΄ κ°μΌλ‘, λλ¨Έμ§ λ μ΄λΈμ 0λ³΄λ€ μ‘°κΈ ν° κ°μ λ£μ΄μ£Όλ μμ
μ΄λ€.
μ¬λμ μ€μμ μν΄μ μλͺ» labeling λ κ°μ΄ μ‘΄μ¬ν μ μκΈ° λλ¬Έμ νλμ λ μ΄λΈμ λν΄μ λμ Confidence λ₯Ό κ°κ² νλ κ²μ λ€μν λ¬Έμ λ₯Ό μΌκΈ°ν μ μλ€.
π» μ½λ μ€μ΅ - Input Mixup & Label Smoothing
νμν λΌμ΄λΈλ¬λ¦¬ ꡬν
import numpy as np
mixup_alpha = 1.0
def mixup_data(x, y):
lam = np.random.beta(mixup_alpha, mixup_alpha)
batch_size = x.size()[0]
index = torch.randperm(batch_size).cuda()
mixed_x = lam * x + (1 - lam) * x[index]
y_a, y_b = y, y[index]
return mixed_x, y_a, y_b, lam
def mixup_criterion(criterion, pred, y_a, y_b, lam):
return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)
class LabelSmoothingCrossEntropy(nn.Module):
def __init__(self):
super(LabelSmoothingCrossEntropy, self).__init__()
def forward(self, y, targets, smoothing=0.1):
confidence = 1. - smoothing
log_probs = F.log_softmax(y, dim=-1) # μμΈ‘ νλ₯ κ³μ°
true_probs = torch.zeros_like(log_probs)
true_probs.fill_(smoothing / (y.shape[1] - 1))
true_probs.scatter_(1, targets.data.unsqueeze(1), confidence) # μ λ΅ μΈλ±μ€μ μ λ΅ νλ₯ μ confidenceλ‘ λ³κ²½
return torch.mean(torch.sum(true_probs * -log_probs, dim=-1)) # negative log likelihood
νκ²½ μ€μ λ° νμ΅ (Training) ν¨μ μ μ
device = 'cuda'
net = ResNet18()
net = net.to(device)
learning_rate = 0.1
file_name = 'resnet18_cifar10.pth'
criterion = LabelSmoothingCrossEntropy()
optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0002)
def train(epoch):
print('\n[ Train epoch: %d ]' % epoch)
net.train()
train_loss = 0
correct = 0
total = 0
for batch_idx, (inputs, targets) in enumerate(train_loader):
inputs, targets = inputs.to(device), targets.to(device)
inputs, targets_a, targets_b, lam = mixup_data(inputs, targets)
optimizer.zero_grad()
outputs = net(inputs)
loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
loss.backward()
optimizer.step()
train_loss += loss.item()
_, predicted = outputs.max(1)
total += targets.size(0)
current_correct = (lam * predicted.eq(targets_a).sum().item() + (1 - lam) * predicted.eq(targets_b).sum().item())
correct += current_correct
if batch_idx % 100 == 0:
print('\nCurrent batch:', str(batch_idx))
print('Current batch average train accuracy:', current_correct / targets.size(0))
print('Current batch average train loss:', loss.item() / targets.size(0))
print('\nTotal average train accuarcy:', correct / total)
print('Total average train loss:', train_loss / total)
def test(epoch):
print('\n[ Test epoch: %d ]' % epoch)
net.eval()
loss = 0
correct = 0
total = 0
for batch_idx, (inputs, targets) in enumerate(test_loader):
inputs, targets = inputs.to(device), targets.to(device)
total += targets.size(0)
outputs = net(inputs)
loss += criterion(outputs, targets).item()
_, predicted = outputs.max(1)
correct += predicted.eq(targets).sum().item()
print('\nTotal average test accuarcy:', correct / total)
print('Total average test loss:', loss / total)
state = {
'net': net.state_dict()
}
if not os.path.isdir('checkpoint'):
os.mkdir('checkpoint')
torch.save(state, './checkpoint/' + file_name)
print('Model Saved!')
νμ΅ μ§ν
import time
def adjust_learning_rate(optimizer, epoch):
lr = learning_rate
if epoch >= 50:
lr /= 10
if epoch >= 100:
lr /= 10
for param_group in optimizer.param_groups:
param_group['lr'] = lr
start_time = time.time()
for epoch in range(0, 150):
adjust_learning_rate(optimizer, epoch)
train(epoch)
test(epoch)
if epoch % 10 == 0 :
print('\nTime elapsed:', time.time() - start_time)
μ°Έκ³
Related Posts
π You need to log in to GitHub to write comments. π
If you can't see comments, please refresh page(F5).