CNN 구현 (MNIST)

학습 목표

이 레슨을 완료하면:

•PyTorch로 CNN 모델을 처음부터 구현합니다
•MNIST 데이터셋에서 99% 이상의 정확도를 달성합니다
•학습 과정을 시각화하고 분석합니다
•MLP와 CNN의 성능을 비교합니다

핵심 메시지

"CNN은 이미지의 공간 구조를 이해합니다." MLP는 이미지를 일렬로 펼쳐서 처리하지만, CNN은 2D 구조를 유지하며 인접 픽셀 간의 관계를 학습합니다. 이것이 이미지 인식에서 CNN이 압도적인 이유입니다.

1. 데이터 준비

🔬 실습: MNIST 데이터 로딩

python⚠️ 로컬 실행 필요
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np

# ═══════════════════════════════════════════════════════════════
# 📊 MNIST 데이터셋 준비
# ═══════════════════════════════════════════════════════════════

# 데이터 변환 정의
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST 평균, 표준편차
])

# 데이터셋 다운로드
train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('./data', train=False, transform=transform)

# DataLoader 생성
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

print("=" * 50)
print("📊 MNIST 데이터셋 정보")
print("=" * 50)
print(f"훈련 데이터: {len(train_dataset)}개")
print(f"테스트 데이터: {len(test_dataset)}개")
print(f"이미지 크기: 28 × 28 × 1 (흑백)")
print(f"클래스 수: 10 (숫자 0-9)")

# 샘플 이미지 시각화
fig, axes = plt.subplots(2, 5, figsize=(12, 5))
for i, ax in enumerate(axes.flat):
    img, label = train_dataset[i]
    ax.imshow(img.squeeze(), cmap='gray')
    ax.set_title(f'Label: {label}')
    ax.axis('off')
plt.suptitle('MNIST Sample Images', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

2. CNN 모델 설계

모델 구조

레이어	출력 크기	파라미터
입력	1×28×28	-
Conv1 (3×3, 32필터)	32×28×28	320
MaxPool (2×2)	32×14×14	-
Conv2 (3×3, 64필터)	64×14×14	18,496
MaxPool (2×2)	64×7×7	-
Flatten	3136	-
FC1	128	401,536
FC2	10	1,290

🔬 실습: CNN 모델 정의

python⚠️ 로컬 실행 필요
import torch
import torch.nn as nn

# ═══════════════════════════════════════════════════════════════
# 📊 CNN 모델 정의
# ═══════════════════════════════════════════════════════════════

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        # 합성곱 레이어
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)

        # 풀링 레이어
        self.pool = nn.MaxPool2d(2, 2)

        # 정규화
        self.bn1 = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(64)

        # 완전연결 레이어
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

        # 드롭아웃
        self.dropout = nn.Dropout(0.25)

    def forward(self, x):
        # 첫 번째 합성곱 블록: Conv -> BN -> ReLU -> Pool
        x = self.pool(torch.relu(self.bn1(self.conv1(x))))

        # 두 번째 합성곱 블록
        x = self.pool(torch.relu(self.bn2(self.conv2(x))))

        # Flatten
        x = x.view(-1, 64 * 7 * 7)

        # 완전연결 레이어
        x = self.dropout(torch.relu(self.fc1(x)))
        x = self.fc2(x)

        return x

# 모델 생성
model = CNN()

# 모델 구조 출력
print("=" * 50)
print("📊 CNN 모델 구조")
print("=" * 50)
print(model)

# 파라미터 수 계산
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"\n총 파라미터: {total_params:,}")
print(f"학습 가능: {trainable_params:,}")

# 입력/출력 테스트
x = torch.randn(1, 1, 28, 28)
output = model(x)
print(f"\n입력 shape: {x.shape}")
print(f"출력 shape: {output.shape}")

3. 학습 루프

🔬 실습: 모델 학습

python⚠️ 로컬 실행 필요
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

# ═══════════════════════════════════════════════════════════════
# 📊 CNN 모델 학습
# ═══════════════════════════════════════════════════════════════

# 디바이스 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"사용 디바이스: {device}")

# 모델, 손실함수, 옵티마이저
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 학습 기록
train_losses = []
train_accs = []
test_accs = []

# 학습 함수
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for data, target in loader:
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        pred = output.argmax(dim=1)
        correct += pred.eq(target).sum().item()
        total += target.size(0)

    return total_loss / len(loader), 100. * correct / total

# 평가 함수
def evaluate(model, loader, device):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1)
            correct += pred.eq(target).sum().item()
            total += target.size(0)

    return 100. * correct / total

# 학습 실행
epochs = 10
print("\n학습 시작!")
print("=" * 50)

for epoch in range(1, epochs + 1):
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    test_acc = evaluate(model, test_loader, device)

    train_losses.append(train_loss)
    train_accs.append(train_acc)
    test_accs.append(test_acc)

    print(f"Epoch {epoch:2d}/{epochs} | Loss: {train_loss:.4f} | "
          f"Train Acc: {train_acc:.2f}% | Test Acc: {test_acc:.2f}%")

print("=" * 50)
print(f"최종 테스트 정확도: {test_accs[-1]:.2f}%")

# 학습 곡선 시각화
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

axes[0].plot(train_losses, 'b-', linewidth=2)
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training Loss')
axes[0].grid(True, alpha=0.3)

axes[1].plot(train_accs, 'b-', label='Train', linewidth=2)
axes[1].plot(test_accs, 'r--', label='Test', linewidth=2)
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy (%)')
axes[1].set_title('Accuracy')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.suptitle('CNN Training Results (MNIST)', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

4. 예측 결과 분석

🔬 실습: 예측 시각화

python⚠️ 로컬 실행 필요
import torch
import matplotlib.pyplot as plt
import numpy as np

# ═══════════════════════════════════════════════════════════════
# 📊 예측 결과 분석
# ═══════════════════════════════════════════════════════════════

model.eval()

# 테스트 이미지 가져오기
test_images, test_labels = next(iter(test_loader))
test_images, test_labels = test_images[:16].to(device), test_labels[:16].to(device)

# 예측
with torch.no_grad():
    outputs = model(test_images)
    predictions = outputs.argmax(dim=1)

# 시각화
fig, axes = plt.subplots(4, 4, figsize=(10, 10))

for i, ax in enumerate(axes.flat):
    img = test_images[i].cpu().squeeze()
    true_label = test_labels[i].item()
    pred_label = predictions[i].item()

    ax.imshow(img, cmap='gray')

    color = 'green' if true_label == pred_label else 'red'
    ax.set_title(f'Pred: {pred_label} (True: {true_label})', color=color)
    ax.axis('off')

plt.suptitle('CNN Predictions (Green=Correct, Red=Wrong)', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

# 정확도 계산
correct = (predictions == test_labels).sum().item()
print(f"\n표시된 16개 중 {correct}개 정답")

핵심 요약

단계	내용	핵심 포인트
데이터	MNIST 로딩	transforms, DataLoader
모델	CNN 정의	Conv → BN → ReLU → Pool
학습	10 에포크	Adam, CrossEntropy
결과	99%+ 정확도	MLP보다 우수

학습 체크리스트

• CNN 모델을 직접 정의할 수 있다
• 학습 루프를 구현할 수 있다
• train/eval 모드의 차이를 안다
• 학습 곡선을 해석할 수 있다

다음 강의 예고

"CNN 구현 (CIFAR-10)" - 더 복잡한 컬러 이미지 분류에 도전합니다!

CNN 구현 (MNIST)

📓Google Colab에서 실습하기

학습 내용

CNN 구현 (MNIST)

학습 목표

핵심 메시지

1. 데이터 준비

🔬 실습: MNIST 데이터 로딩

2. CNN 모델 설계

모델 구조

🔬 실습: CNN 모델 정의

3. 학습 루프

🔬 실습: 모델 학습

4. 예측 결과 분석

🔬 실습: 예측 시각화

핵심 요약

학습 체크리스트

다음 강의 예고

레슨 정보

💡실습 환경 안내

이 레벨의 다른 레슨