Spaces:
Sleeping
Sleeping
| # SGD implementation | |
| from .base import BaseOptimizer | |
| class SGDOptimizer(BaseOptimizer): | |
| """ | |
| Stochastic Gradient Descent (SGD) optimizer implementation. | |
| This optimizer updates parameters using the gradient of the loss function. | |
| """ | |
| def __init__(self, params, lr=0.01, momentum=0.0): | |
| self.params = params | |
| self.lr = lr | |
| self.momentum = momentum | |
| self.state = {p: {'velocity': 0} for p in params} | |
| def step(self): | |
| for p in self.params: | |
| if p.grad is None: | |
| continue | |
| state = self.state[p] | |
| state['velocity'] = self.momentum * state['velocity'] - self.lr * p.grad | |
| p.data += state['velocity'] | |
| def zero_grad(self): | |
| for p in self.params: | |
| p.grad = 0 | |
| def __repr__(self): | |
| return f"SGDOptimizer(lr={self.lr}, momentum={self.momentum})" | |
| def state_dict(self): | |
| return {p: {'velocity': state['velocity']} for p, state in self.state.items()} | |
| def load_state_dict(self, state_dict): | |
| for p in self.params: | |
| if p in state_dict: | |
| self.state[p] = state_dict[p] | |
| else: | |
| self.state[p] = {'velocity': 0} | |
| def __str__(self): | |
| return f"SGDOptimizer(lr={self.lr}, momentum={self.momentum})" | |