| | from typing import Dict, Any, Optional
|
| | import numpy as np
|
| |
|
| | class MemoryBlock:
|
| | """Base class for GPU memory blocks"""
|
| | def __init__(self, size_bytes: int):
|
| | self.size = size_bytes
|
| | self.data = bytearray(size_bytes)
|
| | self.offset = 0
|
| |
|
| | def allocate(self, size_bytes: int) -> Optional[int]:
|
| | """Allocate memory and return offset"""
|
| | if self.offset + size_bytes > self.size:
|
| | return None
|
| | current_offset = self.offset
|
| | self.offset += size_bytes
|
| | return current_offset
|
| |
|
| | def write(self, offset: int, data: bytes):
|
| | """Write data at specified offset"""
|
| | if offset + len(data) > self.size:
|
| | raise ValueError("Write operation exceeds memory block size")
|
| | self.data[offset:offset + len(data)] = data
|
| |
|
| | def read(self, offset: int, size: int) -> bytes:
|
| | """Read data from specified offset"""
|
| | if offset + size > self.size:
|
| | raise ValueError("Read operation exceeds memory block size")
|
| | return bytes(self.data[offset:offset + size])
|
| |
|
| | class SharedMemory(MemoryBlock):
|
| | """Represents shared memory accessible by all threads in a block"""
|
| | def __init__(self, size_bytes: int = 48*1024):
|
| | super().__init__(size_bytes)
|
| | self.locks: Dict[int, bool] = {}
|
| |
|
| | def atomic_add(self, offset: int, value: int) -> int:
|
| | """Perform atomic addition"""
|
| | current = int.from_bytes(self.read(offset, 4), 'little')
|
| | new_value = current + value
|
| | self.write(offset, new_value.to_bytes(4, 'little'))
|
| | return current
|
| |
|
| | class L1Cache(MemoryBlock):
|
| | """Represents L1 cache memory"""
|
| | def __init__(self, size_bytes: int = 32*1024):
|
| | super().__init__(size_bytes)
|
| | self.cache_lines: Dict[int, bytes] = {}
|
| | self.line_size = 128
|
| |
|
| | def load_line(self, address: int) -> bytes:
|
| | """Load a cache line"""
|
| | line_address = address - (address % self.line_size)
|
| | if line_address not in self.cache_lines:
|
| |
|
| | self.cache_lines[line_address] = bytes(self.line_size)
|
| | return self.cache_lines[line_address]
|
| |
|
| | class L2Cache(MemoryBlock):
|
| | """Represents L2 cache memory"""
|
| | def __init__(self, size_bytes: int = 1024*1024):
|
| | super().__init__(size_bytes)
|
| | self.cache_lines: Dict[int, bytes] = {}
|
| | self.line_size = 256
|
| |
|
| | def load_line(self, address: int) -> bytes:
|
| | """Load a cache line"""
|
| | line_address = address - (address % self.line_size)
|
| | if line_address not in self.cache_lines:
|
| |
|
| | self.cache_lines[line_address] = bytes(self.line_size)
|
| | return self.cache_lines[line_address]
|
| |
|
| | class RegisterFile:
|
| | """Represents per-thread registers"""
|
| | def __init__(self, num_registers: int = 255):
|
| | self.registers = [0] * num_registers
|
| | self.used_registers = 0
|
| |
|
| | def allocate(self, num: int = 1) -> Optional[int]:
|
| | """Allocate registers and return starting index"""
|
| | if self.used_registers + num > len(self.registers):
|
| | return None
|
| | start = self.used_registers
|
| | self.used_registers += num
|
| | return start
|
| |
|
| | def read(self, index: int) -> int:
|
| | """Read from register"""
|
| | if 0 <= index < self.used_registers:
|
| | return self.registers[index]
|
| | raise IndexError("Register index out of bounds")
|
| |
|
| | def write(self, index: int, value: int):
|
| | """Write to register"""
|
| | if 0 <= index < self.used_registers:
|
| | self.registers[index] = value
|
| | else:
|
| | raise IndexError("Register index out of bounds")
|
| |
|