vikp
/

llama_coder

Text Generation

text-generation-inference

Model card Files Files and versions

llama_coder / llama_model.py

vikp's picture

Upload CodeLlamaForCausalLM

75dcc88 over 2 years ago

history blame contribute delete

2.28 kB

	from transformers.models.llama.modeling_llama import LlamaForCausalLM, LlamaAttention, LlamaRotaryEmbedding
	from transformers.models.llama.configuration_llama import LlamaConfig
	import torch


	class CodeLlamaConfig(LlamaConfig):
	def __init__(self, **kwargs):
	super().__init__(**kwargs)
	self.rope_theta = 10000.0
	if kwargs.get("rope_theta"):
	try:
	self.rope_theta = float(kwargs["rope_theta"])
	print(f"Rope theta set to {self.rope_theta}")
	except Exception:
	print("Could not set rope theta properly, ensure it is a number")


	class CodeLlamaNTKScalingRotaryEmbedding(LlamaRotaryEmbedding):

	def __init__(self, dim, max_position_embeddings=2048, base=1000000.0, device=None, scaling_factor=1.0):
	self.scaling_factor = scaling_factor
	self.base = base
	super().__init__(dim, max_position_embeddings, base, device)

	def _set_cos_sin_cache(self, seq_len, device, dtype):
	self.max_seq_len_cached = seq_len

	inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2).float().to(device) / self.dim))
	self.register_buffer("inv_freq", inv_freq, persistent=False)

	t = torch.arange(self.max_seq_len_cached, device=device, dtype=self.inv_freq.dtype)

	freqs = torch.einsum("i,j->ij", t, self.inv_freq)
	# Different from paper, but it uses a different permutation in order to obtain the same calculation
	emb = torch.cat((freqs, freqs), dim=-1)
	self.register_buffer("cos_cached", emb.cos()[None, None, :, :].to(dtype), persistent=False)
	self.register_buffer("sin_cached", emb.sin()[None, None, :, :].to(dtype), persistent=False)

	class CodeLlamaForCausalLM(LlamaForCausalLM):
	_tied_weights_keys = ["lm_head.weight"]

	config_class = CodeLlamaConfig

	def __init__(self, config):
	super().__init__(config)
	for layer in self.model.layers:
	attn = layer.self_attn
	head_dim = attn.head_dim
	max_embeddings = attn.max_position_embeddings
	base = config.rope_theta

	attn.rotary_emb = CodeLlamaNTKScalingRotaryEmbedding(head_dim, max_embeddings, base=base)