Novaciano commited on
Commit
8d40d9d
verified
1 Parent(s): 3bce3a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -49
app.py CHANGED
@@ -4,18 +4,14 @@ import signal
4
  import tempfile
5
  from pathlib import Path
6
  from textwrap import dedent
7
- from typing import Optional
8
  from dataclasses import dataclass, field
9
 
10
  import gradio as gr
11
  from datasets import load_dataset
12
  from huggingface_hub import HfApi, ModelCard, whoami
13
- from gradio_huggingfacehub_search import HuggingfaceHubSearch
14
- from apscheduler.schedulers.background import BackgroundScheduler
15
 
16
  os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
17
 
18
- # ----------------- CONFIG DATACLASSES -----------------
19
  @dataclass
20
  class QuantizationConfig:
21
  method: str
@@ -35,7 +31,7 @@ class QuantizationConfig:
35
  class SplitConfig:
36
  enabled: bool = False
37
  max_tensors: int = 256
38
- max_size: Optional[str] = None
39
 
40
  @dataclass
41
  class OutputConfig:
@@ -55,57 +51,46 @@ class ModelProcessingConfig:
55
  new_repo_url: str = field(default="", init=False)
56
  new_repo_id: str = field(default="", init=False)
57
 
58
- # ----------------- EXCEPTIONS -----------------
59
  class GGUFConverterError(Exception):
60
  pass
61
 
62
- # ----------------- PROCESSOR -----------------
63
  class HuggingFaceModelProcessor:
64
- ERROR_LOGIN = "You must be logged in to use GGUF-my-repo."
 
 
 
 
 
65
  DOWNLOAD_FOLDER = "./downloads"
66
  OUTPUT_FOLDER = "./outputs"
67
  CALIBRATION_FILE = "calibration_data_v5_rc.txt"
68
- QUANTIZE_TIMEOUT=86400
69
- HF_TO_GGUF_TIMEOUT=3600
70
- IMATRIX_TIMEOUT=86400
71
- SPLIT_TIMEOUT=3600
72
- KILL_TIMEOUT=5
73
 
74
  def __init__(self):
75
  self.HF_TOKEN = os.environ.get("HF_TOKEN")
76
- self.RUN_LOCALLY = os.environ.get("RUN_LOCALLY")
77
  self._create_folder(self.DOWNLOAD_FOLDER)
78
  self._create_folder(self.OUTPUT_FOLDER)
79
 
80
- def _create_folder(self, folder_name: str) -> str:
81
  if not os.path.exists(folder_name):
82
  os.makedirs(folder_name)
83
  return folder_name
84
 
85
- # ----------------- DATASET -----------------
86
- def load_dataset_from_hf(self, dataset_name: str):
87
- print(f"Cargando dataset: {dataset_name}")
88
- ds = load_dataset(dataset_name, use_auth_token=self.HF_TOKEN)
89
- print("Dataset cargado correctamente")
90
- return ds
91
 
92
- # ----------------- MODELO -----------------
93
- def _download_base_model(self, processing_config: ModelProcessingConfig) -> str:
94
- """Descarga y convierte HuggingFace -> GGUF FP16"""
95
  print(f"Descargando modelo {processing_config.model_name}")
96
  if os.path.exists(processing_config.quant_config.fp16_model):
97
- print("FP16 ya existe, omitiendo descarga")
98
  return processing_config.quant_config.fp16_model
99
 
100
  with tempfile.TemporaryDirectory(dir=self.DOWNLOAD_FOLDER) as tmpdir:
101
  local_dir = f"{Path(tmpdir)}/{processing_config.model_name}"
102
  api = HfApi(token=processing_config.token)
103
- pattern = "*.safetensors" if any(
104
- file.path.endswith(".safetensors")
105
- for file in api.list_repo_tree(repo_id=processing_config.model_id, recursive=True)
106
- ) else "*.bin"
107
- dl_pattern = ["*.md", "*.json", "*.model"] + [pattern]
108
- api.snapshot_download(repo_id=processing_config.model_id, local_dir=local_dir, allow_patterns=dl_pattern)
109
  convert_command = [
110
  "python3", "/app/convert_hf_to_gguf.py", local_dir,
111
  "--outtype", "f16", "--outfile", processing_config.quant_config.fp16_model
@@ -119,13 +104,13 @@ class HuggingFaceModelProcessor:
119
  process.wait(timeout=self.KILL_TIMEOUT)
120
  except subprocess.TimeoutExpired:
121
  process.kill()
122
- raise GGUFConverterError("Error convirtiendo a fp16: Timeout")
123
  if process.returncode != 0:
124
- raise GGUFConverterError(f"Error convirtiendo a fp16: {process.returncode}")
 
125
  return processing_config.quant_config.fp16_model
126
 
127
- # ----------------- CUANTIZACION -----------------
128
- def _quantize_model(self, quant_config: QuantizationConfig) -> str:
129
  quantize_cmd = ["llama-quantize"]
130
  if quant_config.quant_embedding:
131
  quantize_cmd.extend(["--token-embedding-type", quant_config.embedding_tensor_method])
@@ -135,11 +120,11 @@ class HuggingFaceModelProcessor:
135
  if quant_config.quant_output:
136
  quantize_cmd.extend(["--output-tensor-type", quant_config.output_tensor_method])
137
  if quant_config.use_imatrix:
138
- self._generate_importance_matrix(quant_config)
139
- quantize_cmd.extend(["--imatrix", quant_config.imatrix_file])
140
  quantize_cmd.append(quant_config.fp16_model)
141
  quantize_cmd.append(quant_config.quantized_gguf)
142
- quantize_cmd.append(quant_config.imatrix_method if quant_config.use_imatrix else quant_config.method)
 
143
  process = subprocess.Popen(quantize_cmd, shell=False, stderr=subprocess.STDOUT)
144
  try:
145
  process.wait(timeout=self.QUANTIZE_TIMEOUT)
@@ -149,21 +134,71 @@ class HuggingFaceModelProcessor:
149
  process.wait(timeout=self.KILL_TIMEOUT)
150
  except subprocess.TimeoutExpired:
151
  process.kill()
152
- raise GGUFConverterError("Error cuantizando: Timeout")
153
  if process.returncode != 0:
154
- raise GGUFConverterError(f"Error cuantizando: {process.returncode}")
 
155
  return quant_config.quantized_gguf
156
 
157
- # ----------------- GRADIO -----------------
158
- def app_interface(dataset_name: str):
159
- processor = HuggingFaceModelProcessor()
160
- ds = processor.load_dataset_from_hf(dataset_name)
161
- return f"Dataset cargado: {dataset_name}\nN煤mero de splits: {len(ds)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
  with gr.Blocks() as demo:
164
- gr.Markdown("## Selecciona dataset de HuggingFace")
165
- dataset_input = gr.Textbox(label="Nombre del dataset (ej: openerotica/erotiquant3)")
166
- output = gr.Textbox(label="Resultado", interactive=False)
167
- dataset_input.submit(app_interface, inputs=dataset_input, outputs=output)
 
 
 
 
 
 
 
 
168
 
169
  demo.launch()
 
4
  import tempfile
5
  from pathlib import Path
6
  from textwrap import dedent
 
7
  from dataclasses import dataclass, field
8
 
9
  import gradio as gr
10
  from datasets import load_dataset
11
  from huggingface_hub import HfApi, ModelCard, whoami
 
 
12
 
13
  os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
14
 
 
15
  @dataclass
16
  class QuantizationConfig:
17
  method: str
 
31
  class SplitConfig:
32
  enabled: bool = False
33
  max_tensors: int = 256
34
+ max_size: str = None
35
 
36
  @dataclass
37
  class OutputConfig:
 
51
  new_repo_url: str = field(default="", init=False)
52
  new_repo_id: str = field(default="", init=False)
53
 
 
54
  class GGUFConverterError(Exception):
55
  pass
56
 
 
57
  class HuggingFaceModelProcessor:
58
+ QUANTIZE_TIMEOUT = 86400
59
+ HF_TO_GGUF_TIMEOUT = 3600
60
+ IMATRIX_TIMEOUT = 86400
61
+ SPLIT_TIMEOUT = 3600
62
+ KILL_TIMEOUT = 5
63
+
64
  DOWNLOAD_FOLDER = "./downloads"
65
  OUTPUT_FOLDER = "./outputs"
66
  CALIBRATION_FILE = "calibration_data_v5_rc.txt"
 
 
 
 
 
67
 
68
  def __init__(self):
69
  self.HF_TOKEN = os.environ.get("HF_TOKEN")
 
70
  self._create_folder(self.DOWNLOAD_FOLDER)
71
  self._create_folder(self.OUTPUT_FOLDER)
72
 
73
+ def _create_folder(self, folder_name: str):
74
  if not os.path.exists(folder_name):
75
  os.makedirs(folder_name)
76
  return folder_name
77
 
78
+ def _download_dataset(self, dataset_name: str):
79
+ print(f"Cargando dataset desde HuggingFace Hub: {dataset_name}")
80
+ dataset = load_dataset(dataset_name, use_auth_token=self.HF_TOKEN)
81
+ return dataset
 
 
82
 
83
+ def _download_model(self, processing_config: ModelProcessingConfig):
 
 
84
  print(f"Descargando modelo {processing_config.model_name}")
85
  if os.path.exists(processing_config.quant_config.fp16_model):
86
+ print("FP16 ya existe, omitiendo conversi贸n.")
87
  return processing_config.quant_config.fp16_model
88
 
89
  with tempfile.TemporaryDirectory(dir=self.DOWNLOAD_FOLDER) as tmpdir:
90
  local_dir = f"{Path(tmpdir)}/{processing_config.model_name}"
91
  api = HfApi(token=processing_config.token)
92
+ pattern = "*.safetensors"
93
+ api.snapshot_download(repo_id=processing_config.model_id, local_dir=local_dir, allow_patterns=[pattern])
 
 
 
 
94
  convert_command = [
95
  "python3", "/app/convert_hf_to_gguf.py", local_dir,
96
  "--outtype", "f16", "--outfile", processing_config.quant_config.fp16_model
 
104
  process.wait(timeout=self.KILL_TIMEOUT)
105
  except subprocess.TimeoutExpired:
106
  process.kill()
107
+ raise GGUFConverterError("Error convirtiendo a FP16: timeout")
108
  if process.returncode != 0:
109
+ raise GGUFConverterError(f"Error convirtiendo a FP16: code={process.returncode}")
110
+ print("Modelo convertido a FP16 correctamente")
111
  return processing_config.quant_config.fp16_model
112
 
113
+ def _quantize_model(self, quant_config: QuantizationConfig):
 
114
  quantize_cmd = ["llama-quantize"]
115
  if quant_config.quant_embedding:
116
  quantize_cmd.extend(["--token-embedding-type", quant_config.embedding_tensor_method])
 
120
  if quant_config.quant_output:
121
  quantize_cmd.extend(["--output-tensor-type", quant_config.output_tensor_method])
122
  if quant_config.use_imatrix:
123
+ raise NotImplementedError("imatrix no implementado para esta demo autom谩tica")
 
124
  quantize_cmd.append(quant_config.fp16_model)
125
  quantize_cmd.append(quant_config.quantized_gguf)
126
+ quantize_cmd.append(quant_config.method)
127
+
128
  process = subprocess.Popen(quantize_cmd, shell=False, stderr=subprocess.STDOUT)
129
  try:
130
  process.wait(timeout=self.QUANTIZE_TIMEOUT)
 
134
  process.wait(timeout=self.KILL_TIMEOUT)
135
  except subprocess.TimeoutExpired:
136
  process.kill()
137
+ raise GGUFConverterError("Error cuantizando: timeout")
138
  if process.returncode != 0:
139
+ raise GGUFConverterError(f"Error cuantizando: code={process.returncode}")
140
+ print("Cuantizaci贸n completada")
141
  return quant_config.quantized_gguf
142
 
143
+ def _create_repo(self, processing_config: ModelProcessingConfig):
144
+ api = HfApi(token=processing_config.token)
145
+ new_repo_url = api.create_repo(repo_id=processing_config.output_config.repo_name, exist_ok=True, private=processing_config.output_config.private_repo)
146
+ processing_config.new_repo_url = new_repo_url.url
147
+ processing_config.new_repo_id = new_repo_url.repo_id
148
+ print("Repositorio creado:", processing_config.new_repo_url)
149
+ return new_repo_url
150
+
151
+ def run_full_pipeline(self, token, model_id, model_name, dataset_name):
152
+ logs = []
153
+ try:
154
+ # 1. Cargar dataset
155
+ dataset = self._download_dataset(dataset_name)
156
+ logs.append(f"Dataset cargado: {dataset_name}")
157
+
158
+ # 2. Configuraci贸n inicial
159
+ outdir = self.OUTPUT_FOLDER
160
+ quant_config = QuantizationConfig(method="Q4_0")
161
+ quant_config.fp16_model = f"{outdir}/{model_name}.f16"
162
+ quant_config.quantized_gguf = f"{outdir}/{model_name}.gguf"
163
+ split_config = SplitConfig()
164
+ output_config = OutputConfig(private_repo=False, repo_name=f"{model_name}-gguf")
165
+ processing_config = ModelProcessingConfig(
166
+ token=token, model_id=model_id, model_name=model_name, outdir=outdir,
167
+ quant_config=quant_config, split_config=split_config, output_config=output_config
168
+ )
169
+
170
+ # 3. Descargar modelo
171
+ self._download_model(processing_config)
172
+ logs.append("Modelo descargado y convertido a FP16")
173
+
174
+ # 4. Cuantizar modelo
175
+ self._quantize_model(quant_config)
176
+ logs.append("Modelo cuantizado a GGUF")
177
+
178
+ # 5. Crear repo
179
+ self._create_repo(processing_config)
180
+ logs.append(f"Repositorio creado: {processing_config.new_repo_url}")
181
+
182
+ except Exception as e:
183
+ logs.append(f"ERROR: {e}")
184
+
185
+ return "\n".join(logs)
186
+
187
+ # ----------------- Interfaz Gradio -----------------
188
+ processor = HuggingFaceModelProcessor()
189
 
190
  with gr.Blocks() as demo:
191
+ gr.Markdown("## Pipeline Autom谩tica GGUF desde HuggingFace Hub")
192
+ dataset_input = gr.Textbox(label="Nombre del dataset HuggingFace", placeholder="openerotica/erotiquant3")
193
+ model_input = gr.Textbox(label="ID del modelo HF", placeholder="ochoa/your-model")
194
+ token_input = gr.Textbox(label="Tu token HF (opcional, si est谩 en HF_TOKEN puede dejarse vac铆o)", type="password")
195
+ run_button = gr.Button("Ejecutar pipeline autom谩tica")
196
+ output_logs = gr.Textbox(label="Logs", lines=20)
197
+
198
+ run_button.click(
199
+ fn=lambda token, model_id, model_name, dataset_name: processor.run_full_pipeline(token, model_id, model_name, dataset_name),
200
+ inputs=[token_input, model_input, model_input, dataset_input],
201
+ outputs=[output_logs]
202
+ )
203
 
204
  demo.launch()