Spaces:

Novaciano
/

Train-With-Erotiquant3

Running

App Files Files Community

Novaciano commited on Oct 20

Commit

8d40d9d

verified ·

1 Parent(s): 3bce3a9

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -49

app.py CHANGED Viewed

@@ -4,18 +4,14 @@ import signal
 import tempfile
 from pathlib import Path
 from textwrap import dedent
-from typing import Optional
 from dataclasses import dataclass, field
 import gradio as gr
 from datasets import load_dataset
 from huggingface_hub import HfApi, ModelCard, whoami
-from gradio_huggingfacehub_search import HuggingfaceHubSearch
-from apscheduler.schedulers.background import BackgroundScheduler
 os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
-# ----------------- CONFIG DATACLASSES -----------------
 @dataclass
 class QuantizationConfig:
     method: str
@@ -35,7 +31,7 @@ class QuantizationConfig:
 class SplitConfig:
     enabled: bool = False
     max_tensors: int = 256
-    max_size: Optional[str] = None
 @dataclass
 class OutputConfig:
@@ -55,57 +51,46 @@ class ModelProcessingConfig:
     new_repo_url: str = field(default="", init=False)
     new_repo_id: str = field(default="", init=False)
-# ----------------- EXCEPTIONS -----------------
 class GGUFConverterError(Exception):
     pass
-# ----------------- PROCESSOR -----------------
 class HuggingFaceModelProcessor:
-    ERROR_LOGIN = "You must be logged in to use GGUF-my-repo."
     DOWNLOAD_FOLDER = "./downloads"
     OUTPUT_FOLDER = "./outputs"
     CALIBRATION_FILE = "calibration_data_v5_rc.txt"
-    QUANTIZE_TIMEOUT=86400
-    HF_TO_GGUF_TIMEOUT=3600
-    IMATRIX_TIMEOUT=86400
-    SPLIT_TIMEOUT=3600
-    KILL_TIMEOUT=5
     def __init__(self):
         self.HF_TOKEN = os.environ.get("HF_TOKEN")
-        self.RUN_LOCALLY = os.environ.get("RUN_LOCALLY")
         self._create_folder(self.DOWNLOAD_FOLDER)
         self._create_folder(self.OUTPUT_FOLDER)
-    def _create_folder(self, folder_name: str) -> str:
         if not os.path.exists(folder_name):
             os.makedirs(folder_name)
         return folder_name
-    # ----------------- DATASET -----------------
-    def load_dataset_from_hf(self, dataset_name: str):
-        print(f"Cargando dataset: {dataset_name}")
-        ds = load_dataset(dataset_name, use_auth_token=self.HF_TOKEN)
-        print("Dataset cargado correctamente")
-        return ds
-    # ----------------- MODELO -----------------
-    def _download_base_model(self, processing_config: ModelProcessingConfig) -> str:
-        """Descarga y convierte HuggingFace -> GGUF FP16"""
         print(f"Descargando modelo {processing_config.model_name}")
         if os.path.exists(processing_config.quant_config.fp16_model):
-            print("FP16 ya existe, omitiendo descarga")
             return processing_config.quant_config.fp16_model
         with tempfile.TemporaryDirectory(dir=self.DOWNLOAD_FOLDER) as tmpdir:
             local_dir = f"{Path(tmpdir)}/{processing_config.model_name}"
             api = HfApi(token=processing_config.token)
-            pattern = "*.safetensors" if any(
-                file.path.endswith(".safetensors")
-                for file in api.list_repo_tree(repo_id=processing_config.model_id, recursive=True)
-            ) else "*.bin"
-            dl_pattern = ["*.md", "*.json", "*.model"] + [pattern]
-            api.snapshot_download(repo_id=processing_config.model_id, local_dir=local_dir, allow_patterns=dl_pattern)
             convert_command = [
                 "python3", "/app/convert_hf_to_gguf.py", local_dir,
                 "--outtype", "f16", "--outfile", processing_config.quant_config.fp16_model
@@ -119,13 +104,13 @@ class HuggingFaceModelProcessor:
                     process.wait(timeout=self.KILL_TIMEOUT)
                 except subprocess.TimeoutExpired:
                     process.kill()
-                raise GGUFConverterError("Error convirtiendo a fp16: Timeout")
             if process.returncode != 0:
-                raise GGUFConverterError(f"Error convirtiendo a fp16: {process.returncode}")
             return processing_config.quant_config.fp16_model
-    # ----------------- CUANTIZACION -----------------
-    def _quantize_model(self, quant_config: QuantizationConfig) -> str:
         quantize_cmd = ["llama-quantize"]
         if quant_config.quant_embedding:
             quantize_cmd.extend(["--token-embedding-type", quant_config.embedding_tensor_method])
@@ -135,11 +120,11 @@ class HuggingFaceModelProcessor:
             if quant_config.quant_output:
                 quantize_cmd.extend(["--output-tensor-type", quant_config.output_tensor_method])
         if quant_config.use_imatrix:
-            self._generate_importance_matrix(quant_config)
-            quantize_cmd.extend(["--imatrix", quant_config.imatrix_file])
         quantize_cmd.append(quant_config.fp16_model)
         quantize_cmd.append(quant_config.quantized_gguf)
-        quantize_cmd.append(quant_config.imatrix_method if quant_config.use_imatrix else quant_config.method)
         process = subprocess.Popen(quantize_cmd, shell=False, stderr=subprocess.STDOUT)
         try:
             process.wait(timeout=self.QUANTIZE_TIMEOUT)
@@ -149,21 +134,71 @@ class HuggingFaceModelProcessor:
                 process.wait(timeout=self.KILL_TIMEOUT)
             except subprocess.TimeoutExpired:
                 process.kill()
-            raise GGUFConverterError("Error cuantizando: Timeout")
         if process.returncode != 0:
-            raise GGUFConverterError(f"Error cuantizando: {process.returncode}")
         return quant_config.quantized_gguf
-# ----------------- GRADIO -----------------
-def app_interface(dataset_name: str):
-    processor = HuggingFaceModelProcessor()
-    ds = processor.load_dataset_from_hf(dataset_name)
-    return f"Dataset cargado: {dataset_name}\nNúmero de splits: {len(ds)}"
 with gr.Blocks() as demo:
-    gr.Markdown("## Selecciona dataset de HuggingFace")
-    dataset_input = gr.Textbox(label="Nombre del dataset (ej: openerotica/erotiquant3)")
-    output = gr.Textbox(label="Resultado", interactive=False)
-    dataset_input.submit(app_interface, inputs=dataset_input, outputs=output)
 demo.launch()

 import tempfile
 from pathlib import Path
 from textwrap import dedent
 from dataclasses import dataclass, field
 import gradio as gr
 from datasets import load_dataset
 from huggingface_hub import HfApi, ModelCard, whoami
 os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
 @dataclass
 class QuantizationConfig:
     method: str
 class SplitConfig:
     enabled: bool = False
     max_tensors: int = 256
+    max_size: str = None
 @dataclass
 class OutputConfig:
     new_repo_url: str = field(default="", init=False)
     new_repo_id: str = field(default="", init=False)
 class GGUFConverterError(Exception):
     pass
 class HuggingFaceModelProcessor:
+    QUANTIZE_TIMEOUT = 86400
+    HF_TO_GGUF_TIMEOUT = 3600
+    IMATRIX_TIMEOUT = 86400
+    SPLIT_TIMEOUT = 3600
+    KILL_TIMEOUT = 5
     DOWNLOAD_FOLDER = "./downloads"
     OUTPUT_FOLDER = "./outputs"
     CALIBRATION_FILE = "calibration_data_v5_rc.txt"
     def __init__(self):
         self.HF_TOKEN = os.environ.get("HF_TOKEN")
         self._create_folder(self.DOWNLOAD_FOLDER)
         self._create_folder(self.OUTPUT_FOLDER)
+    def _create_folder(self, folder_name: str):
         if not os.path.exists(folder_name):
             os.makedirs(folder_name)
         return folder_name
+    def _download_dataset(self, dataset_name: str):
+        print(f"Cargando dataset desde HuggingFace Hub: {dataset_name}")
+        dataset = load_dataset(dataset_name, use_auth_token=self.HF_TOKEN)
+        return dataset
+    def _download_model(self, processing_config: ModelProcessingConfig):
         print(f"Descargando modelo {processing_config.model_name}")
         if os.path.exists(processing_config.quant_config.fp16_model):
+            print("FP16 ya existe, omitiendo conversión.")
             return processing_config.quant_config.fp16_model
         with tempfile.TemporaryDirectory(dir=self.DOWNLOAD_FOLDER) as tmpdir:
             local_dir = f"{Path(tmpdir)}/{processing_config.model_name}"
             api = HfApi(token=processing_config.token)
+            pattern = "*.safetensors"
+            api.snapshot_download(repo_id=processing_config.model_id, local_dir=local_dir, allow_patterns=[pattern])
             convert_command = [
                 "python3", "/app/convert_hf_to_gguf.py", local_dir,
                 "--outtype", "f16", "--outfile", processing_config.quant_config.fp16_model
                     process.wait(timeout=self.KILL_TIMEOUT)
                 except subprocess.TimeoutExpired:
                     process.kill()
+                raise GGUFConverterError("Error convirtiendo a FP16: timeout")
             if process.returncode != 0:
+                raise GGUFConverterError(f"Error convirtiendo a FP16: code={process.returncode}")
+            print("Modelo convertido a FP16 correctamente")
             return processing_config.quant_config.fp16_model
+    def _quantize_model(self, quant_config: QuantizationConfig):
         quantize_cmd = ["llama-quantize"]
         if quant_config.quant_embedding:
             quantize_cmd.extend(["--token-embedding-type", quant_config.embedding_tensor_method])
             if quant_config.quant_output:
                 quantize_cmd.extend(["--output-tensor-type", quant_config.output_tensor_method])
         if quant_config.use_imatrix:
+            raise NotImplementedError("imatrix no implementado para esta demo automática")
         quantize_cmd.append(quant_config.fp16_model)
         quantize_cmd.append(quant_config.quantized_gguf)
+        quantize_cmd.append(quant_config.method)
         process = subprocess.Popen(quantize_cmd, shell=False, stderr=subprocess.STDOUT)
         try:
             process.wait(timeout=self.QUANTIZE_TIMEOUT)
                 process.wait(timeout=self.KILL_TIMEOUT)
             except subprocess.TimeoutExpired:
                 process.kill()
+            raise GGUFConverterError("Error cuantizando: timeout")
         if process.returncode != 0:
+            raise GGUFConverterError(f"Error cuantizando: code={process.returncode}")
+        print("Cuantización completada")
         return quant_config.quantized_gguf
+    def _create_repo(self, processing_config: ModelProcessingConfig):
+        api = HfApi(token=processing_config.token)
+        new_repo_url = api.create_repo(repo_id=processing_config.output_config.repo_name, exist_ok=True, private=processing_config.output_config.private_repo)
+        processing_config.new_repo_url = new_repo_url.url
+        processing_config.new_repo_id = new_repo_url.repo_id
+        print("Repositorio creado:", processing_config.new_repo_url)
+        return new_repo_url
+    def run_full_pipeline(self, token, model_id, model_name, dataset_name):
+        logs = []
+        try:
+            # 1. Cargar dataset
+            dataset = self._download_dataset(dataset_name)
+            logs.append(f"Dataset cargado: {dataset_name}")
+            # 2. Configuración inicial
+            outdir = self.OUTPUT_FOLDER
+            quant_config = QuantizationConfig(method="Q4_0")
+            quant_config.fp16_model = f"{outdir}/{model_name}.f16"
+            quant_config.quantized_gguf = f"{outdir}/{model_name}.gguf"
+            split_config = SplitConfig()
+            output_config = OutputConfig(private_repo=False, repo_name=f"{model_name}-gguf")
+            processing_config = ModelProcessingConfig(
+                token=token, model_id=model_id, model_name=model_name, outdir=outdir,
+                quant_config=quant_config, split_config=split_config, output_config=output_config
+            )
+            # 3. Descargar modelo
+            self._download_model(processing_config)
+            logs.append("Modelo descargado y convertido a FP16")
+            # 4. Cuantizar modelo
+            self._quantize_model(quant_config)
+            logs.append("Modelo cuantizado a GGUF")
+            # 5. Crear repo
+            self._create_repo(processing_config)
+            logs.append(f"Repositorio creado: {processing_config.new_repo_url}")
+        except Exception as e:
+            logs.append(f"ERROR: {e}")
+        return "\n".join(logs)
+# ----------------- Interfaz Gradio -----------------
+processor = HuggingFaceModelProcessor()
 with gr.Blocks() as demo:
+    gr.Markdown("## Pipeline Automática GGUF desde HuggingFace Hub")
+    dataset_input = gr.Textbox(label="Nombre del dataset HuggingFace", placeholder="openerotica/erotiquant3")
+    model_input = gr.Textbox(label="ID del modelo HF", placeholder="ochoa/your-model")
+    token_input = gr.Textbox(label="Tu token HF (opcional, si está en HF_TOKEN puede dejarse vacío)", type="password")
+    run_button = gr.Button("Ejecutar pipeline automática")
+    output_logs = gr.Textbox(label="Logs", lines=20)
+    run_button.click(
+        fn=lambda token, model_id, model_name, dataset_name: processor.run_full_pipeline(token, model_id, model_name, dataset_name),
+        inputs=[token_input, model_input, model_input, dataset_input],
+        outputs=[output_logs]
+    )
 demo.launch()