Text Classification
Transformers
ONNX
Safetensors
English
distilbert
intent-classification
multitask
iab
conversational-ai
adtech
calibrated-confidence
text-embeddings-inference
Instructions to use admesh/agentic-intent-classifier with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use admesh/agentic-intent-classifier with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-classification", model="admesh/agentic-intent-classifier")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("admesh/agentic-intent-classifier", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| from __future__ import annotations | |
| import csv | |
| import os | |
| from dataclasses import dataclass | |
| from functools import lru_cache | |
| from pathlib import Path | |
| PROJECT_VERSION = "0.6.0-phase4" | |
| BASE_DIR = Path(__file__).resolve().parent | |
| ARTIFACTS_DIR = BASE_DIR / "artifacts" | |
| CALIBRATION_ARTIFACTS_DIR = ARTIFACTS_DIR / "calibration" | |
| EVALUATION_ARTIFACTS_DIR = ARTIFACTS_DIR / "evaluation" | |
| IAB_ARTIFACTS_DIR = ARTIFACTS_DIR / "iab" | |
| FULL_INTENT_TAXONOMY_DATA_DIR = BASE_DIR / "data" / "full_intent_taxonomy" | |
| INTENT_TYPE_DIFFICULTY_DATA_DIR = BASE_DIR / "data" / "intent_type_difficulty" | |
| INTENT_TYPE_BENCHMARK_PATH = BASE_DIR / "data" / "intent_type_benchmark.jsonl" | |
| DECISION_PHASE_DIFFICULTY_DATA_DIR = BASE_DIR / "data" / "decision_phase_difficulty" | |
| DECISION_PHASE_BENCHMARK_PATH = BASE_DIR / "data" / "decision_phase_benchmark.jsonl" | |
| SUBTYPE_DIFFICULTY_DATA_DIR = BASE_DIR / "data" / "subtype_difficulty" | |
| SUBTYPE_BENCHMARK_PATH = BASE_DIR / "data" / "subtype_benchmark.jsonl" | |
| IAB_DIFFICULTY_DATA_DIR = BASE_DIR / "data" / "iab_difficulty" | |
| IAB_BENCHMARK_PATH = BASE_DIR / "data" / "iab_benchmark.jsonl" | |
| IAB_CROSS_VERTICAL_BENCHMARK_PATH = BASE_DIR / "data" / "iab_cross_vertical_benchmark.jsonl" | |
| DEFAULT_API_HOST = "127.0.0.1" | |
| DEFAULT_API_PORT = 8008 | |
| DEFAULT_BENCHMARK_PATH = BASE_DIR / "examples" / "demo_prompt_suite.json" | |
| KNOWN_FAILURE_CASES_PATH = BASE_DIR / "examples" / "known_failure_cases.json" | |
| IAB_TAXONOMY_VERSION = os.environ.get("IAB_TAXONOMY_VERSION_OVERRIDE", "3.0") | |
| _DEFAULT_IAB_TAXONOMY_PATH = Path(BASE_DIR / "data" / "iab-content" / f"Content Taxonomy {IAB_TAXONOMY_VERSION}.tsv") | |
| def _resolve_iab_taxonomy_path() -> Path: | |
| # 1) Explicit override always wins. | |
| override = os.environ.get("IAB_TAXONOMY_PATH_OVERRIDE", "").strip() | |
| if override: | |
| return Path(override) | |
| # 2) Local repo file (normal local dev/training path). | |
| if _DEFAULT_IAB_TAXONOMY_PATH.exists(): | |
| return _DEFAULT_IAB_TAXONOMY_PATH | |
| # 3) HF trust_remote_code fallback: dynamic module cache may not include data files. | |
| repo_id = os.environ.get("ADMESH_MODEL_REPO_ID", "admesh/agentic-intent-classifier").strip() or "admesh/agentic-intent-classifier" | |
| revision = os.environ.get("ADMESH_MODEL_REVISION", "").strip() or None | |
| filename = f"data/iab-content/Content Taxonomy {IAB_TAXONOMY_VERSION}.tsv" | |
| try: | |
| from huggingface_hub import hf_hub_download | |
| downloaded = hf_hub_download( | |
| repo_id=repo_id, | |
| repo_type="model", | |
| filename=filename, | |
| revision=revision, | |
| ) | |
| return Path(downloaded) | |
| except Exception: | |
| # Keep previous behavior: downstream code will raise clear file-not-found | |
| # if neither local nor hub fallback is available. | |
| return _DEFAULT_IAB_TAXONOMY_PATH | |
| IAB_TAXONOMY_PATH = _resolve_iab_taxonomy_path() | |
| IAB_TAXONOMY_GRAPH_PATH = IAB_ARTIFACTS_DIR / "taxonomy_graph.json" | |
| IAB_TAXONOMY_NODES_PATH = IAB_ARTIFACTS_DIR / "taxonomy_nodes.json" | |
| IAB_TAXONOMY_EMBEDDINGS_PATH = IAB_ARTIFACTS_DIR / "taxonomy_embeddings.pt" | |
| IAB_DATASET_SUMMARY_PATH = IAB_ARTIFACTS_DIR / "dataset_summary.json" | |
| MULTITASK_INTENT_MODEL_DIR = BASE_DIR / "multitask_intent_model_output" | |
| IAB_CLASSIFIER_MODEL_DIR = BASE_DIR / "iab_classifier_model_output" | |
| IAB_RETRIEVAL_LOCAL_MODEL_DIR = BASE_DIR / "iab_embedding_model_output" | |
| IAB_QUALITY_TARGET_CASES_PATH = BASE_DIR / "examples" / "iab_mapping_cases.json" | |
| IAB_CROSS_VERTICAL_QUALITY_TARGET_CASES_PATH = BASE_DIR / "examples" / "iab_cross_vertical_mapping_cases.json" | |
| IAB_BEHAVIOR_LOCK_CASES_PATH = BASE_DIR / "examples" / "iab_behavior_lock_cases.json" | |
| IAB_CROSS_VERTICAL_BEHAVIOR_LOCK_CASES_PATH = BASE_DIR / "examples" / "iab_cross_vertical_behavior_lock_cases.json" | |
| IAB_RETRIEVAL_SPLIT_PATHS = { | |
| "train": BASE_DIR / "data" / "iab" / "train.jsonl", | |
| "val": BASE_DIR / "data" / "iab" / "val.jsonl", | |
| "test": BASE_DIR / "data" / "iab" / "test.jsonl", | |
| } | |
| IAB_RETRIEVAL_STRESS_SUITE_PATHS = { | |
| "hard_cases": BASE_DIR / "data" / "iab" / "hard_cases.jsonl", | |
| "extended_cases": BASE_DIR / "data" / "iab" / "extended_cases.jsonl", | |
| "difficulty_benchmark": IAB_BENCHMARK_PATH, | |
| "cross_vertical_benchmark": IAB_CROSS_VERTICAL_BENCHMARK_PATH, | |
| } | |
| IAB_RETRIEVAL_FALLBACK_MODEL_NAME = "Alibaba-NLP/gte-Qwen2-1.5B-instruct" | |
| IAB_RETRIEVAL_MODEL_MAX_LENGTH = 2048 | |
| IAB_RETRIEVAL_TOP_K = 16 | |
| IAB_RETRIEVAL_DEPTH_BONUS = 0.01 | |
| IAB_RETRIEVAL_PREFIX_CONFIDENCE_THRESHOLDS = { | |
| 1: 0.5, | |
| 2: 0.54, | |
| 3: 0.58, | |
| 4: 0.62, | |
| } | |
| IAB_PARENT_FALLBACK_CONFIDENCE_FLOOR = 0.3 | |
| _DEFAULT_MODEL_REPO_ID = "admesh/agentic-intent-classifier" | |
| def _hf_repo_id() -> str: | |
| return os.environ.get("ADMESH_MODEL_REPO_ID", _DEFAULT_MODEL_REPO_ID).strip() or _DEFAULT_MODEL_REPO_ID | |
| def _hf_revision() -> str | None: | |
| rev = os.environ.get("ADMESH_MODEL_REVISION", "").strip() | |
| return rev or None | |
| def _is_hf_dynamic_module_runtime() -> bool: | |
| """True when executing from HF `trust_remote_code` dynamic module cache.""" | |
| return "transformers_modules" in str(BASE_DIR) | |
| def _resolve_repo_subdir(local_dir: Path, repo_subdir: str) -> Path: | |
| """Resolve artifact/model subdirs for local dev and HF trust_remote_code. | |
| Local runs: return on-disk folder inside repo. | |
| HF dynamic module runs: if missing locally, pull only this subdir from Hub. | |
| """ | |
| if local_dir.exists(): | |
| return local_dir | |
| # Critical guard: during local/Colab training we should never silently point | |
| # outputs to a Hub snapshot cache path. Only use Hub fallback when running | |
| # inside HF dynamic modules (`trust_remote_code` path). | |
| if not _is_hf_dynamic_module_runtime(): | |
| return local_dir | |
| try: | |
| from huggingface_hub import snapshot_download | |
| except Exception: | |
| return local_dir | |
| kwargs: dict = { | |
| "repo_id": _hf_repo_id(), | |
| "repo_type": "model", | |
| "allow_patterns": [f"{repo_subdir}/**"], | |
| } | |
| revision = _hf_revision() | |
| if revision: | |
| kwargs["revision"] = revision | |
| try: | |
| root = Path(snapshot_download(**kwargs)) | |
| candidate = root / repo_subdir | |
| if candidate.exists(): | |
| return candidate | |
| except Exception: | |
| pass | |
| return local_dir | |
| # Re-resolve critical artifact/model dirs after helper definitions. | |
| CALIBRATION_ARTIFACTS_DIR = _resolve_repo_subdir(ARTIFACTS_DIR / "calibration", "artifacts/calibration") | |
| IAB_ARTIFACTS_DIR = _resolve_repo_subdir(ARTIFACTS_DIR / "iab", "artifacts/iab") | |
| IAB_TAXONOMY_GRAPH_PATH = IAB_ARTIFACTS_DIR / "taxonomy_graph.json" | |
| IAB_TAXONOMY_NODES_PATH = IAB_ARTIFACTS_DIR / "taxonomy_nodes.json" | |
| IAB_TAXONOMY_EMBEDDINGS_PATH = IAB_ARTIFACTS_DIR / "taxonomy_embeddings.pt" | |
| IAB_DATASET_SUMMARY_PATH = IAB_ARTIFACTS_DIR / "dataset_summary.json" | |
| MULTITASK_INTENT_MODEL_DIR = _resolve_repo_subdir(BASE_DIR / "multitask_intent_model_output", "multitask_intent_model_output") | |
| IAB_CLASSIFIER_MODEL_DIR = _resolve_repo_subdir(BASE_DIR / "iab_classifier_model_output", "iab_classifier_model_output") | |
| INTENT_TYPE_LABELS = ( | |
| "informational", | |
| "exploratory", | |
| "commercial", | |
| "transactional", | |
| "support", | |
| "personal_reflection", | |
| "creative_generation", | |
| "chit_chat", | |
| "ambiguous", | |
| "prohibited", | |
| ) | |
| DECISION_PHASE_LABELS = ( | |
| "awareness", | |
| "research", | |
| "consideration", | |
| "decision", | |
| "action", | |
| "post_purchase", | |
| "support", | |
| ) | |
| SUBTYPE_LABELS = ( | |
| "education", | |
| "product_discovery", | |
| "comparison", | |
| "evaluation", | |
| "deal_seeking", | |
| "provider_selection", | |
| "signup", | |
| "purchase", | |
| "booking", | |
| "download", | |
| "contact_sales", | |
| "task_execution", | |
| "onboarding_setup", | |
| "troubleshooting", | |
| "account_help", | |
| "billing_help", | |
| "follow_up", | |
| "emotional_reflection", | |
| ) | |
| def build_label_maps(labels: tuple[str, ...]) -> tuple[dict[str, int], dict[int, str]]: | |
| label2id = {label: idx for idx, label in enumerate(labels)} | |
| id2label = {idx: label for label, idx in label2id.items()} | |
| return label2id, id2label | |
| def _looks_like_local_hf_model_dir(path: Path) -> bool: | |
| if not path.is_dir() or not (path / "config.json").exists(): | |
| return False | |
| return ( | |
| (path / "model.safetensors").exists() | |
| or (path / "pytorch_model.bin").exists() | |
| or (path / "iab_weights.safetensors").exists() | |
| ) | |
| def _load_iab_path_labels(path: Path) -> tuple[str, ...]: | |
| with path.open("r", encoding="utf-8") as handle: | |
| reader = csv.reader(handle, delimiter="\t") | |
| rows = list(reader) | |
| header = rows[1] | |
| labels: list[str] = [] | |
| for row in rows[2:]: | |
| padded = row + [""] * (len(header) - len(row)) | |
| item = dict(zip(header, padded)) | |
| path_parts = [ | |
| item.get(key, "").strip() | |
| for key in ("Tier 1", "Tier 2", "Tier 3", "Tier 4") | |
| if item.get(key, "").strip() | |
| ] | |
| if path_parts: | |
| labels.append(" > ".join(path_parts)) | |
| return tuple(labels) | |
| IAB_PATH_LABELS = _load_iab_path_labels(IAB_TAXONOMY_PATH) | |
| class HeadConfig: | |
| slug: str | |
| task_name: str | |
| model_name: str | |
| model_dir: Path | |
| data_dir: Path | |
| label_field: str | |
| labels: tuple[str, ...] | |
| max_length: int | |
| default_confidence_threshold: float | |
| target_accept_precision: float | |
| min_calibrated_confidence_threshold: float | |
| stress_suite_paths: dict[str, Path] | |
| def label2id(self) -> dict[str, int]: | |
| return build_label_maps(self.labels)[0] | |
| def id2label(self) -> dict[int, str]: | |
| return build_label_maps(self.labels)[1] | |
| def calibration_path(self) -> Path: | |
| return CALIBRATION_ARTIFACTS_DIR / f"{self.slug}.json" | |
| def split_paths(self) -> dict[str, Path]: | |
| return { | |
| "train": self.data_dir / "train.jsonl", | |
| "val": self.data_dir / "val.jsonl", | |
| "test": self.data_dir / "test.jsonl", | |
| } | |
| INTENT_HEAD_CONFIG = HeadConfig( | |
| slug="intent_type", | |
| task_name="intent.type", | |
| model_name="distilbert-base-uncased", | |
| model_dir=BASE_DIR / "model_output", | |
| data_dir=BASE_DIR / "data", | |
| label_field="intent_type", | |
| labels=INTENT_TYPE_LABELS, | |
| max_length=64, | |
| default_confidence_threshold=0.7, | |
| target_accept_precision=0.8, | |
| min_calibrated_confidence_threshold=0.4, | |
| stress_suite_paths={ | |
| "hard_cases": BASE_DIR / "data" / "hard_cases.jsonl", | |
| "third_wave_cases": BASE_DIR / "data" / "third_wave_cases.jsonl", | |
| "difficulty_benchmark": INTENT_TYPE_BENCHMARK_PATH, | |
| }, | |
| ) | |
| DECISION_PHASE_HEAD_CONFIG = HeadConfig( | |
| slug="decision_phase", | |
| task_name="intent.decision_phase", | |
| model_name="distilbert-base-uncased", | |
| model_dir=BASE_DIR / "decision_phase_model_output", | |
| data_dir=BASE_DIR / "data" / "decision_phase", | |
| label_field="decision_phase", | |
| labels=DECISION_PHASE_LABELS, | |
| max_length=64, | |
| default_confidence_threshold=0.5, | |
| target_accept_precision=0.75, | |
| min_calibrated_confidence_threshold=0.22, | |
| stress_suite_paths={ | |
| "hard_cases": BASE_DIR / "data" / "decision_phase" / "hard_cases.jsonl", | |
| "final_wave_cases": BASE_DIR / "data" / "decision_phase" / "final_wave_cases.jsonl", | |
| "difficulty_benchmark": DECISION_PHASE_BENCHMARK_PATH, | |
| }, | |
| ) | |
| SUBTYPE_HEAD_CONFIG = HeadConfig( | |
| slug="intent_subtype", | |
| task_name="intent.subtype", | |
| model_name="distilbert-base-uncased", | |
| model_dir=BASE_DIR / "subtype_model_output", | |
| data_dir=BASE_DIR / "data" / "subtype", | |
| label_field="intent_subtype", | |
| labels=SUBTYPE_LABELS, | |
| max_length=72, | |
| default_confidence_threshold=0.45, | |
| target_accept_precision=0.75, | |
| min_calibrated_confidence_threshold=0.25, | |
| stress_suite_paths={ | |
| "hard_cases": BASE_DIR / "data" / "subtype" / "hard_cases.jsonl", | |
| "extended_cases": BASE_DIR / "data" / "subtype" / "extended_cases.jsonl", | |
| "difficulty_benchmark": SUBTYPE_BENCHMARK_PATH, | |
| }, | |
| ) | |
| IAB_HEAD_CONFIG = HeadConfig( | |
| slug="iab_content", | |
| task_name="iab.content", | |
| model_name="distilbert-base-uncased", | |
| model_dir=IAB_CLASSIFIER_MODEL_DIR, | |
| data_dir=BASE_DIR / "data" / "iab", | |
| label_field="iab_path", | |
| labels=IAB_PATH_LABELS, | |
| max_length=96, | |
| default_confidence_threshold=0.2, | |
| target_accept_precision=0.7, | |
| min_calibrated_confidence_threshold=0.12, | |
| stress_suite_paths=IAB_RETRIEVAL_STRESS_SUITE_PATHS, | |
| ) | |
| IAB_RETRIEVAL_MODEL_NAME = os.environ.get( | |
| "IAB_RETRIEVAL_MODEL_NAME_OVERRIDE", | |
| str(IAB_RETRIEVAL_LOCAL_MODEL_DIR) | |
| if _looks_like_local_hf_model_dir(IAB_RETRIEVAL_LOCAL_MODEL_DIR) | |
| else IAB_RETRIEVAL_FALLBACK_MODEL_NAME, | |
| ) | |
| HEAD_CONFIGS = { | |
| INTENT_HEAD_CONFIG.slug: INTENT_HEAD_CONFIG, | |
| SUBTYPE_HEAD_CONFIG.slug: SUBTYPE_HEAD_CONFIG, | |
| DECISION_PHASE_HEAD_CONFIG.slug: DECISION_PHASE_HEAD_CONFIG, | |
| IAB_HEAD_CONFIG.slug: IAB_HEAD_CONFIG, | |
| } | |
| COMMERCIAL_SCORE_MIN = 0.6 | |
| SAFE_FALLBACK_INTENTS = {"ambiguous", "support", "personal_reflection", "chit_chat", "prohibited"} | |
| INTENT_SCORE_WEIGHTS = { | |
| "informational": 0.15, | |
| "exploratory": 0.35, | |
| "commercial": 0.75, | |
| "transactional": 0.95, | |
| "support": 0.0, | |
| "personal_reflection": 0.0, | |
| "creative_generation": 0.0, | |
| "chit_chat": 0.0, | |
| "ambiguous": 0.1, | |
| "prohibited": 0.0, | |
| } | |
| INTENT_TYPE_TRAINING_WEIGHTS = { | |
| "informational": 1.0, | |
| "exploratory": 1.0, | |
| "commercial": 1.7, | |
| "transactional": 1.9, | |
| "support": 1.6, | |
| "personal_reflection": 0.85, | |
| "creative_generation": 0.75, | |
| "chit_chat": 0.7, | |
| "ambiguous": 1.1, | |
| "prohibited": 2.2, | |
| } | |
| PHASE_SCORE_WEIGHTS = { | |
| "awareness": 0.1, | |
| "research": 0.35, | |
| "consideration": 0.7, | |
| "decision": 0.85, | |
| "action": 1.0, | |
| "post_purchase": 0.15, | |
| "support": 0.0, | |
| } | |
| DECISION_PHASE_TRAINING_WEIGHTS = { | |
| "awareness": 0.9, | |
| "research": 1.0, | |
| "consideration": 1.35, | |
| "decision": 1.8, | |
| "action": 1.55, | |
| "post_purchase": 1.15, | |
| "support": 1.5, | |
| } | |
| SUBTYPE_TRAINING_WEIGHTS = { | |
| "education": 0.95, | |
| "product_discovery": 1.55, | |
| "comparison": 1.65, | |
| "evaluation": 1.1, | |
| "deal_seeking": 1.7, | |
| "provider_selection": 1.75, | |
| "signup": 1.6, | |
| "purchase": 1.9, | |
| "booking": 1.45, | |
| "download": 1.1, | |
| "contact_sales": 1.55, | |
| "task_execution": 1.0, | |
| "onboarding_setup": 1.05, | |
| "troubleshooting": 1.4, | |
| "account_help": 1.55, | |
| "billing_help": 1.6, | |
| "follow_up": 0.9, | |
| "emotional_reflection": 0.85, | |
| } | |
| SUBTYPE_SCORE_WEIGHTS = { | |
| "education": 0.05, | |
| "product_discovery": 0.58, | |
| "comparison": 0.74, | |
| "evaluation": 0.68, | |
| "deal_seeking": 0.71, | |
| "provider_selection": 0.9, | |
| "signup": 0.92, | |
| "purchase": 1.0, | |
| "booking": 0.94, | |
| "download": 0.46, | |
| "contact_sales": 0.95, | |
| "task_execution": 0.22, | |
| "onboarding_setup": 0.18, | |
| "troubleshooting": 0.0, | |
| "account_help": 0.0, | |
| "billing_help": 0.0, | |
| "follow_up": 0.05, | |
| "emotional_reflection": 0.0, | |
| } | |
| SUBTYPE_FAMILY_MAP = { | |
| "education": "informational", | |
| "product_discovery": "commercial", | |
| "comparison": "commercial", | |
| "evaluation": "commercial", | |
| "deal_seeking": "commercial", | |
| "provider_selection": "commercial", | |
| "signup": "transactional", | |
| "purchase": "transactional", | |
| "booking": "transactional", | |
| "download": "transactional", | |
| "contact_sales": "transactional", | |
| "task_execution": "transactional", | |
| "onboarding_setup": "post_purchase", | |
| "troubleshooting": "support", | |
| "account_help": "support", | |
| "billing_help": "support", | |
| "follow_up": "ambiguous", | |
| "emotional_reflection": "reflection", | |
| } | |
| SAFE_FALLBACK_SUBTYPE_FAMILIES = {"support", "ambiguous", "reflection"} | |
| HIGH_INTENT_SUBTYPES = {"provider_selection", "signup", "purchase", "booking", "contact_sales"} | |
| CAUTIONARY_SUBTYPES = {"comparison", "evaluation", "deal_seeking", "download"} | |
| LOW_SIGNAL_SUBTYPES = {"education", "follow_up", "onboarding_setup", "task_execution"} | |
| def ensure_artifact_dirs() -> None: | |
| CALIBRATION_ARTIFACTS_DIR.mkdir(parents=True, exist_ok=True) | |
| EVALUATION_ARTIFACTS_DIR.mkdir(parents=True, exist_ok=True) | |
| IAB_ARTIFACTS_DIR.mkdir(parents=True, exist_ok=True) | |