Commit
·
3b1e6c7
1
Parent(s):
d92e2aa
UI fixed and DetectGPT renamed
Browse files- README.md +37 -17
- config/model_config.py +10 -10
- config/settings.py +11 -11
- config/threshold_config.py +151 -151
- detector/attribution.py +74 -70
- detector/ensemble.py +171 -153
- detector/highlighter.py +12 -12
- detector/orchestrator.py +6 -7
- metrics/{detect_gpt.py → multi_perturbation_stability.py} +42 -34
- processors/language_detector.py +244 -65
- processors/text_processor.py +1 -1
- reporter/reasoning_generator.py +8 -8
- reporter/report_generator.py +6 -6
- text_auth_app.py +17 -12
- ui/static/index.html +306 -299
README.md
CHANGED
|
@@ -5,7 +5,6 @@
|
|
| 5 |
|
| 6 |

|
| 7 |

|
| 8 |
-

|
| 9 |

|
| 10 |

|
| 11 |
|
|
@@ -52,7 +51,7 @@ This README is research‑grade (detailed math, methodology, and benchmarks) whi
|
|
| 52 |
|
| 53 |
**Problem.** AI generation tools increasingly produce publishable text, creating integrity and verification challenges in education, hiring, publishing, and enterprise content systems.
|
| 54 |
|
| 55 |
-
**Solution.** A domain‑aware detector combining six orthogonal metrics (Perplexity, Entropy, Structural, Semantic, Linguistic,
|
| 56 |
|
| 57 |
**MVP Scope.** End‑to‑end FastAPI backend, lightweight HTML UI, modular metrics, Hugging Face model auto‑download, and a prototype ensemble classifier. Model weights are not committed to the repo; they are fetched at first run.
|
| 58 |
|
|
@@ -99,7 +98,7 @@ flowchart LR
|
|
| 99 |
P3[Structural]
|
| 100 |
P4[Linguistic]
|
| 101 |
P5[Semantic]
|
| 102 |
-
P6[
|
| 103 |
end
|
| 104 |
|
| 105 |
G[Ensemble Classifier]
|
|
@@ -157,12 +156,14 @@ This section provides the exact metric definitions implemented in `metrics/` and
|
|
| 157 |
- Structural — 15%
|
| 158 |
- Semantic — 15%
|
| 159 |
- Linguistic — 15%
|
| 160 |
-
-
|
| 161 |
|
| 162 |
### 1) Perplexity (25% weight)
|
| 163 |
|
| 164 |
**Definition**
|
| 165 |
-
|
|
|
|
|
|
|
| 166 |
|
| 167 |
**Implementation sketch**
|
| 168 |
```python
|
|
@@ -187,7 +188,9 @@ elif domain == Domain.SOCIAL_MEDIA:
|
|
| 187 |
### 2) Entropy (20% weight)
|
| 188 |
|
| 189 |
**Shannon entropy (token level)**
|
| 190 |
-
|
|
|
|
|
|
|
| 191 |
|
| 192 |
**Implementation sketch**
|
| 193 |
```python
|
|
@@ -203,10 +206,23 @@ def calculate_text_entropy(text):
|
|
| 203 |
### 3) Structural Metric (15% weight)
|
| 204 |
|
| 205 |
**Burstiness**
|
| 206 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
|
| 208 |
**Uniformity**
|
| 209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
|
| 211 |
**Sketch**
|
| 212 |
```python
|
|
@@ -223,7 +239,9 @@ def calculate_burstiness(text):
|
|
| 223 |
### 4) Semantic Analysis (15% weight)
|
| 224 |
|
| 225 |
**Coherence (sentence embedding cosine similarity)**
|
| 226 |
-
|
|
|
|
|
|
|
| 227 |
|
| 228 |
**Sketch**
|
| 229 |
```python
|
|
@@ -247,13 +265,15 @@ def calculate_linguistic_features(text, nlp_model):
|
|
| 247 |
return {'pos_diversity': pos_diversity, 'mean_tree_depth': np.mean(depths)}
|
| 248 |
```
|
| 249 |
|
| 250 |
-
### 6)
|
| 251 |
|
| 252 |
**Stability under perturbation** (curvature principle)
|
| 253 |
-
|
|
|
|
|
|
|
| 254 |
|
| 255 |
```python
|
| 256 |
-
def
|
| 257 |
original = model.get_log_probability(text)
|
| 258 |
diffs = []
|
| 259 |
for _ in range(num_perturbations):
|
|
@@ -298,10 +318,10 @@ Domain weights and thresholds are configurable. Example weights (in `config/thre
|
|
| 298 |
|
| 299 |
```python
|
| 300 |
DOMAIN_WEIGHTS = {
|
| 301 |
-
'academic': {'perplexity':0.22,'entropy':0.18,'structural':0.15,'linguistic':0.20,'semantic':0.15,'
|
| 302 |
-
'technical': {'perplexity':0.20,'entropy':0.18,'structural':0.12,'linguistic':0.18,'semantic':0.22,'
|
| 303 |
-
'creative': {'perplexity':0.25,'entropy':0.25,'structural':0.20,'linguistic':0.12,'semantic':0.10,'
|
| 304 |
-
'social_media': {'perplexity':0.30,'entropy':0.22,'structural':0.15,'linguistic':0.10,'semantic':0.13,'
|
| 305 |
}
|
| 306 |
```
|
| 307 |
|
|
@@ -348,7 +368,7 @@ text_auth/
|
|
| 348 |
│ └── orchestrator.py
|
| 349 |
├── metrics/
|
| 350 |
│ ├── base_metric.py
|
| 351 |
-
│ ├──
|
| 352 |
│ ├── entropy.py
|
| 353 |
│ ├── linguistic.py
|
| 354 |
│ ├── perplexity.py
|
|
|
|
| 5 |
|
| 6 |

|
| 7 |

|
|
|
|
| 8 |

|
| 9 |

|
| 10 |
|
|
|
|
| 51 |
|
| 52 |
**Problem.** AI generation tools increasingly produce publishable text, creating integrity and verification challenges in education, hiring, publishing, and enterprise content systems.
|
| 53 |
|
| 54 |
+
**Solution.** A domain‑aware detector combining six orthogonal metrics (Perplexity, Entropy, Structural, Semantic, Linguistic, Multi-perturbation stability) into a confidence‑calibrated ensemble. Outputs are explainable with sentence‑level highlighting, attribution probabilities, and downloadable reports (JSON/PDF).
|
| 55 |
|
| 56 |
**MVP Scope.** End‑to‑end FastAPI backend, lightweight HTML UI, modular metrics, Hugging Face model auto‑download, and a prototype ensemble classifier. Model weights are not committed to the repo; they are fetched at first run.
|
| 57 |
|
|
|
|
| 98 |
P3[Structural]
|
| 99 |
P4[Linguistic]
|
| 100 |
P5[Semantic]
|
| 101 |
+
P6[MultiPerturbationStability]
|
| 102 |
end
|
| 103 |
|
| 104 |
G[Ensemble Classifier]
|
|
|
|
| 156 |
- Structural — 15%
|
| 157 |
- Semantic — 15%
|
| 158 |
- Linguistic — 15%
|
| 159 |
+
- Multi-perturbation Stability — 10%
|
| 160 |
|
| 161 |
### 1) Perplexity (25% weight)
|
| 162 |
|
| 163 |
**Definition**
|
| 164 |
+
```math
|
| 165 |
+
Perplexity = \exp\left(-\frac{1}{N}\sum_{i=1}^N \log P(w_i\mid context)\right)
|
| 166 |
+
```
|
| 167 |
|
| 168 |
**Implementation sketch**
|
| 169 |
```python
|
|
|
|
| 188 |
### 2) Entropy (20% weight)
|
| 189 |
|
| 190 |
**Shannon entropy (token level)**
|
| 191 |
+
```math
|
| 192 |
+
H(X) = -Σ p(x_i) * log₂ p(x_i)
|
| 193 |
+
```
|
| 194 |
|
| 195 |
**Implementation sketch**
|
| 196 |
```python
|
|
|
|
| 206 |
### 3) Structural Metric (15% weight)
|
| 207 |
|
| 208 |
**Burstiness**
|
| 209 |
+
```math
|
| 210 |
+
Burstiness = \frac{\sigma - \mu}{\sigma + \mu}
|
| 211 |
+
```
|
| 212 |
+
|
| 213 |
+
where:
|
| 214 |
+
- μ = mean sentence length
|
| 215 |
+
- σ = standard deviation of sentence length
|
| 216 |
|
| 217 |
**Uniformity**
|
| 218 |
+
```math
|
| 219 |
+
Uniformity = 1 - \frac{\sigma}{\mu}
|
| 220 |
+
```
|
| 221 |
+
|
| 222 |
+
where:
|
| 223 |
+
- μ = mean sentence length
|
| 224 |
+
- σ = standard deviation of sentence length
|
| 225 |
+
|
| 226 |
|
| 227 |
**Sketch**
|
| 228 |
```python
|
|
|
|
| 239 |
### 4) Semantic Analysis (15% weight)
|
| 240 |
|
| 241 |
**Coherence (sentence embedding cosine similarity)**
|
| 242 |
+
```math
|
| 243 |
+
Coherence = \frac{1}{n} \sum_{i=1}^{n-1} \cos(e_i, e_{i+1})
|
| 244 |
+
```
|
| 245 |
|
| 246 |
**Sketch**
|
| 247 |
```python
|
|
|
|
| 265 |
return {'pos_diversity': pos_diversity, 'mean_tree_depth': np.mean(depths)}
|
| 266 |
```
|
| 267 |
|
| 268 |
+
### 6) MultiPerturbationStability (10% weight)
|
| 269 |
|
| 270 |
**Stability under perturbation** (curvature principle)
|
| 271 |
+
```math
|
| 272 |
+
Stability = \frac{1}{n} \sum_{j} \left| \log P(x) - \log P(x_{perturbed_j}) \right|
|
| 273 |
+
```
|
| 274 |
|
| 275 |
```python
|
| 276 |
+
def multi_perturbation_stability_score(text, model, num_perturbations=20):
|
| 277 |
original = model.get_log_probability(text)
|
| 278 |
diffs = []
|
| 279 |
for _ in range(num_perturbations):
|
|
|
|
| 318 |
|
| 319 |
```python
|
| 320 |
DOMAIN_WEIGHTS = {
|
| 321 |
+
'academic': {'perplexity':0.22,'entropy':0.18,'structural':0.15,'linguistic':0.20,'semantic':0.15,'multi_perturbation_stability':0.10},
|
| 322 |
+
'technical': {'perplexity':0.20,'entropy':0.18,'structural':0.12,'linguistic':0.18,'semantic':0.22,'multi_perturbation_stability':0.10},
|
| 323 |
+
'creative': {'perplexity':0.25,'entropy':0.25,'structural':0.20,'linguistic':0.12,'semantic':0.10,'multi_perturbation_stability':0.08},
|
| 324 |
+
'social_media': {'perplexity':0.30,'entropy':0.22,'structural':0.15,'linguistic':0.10,'semantic':0.13,'multi_perturbation_stability':0.10}
|
| 325 |
}
|
| 326 |
```
|
| 327 |
|
|
|
|
| 368 |
│ └── orchestrator.py
|
| 369 |
├── metrics/
|
| 370 |
│ ├── base_metric.py
|
| 371 |
+
│ ├── multi_perturbation_stability.py
|
| 372 |
│ ├── entropy.py
|
| 373 |
│ ├── linguistic.py
|
| 374 |
│ ├── perplexity.py
|
config/model_config.py
CHANGED
|
@@ -98,16 +98,16 @@ MODEL_REGISTRY : Dict[str, ModelConfig] = {"perplexity_gpt2" : ModelC
|
|
| 98 |
batch_size = 16,
|
| 99 |
quantizable = True,
|
| 100 |
),
|
| 101 |
-
"
|
| 102 |
model_type = ModelType.GPTMASK,
|
| 103 |
-
description = "
|
| 104 |
size_mb = 0,
|
| 105 |
required = True,
|
| 106 |
download_priority = 4,
|
| 107 |
max_length = 1024,
|
| 108 |
batch_size = 4,
|
| 109 |
),
|
| 110 |
-
"
|
| 111 |
model_type = ModelType.TRANSFORMER,
|
| 112 |
description = "Masked LM for text perturbation",
|
| 113 |
size_mb = 330,
|
|
@@ -131,18 +131,18 @@ MODEL_REGISTRY : Dict[str, ModelConfig] = {"perplexity_gpt2" : ModelC
|
|
| 131 |
# MODEL GROUPS FOR BATCH DOWNLOADING
|
| 132 |
MODEL_GROUPS = {"minimal" : ["perplexity_gpt2", "domain_classifier"],
|
| 133 |
"essential" : ["perplexity_gpt2", "semantic_primary", "linguistic_spacy", "domain_classifier"],
|
| 134 |
-
"extended" : ["semantic_secondary", "
|
| 135 |
"optional" : ["language_detector"],
|
| 136 |
}
|
| 137 |
|
| 138 |
|
| 139 |
# MODEL WEIGHTS FOR ENSEMBLE : For 6 metrics implemented
|
| 140 |
-
DEFAULT_MODEL_WEIGHTS = {"statistical"
|
| 141 |
-
"perplexity"
|
| 142 |
-
"entropy"
|
| 143 |
-
"semantic_analysis"
|
| 144 |
-
"linguistic"
|
| 145 |
-
"
|
| 146 |
}
|
| 147 |
|
| 148 |
|
|
|
|
| 98 |
batch_size = 16,
|
| 99 |
quantizable = True,
|
| 100 |
),
|
| 101 |
+
"multi_perturbation_base" : ModelConfig(model_id = "gpt2",
|
| 102 |
model_type = ModelType.GPTMASK,
|
| 103 |
+
description = "MultiPerturbationStability model (reuses gpt2)",
|
| 104 |
size_mb = 0,
|
| 105 |
required = True,
|
| 106 |
download_priority = 4,
|
| 107 |
max_length = 1024,
|
| 108 |
batch_size = 4,
|
| 109 |
),
|
| 110 |
+
"multi_perturbation_mask" : ModelConfig(model_id = "distilroberta-base",
|
| 111 |
model_type = ModelType.TRANSFORMER,
|
| 112 |
description = "Masked LM for text perturbation",
|
| 113 |
size_mb = 330,
|
|
|
|
| 131 |
# MODEL GROUPS FOR BATCH DOWNLOADING
|
| 132 |
MODEL_GROUPS = {"minimal" : ["perplexity_gpt2", "domain_classifier"],
|
| 133 |
"essential" : ["perplexity_gpt2", "semantic_primary", "linguistic_spacy", "domain_classifier"],
|
| 134 |
+
"extended" : ["semantic_secondary", "multi_perturbation_mask", "domain_classifier_fallback"],
|
| 135 |
"optional" : ["language_detector"],
|
| 136 |
}
|
| 137 |
|
| 138 |
|
| 139 |
# MODEL WEIGHTS FOR ENSEMBLE : For 6 metrics implemented
|
| 140 |
+
DEFAULT_MODEL_WEIGHTS = {"statistical" : 0.20, # No model needed
|
| 141 |
+
"perplexity" : 0.20, # gpt2
|
| 142 |
+
"entropy" : 0.15, # gpt2 (reused)
|
| 143 |
+
"semantic_analysis" : 0.20, # all-MiniLM-L6-v2
|
| 144 |
+
"linguistic" : 0.15, # spacy
|
| 145 |
+
"multi_perturbation_stability" : 0.10, # gpt2 + distilroberta (optional)
|
| 146 |
}
|
| 147 |
|
| 148 |
|
config/settings.py
CHANGED
|
@@ -34,13 +34,13 @@ class Settings(BaseSettings):
|
|
| 34 |
|
| 35 |
# File Upload Settings
|
| 36 |
MAX_UPLOAD_SIZE : int = 10 * 1024 * 1024 # 10MB
|
| 37 |
-
ALLOWED_EXTENSIONS : list = [".txt", ".pdf", ".docx", ".doc"]
|
| 38 |
|
| 39 |
# Processing Settings
|
| 40 |
-
MAX_TEXT_LENGTH : int =
|
| 41 |
-
MIN_TEXT_LENGTH : int = 50
|
| 42 |
-
CHUNK_SIZE : int = 512
|
| 43 |
-
CHUNK_OVERLAP : int = 50
|
| 44 |
|
| 45 |
# Model Settings
|
| 46 |
DEVICE : str = Field(default = "cpu", env = "DEVICE") # "cuda" or "cpu"
|
|
@@ -87,12 +87,12 @@ class Settings(BaseSettings):
|
|
| 87 |
REQUEST_TIMEOUT : int = 300 # seconds (5 minutes)
|
| 88 |
|
| 89 |
# Metrics Configuration
|
| 90 |
-
METRICS_ENABLED : dict = {"semantic_analysis"
|
| 91 |
-
"
|
| 92 |
-
"perplexity"
|
| 93 |
-
"statistical"
|
| 94 |
-
"entropy"
|
| 95 |
-
"linguistic"
|
| 96 |
}
|
| 97 |
|
| 98 |
class Config:
|
|
|
|
| 34 |
|
| 35 |
# File Upload Settings
|
| 36 |
MAX_UPLOAD_SIZE : int = 10 * 1024 * 1024 # 10MB
|
| 37 |
+
ALLOWED_EXTENSIONS : list = [".txt", ".pdf", ".docx", ".doc", ".md"]
|
| 38 |
|
| 39 |
# Processing Settings
|
| 40 |
+
MAX_TEXT_LENGTH : int = 500000 # Maximum characters to process
|
| 41 |
+
MIN_TEXT_LENGTH : int = 50 # Minimum characters for analysis
|
| 42 |
+
CHUNK_SIZE : int = 512 # Tokens per chunk
|
| 43 |
+
CHUNK_OVERLAP : int = 50 # Overlap between chunks
|
| 44 |
|
| 45 |
# Model Settings
|
| 46 |
DEVICE : str = Field(default = "cpu", env = "DEVICE") # "cuda" or "cpu"
|
|
|
|
| 87 |
REQUEST_TIMEOUT : int = 300 # seconds (5 minutes)
|
| 88 |
|
| 89 |
# Metrics Configuration
|
| 90 |
+
METRICS_ENABLED : dict = {"semantic_analysis" : True,
|
| 91 |
+
"multi_perturbation_stability" : True,
|
| 92 |
+
"perplexity" : True,
|
| 93 |
+
"statistical" : True,
|
| 94 |
+
"entropy" : True,
|
| 95 |
+
"linguistic" : True,
|
| 96 |
}
|
| 97 |
|
| 98 |
class Config:
|
config/threshold_config.py
CHANGED
|
@@ -55,191 +55,191 @@ class DomainThresholds:
|
|
| 55 |
"""
|
| 56 |
Thresholds for 6 metrics in a specific domain
|
| 57 |
"""
|
| 58 |
-
domain
|
| 59 |
-
structural
|
| 60 |
-
perplexity
|
| 61 |
-
entropy
|
| 62 |
-
semantic_analysis
|
| 63 |
-
linguistic
|
| 64 |
-
|
| 65 |
-
ensemble_threshold
|
| 66 |
|
| 67 |
|
| 68 |
# ==================== DOMAIN-SPECIFIC THRESHOLDS ====================
|
| 69 |
# GENERAL (Default fallback)
|
| 70 |
-
DEFAULT_THRESHOLDS = DomainThresholds(domain
|
| 71 |
-
structural
|
| 72 |
-
perplexity
|
| 73 |
-
entropy
|
| 74 |
-
semantic_analysis
|
| 75 |
-
linguistic
|
| 76 |
-
|
| 77 |
-
ensemble_threshold
|
| 78 |
)
|
| 79 |
|
| 80 |
# ACADEMIC
|
| 81 |
-
ACADEMIC_THRESHOLDS = DomainThresholds(domain
|
| 82 |
-
structural
|
| 83 |
-
perplexity
|
| 84 |
-
entropy
|
| 85 |
-
semantic_analysis
|
| 86 |
-
linguistic
|
| 87 |
-
|
| 88 |
-
ensemble_threshold
|
| 89 |
)
|
| 90 |
|
| 91 |
# CREATIVE WRITING
|
| 92 |
-
CREATIVE_THRESHOLDS = DomainThresholds(domain
|
| 93 |
-
structural
|
| 94 |
-
perplexity
|
| 95 |
-
entropy
|
| 96 |
-
semantic_analysis
|
| 97 |
-
linguistic
|
| 98 |
-
|
| 99 |
-
ensemble_threshold
|
| 100 |
)
|
| 101 |
|
| 102 |
# AI/ML/DATA SCIENCE
|
| 103 |
-
AI_ML_THRESHOLDS = DomainThresholds(domain
|
| 104 |
-
structural
|
| 105 |
-
perplexity
|
| 106 |
-
entropy
|
| 107 |
-
semantic_analysis
|
| 108 |
-
linguistic
|
| 109 |
-
|
| 110 |
-
ensemble_threshold
|
| 111 |
)
|
| 112 |
|
| 113 |
# SOFTWARE DEVELOPMENT
|
| 114 |
-
SOFTWARE_DEV_THRESHOLDS = DomainThresholds(domain
|
| 115 |
-
structural
|
| 116 |
-
perplexity
|
| 117 |
-
entropy
|
| 118 |
-
semantic_analysis
|
| 119 |
-
linguistic
|
| 120 |
-
|
| 121 |
-
ensemble_threshold
|
| 122 |
)
|
| 123 |
|
| 124 |
-
# TECHNICAL DOCUMENTATION
|
| 125 |
-
TECHNICAL_DOC_THRESHOLDS = DomainThresholds(domain
|
| 126 |
-
structural
|
| 127 |
-
perplexity
|
| 128 |
-
entropy
|
| 129 |
-
semantic_analysis
|
| 130 |
-
linguistic
|
| 131 |
-
|
| 132 |
-
ensemble_threshold
|
| 133 |
)
|
| 134 |
|
| 135 |
# ENGINEERING
|
| 136 |
-
ENGINEERING_THRESHOLDS = DomainThresholds(domain
|
| 137 |
-
structural
|
| 138 |
-
perplexity
|
| 139 |
-
entropy
|
| 140 |
-
semantic_analysis
|
| 141 |
-
linguistic
|
| 142 |
-
|
| 143 |
-
ensemble_threshold
|
| 144 |
)
|
| 145 |
|
| 146 |
# SCIENCE (Physics, Chemistry, Biology)
|
| 147 |
-
SCIENCE_THRESHOLDS = DomainThresholds(domain
|
| 148 |
-
structural
|
| 149 |
-
perplexity
|
| 150 |
-
entropy
|
| 151 |
-
semantic_analysis
|
| 152 |
-
linguistic
|
| 153 |
-
|
| 154 |
-
ensemble_threshold
|
| 155 |
)
|
| 156 |
|
| 157 |
# BUSINESS
|
| 158 |
-
BUSINESS_THRESHOLDS = DomainThresholds(domain
|
| 159 |
-
structural
|
| 160 |
-
perplexity
|
| 161 |
-
entropy
|
| 162 |
-
semantic_analysis
|
| 163 |
-
linguistic
|
| 164 |
-
|
| 165 |
-
ensemble_threshold
|
| 166 |
)
|
| 167 |
|
| 168 |
# LEGAL
|
| 169 |
-
LEGAL_THRESHOLDS = DomainThresholds(domain
|
| 170 |
-
structural
|
| 171 |
-
perplexity
|
| 172 |
-
entropy
|
| 173 |
-
semantic_analysis
|
| 174 |
-
linguistic
|
| 175 |
-
|
| 176 |
-
ensemble_threshold
|
| 177 |
)
|
| 178 |
|
| 179 |
# MEDICAL
|
| 180 |
-
MEDICAL_THRESHOLDS = DomainThresholds(domain
|
| 181 |
-
structural
|
| 182 |
-
perplexity
|
| 183 |
-
entropy
|
| 184 |
-
semantic_analysis
|
| 185 |
-
linguistic
|
| 186 |
-
|
| 187 |
-
ensemble_threshold
|
| 188 |
)
|
| 189 |
|
| 190 |
# JOURNALISM
|
| 191 |
-
JOURNALISM_THRESHOLDS = DomainThresholds(domain
|
| 192 |
-
structural
|
| 193 |
-
perplexity
|
| 194 |
-
entropy
|
| 195 |
-
semantic_analysis
|
| 196 |
-
linguistic
|
| 197 |
-
|
| 198 |
-
ensemble_threshold
|
| 199 |
)
|
| 200 |
|
| 201 |
# MARKETING
|
| 202 |
-
MARKETING_THRESHOLDS = DomainThresholds(domain
|
| 203 |
-
structural
|
| 204 |
-
perplexity
|
| 205 |
-
entropy
|
| 206 |
-
semantic_analysis
|
| 207 |
-
linguistic
|
| 208 |
-
|
| 209 |
-
ensemble_threshold
|
| 210 |
)
|
| 211 |
|
| 212 |
# SOCIAL MEDIA
|
| 213 |
-
SOCIAL_MEDIA_THRESHOLDS = DomainThresholds(domain
|
| 214 |
-
structural
|
| 215 |
-
perplexity
|
| 216 |
-
entropy
|
| 217 |
-
semantic_analysis
|
| 218 |
-
linguistic
|
| 219 |
-
|
| 220 |
-
ensemble_threshold
|
| 221 |
)
|
| 222 |
|
| 223 |
# PERSONAL BLOG
|
| 224 |
-
BLOG_PERSONAL_THRESHOLDS = DomainThresholds(domain
|
| 225 |
-
structural
|
| 226 |
-
perplexity
|
| 227 |
-
entropy
|
| 228 |
-
semantic_analysis
|
| 229 |
-
linguistic
|
| 230 |
-
|
| 231 |
-
ensemble_threshold
|
| 232 |
)
|
| 233 |
|
| 234 |
# TUTORIAL/HOW-TO
|
| 235 |
-
TUTORIAL_THRESHOLDS = DomainThresholds(domain
|
| 236 |
-
structural
|
| 237 |
-
perplexity
|
| 238 |
-
entropy
|
| 239 |
-
semantic_analysis
|
| 240 |
-
linguistic
|
| 241 |
-
|
| 242 |
-
ensemble_threshold
|
| 243 |
)
|
| 244 |
|
| 245 |
|
|
@@ -322,14 +322,14 @@ def interpolate_thresholds(domain1: Domain, domain2: Domain, weight1: float = 0.
|
|
| 322 |
weight = m1.weight * weight1 + m2.weight * weight2,
|
| 323 |
)
|
| 324 |
|
| 325 |
-
return DomainThresholds(domain
|
| 326 |
-
structural
|
| 327 |
-
perplexity
|
| 328 |
-
entropy
|
| 329 |
-
semantic_analysis
|
| 330 |
-
linguistic
|
| 331 |
-
|
| 332 |
-
ensemble_threshold
|
| 333 |
)
|
| 334 |
|
| 335 |
|
|
@@ -339,12 +339,12 @@ def get_active_metric_weights(domain: Domain, enabled_metrics: Dict[str, bool])
|
|
| 339 |
"""
|
| 340 |
thresholds = get_threshold_for_domain(domain = domain)
|
| 341 |
|
| 342 |
-
metric_mapping = {"structural"
|
| 343 |
-
"perplexity"
|
| 344 |
-
"entropy"
|
| 345 |
-
"semantic_analysis"
|
| 346 |
-
"linguistic"
|
| 347 |
-
"
|
| 348 |
}
|
| 349 |
|
| 350 |
active_weights = dict()
|
|
|
|
| 55 |
"""
|
| 56 |
Thresholds for 6 metrics in a specific domain
|
| 57 |
"""
|
| 58 |
+
domain : Domain
|
| 59 |
+
structural : MetricThresholds
|
| 60 |
+
perplexity : MetricThresholds
|
| 61 |
+
entropy : MetricThresholds
|
| 62 |
+
semantic_analysis : MetricThresholds
|
| 63 |
+
linguistic : MetricThresholds
|
| 64 |
+
multi_perturbation_stability : MetricThresholds
|
| 65 |
+
ensemble_threshold : float = 0.5
|
| 66 |
|
| 67 |
|
| 68 |
# ==================== DOMAIN-SPECIFIC THRESHOLDS ====================
|
| 69 |
# GENERAL (Default fallback)
|
| 70 |
+
DEFAULT_THRESHOLDS = DomainThresholds(domain = Domain.GENERAL,
|
| 71 |
+
structural = MetricThresholds(ai_threshold = 0.55, human_threshold = 0.45, weight = 0.20),
|
| 72 |
+
perplexity = MetricThresholds(ai_threshold = 0.52, human_threshold = 0.48, weight = 0.25),
|
| 73 |
+
entropy = MetricThresholds(ai_threshold = 0.48, human_threshold = 0.52, weight = 0.15),
|
| 74 |
+
semantic_analysis = MetricThresholds(ai_threshold = 0.55, human_threshold = 0.45, weight = 0.18),
|
| 75 |
+
linguistic = MetricThresholds(ai_threshold = 0.58, human_threshold = 0.42, weight = 0.12),
|
| 76 |
+
multi_perturbation_stability = MetricThresholds(ai_threshold = 0.60, human_threshold = 0.40, weight = 0.10),
|
| 77 |
+
ensemble_threshold = 0.40,
|
| 78 |
)
|
| 79 |
|
| 80 |
# ACADEMIC
|
| 81 |
+
ACADEMIC_THRESHOLDS = DomainThresholds(domain = Domain.ACADEMIC,
|
| 82 |
+
structural = MetricThresholds(ai_threshold = 0.58, human_threshold = 0.42, weight = 0.18),
|
| 83 |
+
perplexity = MetricThresholds(ai_threshold = 0.50, human_threshold = 0.45, weight = 0.26),
|
| 84 |
+
entropy = MetricThresholds(ai_threshold = 0.45, human_threshold = 0.50, weight = 0.14),
|
| 85 |
+
semantic_analysis = MetricThresholds(ai_threshold = 0.58, human_threshold = 0.42, weight = 0.20),
|
| 86 |
+
linguistic = MetricThresholds(ai_threshold = 0.62, human_threshold = 0.38, weight = 0.14),
|
| 87 |
+
multi_perturbation_stability = MetricThresholds(ai_threshold = 0.65, human_threshold = 0.35, weight = 0.08),
|
| 88 |
+
ensemble_threshold = 0.42,
|
| 89 |
)
|
| 90 |
|
| 91 |
# CREATIVE WRITING
|
| 92 |
+
CREATIVE_THRESHOLDS = DomainThresholds(domain = Domain.CREATIVE,
|
| 93 |
+
structural = MetricThresholds(ai_threshold = 0.52, human_threshold = 0.48, weight = 0.18),
|
| 94 |
+
perplexity = MetricThresholds(ai_threshold = 0.55, human_threshold = 0.50, weight = 0.22),
|
| 95 |
+
entropy = MetricThresholds(ai_threshold = 0.50, human_threshold = 0.55, weight = 0.16),
|
| 96 |
+
semantic_analysis = MetricThresholds(ai_threshold = 0.52, human_threshold = 0.48, weight = 0.20),
|
| 97 |
+
linguistic = MetricThresholds(ai_threshold = 0.55, human_threshold = 0.45, weight = 0.16),
|
| 98 |
+
multi_perturbation_stability = MetricThresholds(ai_threshold = 0.58, human_threshold = 0.42, weight = 0.08),
|
| 99 |
+
ensemble_threshold = 0.38,
|
| 100 |
)
|
| 101 |
|
| 102 |
# AI/ML/DATA SCIENCE
|
| 103 |
+
AI_ML_THRESHOLDS = DomainThresholds(domain = Domain.AI_ML,
|
| 104 |
+
structural = MetricThresholds(ai_threshold = 0.57, human_threshold = 0.43, weight = 0.18),
|
| 105 |
+
perplexity = MetricThresholds(ai_threshold = 0.51, human_threshold = 0.46, weight = 0.26),
|
| 106 |
+
entropy = MetricThresholds(ai_threshold = 0.47, human_threshold = 0.50, weight = 0.14),
|
| 107 |
+
semantic_analysis = MetricThresholds(ai_threshold = 0.57, human_threshold = 0.43, weight = 0.20),
|
| 108 |
+
linguistic = MetricThresholds(ai_threshold = 0.61, human_threshold = 0.39, weight = 0.14),
|
| 109 |
+
multi_perturbation_stability = MetricThresholds(ai_threshold = 0.64, human_threshold = 0.36, weight = 0.08),
|
| 110 |
+
ensemble_threshold = 0.41,
|
| 111 |
)
|
| 112 |
|
| 113 |
# SOFTWARE DEVELOPMENT
|
| 114 |
+
SOFTWARE_DEV_THRESHOLDS = DomainThresholds(domain = Domain.SOFTWARE_DEV,
|
| 115 |
+
structural = MetricThresholds(ai_threshold = 0.58, human_threshold = 0.42, weight = 0.17),
|
| 116 |
+
perplexity = MetricThresholds(ai_threshold = 0.50, human_threshold = 0.45, weight = 0.27),
|
| 117 |
+
entropy = MetricThresholds(ai_threshold = 0.46, human_threshold = 0.50, weight = 0.14),
|
| 118 |
+
semantic_analysis = MetricThresholds(ai_threshold = 0.58, human_threshold = 0.42, weight = 0.20),
|
| 119 |
+
linguistic = MetricThresholds(ai_threshold = 0.60, human_threshold = 0.40, weight = 0.14),
|
| 120 |
+
multi_perturbation_stability = MetricThresholds(ai_threshold = 0.63, human_threshold = 0.37, weight = 0.08),
|
| 121 |
+
ensemble_threshold = 0.41,
|
| 122 |
)
|
| 123 |
|
| 124 |
+
# TECHNICAL DOCUMENTATION
|
| 125 |
+
TECHNICAL_DOC_THRESHOLDS = DomainThresholds(domain = Domain.TECHNICAL_DOC,
|
| 126 |
+
structural = MetricThresholds(ai_threshold = 0.59, human_threshold = 0.41, weight = 0.18),
|
| 127 |
+
perplexity = MetricThresholds(ai_threshold = 0.49, human_threshold = 0.44, weight = 0.27),
|
| 128 |
+
entropy = MetricThresholds(ai_threshold = 0.45, human_threshold = 0.49, weight = 0.13),
|
| 129 |
+
semantic_analysis = MetricThresholds(ai_threshold = 0.59, human_threshold = 0.41, weight = 0.20),
|
| 130 |
+
linguistic = MetricThresholds(ai_threshold = 0.62, human_threshold = 0.38, weight = 0.14),
|
| 131 |
+
multi_perturbation_stability = MetricThresholds(ai_threshold = 0.65, human_threshold = 0.35, weight = 0.08),
|
| 132 |
+
ensemble_threshold = 0.42,
|
| 133 |
)
|
| 134 |
|
| 135 |
# ENGINEERING
|
| 136 |
+
ENGINEERING_THRESHOLDS = DomainThresholds(domain = Domain.ENGINEERING,
|
| 137 |
+
structural = MetricThresholds(ai_threshold = 0.58, human_threshold = 0.42, weight = 0.18),
|
| 138 |
+
perplexity = MetricThresholds(ai_threshold = 0.50, human_threshold = 0.45, weight = 0.26),
|
| 139 |
+
entropy = MetricThresholds(ai_threshold = 0.46, human_threshold = 0.50, weight = 0.14),
|
| 140 |
+
semantic_analysis = MetricThresholds(ai_threshold = 0.58, human_threshold = 0.42, weight = 0.20),
|
| 141 |
+
linguistic = MetricThresholds(ai_threshold = 0.61, human_threshold = 0.39, weight = 0.14),
|
| 142 |
+
multi_perturbation_stability = MetricThresholds(ai_threshold = 0.64, human_threshold = 0.36, weight = 0.08),
|
| 143 |
+
ensemble_threshold = 0.41,
|
| 144 |
)
|
| 145 |
|
| 146 |
# SCIENCE (Physics, Chemistry, Biology)
|
| 147 |
+
SCIENCE_THRESHOLDS = DomainThresholds(domain = Domain.SCIENCE,
|
| 148 |
+
structural = MetricThresholds(ai_threshold = 0.58, human_threshold = 0.42, weight = 0.18),
|
| 149 |
+
perplexity = MetricThresholds(ai_threshold = 0.51, human_threshold = 0.46, weight = 0.26),
|
| 150 |
+
entropy = MetricThresholds(ai_threshold = 0.46, human_threshold = 0.50, weight = 0.14),
|
| 151 |
+
semantic_analysis = MetricThresholds(ai_threshold = 0.58, human_threshold = 0.42, weight = 0.20),
|
| 152 |
+
linguistic = MetricThresholds(ai_threshold = 0.62, human_threshold = 0.38, weight = 0.14),
|
| 153 |
+
multi_perturbation_stability = MetricThresholds(ai_threshold = 0.64, human_threshold = 0.36, weight = 0.08),
|
| 154 |
+
ensemble_threshold = 0.42,
|
| 155 |
)
|
| 156 |
|
| 157 |
# BUSINESS
|
| 158 |
+
BUSINESS_THRESHOLDS = DomainThresholds(domain = Domain.BUSINESS,
|
| 159 |
+
structural = MetricThresholds(ai_threshold = 0.56, human_threshold = 0.44, weight = 0.18),
|
| 160 |
+
perplexity = MetricThresholds(ai_threshold = 0.52, human_threshold = 0.48, weight = 0.24),
|
| 161 |
+
entropy = MetricThresholds(ai_threshold = 0.48, human_threshold = 0.52, weight = 0.15),
|
| 162 |
+
semantic_analysis = MetricThresholds(ai_threshold = 0.56, human_threshold = 0.44, weight = 0.19),
|
| 163 |
+
linguistic = MetricThresholds(ai_threshold = 0.60, human_threshold = 0.40, weight = 0.15),
|
| 164 |
+
multi_perturbation_stability = MetricThresholds(ai_threshold = 0.62, human_threshold = 0.38, weight = 0.09),
|
| 165 |
+
ensemble_threshold = 0.40,
|
| 166 |
)
|
| 167 |
|
| 168 |
# LEGAL
|
| 169 |
+
LEGAL_THRESHOLDS = DomainThresholds(domain = Domain.LEGAL,
|
| 170 |
+
structural = MetricThresholds(ai_threshold = 0.60, human_threshold = 0.40, weight = 0.17),
|
| 171 |
+
perplexity = MetricThresholds(ai_threshold = 0.50, human_threshold = 0.44, weight = 0.27),
|
| 172 |
+
entropy = MetricThresholds(ai_threshold = 0.44, human_threshold = 0.48, weight = 0.13),
|
| 173 |
+
semantic_analysis = MetricThresholds(ai_threshold = 0.60, human_threshold = 0.40, weight = 0.20),
|
| 174 |
+
linguistic = MetricThresholds(ai_threshold = 0.63, human_threshold = 0.37, weight = 0.15),
|
| 175 |
+
multi_perturbation_stability = MetricThresholds(ai_threshold = 0.66, human_threshold = 0.34, weight = 0.08),
|
| 176 |
+
ensemble_threshold = 0.43,
|
| 177 |
)
|
| 178 |
|
| 179 |
# MEDICAL
|
| 180 |
+
MEDICAL_THRESHOLDS = DomainThresholds(domain = Domain.MEDICAL,
|
| 181 |
+
structural = MetricThresholds(ai_threshold = 0.59, human_threshold = 0.41, weight = 0.17),
|
| 182 |
+
perplexity = MetricThresholds(ai_threshold = 0.50, human_threshold = 0.45, weight = 0.27),
|
| 183 |
+
entropy = MetricThresholds(ai_threshold = 0.45, human_threshold = 0.49, weight = 0.13),
|
| 184 |
+
semantic_analysis = MetricThresholds(ai_threshold = 0.59, human_threshold = 0.41, weight = 0.20),
|
| 185 |
+
linguistic = MetricThresholds(ai_threshold = 0.62, human_threshold = 0.38, weight = 0.15),
|
| 186 |
+
multi_perturbation_stability = MetricThresholds(ai_threshold = 0.65, human_threshold = 0.35, weight = 0.08),
|
| 187 |
+
ensemble_threshold = 0.43,
|
| 188 |
)
|
| 189 |
|
| 190 |
# JOURNALISM
|
| 191 |
+
JOURNALISM_THRESHOLDS = DomainThresholds(domain = Domain.JOURNALISM,
|
| 192 |
+
structural = MetricThresholds(ai_threshold = 0.56, human_threshold = 0.44, weight = 0.18),
|
| 193 |
+
perplexity = MetricThresholds(ai_threshold = 0.52, human_threshold = 0.48, weight = 0.24),
|
| 194 |
+
entropy = MetricThresholds(ai_threshold = 0.48, human_threshold = 0.52, weight = 0.15),
|
| 195 |
+
semantic_analysis = MetricThresholds(ai_threshold = 0.56, human_threshold = 0.44, weight = 0.20),
|
| 196 |
+
linguistic = MetricThresholds(ai_threshold = 0.58, human_threshold = 0.42, weight = 0.15),
|
| 197 |
+
multi_perturbation_stability = MetricThresholds(ai_threshold = 0.62, human_threshold = 0.38, weight = 0.08),
|
| 198 |
+
ensemble_threshold = 0.40,
|
| 199 |
)
|
| 200 |
|
| 201 |
# MARKETING
|
| 202 |
+
MARKETING_THRESHOLDS = DomainThresholds(domain = Domain.MARKETING,
|
| 203 |
+
structural = MetricThresholds(ai_threshold = 0.54, human_threshold = 0.46, weight = 0.19),
|
| 204 |
+
perplexity = MetricThresholds(ai_threshold = 0.53, human_threshold = 0.49, weight = 0.23),
|
| 205 |
+
entropy = MetricThresholds(ai_threshold = 0.49, human_threshold = 0.53, weight = 0.15),
|
| 206 |
+
semantic_analysis = MetricThresholds(ai_threshold = 0.54, human_threshold = 0.46, weight = 0.19),
|
| 207 |
+
linguistic = MetricThresholds(ai_threshold = 0.57, human_threshold = 0.43, weight = 0.16),
|
| 208 |
+
multi_perturbation_stability = MetricThresholds(ai_threshold = 0.61, human_threshold = 0.39, weight = 0.08),
|
| 209 |
+
ensemble_threshold = 0.39,
|
| 210 |
)
|
| 211 |
|
| 212 |
# SOCIAL MEDIA
|
| 213 |
+
SOCIAL_MEDIA_THRESHOLDS = DomainThresholds(domain = Domain.SOCIAL_MEDIA,
|
| 214 |
+
structural = MetricThresholds(ai_threshold = 0.52, human_threshold = 0.48, weight = 0.18),
|
| 215 |
+
perplexity = MetricThresholds(ai_threshold = 0.54, human_threshold = 0.50, weight = 0.20),
|
| 216 |
+
entropy = MetricThresholds(ai_threshold = 0.50, human_threshold = 0.54, weight = 0.17),
|
| 217 |
+
semantic_analysis = MetricThresholds(ai_threshold = 0.52, human_threshold = 0.48, weight = 0.18),
|
| 218 |
+
linguistic = MetricThresholds(ai_threshold = 0.55, human_threshold = 0.45, weight = 0.18),
|
| 219 |
+
multi_perturbation_stability = MetricThresholds(ai_threshold = 0.60, human_threshold = 0.40, weight = 0.09),
|
| 220 |
+
ensemble_threshold = 0.36,
|
| 221 |
)
|
| 222 |
|
| 223 |
# PERSONAL BLOG
|
| 224 |
+
BLOG_PERSONAL_THRESHOLDS = DomainThresholds(domain = Domain.BLOG_PERSONAL,
|
| 225 |
+
structural = MetricThresholds(ai_threshold = 0.53, human_threshold = 0.47, weight = 0.19),
|
| 226 |
+
perplexity = MetricThresholds(ai_threshold = 0.54, human_threshold = 0.50, weight = 0.22),
|
| 227 |
+
entropy = MetricThresholds(ai_threshold = 0.50, human_threshold = 0.54, weight = 0.16),
|
| 228 |
+
semantic_analysis = MetricThresholds(ai_threshold = 0.53, human_threshold = 0.47, weight = 0.19),
|
| 229 |
+
linguistic = MetricThresholds(ai_threshold = 0.56, human_threshold = 0.44, weight = 0.16),
|
| 230 |
+
multi_perturbation_stability = MetricThresholds(ai_threshold = 0.59, human_threshold = 0.41, weight = 0.08),
|
| 231 |
+
ensemble_threshold = 0.38,
|
| 232 |
)
|
| 233 |
|
| 234 |
# TUTORIAL/HOW-TO
|
| 235 |
+
TUTORIAL_THRESHOLDS = DomainThresholds(domain = Domain.TUTORIAL,
|
| 236 |
+
structural = MetricThresholds(ai_threshold = 0.56, human_threshold = 0.44, weight = 0.18),
|
| 237 |
+
perplexity = MetricThresholds(ai_threshold = 0.52, human_threshold = 0.48, weight = 0.25),
|
| 238 |
+
entropy = MetricThresholds(ai_threshold = 0.48, human_threshold = 0.52, weight = 0.15),
|
| 239 |
+
semantic_analysis = MetricThresholds(ai_threshold = 0.56, human_threshold = 0.44, weight = 0.19),
|
| 240 |
+
linguistic = MetricThresholds(ai_threshold = 0.59, human_threshold = 0.41, weight = 0.15),
|
| 241 |
+
multi_perturbation_stability = MetricThresholds(ai_threshold = 0.62, human_threshold = 0.38, weight = 0.08),
|
| 242 |
+
ensemble_threshold = 0.40,
|
| 243 |
)
|
| 244 |
|
| 245 |
|
|
|
|
| 322 |
weight = m1.weight * weight1 + m2.weight * weight2,
|
| 323 |
)
|
| 324 |
|
| 325 |
+
return DomainThresholds(domain = domain1,
|
| 326 |
+
structural = interpolate_metric(thresh1.structural, thresh2.structural),
|
| 327 |
+
perplexity = interpolate_metric(thresh1.perplexity, thresh2.perplexity),
|
| 328 |
+
entropy = interpolate_metric(thresh1.entropy, thresh2.entropy),
|
| 329 |
+
semantic_analysis = interpolate_metric(thresh1.semantic_analysis, thresh2.semantic_analysis),
|
| 330 |
+
linguistic = interpolate_metric(thresh1.linguistic, thresh2.linguistic),
|
| 331 |
+
multi_perturbation_stability = interpolate_metric(thresh1.multi_perturbation_stability, thresh2.multi_perturbation_stability),
|
| 332 |
+
ensemble_threshold = thresh1.ensemble_threshold * weight1 + thresh2.ensemble_threshold * weight2,
|
| 333 |
)
|
| 334 |
|
| 335 |
|
|
|
|
| 339 |
"""
|
| 340 |
thresholds = get_threshold_for_domain(domain = domain)
|
| 341 |
|
| 342 |
+
metric_mapping = {"structural" : thresholds.structural,
|
| 343 |
+
"perplexity" : thresholds.perplexity,
|
| 344 |
+
"entropy" : thresholds.entropy,
|
| 345 |
+
"semantic_analysis" : thresholds.semantic_analysis,
|
| 346 |
+
"linguistic" : thresholds.linguistic,
|
| 347 |
+
"multi_perturbation_stability" : thresholds.multi_perturbation_stability,
|
| 348 |
}
|
| 349 |
|
| 350 |
active_weights = dict()
|
detector/attribution.py
CHANGED
|
@@ -78,12 +78,12 @@ class ModelAttributor:
|
|
| 78 |
- Explainable reasoning
|
| 79 |
"""
|
| 80 |
# DOCUMENT-ALIGNED: Metric weights from technical specification
|
| 81 |
-
METRIC_WEIGHTS = {"perplexity"
|
| 82 |
-
"structural"
|
| 83 |
-
"semantic_analysis": 0.15,
|
| 84 |
-
"entropy"
|
| 85 |
-
"linguistic"
|
| 86 |
-
"
|
| 87 |
}
|
| 88 |
|
| 89 |
# DOMAIN-AWARE model patterns for ALL 16 DOMAINS
|
|
@@ -441,13 +441,17 @@ class ModelAttributor:
|
|
| 441 |
domain_preferences = self.DOMAIN_MODEL_PREFERENCES.get(domain, [AIModel.GPT_4, AIModel.CLAUDE_3_SONNET])
|
| 442 |
|
| 443 |
# Fingerprint analysis
|
| 444 |
-
fingerprint_scores = self._calculate_fingerprint_scores(text,
|
|
|
|
|
|
|
| 445 |
|
| 446 |
# Statistical pattern analysis
|
| 447 |
-
statistical_scores = self._analyze_statistical_patterns(text,
|
|
|
|
|
|
|
| 448 |
|
| 449 |
# Metric-based attribution using all 6 metrics
|
| 450 |
-
metric_scores = self._analyze_metric_patterns(metric_results, domain) if metric_results else {}
|
| 451 |
|
| 452 |
# Ensemble Combination
|
| 453 |
combined_scores, metric_contributions = self._combine_attribution_scores(fingerprint_scores = fingerprint_scores,
|
|
@@ -462,7 +466,7 @@ class ModelAttributor:
|
|
| 462 |
domain_preferences = domain_preferences,
|
| 463 |
)
|
| 464 |
|
| 465 |
-
# Reasoning with domain context
|
| 466 |
reasoning = self._generate_detailed_reasoning(predicted_model = predicted_model,
|
| 467 |
confidence = confidence,
|
| 468 |
domain = domain,
|
|
@@ -650,22 +654,22 @@ class ModelAttributor:
|
|
| 650 |
return scores
|
| 651 |
|
| 652 |
# DOMAIN-AWARE: Adjust metric sensitivity based on domain
|
| 653 |
-
domain_metric_weights = {Domain.GENERAL : {"perplexity": 1.0, "structural": 1.0, "entropy": 1.0, "semantic_analysis": 1.0, "linguistic": 1.0, "
|
| 654 |
-
Domain.ACADEMIC : {"perplexity": 1.2, "structural": 1.0, "entropy": 0.9, "semantic_analysis": 1.1, "linguistic": 1.3, "
|
| 655 |
-
Domain.TECHNICAL_DOC : {"perplexity": 1.2, "structural": 1.1, "entropy": 0.9, "semantic_analysis": 1.2, "linguistic": 1.1, "
|
| 656 |
-
Domain.AI_ML : {"perplexity": 1.3, "structural": 1.0, "entropy": 0.9, "semantic_analysis": 1.2, "linguistic": 1.2, "
|
| 657 |
-
Domain.SOFTWARE_DEV : {"perplexity": 1.2, "structural": 1.1, "entropy": 0.9, "semantic_analysis": 1.1, "linguistic": 1.0, "
|
| 658 |
-
Domain.ENGINEERING : {"perplexity": 1.2, "structural": 1.1, "entropy": 0.9, "semantic_analysis": 1.1, "linguistic": 1.2, "
|
| 659 |
-
Domain.SCIENCE : {"perplexity": 1.2, "structural": 1.0, "entropy": 0.9, "semantic_analysis": 1.2, "linguistic": 1.3, "
|
| 660 |
-
Domain.BUSINESS : {"perplexity": 1.1, "structural": 1.0, "entropy": 1.0, "semantic_analysis": 1.2, "linguistic": 1.1, "
|
| 661 |
-
Domain.LEGAL : {"perplexity": 1.2, "structural": 1.1, "entropy": 0.9, "semantic_analysis": 1.3, "linguistic": 1.3, "
|
| 662 |
-
Domain.MEDICAL : {"perplexity": 1.2, "structural": 1.0, "entropy": 0.9, "semantic_analysis": 1.2, "linguistic": 1.2, "
|
| 663 |
-
Domain.JOURNALISM : {"perplexity": 1.1, "structural": 1.0, "entropy": 1.0, "semantic_analysis": 1.1, "linguistic": 1.1, "
|
| 664 |
-
Domain.CREATIVE : {"perplexity": 0.9, "structural": 0.9, "entropy": 1.2, "semantic_analysis": 1.0, "linguistic": 1.3, "
|
| 665 |
-
Domain.MARKETING : {"perplexity": 1.0, "structural": 1.0, "entropy": 1.1, "semantic_analysis": 1.1, "linguistic": 1.2, "
|
| 666 |
-
Domain.SOCIAL_MEDIA : {"perplexity": 1.0, "structural": 0.8, "entropy": 1.3, "semantic_analysis": 0.9, "linguistic": 0.9, "
|
| 667 |
-
Domain.BLOG_PERSONAL : {"perplexity": 1.0, "structural": 0.9, "entropy": 1.2, "semantic_analysis": 1.0, "linguistic": 1.1, "
|
| 668 |
-
Domain.TUTORIAL : {"perplexity": 1.1, "structural": 1.0, "entropy": 1.0, "semantic_analysis": 1.1, "linguistic": 1.1, "
|
| 669 |
}
|
| 670 |
|
| 671 |
domain_weights = domain_metric_weights.get(domain, domain_metric_weights[Domain.GENERAL])
|
|
@@ -733,16 +737,16 @@ class ModelAttributor:
|
|
| 733 |
scores[AIModel.CLAUDE_3_OPUS] += 0.5 * self.METRIC_WEIGHTS["linguistic"] * domain_weight
|
| 734 |
scores[AIModel.GPT_4] += 0.4 * self.METRIC_WEIGHTS["linguistic"] * domain_weight
|
| 735 |
|
| 736 |
-
#
|
| 737 |
-
if ("
|
| 738 |
-
|
| 739 |
-
stability
|
| 740 |
-
curvature
|
| 741 |
|
| 742 |
# Specific stability patterns for different model families
|
| 743 |
if (0.4 <= stability <= 0.6):
|
| 744 |
-
scores[AIModel.MIXTRAL] += 0.4 * self.METRIC_WEIGHTS["
|
| 745 |
-
scores[AIModel.LLAMA_3] += 0.3 * self.METRIC_WEIGHTS["
|
| 746 |
|
| 747 |
# Normalize scores
|
| 748 |
for model in scores:
|
|
@@ -823,7 +827,7 @@ class ModelAttributor:
|
|
| 823 |
|
| 824 |
# FIXED: Only return UNKNOWN if the best score is very low
|
| 825 |
# Use a more reasonable threshold for attribution
|
| 826 |
-
if best_score < 0.
|
| 827 |
return AIModel.UNKNOWN, best_score
|
| 828 |
|
| 829 |
# FIXED: Don't override with domain preferences if there's a clear winner
|
|
@@ -868,62 +872,62 @@ class ModelAttributor:
|
|
| 868 |
def _generate_detailed_reasoning(self, predicted_model: AIModel, confidence: float, domain: Domain, metric_contributions: Dict[str, float],
|
| 869 |
combined_scores: Dict[str, float]) -> List[str]:
|
| 870 |
"""
|
| 871 |
-
Generate Explainable reasoning - FIXED to show proper
|
| 872 |
"""
|
| 873 |
-
reasoning =
|
| 874 |
|
| 875 |
-
reasoning.append("
|
|
|
|
| 876 |
reasoning.append(f"**Domain**: {domain.value.replace('_', ' ').title()}")
|
|
|
|
| 877 |
|
|
|
|
| 878 |
if (predicted_model == AIModel.UNKNOWN):
|
| 879 |
-
reasoning.append("**Most Likely**:
|
| 880 |
-
|
| 881 |
-
|
| 882 |
-
|
| 883 |
-
if sorted_models and sorted_models[0][1] > 0:
|
| 884 |
-
top_model_name = sorted_models[0][0].replace("-", " ").replace("_", " ").title()
|
| 885 |
-
top_score = sorted_models[0][1] * 100
|
| 886 |
-
reasoning.append(f"**{top_model_name}**")
|
| 887 |
-
reasoning.append(f"{top_score:.1f}%")
|
| 888 |
else:
|
| 889 |
model_name = predicted_model.value.replace("-", " ").replace("_", " ").title()
|
| 890 |
-
reasoning.append(f"**
|
| 891 |
-
|
| 892 |
-
|
| 893 |
-
|
| 894 |
-
|
| 895 |
-
reasoning.append(f"{score:.1f}%")
|
| 896 |
-
|
| 897 |
-
# Show top model candidates with ACTUAL percentages in proper order
|
| 898 |
reasoning.append("")
|
|
|
|
|
|
|
| 899 |
if combined_scores:
|
| 900 |
-
sorted_models = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)
|
| 901 |
|
| 902 |
-
for model_name, score in sorted_models[:6]:
|
| 903 |
-
|
|
|
|
| 904 |
continue
|
| 905 |
-
|
| 906 |
display_name = model_name.replace("-", " ").replace("_", " ").title()
|
| 907 |
-
|
| 908 |
-
percentage = score * 100
|
| 909 |
|
| 910 |
-
#
|
| 911 |
-
reasoning.append(f"**{display_name}
|
| 912 |
-
reasoning.append(f"{percentage:.1f}%")
|
| 913 |
-
reasoning.append("")
|
| 914 |
|
| 915 |
-
|
| 916 |
-
|
| 917 |
-
|
|
|
|
|
|
|
| 918 |
|
| 919 |
if (domain in [Domain.ACADEMIC, Domain.TECHNICAL_DOC, Domain.AI_ML, Domain.SOFTWARE_DEV, Domain.ENGINEERING, Domain.SCIENCE]):
|
| 920 |
-
reasoning.append("Higher weight
|
| 921 |
|
| 922 |
elif (domain in [Domain.CREATIVE, Domain.MARKETING, Domain.SOCIAL_MEDIA, Domain.BLOG_PERSONAL]):
|
| 923 |
-
reasoning.append("
|
| 924 |
|
| 925 |
elif (domain in [Domain.LEGAL, Domain.MEDICAL]):
|
| 926 |
-
reasoning.append("
|
|
|
|
|
|
|
|
|
|
| 927 |
|
| 928 |
return reasoning
|
| 929 |
|
|
|
|
| 78 |
- Explainable reasoning
|
| 79 |
"""
|
| 80 |
# DOCUMENT-ALIGNED: Metric weights from technical specification
|
| 81 |
+
METRIC_WEIGHTS = {"perplexity" : 0.25,
|
| 82 |
+
"structural" : 0.15,
|
| 83 |
+
"semantic_analysis" : 0.15,
|
| 84 |
+
"entropy" : 0.20,
|
| 85 |
+
"linguistic" : 0.15,
|
| 86 |
+
"multi_perturbation_stability" : 0.10,
|
| 87 |
}
|
| 88 |
|
| 89 |
# DOMAIN-AWARE model patterns for ALL 16 DOMAINS
|
|
|
|
| 441 |
domain_preferences = self.DOMAIN_MODEL_PREFERENCES.get(domain, [AIModel.GPT_4, AIModel.CLAUDE_3_SONNET])
|
| 442 |
|
| 443 |
# Fingerprint analysis
|
| 444 |
+
fingerprint_scores = self._calculate_fingerprint_scores(text = text,
|
| 445 |
+
domain = domain,
|
| 446 |
+
)
|
| 447 |
|
| 448 |
# Statistical pattern analysis
|
| 449 |
+
statistical_scores = self._analyze_statistical_patterns(text = text,
|
| 450 |
+
domain = domain,
|
| 451 |
+
)
|
| 452 |
|
| 453 |
# Metric-based attribution using all 6 metrics
|
| 454 |
+
metric_scores = self._analyze_metric_patterns(metric_results = metric_results, domain = domain) if metric_results else {}
|
| 455 |
|
| 456 |
# Ensemble Combination
|
| 457 |
combined_scores, metric_contributions = self._combine_attribution_scores(fingerprint_scores = fingerprint_scores,
|
|
|
|
| 466 |
domain_preferences = domain_preferences,
|
| 467 |
)
|
| 468 |
|
| 469 |
+
# Reasoning with domain context - FIXED
|
| 470 |
reasoning = self._generate_detailed_reasoning(predicted_model = predicted_model,
|
| 471 |
confidence = confidence,
|
| 472 |
domain = domain,
|
|
|
|
| 654 |
return scores
|
| 655 |
|
| 656 |
# DOMAIN-AWARE: Adjust metric sensitivity based on domain
|
| 657 |
+
domain_metric_weights = {Domain.GENERAL : {"perplexity": 1.0, "structural": 1.0, "entropy": 1.0, "semantic_analysis": 1.0, "linguistic": 1.0, "multi_perturbation_stability": 1.0},
|
| 658 |
+
Domain.ACADEMIC : {"perplexity": 1.2, "structural": 1.0, "entropy": 0.9, "semantic_analysis": 1.1, "linguistic": 1.3, "multi_perturbation_stability": 0.8},
|
| 659 |
+
Domain.TECHNICAL_DOC : {"perplexity": 1.2, "structural": 1.1, "entropy": 0.9, "semantic_analysis": 1.2, "linguistic": 1.1, "multi_perturbation_stability": 0.8},
|
| 660 |
+
Domain.AI_ML : {"perplexity": 1.3, "structural": 1.0, "entropy": 0.9, "semantic_analysis": 1.2, "linguistic": 1.2, "multi_perturbation_stability": 0.8},
|
| 661 |
+
Domain.SOFTWARE_DEV : {"perplexity": 1.2, "structural": 1.1, "entropy": 0.9, "semantic_analysis": 1.1, "linguistic": 1.0, "multi_perturbation_stability": 0.9},
|
| 662 |
+
Domain.ENGINEERING : {"perplexity": 1.2, "structural": 1.1, "entropy": 0.9, "semantic_analysis": 1.1, "linguistic": 1.2, "multi_perturbation_stability": 0.8},
|
| 663 |
+
Domain.SCIENCE : {"perplexity": 1.2, "structural": 1.0, "entropy": 0.9, "semantic_analysis": 1.2, "linguistic": 1.3, "multi_perturbation_stability": 0.8},
|
| 664 |
+
Domain.BUSINESS : {"perplexity": 1.1, "structural": 1.0, "entropy": 1.0, "semantic_analysis": 1.2, "linguistic": 1.1, "multi_perturbation_stability": 0.9},
|
| 665 |
+
Domain.LEGAL : {"perplexity": 1.2, "structural": 1.1, "entropy": 0.9, "semantic_analysis": 1.3, "linguistic": 1.3, "multi_perturbation_stability": 0.8},
|
| 666 |
+
Domain.MEDICAL : {"perplexity": 1.2, "structural": 1.0, "entropy": 0.9, "semantic_analysis": 1.2, "linguistic": 1.2, "multi_perturbation_stability": 0.8},
|
| 667 |
+
Domain.JOURNALISM : {"perplexity": 1.1, "structural": 1.0, "entropy": 1.0, "semantic_analysis": 1.1, "linguistic": 1.1, "multi_perturbation_stability": 0.9},
|
| 668 |
+
Domain.CREATIVE : {"perplexity": 0.9, "structural": 0.9, "entropy": 1.2, "semantic_analysis": 1.0, "linguistic": 1.3, "multi_perturbation_stability": 0.9},
|
| 669 |
+
Domain.MARKETING : {"perplexity": 1.0, "structural": 1.0, "entropy": 1.1, "semantic_analysis": 1.1, "linguistic": 1.2, "multi_perturbation_stability": 0.8},
|
| 670 |
+
Domain.SOCIAL_MEDIA : {"perplexity": 1.0, "structural": 0.8, "entropy": 1.3, "semantic_analysis": 0.9, "linguistic": 0.9, "multi_perturbation_stability": 0.9},
|
| 671 |
+
Domain.BLOG_PERSONAL : {"perplexity": 1.0, "structural": 0.9, "entropy": 1.2, "semantic_analysis": 1.0, "linguistic": 1.1, "multi_perturbation_stability": 0.8},
|
| 672 |
+
Domain.TUTORIAL : {"perplexity": 1.1, "structural": 1.0, "entropy": 1.0, "semantic_analysis": 1.1, "linguistic": 1.1, "multi_perturbation_stability": 0.9},
|
| 673 |
}
|
| 674 |
|
| 675 |
domain_weights = domain_metric_weights.get(domain, domain_metric_weights[Domain.GENERAL])
|
|
|
|
| 737 |
scores[AIModel.CLAUDE_3_OPUS] += 0.5 * self.METRIC_WEIGHTS["linguistic"] * domain_weight
|
| 738 |
scores[AIModel.GPT_4] += 0.4 * self.METRIC_WEIGHTS["linguistic"] * domain_weight
|
| 739 |
|
| 740 |
+
# MULTI-PERTURBATION STABILITY ANALYSIS (10% weight)
|
| 741 |
+
if ("multi_perturbation_stability" in metric_results):
|
| 742 |
+
multi_perturbation_stability_result = metric_results["multi_perturbation_stability"]
|
| 743 |
+
stability = multi_perturbation_stability_result.details.get("stability_score", 0.5)
|
| 744 |
+
curvature = multi_perturbation_stability_result.details.get("curvature_score", 0.5)
|
| 745 |
|
| 746 |
# Specific stability patterns for different model families
|
| 747 |
if (0.4 <= stability <= 0.6):
|
| 748 |
+
scores[AIModel.MIXTRAL] += 0.4 * self.METRIC_WEIGHTS["multi_perturbation_stability"]
|
| 749 |
+
scores[AIModel.LLAMA_3] += 0.3 * self.METRIC_WEIGHTS["multi_perturbation_stability"]
|
| 750 |
|
| 751 |
# Normalize scores
|
| 752 |
for model in scores:
|
|
|
|
| 827 |
|
| 828 |
# FIXED: Only return UNKNOWN if the best score is very low
|
| 829 |
# Use a more reasonable threshold for attribution
|
| 830 |
+
if best_score < 0.05: # Changed from 0.08 to 0.05 to be less restrictive
|
| 831 |
return AIModel.UNKNOWN, best_score
|
| 832 |
|
| 833 |
# FIXED: Don't override with domain preferences if there's a clear winner
|
|
|
|
| 872 |
def _generate_detailed_reasoning(self, predicted_model: AIModel, confidence: float, domain: Domain, metric_contributions: Dict[str, float],
|
| 873 |
combined_scores: Dict[str, float]) -> List[str]:
|
| 874 |
"""
|
| 875 |
+
Generate Explainable reasoning - FIXED to show proper formatting
|
| 876 |
"""
|
| 877 |
+
reasoning = []
|
| 878 |
|
| 879 |
+
reasoning.append("**AI Model Attribution Analysis**")
|
| 880 |
+
reasoning.append("")
|
| 881 |
reasoning.append(f"**Domain**: {domain.value.replace('_', ' ').title()}")
|
| 882 |
+
reasoning.append("")
|
| 883 |
|
| 884 |
+
# Show prediction with confidence
|
| 885 |
if (predicted_model == AIModel.UNKNOWN):
|
| 886 |
+
reasoning.append("**Most Likely**: Unable to determine with high confidence")
|
| 887 |
+
reasoning.append("")
|
| 888 |
+
reasoning.append("**Top Candidates:**")
|
| 889 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 890 |
else:
|
| 891 |
model_name = predicted_model.value.replace("-", " ").replace("_", " ").title()
|
| 892 |
+
reasoning.append(f"**Predicted Model**: {model_name}")
|
| 893 |
+
reasoning.append(f"**Confidence**: {confidence*100:.1f}%")
|
| 894 |
+
reasoning.append("")
|
| 895 |
+
reasoning.append("**Model Probability Distribution:**")
|
| 896 |
+
|
|
|
|
|
|
|
|
|
|
| 897 |
reasoning.append("")
|
| 898 |
+
|
| 899 |
+
# Show top candidates in proper format
|
| 900 |
if combined_scores:
|
| 901 |
+
sorted_models = sorted(combined_scores.items(), key = lambda x: x[1], reverse = True)
|
| 902 |
|
| 903 |
+
for i, (model_name, score) in enumerate(sorted_models[:6]):
|
| 904 |
+
# Skip very low probability models
|
| 905 |
+
if (score < 0.01):
|
| 906 |
continue
|
| 907 |
+
|
| 908 |
display_name = model_name.replace("-", " ").replace("_", " ").title()
|
| 909 |
+
percentage = score * 100
|
|
|
|
| 910 |
|
| 911 |
+
# Single line format: "• Model Name: XX.X%"
|
| 912 |
+
reasoning.append(f"• **{display_name}**: {percentage:.1f}%")
|
|
|
|
|
|
|
| 913 |
|
| 914 |
+
reasoning.append("")
|
| 915 |
+
|
| 916 |
+
# Domain-specific insights - FIXED: Removed duplicate header
|
| 917 |
+
reasoning.append("**Analysis Notes:**")
|
| 918 |
+
reasoning.append(f"• Calibrated for {domain.value.replace('_', ' ')} domain")
|
| 919 |
|
| 920 |
if (domain in [Domain.ACADEMIC, Domain.TECHNICAL_DOC, Domain.AI_ML, Domain.SOFTWARE_DEV, Domain.ENGINEERING, Domain.SCIENCE]):
|
| 921 |
+
reasoning.append("• Higher weight on structural coherence and technical patterns")
|
| 922 |
|
| 923 |
elif (domain in [Domain.CREATIVE, Domain.MARKETING, Domain.SOCIAL_MEDIA, Domain.BLOG_PERSONAL]):
|
| 924 |
+
reasoning.append("• Emphasis on linguistic diversity and stylistic variation")
|
| 925 |
|
| 926 |
elif (domain in [Domain.LEGAL, Domain.MEDICAL]):
|
| 927 |
+
reasoning.append("• Focus on formal language and specialized terminology")
|
| 928 |
+
|
| 929 |
+
elif (domain in [Domain.BUSINESS, Domain.JOURNALISM, Domain.TUTORIAL]):
|
| 930 |
+
reasoning.append("• Balanced analysis across multiple attribution factors")
|
| 931 |
|
| 932 |
return reasoning
|
| 933 |
|
detector/ensemble.py
CHANGED
|
@@ -103,7 +103,7 @@ class EnsembleClassifier:
|
|
| 103 |
|
| 104 |
Returns:
|
| 105 |
--------
|
| 106 |
-
{ EnsembleResult }
|
| 107 |
"""
|
| 108 |
try:
|
| 109 |
# Filter and validate metrics
|
|
@@ -114,76 +114,94 @@ class EnsembleClassifier:
|
|
| 114 |
return self._create_fallback_result(domain, metric_results, "insufficient_metrics")
|
| 115 |
|
| 116 |
# Get domain-specific base weights
|
| 117 |
-
enabled_metrics
|
| 118 |
-
base_weights
|
| 119 |
|
| 120 |
-
# Try primary aggregation method
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
try:
|
| 122 |
if (self.primary_method == "confidence_calibrated"):
|
| 123 |
-
aggregated,
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
|
| 128 |
elif (self.primary_method == "domain_adaptive"):
|
| 129 |
-
aggregated,
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
|
| 134 |
elif (self.primary_method == "consensus_based"):
|
| 135 |
-
aggregated,
|
| 136 |
-
|
| 137 |
-
|
|
|
|
| 138 |
|
| 139 |
elif ((self.primary_method == "ml_ensemble") and self.use_ml_ensemble):
|
| 140 |
-
aggregated,
|
| 141 |
-
|
| 142 |
-
|
|
|
|
| 143 |
|
| 144 |
else:
|
| 145 |
# Fallback to domain weighted
|
| 146 |
-
aggregated,
|
| 147 |
-
|
| 148 |
-
|
|
|
|
| 149 |
|
| 150 |
except Exception as e:
|
| 151 |
logger.warning(f"Primary aggregation failed: {e}, using fallback")
|
| 152 |
-
aggregated,
|
| 153 |
-
|
| 154 |
-
|
| 155 |
|
| 156 |
-
#
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
|
| 162 |
-
uncertainty_score
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
|
| 167 |
-
consensus_level
|
| 168 |
|
| 169 |
# Apply domain-specific threshold with uncertainty consideration
|
| 170 |
-
domain_thresholds
|
| 171 |
-
final_verdict
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
|
| 176 |
-
# Generate detailed reasoning
|
| 177 |
-
reasoning
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
|
| 185 |
-
# Calculate weighted scores
|
| 186 |
-
weighted_scores
|
| 187 |
|
| 188 |
return EnsembleResult(final_verdict = final_verdict,
|
| 189 |
ai_probability = aggregated["ai_probability"],
|
|
@@ -192,7 +210,7 @@ class EnsembleClassifier:
|
|
| 192 |
overall_confidence = overall_confidence,
|
| 193 |
domain = domain,
|
| 194 |
metric_results = metric_results,
|
| 195 |
-
metric_weights =
|
| 196 |
weighted_scores = weighted_scores,
|
| 197 |
reasoning = reasoning,
|
| 198 |
uncertainty_score = uncertainty_score,
|
|
@@ -202,8 +220,8 @@ class EnsembleClassifier:
|
|
| 202 |
except Exception as e:
|
| 203 |
logger.error(f"Error in advanced ensemble prediction: {e}")
|
| 204 |
return self._create_fallback_result(domain, metric_results, str(e))
|
| 205 |
-
|
| 206 |
|
|
|
|
| 207 |
def _validate_metrics(self, results: Dict[str, MetricResult]) -> tuple:
|
| 208 |
"""
|
| 209 |
Validate metrics and return quality information
|
|
@@ -454,126 +472,126 @@ class EnsembleClassifier:
|
|
| 454 |
Get domain-specific performance weights (would come from validation data)
|
| 455 |
"""
|
| 456 |
# Placeholder - in practice, this would be based on historical performance per domain : FUTURE WORK
|
| 457 |
-
performance_weights = {'structural'
|
| 458 |
-
'entropy'
|
| 459 |
-
'semantic_analysis'
|
| 460 |
-
'linguistic'
|
| 461 |
-
'perplexity'
|
| 462 |
-
'
|
| 463 |
}
|
| 464 |
|
| 465 |
# Domain-specific adjustments for all 16 domains
|
| 466 |
-
domain_adjustments = {Domain.GENERAL : {'structural'
|
| 467 |
-
'perplexity'
|
| 468 |
-
'entropy'
|
| 469 |
-
'semantic_analysis'
|
| 470 |
-
'linguistic'
|
| 471 |
-
'
|
| 472 |
},
|
| 473 |
-
Domain.ACADEMIC : {'structural'
|
| 474 |
-
'perplexity'
|
| 475 |
-
'entropy'
|
| 476 |
-
'semantic_analysis'
|
| 477 |
-
'linguistic'
|
| 478 |
-
'
|
| 479 |
},
|
| 480 |
-
Domain.CREATIVE : {'structural'
|
| 481 |
-
'perplexity'
|
| 482 |
-
'entropy'
|
| 483 |
-
'semantic_analysis'
|
| 484 |
-
'linguistic'
|
| 485 |
-
'
|
| 486 |
},
|
| 487 |
-
Domain.AI_ML : {'structural'
|
| 488 |
-
'perplexity'
|
| 489 |
-
'entropy'
|
| 490 |
-
'semantic_analysis'
|
| 491 |
-
'linguistic'
|
| 492 |
-
'
|
| 493 |
},
|
| 494 |
-
Domain.SOFTWARE_DEV : {'structural'
|
| 495 |
-
'perplexity'
|
| 496 |
-
'entropy'
|
| 497 |
-
'semantic_analysis'
|
| 498 |
-
'linguistic'
|
| 499 |
-
'
|
| 500 |
},
|
| 501 |
-
Domain.TECHNICAL_DOC : {'structural'
|
| 502 |
-
'perplexity'
|
| 503 |
-
'entropy'
|
| 504 |
-
'semantic_analysis'
|
| 505 |
-
'linguistic'
|
| 506 |
-
'
|
| 507 |
},
|
| 508 |
-
Domain.ENGINEERING : {'structural'
|
| 509 |
-
'perplexity'
|
| 510 |
-
'entropy'
|
| 511 |
-
'semantic_analysis'
|
| 512 |
-
'linguistic'
|
| 513 |
-
'
|
| 514 |
},
|
| 515 |
-
Domain.SCIENCE : {'structural'
|
| 516 |
-
'perplexity'
|
| 517 |
-
'entropy'
|
| 518 |
-
'semantic_analysis'
|
| 519 |
-
'linguistic'
|
| 520 |
-
'
|
| 521 |
},
|
| 522 |
-
Domain.BUSINESS : {'structural'
|
| 523 |
-
'perplexity'
|
| 524 |
-
'entropy'
|
| 525 |
-
'semantic_analysis'
|
| 526 |
-
'linguistic'
|
| 527 |
-
'
|
| 528 |
},
|
| 529 |
-
Domain.LEGAL : {'structural'
|
| 530 |
-
'perplexity'
|
| 531 |
-
'entropy'
|
| 532 |
-
'semantic_analysis'
|
| 533 |
-
'linguistic'
|
| 534 |
-
'
|
| 535 |
},
|
| 536 |
-
Domain.MEDICAL : {'structural'
|
| 537 |
-
'perplexity'
|
| 538 |
-
'entropy'
|
| 539 |
-
'semantic_analysis'
|
| 540 |
-
'linguistic'
|
| 541 |
-
'
|
| 542 |
},
|
| 543 |
-
Domain.JOURNALISM : {'structural'
|
| 544 |
-
'perplexity'
|
| 545 |
-
'entropy'
|
| 546 |
-
'semantic_analysis'
|
| 547 |
-
'linguistic'
|
| 548 |
-
'
|
| 549 |
},
|
| 550 |
-
Domain.MARKETING : {'structural'
|
| 551 |
-
'perplexity'
|
| 552 |
-
'entropy'
|
| 553 |
-
'semantic_analysis'
|
| 554 |
-
'linguistic'
|
| 555 |
-
'
|
| 556 |
},
|
| 557 |
-
Domain.SOCIAL_MEDIA : {'structural'
|
| 558 |
-
'perplexity'
|
| 559 |
-
'entropy'
|
| 560 |
-
'semantic_analysis'
|
| 561 |
-
'linguistic'
|
| 562 |
-
'
|
| 563 |
},
|
| 564 |
-
Domain.BLOG_PERSONAL : {'structural'
|
| 565 |
-
'perplexity'
|
| 566 |
-
'entropy'
|
| 567 |
-
'semantic_analysis'
|
| 568 |
-
'linguistic'
|
| 569 |
-
'
|
| 570 |
},
|
| 571 |
-
Domain.TUTORIAL : {'structural'
|
| 572 |
-
'perplexity'
|
| 573 |
-
'entropy'
|
| 574 |
-
'semantic_analysis'
|
| 575 |
-
'linguistic'
|
| 576 |
-
'
|
| 577 |
},
|
| 578 |
}
|
| 579 |
|
|
|
|
| 103 |
|
| 104 |
Returns:
|
| 105 |
--------
|
| 106 |
+
{ EnsembleResult } : EnsembleResult object with final prediction
|
| 107 |
"""
|
| 108 |
try:
|
| 109 |
# Filter and validate metrics
|
|
|
|
| 114 |
return self._create_fallback_result(domain, metric_results, "insufficient_metrics")
|
| 115 |
|
| 116 |
# Get domain-specific base weights
|
| 117 |
+
enabled_metrics = {name: True for name in valid_results.keys()}
|
| 118 |
+
base_weights = get_active_metric_weights(domain, enabled_metrics)
|
| 119 |
|
| 120 |
+
# Try primary aggregation method : Initialize in case all methods fail unexpectedly
|
| 121 |
+
calculated_weights = dict()
|
| 122 |
+
aggregated = {"ai_probability" : 0.5,
|
| 123 |
+
"human_probability" : 0.5,
|
| 124 |
+
"mixed_probability" : 0.0,
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
try:
|
| 128 |
if (self.primary_method == "confidence_calibrated"):
|
| 129 |
+
aggregated, calculated_weights = self._confidence_calibrated_aggregation(results = valid_results,
|
| 130 |
+
base_weights = base_weights,
|
| 131 |
+
domain = domain,
|
| 132 |
+
)
|
| 133 |
|
| 134 |
elif (self.primary_method == "domain_adaptive"):
|
| 135 |
+
aggregated, calculated_weights = self._domain_adaptive_aggregation(results = valid_results,
|
| 136 |
+
base_weights = base_weights,
|
| 137 |
+
domain = domain,
|
| 138 |
+
)
|
| 139 |
|
| 140 |
elif (self.primary_method == "consensus_based"):
|
| 141 |
+
aggregated, calculated_weights = self._consensus_based_aggregation(results = valid_results,
|
| 142 |
+
base_weights = base_weights,
|
| 143 |
+
domain = domain,
|
| 144 |
+
)
|
| 145 |
|
| 146 |
elif ((self.primary_method == "ml_ensemble") and self.use_ml_ensemble):
|
| 147 |
+
aggregated, calculated_weights = self._ml_ensemble_aggregation(results = valid_results,
|
| 148 |
+
base_weights = base_weights,
|
| 149 |
+
domain = domain,
|
| 150 |
+
)
|
| 151 |
|
| 152 |
else:
|
| 153 |
# Fallback to domain weighted
|
| 154 |
+
aggregated, calculated_weights = self._domain_weighted_aggregation(results = valid_results,
|
| 155 |
+
base_weights = base_weights,
|
| 156 |
+
domain = domain,
|
| 157 |
+
)
|
| 158 |
|
| 159 |
except Exception as e:
|
| 160 |
logger.warning(f"Primary aggregation failed: {e}, using fallback")
|
| 161 |
+
aggregated, calculated_weights = self._apply_fallback_aggregation(results = valid_results,
|
| 162 |
+
base_weights = base_weights,
|
| 163 |
+
)
|
| 164 |
|
| 165 |
+
# Start with the calculated weights (from valid_results)
|
| 166 |
+
final_metric_weights = calculated_weights.copy()
|
| 167 |
+
|
| 168 |
+
# Iterate through the *original* metric_results input to the ensemble
|
| 169 |
+
for original_metric_name in metric_results.keys():
|
| 170 |
+
# If a metric from the original input wasn't included in calculated_weights :assign it a weight of 0.0.
|
| 171 |
+
if original_metric_name not in final_metric_weights:
|
| 172 |
+
final_metric_weights[original_metric_name] = 0.0
|
| 173 |
+
|
| 174 |
+
# Calculate advanced metrics using the CALCULATED weights (from valid_results), not the final ones
|
| 175 |
+
overall_confidence = self._calculate_advanced_confidence(results = valid_results,
|
| 176 |
+
weights = calculated_weights,
|
| 177 |
+
aggregated = aggregated,
|
| 178 |
+
)
|
| 179 |
|
| 180 |
+
uncertainty_score = self._calculate_uncertainty(results = valid_results,
|
| 181 |
+
weights = calculated_weights,
|
| 182 |
+
aggregated = aggregated,
|
| 183 |
+
)
|
| 184 |
|
| 185 |
+
consensus_level = self._calculate_consensus_level(results = valid_results)
|
| 186 |
|
| 187 |
# Apply domain-specific threshold with uncertainty consideration
|
| 188 |
+
domain_thresholds = get_threshold_for_domain(domain = domain)
|
| 189 |
+
final_verdict = self._apply_adaptive_threshold(aggregated = aggregated,
|
| 190 |
+
base_threshold = domain_thresholds.ensemble_threshold,
|
| 191 |
+
uncertainty = uncertainty_score,
|
| 192 |
+
)
|
| 193 |
|
| 194 |
+
# Generate detailed reasoning using the CALCULATED weights
|
| 195 |
+
reasoning = self._generate_detailed_reasoning(results = valid_results,
|
| 196 |
+
weights = calculated_weights,
|
| 197 |
+
aggregated = aggregated,
|
| 198 |
+
verdict = final_verdict,
|
| 199 |
+
uncertainty = uncertainty_score,
|
| 200 |
+
consensus = consensus_level,
|
| 201 |
+
)
|
| 202 |
|
| 203 |
+
# Calculate weighted scores based on the CALCULATED weights (from valid_results)
|
| 204 |
+
weighted_scores = {name: result.ai_probability * calculated_weights.get(name, 0.0) for name, result in valid_results.items()}
|
| 205 |
|
| 206 |
return EnsembleResult(final_verdict = final_verdict,
|
| 207 |
ai_probability = aggregated["ai_probability"],
|
|
|
|
| 210 |
overall_confidence = overall_confidence,
|
| 211 |
domain = domain,
|
| 212 |
metric_results = metric_results,
|
| 213 |
+
metric_weights = final_metric_weights,
|
| 214 |
weighted_scores = weighted_scores,
|
| 215 |
reasoning = reasoning,
|
| 216 |
uncertainty_score = uncertainty_score,
|
|
|
|
| 220 |
except Exception as e:
|
| 221 |
logger.error(f"Error in advanced ensemble prediction: {e}")
|
| 222 |
return self._create_fallback_result(domain, metric_results, str(e))
|
|
|
|
| 223 |
|
| 224 |
+
|
| 225 |
def _validate_metrics(self, results: Dict[str, MetricResult]) -> tuple:
|
| 226 |
"""
|
| 227 |
Validate metrics and return quality information
|
|
|
|
| 472 |
Get domain-specific performance weights (would come from validation data)
|
| 473 |
"""
|
| 474 |
# Placeholder - in practice, this would be based on historical performance per domain : FUTURE WORK
|
| 475 |
+
performance_weights = {'structural' : 1.0,
|
| 476 |
+
'entropy' : 1.0,
|
| 477 |
+
'semantic_analysis' : 1.0,
|
| 478 |
+
'linguistic' : 1.0,
|
| 479 |
+
'perplexity' : 1.0,
|
| 480 |
+
'multi_perturbation_stability' : 1.0,
|
| 481 |
}
|
| 482 |
|
| 483 |
# Domain-specific adjustments for all 16 domains
|
| 484 |
+
domain_adjustments = {Domain.GENERAL : {'structural' : 1.0,
|
| 485 |
+
'perplexity' : 1.0,
|
| 486 |
+
'entropy' : 1.0,
|
| 487 |
+
'semantic_analysis' : 1.0,
|
| 488 |
+
'linguistic' : 1.0,
|
| 489 |
+
'multi_perturbation_stability' : 1.0,
|
| 490 |
},
|
| 491 |
+
Domain.ACADEMIC : {'structural' : 1.2,
|
| 492 |
+
'perplexity' : 1.3,
|
| 493 |
+
'entropy' : 0.9,
|
| 494 |
+
'semantic_analysis' : 1.1,
|
| 495 |
+
'linguistic' : 1.3,
|
| 496 |
+
'multi_perturbation_stability' : 0.8,
|
| 497 |
},
|
| 498 |
+
Domain.CREATIVE : {'structural' : 0.9,
|
| 499 |
+
'perplexity' : 1.1,
|
| 500 |
+
'entropy' : 1.2,
|
| 501 |
+
'semantic_analysis' : 1.0,
|
| 502 |
+
'linguistic' : 1.1,
|
| 503 |
+
'multi_perturbation_stability' : 0.9,
|
| 504 |
},
|
| 505 |
+
Domain.AI_ML : {'structural' : 1.2,
|
| 506 |
+
'perplexity' : 1.3,
|
| 507 |
+
'entropy' : 0.9,
|
| 508 |
+
'semantic_analysis' : 1.1,
|
| 509 |
+
'linguistic' : 1.2,
|
| 510 |
+
'multi_perturbation_stability' : 0.8,
|
| 511 |
},
|
| 512 |
+
Domain.SOFTWARE_DEV : {'structural' : 1.2,
|
| 513 |
+
'perplexity' : 1.3,
|
| 514 |
+
'entropy' : 0.9,
|
| 515 |
+
'semantic_analysis' : 1.1,
|
| 516 |
+
'linguistic' : 1.2,
|
| 517 |
+
'multi_perturbation_stability' : 0.8,
|
| 518 |
},
|
| 519 |
+
Domain.TECHNICAL_DOC : {'structural' : 1.3,
|
| 520 |
+
'perplexity' : 1.3,
|
| 521 |
+
'entropy' : 0.9,
|
| 522 |
+
'semantic_analysis' : 1.2,
|
| 523 |
+
'linguistic' : 1.2,
|
| 524 |
+
'multi_perturbation_stability' : 0.8,
|
| 525 |
},
|
| 526 |
+
Domain.ENGINEERING : {'structural' : 1.2,
|
| 527 |
+
'perplexity' : 1.3,
|
| 528 |
+
'entropy' : 0.9,
|
| 529 |
+
'semantic_analysis' : 1.1,
|
| 530 |
+
'linguistic' : 1.2,
|
| 531 |
+
'multi_perturbation_stability' : 0.8,
|
| 532 |
},
|
| 533 |
+
Domain.SCIENCE : {'structural' : 1.2,
|
| 534 |
+
'perplexity' : 1.3,
|
| 535 |
+
'entropy' : 0.9,
|
| 536 |
+
'semantic_analysis' : 1.1,
|
| 537 |
+
'linguistic' : 1.2,
|
| 538 |
+
'multi_perturbation_stability' : 0.8,
|
| 539 |
},
|
| 540 |
+
Domain.BUSINESS : {'structural' : 1.1,
|
| 541 |
+
'perplexity' : 1.2,
|
| 542 |
+
'entropy' : 1.0,
|
| 543 |
+
'semantic_analysis' : 1.1,
|
| 544 |
+
'linguistic' : 1.1,
|
| 545 |
+
'multi_perturbation_stability' : 0.9,
|
| 546 |
},
|
| 547 |
+
Domain.LEGAL : {'structural' : 1.3,
|
| 548 |
+
'perplexity' : 1.3,
|
| 549 |
+
'entropy' : 0.9,
|
| 550 |
+
'semantic_analysis' : 1.2,
|
| 551 |
+
'linguistic' : 1.3,
|
| 552 |
+
'multi_perturbation_stability' : 0.8,
|
| 553 |
},
|
| 554 |
+
Domain.MEDICAL : {'structural' : 1.2,
|
| 555 |
+
'perplexity' : 1.3,
|
| 556 |
+
'entropy' : 0.9,
|
| 557 |
+
'semantic_analysis' : 1.2,
|
| 558 |
+
'linguistic' : 1.2,
|
| 559 |
+
'multi_perturbation_stability' : 0.8,
|
| 560 |
},
|
| 561 |
+
Domain.JOURNALISM : {'structural' : 1.1,
|
| 562 |
+
'perplexity' : 1.2,
|
| 563 |
+
'entropy' : 1.0,
|
| 564 |
+
'semantic_analysis' : 1.1,
|
| 565 |
+
'linguistic' : 1.1,
|
| 566 |
+
'multi_perturbation_stability' : 0.8,
|
| 567 |
},
|
| 568 |
+
Domain.MARKETING : {'structural' : 1.0,
|
| 569 |
+
'perplexity' : 1.1,
|
| 570 |
+
'entropy' : 1.1,
|
| 571 |
+
'semantic_analysis' : 1.0,
|
| 572 |
+
'linguistic' : 1.2,
|
| 573 |
+
'multi_perturbation_stability' : 0.8,
|
| 574 |
},
|
| 575 |
+
Domain.SOCIAL_MEDIA : {'structural' : 0.8,
|
| 576 |
+
'perplexity' : 1.0,
|
| 577 |
+
'entropy' : 1.3,
|
| 578 |
+
'semantic_analysis' : 0.9,
|
| 579 |
+
'linguistic' : 0.7,
|
| 580 |
+
'multi_perturbation_stability' : 0.9,
|
| 581 |
},
|
| 582 |
+
Domain.BLOG_PERSONAL : {'structural' : 0.9,
|
| 583 |
+
'perplexity' : 1.1,
|
| 584 |
+
'entropy' : 1.2,
|
| 585 |
+
'semantic_analysis' : 1.0,
|
| 586 |
+
'linguistic' : 1.0,
|
| 587 |
+
'multi_perturbation_stability' : 0.8,
|
| 588 |
},
|
| 589 |
+
Domain.TUTORIAL : {'structural' : 1.1,
|
| 590 |
+
'perplexity' : 1.2,
|
| 591 |
+
'entropy' : 1.0,
|
| 592 |
+
'semantic_analysis' : 1.1,
|
| 593 |
+
'linguistic' : 1.1,
|
| 594 |
+
'multi_perturbation_stability' : 0.8,
|
| 595 |
},
|
| 596 |
}
|
| 597 |
|
detector/highlighter.py
CHANGED
|
@@ -48,7 +48,7 @@ class TextHighlighter:
|
|
| 48 |
- Explainable tooltips
|
| 49 |
- Highlighting metrics calculation
|
| 50 |
"""
|
| 51 |
-
# Color thresholds with mixed content support
|
| 52 |
COLOR_THRESHOLDS = [(0.00, 0.10, "very-high-human", "#dcfce7", "Very likely human-written"),
|
| 53 |
(0.10, 0.25, "high-human", "#bbf7d0", "Likely human-written"),
|
| 54 |
(0.25, 0.40, "medium-human", "#86efac", "Possibly human-written"),
|
|
@@ -61,7 +61,7 @@ class TextHighlighter:
|
|
| 61 |
# Mixed content pattern
|
| 62 |
MIXED_THRESHOLD = 0.25
|
| 63 |
|
| 64 |
-
#
|
| 65 |
RISK_WEIGHTS = {'very-high-ai' : 1.0,
|
| 66 |
'high-ai' : 0.8,
|
| 67 |
'medium-ai' : 0.6,
|
|
@@ -189,7 +189,7 @@ class TextHighlighter:
|
|
| 189 |
def _calculate_sentence_ensemble_probability(self, sentence: str, metric_results: Dict[str, MetricResult], weights: Dict[str, float],
|
| 190 |
ensemble_result: Optional[EnsembleResult] = None) -> Tuple[float, float, float, float, Dict[str, float]]:
|
| 191 |
"""
|
| 192 |
-
Calculate sentence probabilities using ensemble methods with domain calibration
|
| 193 |
"""
|
| 194 |
sentence_length = len(sentence.split())
|
| 195 |
|
|
@@ -289,7 +289,7 @@ class TextHighlighter:
|
|
| 289 |
|
| 290 |
def _calculate_weighted_probability(self, metric_results: Dict[str, MetricResult], weights: Dict[str, float], breakdown: Dict[str, float]) -> Tuple[float, float, float, float, Dict[str, float]]:
|
| 291 |
"""
|
| 292 |
-
Fallback weighted probability calculation
|
| 293 |
"""
|
| 294 |
weighted_ai_probs = list()
|
| 295 |
weighted_human_probs = list()
|
|
@@ -456,8 +456,8 @@ class TextHighlighter:
|
|
| 456 |
if self._has_repetition(sentence):
|
| 457 |
return min(1.0, base_prob * 1.2)
|
| 458 |
|
| 459 |
-
elif (metric_name == "
|
| 460 |
-
#
|
| 461 |
if (sentence_length > 15):
|
| 462 |
return min(1.0, base_prob * 1.1)
|
| 463 |
|
|
@@ -466,7 +466,7 @@ class TextHighlighter:
|
|
| 466 |
|
| 467 |
def _get_color_for_probability(self, probability: float, is_mixed_content: bool = False, mixed_prob: float = 0.0) -> Tuple[str, str, str]:
|
| 468 |
"""
|
| 469 |
-
Get color class with mixed content support
|
| 470 |
"""
|
| 471 |
# Check mixed content first
|
| 472 |
if (is_mixed_content and (mixed_prob > self.MIXED_THRESHOLD)):
|
|
@@ -687,7 +687,7 @@ class TextHighlighter:
|
|
| 687 |
|
| 688 |
include_legend { bool } : Whether to include legend (set to False to avoid duplicates)
|
| 689 |
|
| 690 |
-
include_metrics { bool } : Whether to include
|
| 691 |
|
| 692 |
Returns:
|
| 693 |
--------
|
|
@@ -732,7 +732,7 @@ class TextHighlighter:
|
|
| 732 |
|
| 733 |
def _generate_enhanced_css(self) -> str:
|
| 734 |
"""
|
| 735 |
-
Generate CSS for highlighting
|
| 736 |
"""
|
| 737 |
return """
|
| 738 |
<style>
|
|
@@ -863,7 +863,7 @@ class TextHighlighter:
|
|
| 863 |
|
| 864 |
def _generate_metrics_summary(self, sentences: List[HighlightedSentence]) -> str:
|
| 865 |
"""
|
| 866 |
-
Generate summary statistics for highlighted sentences
|
| 867 |
"""
|
| 868 |
if not sentences:
|
| 869 |
return ""
|
|
@@ -893,7 +893,7 @@ class TextHighlighter:
|
|
| 893 |
avg_ai_prob = sum(s.ai_probability for s in sentences) / total_sentences
|
| 894 |
avg_human_prob = sum(s.human_probability for s in sentences) / total_sentences
|
| 895 |
|
| 896 |
-
#
|
| 897 |
ai_sentences = very_high_ai + high_ai + medium_ai
|
| 898 |
human_sentences = very_high_human + high_human + medium_human
|
| 899 |
|
|
@@ -994,7 +994,7 @@ class TextHighlighter:
|
|
| 994 |
|
| 995 |
Returns:
|
| 996 |
--------
|
| 997 |
-
|
| 998 |
"""
|
| 999 |
if not highlighted_sentences:
|
| 1000 |
return {}
|
|
|
|
| 48 |
- Explainable tooltips
|
| 49 |
- Highlighting metrics calculation
|
| 50 |
"""
|
| 51 |
+
# Color thresholds with mixed content support
|
| 52 |
COLOR_THRESHOLDS = [(0.00, 0.10, "very-high-human", "#dcfce7", "Very likely human-written"),
|
| 53 |
(0.10, 0.25, "high-human", "#bbf7d0", "Likely human-written"),
|
| 54 |
(0.25, 0.40, "medium-human", "#86efac", "Possibly human-written"),
|
|
|
|
| 61 |
# Mixed content pattern
|
| 62 |
MIXED_THRESHOLD = 0.25
|
| 63 |
|
| 64 |
+
# Risk weights
|
| 65 |
RISK_WEIGHTS = {'very-high-ai' : 1.0,
|
| 66 |
'high-ai' : 0.8,
|
| 67 |
'medium-ai' : 0.6,
|
|
|
|
| 189 |
def _calculate_sentence_ensemble_probability(self, sentence: str, metric_results: Dict[str, MetricResult], weights: Dict[str, float],
|
| 190 |
ensemble_result: Optional[EnsembleResult] = None) -> Tuple[float, float, float, float, Dict[str, float]]:
|
| 191 |
"""
|
| 192 |
+
Calculate sentence probabilities using ensemble methods with domain calibration
|
| 193 |
"""
|
| 194 |
sentence_length = len(sentence.split())
|
| 195 |
|
|
|
|
| 289 |
|
| 290 |
def _calculate_weighted_probability(self, metric_results: Dict[str, MetricResult], weights: Dict[str, float], breakdown: Dict[str, float]) -> Tuple[float, float, float, float, Dict[str, float]]:
|
| 291 |
"""
|
| 292 |
+
Fallback weighted probability calculation
|
| 293 |
"""
|
| 294 |
weighted_ai_probs = list()
|
| 295 |
weighted_human_probs = list()
|
|
|
|
| 456 |
if self._has_repetition(sentence):
|
| 457 |
return min(1.0, base_prob * 1.2)
|
| 458 |
|
| 459 |
+
elif (metric_name == "multi_perturbation_stability"):
|
| 460 |
+
# MultiPerturbationStability adjustments for sentence level
|
| 461 |
if (sentence_length > 15):
|
| 462 |
return min(1.0, base_prob * 1.1)
|
| 463 |
|
|
|
|
| 466 |
|
| 467 |
def _get_color_for_probability(self, probability: float, is_mixed_content: bool = False, mixed_prob: float = 0.0) -> Tuple[str, str, str]:
|
| 468 |
"""
|
| 469 |
+
Get color class with mixed content support
|
| 470 |
"""
|
| 471 |
# Check mixed content first
|
| 472 |
if (is_mixed_content and (mixed_prob > self.MIXED_THRESHOLD)):
|
|
|
|
| 687 |
|
| 688 |
include_legend { bool } : Whether to include legend (set to False to avoid duplicates)
|
| 689 |
|
| 690 |
+
include_metrics { bool } : Whether to include metrics summary
|
| 691 |
|
| 692 |
Returns:
|
| 693 |
--------
|
|
|
|
| 732 |
|
| 733 |
def _generate_enhanced_css(self) -> str:
|
| 734 |
"""
|
| 735 |
+
Generate CSS for highlighting for Better readability
|
| 736 |
"""
|
| 737 |
return """
|
| 738 |
<style>
|
|
|
|
| 863 |
|
| 864 |
def _generate_metrics_summary(self, sentences: List[HighlightedSentence]) -> str:
|
| 865 |
"""
|
| 866 |
+
Generate summary statistics for highlighted sentences
|
| 867 |
"""
|
| 868 |
if not sentences:
|
| 869 |
return ""
|
|
|
|
| 893 |
avg_ai_prob = sum(s.ai_probability for s in sentences) / total_sentences
|
| 894 |
avg_human_prob = sum(s.human_probability for s in sentences) / total_sentences
|
| 895 |
|
| 896 |
+
# Sentence counts
|
| 897 |
ai_sentences = very_high_ai + high_ai + medium_ai
|
| 898 |
human_sentences = very_high_human + high_human + medium_human
|
| 899 |
|
|
|
|
| 994 |
|
| 995 |
Returns:
|
| 996 |
--------
|
| 997 |
+
{ Dict[str, float] } : Dictionary with metrics
|
| 998 |
"""
|
| 999 |
if not highlighted_sentences:
|
| 1000 |
return {}
|
detector/orchestrator.py
CHANGED
|
@@ -11,7 +11,6 @@ from metrics.entropy import EntropyMetric
|
|
| 11 |
from config.threshold_config import Domain
|
| 12 |
from metrics.base_metric import MetricResult
|
| 13 |
from detector.ensemble import EnsembleResult
|
| 14 |
-
from metrics.detect_gpt import DetectGPTMetric
|
| 15 |
from metrics.perplexity import PerplexityMetric
|
| 16 |
from metrics.linguistic import LinguisticMetric
|
| 17 |
from metrics.structural import StructuralMetric
|
|
@@ -23,7 +22,7 @@ from processors.domain_classifier import DomainPrediction
|
|
| 23 |
from processors.language_detector import LanguageDetector
|
| 24 |
from metrics.semantic_analysis import SemanticAnalysisMetric
|
| 25 |
from processors.language_detector import LanguageDetectionResult
|
| 26 |
-
|
| 27 |
|
| 28 |
|
| 29 |
@dataclass
|
|
@@ -179,13 +178,13 @@ class DetectionOrchestrator:
|
|
| 179 |
except Exception as e:
|
| 180 |
logger.error(f"Failed to initialize linguistic metric: {repr(e)}")
|
| 181 |
|
| 182 |
-
#
|
| 183 |
try:
|
| 184 |
-
metrics["
|
| 185 |
-
logger.debug("
|
| 186 |
|
| 187 |
except Exception as e:
|
| 188 |
-
logger.error(f"Failed to initialize
|
| 189 |
|
| 190 |
logger.info(f"Initialized {len(metrics)} metrics: {list(metrics.keys())}")
|
| 191 |
return metrics
|
|
@@ -328,7 +327,7 @@ class DetectionOrchestrator:
|
|
| 328 |
|
| 329 |
try:
|
| 330 |
# Check if we should skip expensive metrics
|
| 331 |
-
if (self.skip_expensive_metrics and (name == "
|
| 332 |
logger.info(f"Skipping expensive metric: {name}")
|
| 333 |
continue
|
| 334 |
|
|
|
|
| 11 |
from config.threshold_config import Domain
|
| 12 |
from metrics.base_metric import MetricResult
|
| 13 |
from detector.ensemble import EnsembleResult
|
|
|
|
| 14 |
from metrics.perplexity import PerplexityMetric
|
| 15 |
from metrics.linguistic import LinguisticMetric
|
| 16 |
from metrics.structural import StructuralMetric
|
|
|
|
| 22 |
from processors.language_detector import LanguageDetector
|
| 23 |
from metrics.semantic_analysis import SemanticAnalysisMetric
|
| 24 |
from processors.language_detector import LanguageDetectionResult
|
| 25 |
+
from metrics.multi_perturbation_stability import MultiPerturbationStabilityMetric
|
| 26 |
|
| 27 |
|
| 28 |
@dataclass
|
|
|
|
| 178 |
except Exception as e:
|
| 179 |
logger.error(f"Failed to initialize linguistic metric: {repr(e)}")
|
| 180 |
|
| 181 |
+
# MultiPerturbationStability metric (expensive)
|
| 182 |
try:
|
| 183 |
+
metrics["multi_perturbation_stability"] = MultiPerturbationStabilityMetric()
|
| 184 |
+
logger.debug("MultiPerturbationStability metric initialized")
|
| 185 |
|
| 186 |
except Exception as e:
|
| 187 |
+
logger.error(f"Failed to initialize MultiPerturbationStability metric: {repr(e)}")
|
| 188 |
|
| 189 |
logger.info(f"Initialized {len(metrics)} metrics: {list(metrics.keys())}")
|
| 190 |
return metrics
|
|
|
|
| 327 |
|
| 328 |
try:
|
| 329 |
# Check if we should skip expensive metrics
|
| 330 |
+
if (self.skip_expensive_metrics and (name == "multi_perturbation_stability")):
|
| 331 |
logger.info(f"Skipping expensive metric: {name}")
|
| 332 |
continue
|
| 333 |
|
metrics/{detect_gpt.py → multi_perturbation_stability.py}
RENAMED
|
@@ -15,18 +15,26 @@ from config.threshold_config import get_threshold_for_domain
|
|
| 15 |
|
| 16 |
|
| 17 |
|
| 18 |
-
class
|
| 19 |
"""
|
| 20 |
-
|
|
|
|
|
|
|
| 21 |
|
| 22 |
Measures:
|
| 23 |
- Text stability under random perturbations
|
| 24 |
- Likelihood curvature analysis
|
| 25 |
- Masked token prediction analysis
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
"""
|
| 27 |
def __init__(self):
|
| 28 |
-
super().__init__(name = "
|
| 29 |
-
description = "Text stability analysis under perturbations
|
| 30 |
)
|
| 31 |
|
| 32 |
self.gpt_model = None
|
|
@@ -38,14 +46,14 @@ class DetectGPTMetric(BaseMetric):
|
|
| 38 |
|
| 39 |
def initialize(self) -> bool:
|
| 40 |
"""
|
| 41 |
-
Initialize the
|
| 42 |
"""
|
| 43 |
try:
|
| 44 |
-
logger.info("Initializing
|
| 45 |
|
| 46 |
# Load GPT-2 model for likelihood calculation
|
| 47 |
model_manager = get_model_manager()
|
| 48 |
-
gpt_result = model_manager.load_model("
|
| 49 |
|
| 50 |
if isinstance(gpt_result, tuple):
|
| 51 |
self.gpt_model, self.gpt_tokenizer = gpt_result
|
|
@@ -53,11 +61,11 @@ class DetectGPTMetric(BaseMetric):
|
|
| 53 |
self.gpt_model.to(self.device)
|
| 54 |
|
| 55 |
else:
|
| 56 |
-
logger.error("Failed to load GPT-2 model for
|
| 57 |
return False
|
| 58 |
|
| 59 |
# Load masked language model for perturbations
|
| 60 |
-
mask_result = model_manager.load_model("
|
| 61 |
|
| 62 |
if (isinstance(mask_result, tuple)):
|
| 63 |
self.mask_model, self.mask_tokenizer = mask_result
|
|
@@ -73,17 +81,17 @@ class DetectGPTMetric(BaseMetric):
|
|
| 73 |
|
| 74 |
self.is_initialized = True
|
| 75 |
|
| 76 |
-
logger.success("
|
| 77 |
return True
|
| 78 |
|
| 79 |
except Exception as e:
|
| 80 |
-
logger.error(f"Failed to initialize
|
| 81 |
return False
|
| 82 |
|
| 83 |
|
| 84 |
def compute(self, text: str, **kwargs) -> MetricResult:
|
| 85 |
"""
|
| 86 |
-
Compute
|
| 87 |
"""
|
| 88 |
try:
|
| 89 |
if ((not text) or (len(text.strip()) < 100)):
|
|
@@ -92,17 +100,17 @@ class DetectGPTMetric(BaseMetric):
|
|
| 92 |
human_probability = 0.5,
|
| 93 |
mixed_probability = 0.0,
|
| 94 |
confidence = 0.1,
|
| 95 |
-
error = "Text too short for
|
| 96 |
)
|
| 97 |
|
| 98 |
# Get domain-specific thresholds
|
| 99 |
-
domain
|
| 100 |
-
domain_thresholds
|
| 101 |
-
|
| 102 |
|
| 103 |
# Check if we should run this computationally expensive metric
|
| 104 |
if (kwargs.get('skip_expensive', False)):
|
| 105 |
-
logger.info("Skipping
|
| 106 |
|
| 107 |
return MetricResult(metric_name = self.name,
|
| 108 |
ai_probability = 0.5,
|
|
@@ -112,17 +120,17 @@ class DetectGPTMetric(BaseMetric):
|
|
| 112 |
error = "Skipped for performance",
|
| 113 |
)
|
| 114 |
|
| 115 |
-
# Calculate
|
| 116 |
-
features = self.
|
| 117 |
|
| 118 |
-
# Calculate raw
|
| 119 |
-
|
| 120 |
|
| 121 |
# Apply domain-specific thresholds to convert raw score to probabilities
|
| 122 |
-
ai_prob, human_prob, mixed_prob = self._apply_domain_thresholds(
|
| 123 |
|
| 124 |
# Apply confidence multiplier from domain thresholds
|
| 125 |
-
confidence *=
|
| 126 |
confidence = max(0.0, min(1.0, confidence))
|
| 127 |
|
| 128 |
return MetricResult(metric_name = self.name,
|
|
@@ -132,14 +140,14 @@ class DetectGPTMetric(BaseMetric):
|
|
| 132 |
confidence = confidence,
|
| 133 |
details = {**features,
|
| 134 |
'domain_used' : domain.value,
|
| 135 |
-
'ai_threshold' :
|
| 136 |
-
'human_threshold' :
|
| 137 |
-
'raw_score' :
|
| 138 |
},
|
| 139 |
)
|
| 140 |
|
| 141 |
except Exception as e:
|
| 142 |
-
logger.error(f"Error in
|
| 143 |
|
| 144 |
return MetricResult(metric_name = self.name,
|
| 145 |
ai_probability = 0.5,
|
|
@@ -201,9 +209,9 @@ class DetectGPTMetric(BaseMetric):
|
|
| 201 |
return ai_prob, human_prob, mixed_prob
|
| 202 |
|
| 203 |
|
| 204 |
-
def
|
| 205 |
"""
|
| 206 |
-
Calculate comprehensive
|
| 207 |
"""
|
| 208 |
if not self.gpt_model or not self.gpt_tokenizer:
|
| 209 |
return self._get_default_features()
|
|
@@ -267,7 +275,7 @@ class DetectGPTMetric(BaseMetric):
|
|
| 267 |
}
|
| 268 |
|
| 269 |
except Exception as e:
|
| 270 |
-
logger.warning(f"
|
| 271 |
return self._get_default_features()
|
| 272 |
|
| 273 |
|
|
@@ -652,9 +660,9 @@ class DetectGPTMetric(BaseMetric):
|
|
| 652 |
return stabilities
|
| 653 |
|
| 654 |
|
| 655 |
-
def
|
| 656 |
"""
|
| 657 |
-
Analyze
|
| 658 |
"""
|
| 659 |
# Check feature validity first
|
| 660 |
required_features = ['stability_score', 'curvature_score', 'normalized_likelihood_ratio', 'stability_variance', 'perturbation_variance']
|
|
@@ -781,7 +789,7 @@ class DetectGPTMetric(BaseMetric):
|
|
| 781 |
|
| 782 |
def _preprocess_text_for_analysis(self, text: str) -> str:
|
| 783 |
"""
|
| 784 |
-
Preprocess text for
|
| 785 |
"""
|
| 786 |
if not text:
|
| 787 |
return ""
|
|
@@ -882,4 +890,4 @@ class DetectGPTMetric(BaseMetric):
|
|
| 882 |
|
| 883 |
|
| 884 |
# Export
|
| 885 |
-
__all__ = ["
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
|
| 18 |
+
class MultiPerturbationStabilityMetric(BaseMetric):
|
| 19 |
"""
|
| 20 |
+
Multi-Perturbation Stability Metric (MPSM)
|
| 21 |
+
|
| 22 |
+
A hybrid approach for combining multiple perturbation techniques for robust AI-generated text detection
|
| 23 |
|
| 24 |
Measures:
|
| 25 |
- Text stability under random perturbations
|
| 26 |
- Likelihood curvature analysis
|
| 27 |
- Masked token prediction analysis
|
| 28 |
+
|
| 29 |
+
Perturbation Methods:
|
| 30 |
+
- Word deletation & swapping
|
| 31 |
+
- RoBERTa mask filling
|
| 32 |
+
- Synonym replacement
|
| 33 |
+
- Chunk-based stability Analysis
|
| 34 |
"""
|
| 35 |
def __init__(self):
|
| 36 |
+
super().__init__(name = "multi_perturbation_stability",
|
| 37 |
+
description = "Text stability analysis under multi-perturbations techniques",
|
| 38 |
)
|
| 39 |
|
| 40 |
self.gpt_model = None
|
|
|
|
| 46 |
|
| 47 |
def initialize(self) -> bool:
|
| 48 |
"""
|
| 49 |
+
Initialize the MultiPerturbationStability metric
|
| 50 |
"""
|
| 51 |
try:
|
| 52 |
+
logger.info("Initializing MultiPerturbationStability metric...")
|
| 53 |
|
| 54 |
# Load GPT-2 model for likelihood calculation
|
| 55 |
model_manager = get_model_manager()
|
| 56 |
+
gpt_result = model_manager.load_model(model_name = "multi_perturbation_base")
|
| 57 |
|
| 58 |
if isinstance(gpt_result, tuple):
|
| 59 |
self.gpt_model, self.gpt_tokenizer = gpt_result
|
|
|
|
| 61 |
self.gpt_model.to(self.device)
|
| 62 |
|
| 63 |
else:
|
| 64 |
+
logger.error("Failed to load GPT-2 model for MultiPerturbationStability")
|
| 65 |
return False
|
| 66 |
|
| 67 |
# Load masked language model for perturbations
|
| 68 |
+
mask_result = model_manager.load_model("multi_perturbation_mask")
|
| 69 |
|
| 70 |
if (isinstance(mask_result, tuple)):
|
| 71 |
self.mask_model, self.mask_tokenizer = mask_result
|
|
|
|
| 81 |
|
| 82 |
self.is_initialized = True
|
| 83 |
|
| 84 |
+
logger.success("MultiPerturbationStability metric initialized successfully")
|
| 85 |
return True
|
| 86 |
|
| 87 |
except Exception as e:
|
| 88 |
+
logger.error(f"Failed to initialize MultiPerturbationStability metric: {repr(e)}")
|
| 89 |
return False
|
| 90 |
|
| 91 |
|
| 92 |
def compute(self, text: str, **kwargs) -> MetricResult:
|
| 93 |
"""
|
| 94 |
+
Compute MultiPerturbationStability analysis with FULL DOMAIN THRESHOLD INTEGRATION
|
| 95 |
"""
|
| 96 |
try:
|
| 97 |
if ((not text) or (len(text.strip()) < 100)):
|
|
|
|
| 100 |
human_probability = 0.5,
|
| 101 |
mixed_probability = 0.0,
|
| 102 |
confidence = 0.1,
|
| 103 |
+
error = "Text too short for MultiPerturbationStability analysis",
|
| 104 |
)
|
| 105 |
|
| 106 |
# Get domain-specific thresholds
|
| 107 |
+
domain = kwargs.get('domain', Domain.GENERAL)
|
| 108 |
+
domain_thresholds = get_threshold_for_domain(domain)
|
| 109 |
+
multi_perturbation_stability_thresholds = domain_thresholds.multi_perturbation_stability
|
| 110 |
|
| 111 |
# Check if we should run this computationally expensive metric
|
| 112 |
if (kwargs.get('skip_expensive', False)):
|
| 113 |
+
logger.info("Skipping MultiPerturbationStability due to computational constraints")
|
| 114 |
|
| 115 |
return MetricResult(metric_name = self.name,
|
| 116 |
ai_probability = 0.5,
|
|
|
|
| 120 |
error = "Skipped for performance",
|
| 121 |
)
|
| 122 |
|
| 123 |
+
# Calculate MultiPerturbationStability features
|
| 124 |
+
features = self._calculate_stability_features(text)
|
| 125 |
|
| 126 |
+
# Calculate raw MultiPerturbationStability score (0-1 scale)
|
| 127 |
+
raw_stability_score, confidence = self._analyze_stability_patterns(features)
|
| 128 |
|
| 129 |
# Apply domain-specific thresholds to convert raw score to probabilities
|
| 130 |
+
ai_prob, human_prob, mixed_prob = self._apply_domain_thresholds(raw_stability_score, multi_perturbation_stability_thresholds, features)
|
| 131 |
|
| 132 |
# Apply confidence multiplier from domain thresholds
|
| 133 |
+
confidence *= multi_perturbation_stability_thresholds.confidence_multiplier
|
| 134 |
confidence = max(0.0, min(1.0, confidence))
|
| 135 |
|
| 136 |
return MetricResult(metric_name = self.name,
|
|
|
|
| 140 |
confidence = confidence,
|
| 141 |
details = {**features,
|
| 142 |
'domain_used' : domain.value,
|
| 143 |
+
'ai_threshold' : multi_perturbation_stability_thresholds.ai_threshold,
|
| 144 |
+
'human_threshold' : multi_perturbation_stability_thresholds.human_threshold,
|
| 145 |
+
'raw_score' : raw_stability_score,
|
| 146 |
},
|
| 147 |
)
|
| 148 |
|
| 149 |
except Exception as e:
|
| 150 |
+
logger.error(f"Error in MultiPerturbationStability computation: {repr(e)}")
|
| 151 |
|
| 152 |
return MetricResult(metric_name = self.name,
|
| 153 |
ai_probability = 0.5,
|
|
|
|
| 209 |
return ai_prob, human_prob, mixed_prob
|
| 210 |
|
| 211 |
|
| 212 |
+
def _calculate_stability_features(self, text: str) -> Dict[str, Any]:
|
| 213 |
"""
|
| 214 |
+
Calculate comprehensive MultiPerturbationStability features
|
| 215 |
"""
|
| 216 |
if not self.gpt_model or not self.gpt_tokenizer:
|
| 217 |
return self._get_default_features()
|
|
|
|
| 275 |
}
|
| 276 |
|
| 277 |
except Exception as e:
|
| 278 |
+
logger.warning(f"MultiPerturbationStability feature calculation failed: {repr(e)}")
|
| 279 |
return self._get_default_features()
|
| 280 |
|
| 281 |
|
|
|
|
| 660 |
return stabilities
|
| 661 |
|
| 662 |
|
| 663 |
+
def _analyze_stability_patterns(self, features: Dict[str, Any]) -> tuple:
|
| 664 |
"""
|
| 665 |
+
Analyze MultiPerturbationStability patterns to determine RAW MultiPerturbationStability score (0-1 scale) : Higher score = more AI-like
|
| 666 |
"""
|
| 667 |
# Check feature validity first
|
| 668 |
required_features = ['stability_score', 'curvature_score', 'normalized_likelihood_ratio', 'stability_variance', 'perturbation_variance']
|
|
|
|
| 789 |
|
| 790 |
def _preprocess_text_for_analysis(self, text: str) -> str:
|
| 791 |
"""
|
| 792 |
+
Preprocess text for MultiPerturbationStability analysis
|
| 793 |
"""
|
| 794 |
if not text:
|
| 795 |
return ""
|
|
|
|
| 890 |
|
| 891 |
|
| 892 |
# Export
|
| 893 |
+
__all__ = ["MultiPerturbationStabilityMetric"]
|
processors/language_detector.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
# DEPENDENCIES
|
| 2 |
import re
|
|
|
|
| 3 |
import string
|
| 4 |
from enum import Enum
|
| 5 |
from typing import Dict
|
|
@@ -17,6 +18,7 @@ try:
|
|
| 17 |
# Seed for reproducibility
|
| 18 |
DetectorFactory.seed = 0
|
| 19 |
LANGDETECT_AVAILABLE = True
|
|
|
|
| 20 |
except ImportError:
|
| 21 |
logger.warning("langdetect not available. Install: pip install langdetect")
|
| 22 |
LANGDETECT_AVAILABLE = False
|
|
@@ -24,6 +26,7 @@ except ImportError:
|
|
| 24 |
try:
|
| 25 |
from models.model_manager import get_model_manager
|
| 26 |
MODEL_MANAGER_AVAILABLE = True
|
|
|
|
| 27 |
except ImportError:
|
| 28 |
logger.warning("model_manager not available, using fallback methods")
|
| 29 |
MODEL_MANAGER_AVAILABLE = False
|
|
@@ -251,7 +254,7 @@ class LanguageDetector:
|
|
| 251 |
# Method 1 : ML Model
|
| 252 |
if self.use_model and self.is_initialized:
|
| 253 |
try:
|
| 254 |
-
result = self._detect_with_model(cleaned_text)
|
| 255 |
result.detection_method = "xlm-roberta-model"
|
| 256 |
|
| 257 |
except Exception as e:
|
|
@@ -261,7 +264,7 @@ class LanguageDetector:
|
|
| 261 |
# Method 2 : langdetect library
|
| 262 |
if result is None and LANGDETECT_AVAILABLE:
|
| 263 |
try:
|
| 264 |
-
result = self._detect_with_langdetect(cleaned_text)
|
| 265 |
result.detection_method = "langdetect-library"
|
| 266 |
|
| 267 |
except Exception as e:
|
|
@@ -292,35 +295,203 @@ class LanguageDetector:
|
|
| 292 |
|
| 293 |
def _detect_with_model(self, text: str) -> LanguageDetectionResult:
|
| 294 |
"""
|
| 295 |
-
Detect language using XLM-RoBERTa model
|
| 296 |
"""
|
| 297 |
if not self.is_initialized:
|
| 298 |
if not self.initialize():
|
| 299 |
raise RuntimeError("Model not initialized")
|
| 300 |
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
text = text
|
| 304 |
-
logger.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
|
| 306 |
-
#
|
| 307 |
-
|
|
|
|
| 308 |
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
|
| 318 |
-
# Handle model output format
|
| 319 |
if ('_' in lang_code):
|
| 320 |
lang_code = lang_code.split('_')[0]
|
| 321 |
|
| 322 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
if (score > primary_conf):
|
| 325 |
primary_conf = score
|
| 326 |
primary_lang = lang_code
|
|
@@ -332,17 +503,64 @@ class LanguageDetector:
|
|
| 332 |
except ValueError:
|
| 333 |
primary_language = Language.UNKNOWN
|
| 334 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 335 |
return LanguageDetectionResult(primary_language = primary_language,
|
| 336 |
confidence = primary_conf,
|
| 337 |
-
all_languages =
|
| 338 |
script = Script.UNKNOWN,
|
| 339 |
-
is_multilingual = False,
|
| 340 |
-
detection_method = "model",
|
| 341 |
char_count = 0,
|
| 342 |
word_count = 0,
|
| 343 |
-
warnings =
|
| 344 |
)
|
| 345 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
|
| 347 |
def _detect_with_langdetect(self, text: str) -> LanguageDetectionResult:
|
| 348 |
"""
|
|
@@ -357,7 +575,7 @@ class LanguageDetector:
|
|
| 357 |
all_languages[prob.lang] = prob.prob
|
| 358 |
|
| 359 |
# Primary language
|
| 360 |
-
primary
|
| 361 |
|
| 362 |
try:
|
| 363 |
primary_language = Language(primary.lang)
|
|
@@ -542,14 +760,14 @@ class LanguageDetector:
|
|
| 542 |
{ bool } : True if text is in target language with sufficient confidence
|
| 543 |
"""
|
| 544 |
result = self.detect(text)
|
| 545 |
-
return (result.primary_language == target_language and (result.confidence >= threshold))
|
| 546 |
|
| 547 |
|
| 548 |
def get_supported_languages(self) -> List[str]:
|
| 549 |
"""
|
| 550 |
Get list of supported language codes
|
| 551 |
"""
|
| 552 |
-
return [lang.value for lang in Language if lang != Language.UNKNOWN]
|
| 553 |
|
| 554 |
|
| 555 |
def cleanup(self):
|
|
@@ -560,7 +778,7 @@ class LanguageDetector:
|
|
| 560 |
self.is_initialized = False
|
| 561 |
|
| 562 |
|
| 563 |
-
#
|
| 564 |
def quick_detect(text: str, **kwargs) -> LanguageDetectionResult:
|
| 565 |
"""
|
| 566 |
Quick language detection with default settings
|
|
@@ -602,42 +820,3 @@ __all__ = ['Script',
|
|
| 602 |
'LanguageDetector',
|
| 603 |
'LanguageDetectionResult',
|
| 604 |
]
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
# ==================== Testing ====================
|
| 608 |
-
if __name__ == "__main__":
|
| 609 |
-
# Test cases
|
| 610 |
-
test_texts = {"English" : "This is a sample text written in English. It contains multiple sentences to test the language detection system.",
|
| 611 |
-
"Spanish" : "Este es un texto de ejemplo escrito en español. Contiene múltiples oraciones para probar el sistema de detección de idiomas.",
|
| 612 |
-
"French" : "Ceci est un exemple de texte écrit en français. Il contient plusieurs phrases pour tester le système de détection de langue.",
|
| 613 |
-
"German" : "Dies ist ein Beispieltext in deutscher Sprache. Es enthält mehrere Sätze zum Testen des Spracherkennungssystems.",
|
| 614 |
-
"Chinese" : "这是用中文写的示例文本。它包含多个句子来测试语言检测系统。",
|
| 615 |
-
"Russian" : "Это пример текста, написанного на русском языке. Он содержит несколько предложений для проверки системы определения языка.",
|
| 616 |
-
"Mixed" : "This is English. Este es español. C'est français.",
|
| 617 |
-
"Short" : "Hello",
|
| 618 |
-
}
|
| 619 |
-
|
| 620 |
-
detector = LanguageDetector(use_model = True) # Use fast mode for testing
|
| 621 |
-
|
| 622 |
-
for name, text in test_texts.items():
|
| 623 |
-
print(f"\n{'='*70}")
|
| 624 |
-
print(f"Testing: {name}")
|
| 625 |
-
print(f"{'='*70}")
|
| 626 |
-
print(f"Text: {text[:80]}...")
|
| 627 |
-
|
| 628 |
-
result = detector.detect(text)
|
| 629 |
-
|
| 630 |
-
print(f"\nPrimary Language: {result.primary_language.value}")
|
| 631 |
-
print(f"Confidence: {result.confidence:.2f}")
|
| 632 |
-
print(f"Script: {result.script.value}")
|
| 633 |
-
print(f"Method: {result.detection_method}")
|
| 634 |
-
print(f"Multilingual: {result.is_multilingual}")
|
| 635 |
-
|
| 636 |
-
if result.warnings:
|
| 637 |
-
print(f"Warnings: {result.warnings}")
|
| 638 |
-
|
| 639 |
-
if (len(result.all_languages) > 1):
|
| 640 |
-
print("\nAll detected languages:")
|
| 641 |
-
for lang, conf in sorted(result.all_languages.items(), key = lambda x: x[1], reverse = True)[:3]:
|
| 642 |
-
print(f" {lang}: {conf:.2f}")
|
| 643 |
-
|
|
|
|
| 1 |
# DEPENDENCIES
|
| 2 |
import re
|
| 3 |
+
import torch
|
| 4 |
import string
|
| 5 |
from enum import Enum
|
| 6 |
from typing import Dict
|
|
|
|
| 18 |
# Seed for reproducibility
|
| 19 |
DetectorFactory.seed = 0
|
| 20 |
LANGDETECT_AVAILABLE = True
|
| 21 |
+
|
| 22 |
except ImportError:
|
| 23 |
logger.warning("langdetect not available. Install: pip install langdetect")
|
| 24 |
LANGDETECT_AVAILABLE = False
|
|
|
|
| 26 |
try:
|
| 27 |
from models.model_manager import get_model_manager
|
| 28 |
MODEL_MANAGER_AVAILABLE = True
|
| 29 |
+
|
| 30 |
except ImportError:
|
| 31 |
logger.warning("model_manager not available, using fallback methods")
|
| 32 |
MODEL_MANAGER_AVAILABLE = False
|
|
|
|
| 254 |
# Method 1 : ML Model
|
| 255 |
if self.use_model and self.is_initialized:
|
| 256 |
try:
|
| 257 |
+
result = self._detect_with_model(text = cleaned_text)
|
| 258 |
result.detection_method = "xlm-roberta-model"
|
| 259 |
|
| 260 |
except Exception as e:
|
|
|
|
| 264 |
# Method 2 : langdetect library
|
| 265 |
if result is None and LANGDETECT_AVAILABLE:
|
| 266 |
try:
|
| 267 |
+
result = self._detect_with_langdetect(text = cleaned_text)
|
| 268 |
result.detection_method = "langdetect-library"
|
| 269 |
|
| 270 |
except Exception as e:
|
|
|
|
| 295 |
|
| 296 |
def _detect_with_model(self, text: str) -> LanguageDetectionResult:
|
| 297 |
"""
|
| 298 |
+
Detect language using XLM-RoBERTa model with sentence-based chunking for more accurate detection on long texts
|
| 299 |
"""
|
| 300 |
if not self.is_initialized:
|
| 301 |
if not self.initialize():
|
| 302 |
raise RuntimeError("Model not initialized")
|
| 303 |
|
| 304 |
+
try:
|
| 305 |
+
# Strategy: Use multiple text chunks for better accuracy
|
| 306 |
+
chunks = self._split_text_into_chunks(text = text)
|
| 307 |
+
logger.info(f"Split text into {len(chunks)} chunks for language detection")
|
| 308 |
+
|
| 309 |
+
all_chunk_results = list()
|
| 310 |
+
|
| 311 |
+
for i, chunk in enumerate(chunks):
|
| 312 |
+
try:
|
| 313 |
+
chunk_result = self._process_single_chunk(chunk = chunk)
|
| 314 |
+
all_chunk_results.append(chunk_result)
|
| 315 |
+
|
| 316 |
+
except Exception as e:
|
| 317 |
+
logger.warning(f"Chunk {i+1} processing failed: {repr(e)}")
|
| 318 |
+
continue
|
| 319 |
+
|
| 320 |
+
if not all_chunk_results:
|
| 321 |
+
raise RuntimeError("All chunks failed processing")
|
| 322 |
+
|
| 323 |
+
# Aggregate results from all chunks
|
| 324 |
+
return self._aggregate_chunk_results(chunk_results = all_chunk_results)
|
| 325 |
+
|
| 326 |
+
except Exception as e:
|
| 327 |
+
logger.error(f"Chunk-based model detection failed: {repr(e)}")
|
| 328 |
+
raise
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
def _split_text_into_chunks(self, text: str, max_chunk_length: int = 500, min_chunk_length: int = 50) -> List[str]:
|
| 332 |
+
"""
|
| 333 |
+
Split text into meaningful chunks for language detection
|
| 334 |
+
|
| 335 |
+
Arguments:
|
| 336 |
+
----------
|
| 337 |
+
text { str } : Input text
|
| 338 |
+
|
| 339 |
+
max_chunk_length { int } : Maximum characters per chunk
|
| 340 |
+
|
| 341 |
+
min_chunk_length { int } : Minimum characters per chunk
|
| 342 |
+
|
| 343 |
+
Returns:
|
| 344 |
+
--------
|
| 345 |
+
List of text chunks
|
| 346 |
+
"""
|
| 347 |
+
if (len(text) <= max_chunk_length):
|
| 348 |
+
return [text]
|
| 349 |
|
| 350 |
+
# Strategy 1: Split by sentences first
|
| 351 |
+
sentences = re.split(r'[.!?]+', text)
|
| 352 |
+
sentences = [s.strip() for s in sentences if s.strip()]
|
| 353 |
|
| 354 |
+
chunks = list()
|
| 355 |
+
|
| 356 |
+
current_chunk = ""
|
| 357 |
+
|
| 358 |
+
for sentence in sentences:
|
| 359 |
+
# If adding this sentence doesn't exceed max length
|
| 360 |
+
if len(current_chunk) + len(sentence) + 1 <= max_chunk_length:
|
| 361 |
+
if current_chunk:
|
| 362 |
+
current_chunk += " " + sentence
|
| 363 |
+
|
| 364 |
+
else:
|
| 365 |
+
current_chunk = sentence
|
| 366 |
+
|
| 367 |
+
else:
|
| 368 |
+
# Current chunk is full, save it
|
| 369 |
+
if current_chunk and len(current_chunk) >= min_chunk_length:
|
| 370 |
+
chunks.append(current_chunk)
|
| 371 |
+
|
| 372 |
+
# Start new chunk with current sentence
|
| 373 |
+
current_chunk = sentence
|
| 374 |
+
|
| 375 |
+
# Add the last chunk if it meets minimum length
|
| 376 |
+
if (current_chunk and (len(current_chunk) >= min_chunk_length)):
|
| 377 |
+
chunks.append(current_chunk)
|
| 378 |
+
|
| 379 |
+
# Strategy 2: If sentence splitting didn't work well, use fixed-length chunks
|
| 380 |
+
if ((len(chunks) == 0) or ((len(chunks) == 1 )and (len(chunks[0]) > max_chunk_length))):
|
| 381 |
+
chunks = self._split_fixed_length(text, max_chunk_length)
|
| 382 |
+
|
| 383 |
+
logger.debug(f"Split {len(text)} chars into {len(chunks)} chunks: {[len(c) for c in chunks]}")
|
| 384 |
+
return chunks
|
| 385 |
+
|
| 386 |
+
|
| 387 |
+
def _split_fixed_length(self, text: str, chunk_size: int = 1000) -> List[str]:
|
| 388 |
+
"""
|
| 389 |
+
Fallback: Split text into fixed-length chunks
|
| 390 |
+
"""
|
| 391 |
+
chunks = list()
|
| 392 |
+
|
| 393 |
+
for i in range(0, len(text), chunk_size):
|
| 394 |
+
chunk = text[i:i + chunk_size]
|
| 395 |
+
# Try to break at word boundaries
|
| 396 |
+
if ((i + chunk_size) < len(text)):
|
| 397 |
+
last_space = chunk.rfind(' ')
|
| 398 |
+
# If we found a space in the last 30%
|
| 399 |
+
if (last_space > chunk_size * 0.7):
|
| 400 |
+
chunk = chunk[:last_space].strip()
|
| 401 |
+
|
| 402 |
+
chunks.append(chunk)
|
| 403 |
+
return chunks
|
| 404 |
+
|
| 405 |
+
|
| 406 |
+
def _process_single_chunk(self, chunk: str) -> Dict:
|
| 407 |
+
"""
|
| 408 |
+
Process a single chunk through the language detection model
|
| 409 |
+
"""
|
| 410 |
+
# Get the tokenizer from the pipeline
|
| 411 |
+
tokenizer = self.classifier.tokenizer
|
| 412 |
+
|
| 413 |
+
# Tokenize with explicit length limits
|
| 414 |
+
inputs = tokenizer(chunk,
|
| 415 |
+
return_tensors = "pt",
|
| 416 |
+
truncation = True,
|
| 417 |
+
max_length = 512,
|
| 418 |
+
padding = True,
|
| 419 |
+
add_special_tokens = True,
|
| 420 |
+
)
|
| 421 |
+
|
| 422 |
+
# Get model from pipeline
|
| 423 |
+
model = self.classifier.model
|
| 424 |
+
device = next(model.parameters()).device
|
| 425 |
+
|
| 426 |
+
# Move inputs to correct device
|
| 427 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
| 428 |
+
|
| 429 |
+
with torch.no_grad():
|
| 430 |
+
outputs = model(**inputs)
|
| 431 |
+
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
| 432 |
+
|
| 433 |
+
# Get top predictions for this chunk
|
| 434 |
+
top_predictions = torch.topk(predictions[0], k = 3)
|
| 435 |
|
| 436 |
+
chunk_results = dict()
|
| 437 |
+
|
| 438 |
+
for i in range(len(top_predictions.indices)):
|
| 439 |
+
lang_idx = top_predictions.indices[i].item()
|
| 440 |
+
score = top_predictions.values[i].item()
|
| 441 |
+
|
| 442 |
+
# Get language label from model config
|
| 443 |
+
lang_code = model.config.id2label[lang_idx]
|
| 444 |
|
| 445 |
+
# Handle model output format
|
| 446 |
if ('_' in lang_code):
|
| 447 |
lang_code = lang_code.split('_')[0]
|
| 448 |
|
| 449 |
+
chunk_results[lang_code] = score
|
| 450 |
+
|
| 451 |
+
return chunk_results
|
| 452 |
+
|
| 453 |
+
|
| 454 |
+
def _aggregate_chunk_results(self, chunk_results: List[Dict]) -> LanguageDetectionResult:
|
| 455 |
+
"""
|
| 456 |
+
Aggregate results from multiple chunks using weighted averaging
|
| 457 |
+
"""
|
| 458 |
+
# Combine scores from all chunks
|
| 459 |
+
all_scores = dict()
|
| 460 |
+
chunk_weights = list()
|
| 461 |
+
|
| 462 |
+
for chunk_result in chunk_results:
|
| 463 |
+
# Calculate chunk weight based on confidence and diversity
|
| 464 |
+
top_score = max(chunk_result.values()) if chunk_result else 0
|
| 465 |
+
# Weight by confidence
|
| 466 |
+
chunk_weight = top_score
|
| 467 |
+
|
| 468 |
+
chunk_weights.append(chunk_weight)
|
| 469 |
|
| 470 |
+
for lang_code, score in chunk_result.items():
|
| 471 |
+
if lang_code not in all_scores:
|
| 472 |
+
all_scores[lang_code] = list()
|
| 473 |
+
|
| 474 |
+
all_scores[lang_code].append(score)
|
| 475 |
+
|
| 476 |
+
# Calculate weighted average for each language
|
| 477 |
+
weighted_scores = dict()
|
| 478 |
+
|
| 479 |
+
for lang_code, scores in all_scores.items():
|
| 480 |
+
if (len(scores) != len(chunk_weights)):
|
| 481 |
+
# Use simple average if weight mismatch
|
| 482 |
+
weighted_scores[lang_code] = sum(scores) / len(scores)
|
| 483 |
+
|
| 484 |
+
else:
|
| 485 |
+
# Weighted average
|
| 486 |
+
weighted_sum = sum(score * weight for score, weight in zip(scores, chunk_weights))
|
| 487 |
+
total_weight = sum(chunk_weights)
|
| 488 |
+
weighted_scores[lang_code] = weighted_sum / total_weight if total_weight > 0 else sum(scores) / len(scores)
|
| 489 |
+
|
| 490 |
+
# Find primary language
|
| 491 |
+
primary_lang = None
|
| 492 |
+
primary_conf = 0.0
|
| 493 |
+
|
| 494 |
+
for lang_code, score in weighted_scores.items():
|
| 495 |
if (score > primary_conf):
|
| 496 |
primary_conf = score
|
| 497 |
primary_lang = lang_code
|
|
|
|
| 503 |
except ValueError:
|
| 504 |
primary_language = Language.UNKNOWN
|
| 505 |
|
| 506 |
+
# Calculate detection quality metrics
|
| 507 |
+
detection_quality = self._assess_detection_quality(chunk_results, weighted_scores)
|
| 508 |
+
|
| 509 |
+
warnings = list()
|
| 510 |
+
|
| 511 |
+
if detection_quality.get('low_confidence', False):
|
| 512 |
+
warnings.append("Low confidence across multiple chunks")
|
| 513 |
+
|
| 514 |
+
if detection_quality.get('inconsistent', False):
|
| 515 |
+
warnings.append("Inconsistent language detection across chunks")
|
| 516 |
+
|
| 517 |
return LanguageDetectionResult(primary_language = primary_language,
|
| 518 |
confidence = primary_conf,
|
| 519 |
+
all_languages = weighted_scores,
|
| 520 |
script = Script.UNKNOWN,
|
| 521 |
+
is_multilingual = detection_quality.get('multilingual', False),
|
| 522 |
+
detection_method = "model-chunked",
|
| 523 |
char_count = 0,
|
| 524 |
word_count = 0,
|
| 525 |
+
warnings = warnings,
|
| 526 |
)
|
| 527 |
|
| 528 |
+
|
| 529 |
+
def _assess_detection_quality(self, chunk_results: List[Dict], final_scores: Dict[str, float]) -> Dict[str, bool]:
|
| 530 |
+
"""
|
| 531 |
+
Assess the quality and consistency of language detection across chunks
|
| 532 |
+
"""
|
| 533 |
+
quality_metrics = {'low_confidence' : False,
|
| 534 |
+
'inconsistent' : False,
|
| 535 |
+
'multilingual' : False,
|
| 536 |
+
}
|
| 537 |
+
|
| 538 |
+
if not chunk_results:
|
| 539 |
+
return quality_metrics
|
| 540 |
+
|
| 541 |
+
# Check for low confidence
|
| 542 |
+
avg_top_confidence = sum(max(chunk.values()) for chunk in chunk_results) / len(chunk_results)
|
| 543 |
+
if (avg_top_confidence < 0.6):
|
| 544 |
+
quality_metrics['low_confidence'] = True
|
| 545 |
+
|
| 546 |
+
# Check for inconsistency (different primary languages across chunks)
|
| 547 |
+
chunk_primaries = list()
|
| 548 |
+
|
| 549 |
+
for chunk in chunk_results:
|
| 550 |
+
if chunk:
|
| 551 |
+
primary = max(chunk.items(), key=lambda x: x[1])[0]
|
| 552 |
+
chunk_primaries.append(primary)
|
| 553 |
+
|
| 554 |
+
if (len(set(chunk_primaries)) > 1):
|
| 555 |
+
quality_metrics['inconsistent'] = True
|
| 556 |
+
|
| 557 |
+
# Check for multilingual content
|
| 558 |
+
strong_languages = [lang for lang, score in final_scores.items() if score > 0.2]
|
| 559 |
+
if (len(strong_languages) > 1):
|
| 560 |
+
quality_metrics['multilingual'] = True
|
| 561 |
+
|
| 562 |
+
return quality_metrics
|
| 563 |
+
|
| 564 |
|
| 565 |
def _detect_with_langdetect(self, text: str) -> LanguageDetectionResult:
|
| 566 |
"""
|
|
|
|
| 575 |
all_languages[prob.lang] = prob.prob
|
| 576 |
|
| 577 |
# Primary language
|
| 578 |
+
primary = lang_probs[0]
|
| 579 |
|
| 580 |
try:
|
| 581 |
primary_language = Language(primary.lang)
|
|
|
|
| 760 |
{ bool } : True if text is in target language with sufficient confidence
|
| 761 |
"""
|
| 762 |
result = self.detect(text)
|
| 763 |
+
return ((result.primary_language == target_language) and (result.confidence >= threshold))
|
| 764 |
|
| 765 |
|
| 766 |
def get_supported_languages(self) -> List[str]:
|
| 767 |
"""
|
| 768 |
Get list of supported language codes
|
| 769 |
"""
|
| 770 |
+
return [lang.value for lang in Language if (lang != Language.UNKNOWN)]
|
| 771 |
|
| 772 |
|
| 773 |
def cleanup(self):
|
|
|
|
| 778 |
self.is_initialized = False
|
| 779 |
|
| 780 |
|
| 781 |
+
# Convenience Functions
|
| 782 |
def quick_detect(text: str, **kwargs) -> LanguageDetectionResult:
|
| 783 |
"""
|
| 784 |
Quick language detection with default settings
|
|
|
|
| 820 |
'LanguageDetector',
|
| 821 |
'LanguageDetectionResult',
|
| 822 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
processors/text_processor.py
CHANGED
|
@@ -75,7 +75,7 @@ class TextProcessor:
|
|
| 75 |
MULTIPLE_NEWLINES = re.compile(r'\n{3,}')
|
| 76 |
|
| 77 |
|
| 78 |
-
def __init__(self, min_text_length: int = 50, max_text_length: int =
|
| 79 |
normalize_unicode: bool = True, fix_encoding: bool = True):
|
| 80 |
"""
|
| 81 |
Initialize text processor
|
|
|
|
| 75 |
MULTIPLE_NEWLINES = re.compile(r'\n{3,}')
|
| 76 |
|
| 77 |
|
| 78 |
+
def __init__(self, min_text_length: int = 50, max_text_length: int = 500000, preserve_formatting: bool = False, remove_urls: bool = True, remove_emails: bool = True,
|
| 79 |
normalize_unicode: bool = True, fix_encoding: bool = True):
|
| 80 |
"""
|
| 81 |
Initialize text processor
|
reporter/reasoning_generator.py
CHANGED
|
@@ -61,13 +61,13 @@ class ReasoningGenerator:
|
|
| 61 |
- Metric contribution analysis
|
| 62 |
- Actionable recommendations
|
| 63 |
"""
|
| 64 |
-
#
|
| 65 |
-
METRIC_DESCRIPTIONS = {"structural"
|
| 66 |
-
"perplexity"
|
| 67 |
-
"entropy"
|
| 68 |
-
"semantic_analysis"
|
| 69 |
-
"linguistic"
|
| 70 |
-
"
|
| 71 |
}
|
| 72 |
|
| 73 |
# Ensemble method descriptions
|
|
@@ -338,7 +338,7 @@ class ReasoningGenerator:
|
|
| 338 |
elif (not is_ai and (pos_diversity > 0.55)):
|
| 339 |
return f"Rich grammatical variety ({pos_diversity:.2f})"
|
| 340 |
|
| 341 |
-
elif (metric_name == "
|
| 342 |
stability = details.get("stability_score", 0.5)
|
| 343 |
curvature = details.get("curvature_score", 0.5)
|
| 344 |
|
|
|
|
| 61 |
- Metric contribution analysis
|
| 62 |
- Actionable recommendations
|
| 63 |
"""
|
| 64 |
+
# Metric descriptions
|
| 65 |
+
METRIC_DESCRIPTIONS = {"structural" : "analyzes sentence structure, length patterns, and statistical features",
|
| 66 |
+
"perplexity" : "measures text predictability using language model cross-entropy",
|
| 67 |
+
"entropy" : "evaluates token diversity and sequence unpredictability",
|
| 68 |
+
"semantic_analysis" : "examines semantic coherence, topic consistency, and logical flow",
|
| 69 |
+
"linguistic" : "assesses grammatical patterns, syntactic complexity, and style markers",
|
| 70 |
+
"multi_perturbation_stability" : "tests text stability under perturbation using curvature analysis",
|
| 71 |
}
|
| 72 |
|
| 73 |
# Ensemble method descriptions
|
|
|
|
| 338 |
elif (not is_ai and (pos_diversity > 0.55)):
|
| 339 |
return f"Rich grammatical variety ({pos_diversity:.2f})"
|
| 340 |
|
| 341 |
+
elif (metric_name == "multi_perturbation_stability"):
|
| 342 |
stability = details.get("stability_score", 0.5)
|
| 343 |
curvature = details.get("curvature_score", 0.5)
|
| 344 |
|
reporter/report_generator.py
CHANGED
|
@@ -207,12 +207,12 @@ class ReportGenerator:
|
|
| 207 |
"""
|
| 208 |
Get description for each metric type
|
| 209 |
"""
|
| 210 |
-
descriptions = {"structural"
|
| 211 |
-
"perplexity"
|
| 212 |
-
"entropy"
|
| 213 |
-
"semantic_analysis"
|
| 214 |
-
"linguistic"
|
| 215 |
-
"
|
| 216 |
}
|
| 217 |
|
| 218 |
return descriptions.get(metric_name, "Advanced text analysis metric.")
|
|
|
|
| 207 |
"""
|
| 208 |
Get description for each metric type
|
| 209 |
"""
|
| 210 |
+
descriptions = {"structural" : "Analyzes sentence structure, length patterns, and statistical features",
|
| 211 |
+
"perplexity" : "Measures text predictability using language model cross-entropy",
|
| 212 |
+
"entropy" : "Evaluates token diversity and sequence unpredictability",
|
| 213 |
+
"semantic_analysis" : "Examines semantic coherence, topic consistency, and logical flow",
|
| 214 |
+
"linguistic" : "Assesses grammatical patterns, syntactic complexity, and style markers",
|
| 215 |
+
"multi_perturbation_stability" : "Tests text stability under perturbation using curvature analysis",
|
| 216 |
}
|
| 217 |
|
| 218 |
return descriptions.get(metric_name, "Advanced text analysis metric.")
|
text_auth_app.py
CHANGED
|
@@ -767,7 +767,7 @@ async def analyze_text(request: TextAnalysisRequest):
|
|
| 767 |
use_sentence_level = request.use_sentence_level,
|
| 768 |
)
|
| 769 |
|
| 770 |
-
#
|
| 771 |
highlighted_html = highlighter.generate_html(highlighted_sentences = highlighted_sentences,
|
| 772 |
include_legend = False, # UI already has its own legend
|
| 773 |
include_metrics = request.include_metrics_summary,
|
|
@@ -794,8 +794,6 @@ async def analyze_text(request: TextAnalysisRequest):
|
|
| 794 |
|
| 795 |
processing_time = time.time() - start_time
|
| 796 |
|
| 797 |
-
#logger.success(f"[{analysis_id}] Analysis complete: {detection_result.ensemble_result.final_verdict} ({processing_time:.2f}s)")
|
| 798 |
-
|
| 799 |
# Log the detection event
|
| 800 |
log_detection_event(analysis_id = analysis_id,
|
| 801 |
text_length = len(request.text),
|
|
@@ -806,7 +804,7 @@ async def analyze_text(request: TextAnalysisRequest):
|
|
| 806 |
enable_attribution = request.enable_attribution,
|
| 807 |
enable_highlighting = request.enable_highlighting,
|
| 808 |
)
|
| 809 |
-
|
| 810 |
return TextAnalysisResponse(status = "success",
|
| 811 |
analysis_id = analysis_id,
|
| 812 |
detection_result = detection_dict,
|
|
@@ -905,7 +903,7 @@ async def analyze_file(file: UploadFile = File(...), domain: Optional[str] = For
|
|
| 905 |
use_sentence_level = use_sentence_level,
|
| 906 |
)
|
| 907 |
|
| 908 |
-
#
|
| 909 |
highlighted_html = highlighter.generate_html(highlighted_sentences = highlighted_sentences,
|
| 910 |
include_legend = False, # UI already has its own legend
|
| 911 |
include_metrics = include_metrics_summary,
|
|
@@ -1119,16 +1117,23 @@ async def generate_report(background_tasks: BackgroundTasks, analysis_id: str =
|
|
| 1119 |
logger.warning(f"Highlight generation for report failed: {e}")
|
| 1120 |
|
| 1121 |
# Generate reports
|
| 1122 |
-
report_files
|
| 1123 |
-
|
| 1124 |
-
|
| 1125 |
-
|
| 1126 |
-
|
| 1127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1128 |
|
| 1129 |
return ReportGenerationResponse(status = "success",
|
| 1130 |
analysis_id = analysis_id,
|
| 1131 |
-
reports =
|
| 1132 |
timestamp = datetime.now().isoformat(),
|
| 1133 |
)
|
| 1134 |
|
|
|
|
| 767 |
use_sentence_level = request.use_sentence_level,
|
| 768 |
)
|
| 769 |
|
| 770 |
+
# Set include_legend=False to prevent duplicate legends
|
| 771 |
highlighted_html = highlighter.generate_html(highlighted_sentences = highlighted_sentences,
|
| 772 |
include_legend = False, # UI already has its own legend
|
| 773 |
include_metrics = request.include_metrics_summary,
|
|
|
|
| 794 |
|
| 795 |
processing_time = time.time() - start_time
|
| 796 |
|
|
|
|
|
|
|
| 797 |
# Log the detection event
|
| 798 |
log_detection_event(analysis_id = analysis_id,
|
| 799 |
text_length = len(request.text),
|
|
|
|
| 804 |
enable_attribution = request.enable_attribution,
|
| 805 |
enable_highlighting = request.enable_highlighting,
|
| 806 |
)
|
| 807 |
+
|
| 808 |
return TextAnalysisResponse(status = "success",
|
| 809 |
analysis_id = analysis_id,
|
| 810 |
detection_result = detection_dict,
|
|
|
|
| 903 |
use_sentence_level = use_sentence_level,
|
| 904 |
)
|
| 905 |
|
| 906 |
+
# Set include_legend=False to prevent duplicate legends
|
| 907 |
highlighted_html = highlighter.generate_html(highlighted_sentences = highlighted_sentences,
|
| 908 |
include_legend = False, # UI already has its own legend
|
| 909 |
include_metrics = include_metrics_summary,
|
|
|
|
| 1117 |
logger.warning(f"Highlight generation for report failed: {e}")
|
| 1118 |
|
| 1119 |
# Generate reports
|
| 1120 |
+
report_files = reporter.generate_complete_report(detection_result = detection_result,
|
| 1121 |
+
attribution_result = attribution_result,
|
| 1122 |
+
highlighted_sentences = highlighted_sentences,
|
| 1123 |
+
formats = requested_formats,
|
| 1124 |
+
filename_prefix = analysis_id,
|
| 1125 |
+
)
|
| 1126 |
+
|
| 1127 |
+
# Extract only the filename from the full path for the response
|
| 1128 |
+
report_filenames = dict()
|
| 1129 |
+
|
| 1130 |
+
for fmt, full_path in report_files.items():
|
| 1131 |
+
# Get the filename part
|
| 1132 |
+
report_filenames[fmt] = Path(full_path).name
|
| 1133 |
|
| 1134 |
return ReportGenerationResponse(status = "success",
|
| 1135 |
analysis_id = analysis_id,
|
| 1136 |
+
reports = report_filenames,
|
| 1137 |
timestamp = datetime.now().isoformat(),
|
| 1138 |
)
|
| 1139 |
|
ui/static/index.html
CHANGED
|
@@ -273,6 +273,16 @@ body {
|
|
| 273 |
padding: 2rem;
|
| 274 |
border: 1px solid var(--border);
|
| 275 |
backdrop-filter: blur(10px);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
}
|
| 277 |
.panel-title {
|
| 278 |
font-size: 1.5rem;
|
|
@@ -611,7 +621,6 @@ input[type="checkbox"] {
|
|
| 611 |
color: var(--text-secondary);
|
| 612 |
line-height: 1.7;
|
| 613 |
}
|
| 614 |
-
|
| 615 |
/* Enhanced Reasoning Styles */
|
| 616 |
.reasoning-box.enhanced {
|
| 617 |
background: linear-gradient(135deg, rgba(30, 41, 59, 0.95) 0%, rgba(15, 23, 42, 0.95) 100%);
|
|
@@ -621,25 +630,21 @@ input[type="checkbox"] {
|
|
| 621 |
margin-top: 2rem;
|
| 622 |
backdrop-filter: blur(10px);
|
| 623 |
}
|
| 624 |
-
|
| 625 |
.reasoning-header {
|
| 626 |
display: flex;
|
| 627 |
align-items: center;
|
| 628 |
gap: 0.75rem;
|
| 629 |
margin-bottom: 1rem;
|
| 630 |
}
|
| 631 |
-
|
| 632 |
.reasoning-icon {
|
| 633 |
font-size: 1.5rem;
|
| 634 |
}
|
| 635 |
-
|
| 636 |
.reasoning-title {
|
| 637 |
font-size: 1.1rem;
|
| 638 |
font-weight: 700;
|
| 639 |
color: var(--primary);
|
| 640 |
flex: 1;
|
| 641 |
}
|
| 642 |
-
|
| 643 |
.confidence-tag {
|
| 644 |
padding: 0.25rem 0.75rem;
|
| 645 |
border-radius: 20px;
|
|
@@ -647,25 +652,21 @@ input[type="checkbox"] {
|
|
| 647 |
font-weight: 600;
|
| 648 |
text-transform: uppercase;
|
| 649 |
}
|
| 650 |
-
|
| 651 |
.high-confidence {
|
| 652 |
background: rgba(16, 185, 129, 0.2);
|
| 653 |
color: var(--success);
|
| 654 |
border: 1px solid rgba(16, 185, 129, 0.3);
|
| 655 |
}
|
| 656 |
-
|
| 657 |
.medium-confidence {
|
| 658 |
background: rgba(245, 158, 11, 0.2);
|
| 659 |
color: var(--warning);
|
| 660 |
border: 1px solid rgba(245, 158, 11, 0.3);
|
| 661 |
}
|
| 662 |
-
|
| 663 |
.low-confidence {
|
| 664 |
background: rgba(239, 68, 68, 0.2);
|
| 665 |
color: var(--danger);
|
| 666 |
border: 1px solid rgba(239, 68, 68, 0.3);
|
| 667 |
}
|
| 668 |
-
|
| 669 |
.verdict-summary {
|
| 670 |
display: flex;
|
| 671 |
justify-content: space-between;
|
|
@@ -675,27 +676,22 @@ input[type="checkbox"] {
|
|
| 675 |
background: rgba(51, 65, 85, 0.3);
|
| 676 |
border-radius: 8px;
|
| 677 |
}
|
| 678 |
-
|
| 679 |
.verdict-text {
|
| 680 |
font-size: 1.3rem;
|
| 681 |
font-weight: 800;
|
| 682 |
color: var(--warning);
|
| 683 |
}
|
| 684 |
-
|
| 685 |
.probability {
|
| 686 |
color: var(--text-secondary);
|
| 687 |
font-size: 0.95rem;
|
| 688 |
}
|
| 689 |
-
|
| 690 |
.probability-value {
|
| 691 |
color: var(--text-primary);
|
| 692 |
font-weight: 700;
|
| 693 |
}
|
| 694 |
-
|
| 695 |
.metrics-breakdown {
|
| 696 |
margin-bottom: 1.5rem;
|
| 697 |
}
|
| 698 |
-
|
| 699 |
.breakdown-header {
|
| 700 |
font-size: 0.9rem;
|
| 701 |
font-weight: 600;
|
|
@@ -704,7 +700,6 @@ input[type="checkbox"] {
|
|
| 704 |
text-transform: uppercase;
|
| 705 |
letter-spacing: 0.5px;
|
| 706 |
}
|
| 707 |
-
|
| 708 |
.metric-indicator {
|
| 709 |
display: flex;
|
| 710 |
justify-content: space-between;
|
|
@@ -714,24 +709,20 @@ input[type="checkbox"] {
|
|
| 714 |
border-radius: 8px;
|
| 715 |
transition: all 0.2s ease;
|
| 716 |
}
|
| 717 |
-
|
| 718 |
.metric-indicator:hover {
|
| 719 |
background: rgba(51, 65, 85, 0.4);
|
| 720 |
transform: translateX(4px);
|
| 721 |
}
|
| 722 |
-
|
| 723 |
.metric-name {
|
| 724 |
font-weight: 600;
|
| 725 |
color: var(--text-primary);
|
| 726 |
min-width: 140px;
|
| 727 |
}
|
| 728 |
-
|
| 729 |
.metric-details {
|
| 730 |
display: flex;
|
| 731 |
gap: 1rem;
|
| 732 |
align-items: center;
|
| 733 |
}
|
| 734 |
-
|
| 735 |
.verdict-badge {
|
| 736 |
padding: 0.2rem 0.6rem;
|
| 737 |
border-radius: 6px;
|
|
@@ -741,25 +732,21 @@ input[type="checkbox"] {
|
|
| 741 |
min-width: 60px;
|
| 742 |
text-align: center;
|
| 743 |
}
|
| 744 |
-
|
| 745 |
.ai-badge {
|
| 746 |
background: rgba(239, 68, 68, 0.2);
|
| 747 |
color: var(--danger);
|
| 748 |
border: 1px solid rgba(239, 68, 68, 0.3);
|
| 749 |
}
|
| 750 |
-
|
| 751 |
.human-badge {
|
| 752 |
background: rgba(16, 185, 129, 0.2);
|
| 753 |
color: var(--success);
|
| 754 |
border: 1px solid rgba(16, 185, 129, 0.3);
|
| 755 |
}
|
| 756 |
-
|
| 757 |
.confidence, .weight {
|
| 758 |
font-size: 0.8rem;
|
| 759 |
color: var(--text-muted);
|
| 760 |
min-width: 100px;
|
| 761 |
}
|
| 762 |
-
|
| 763 |
.agreement-indicator {
|
| 764 |
display: flex;
|
| 765 |
align-items: center;
|
|
@@ -770,16 +757,13 @@ input[type="checkbox"] {
|
|
| 770 |
border-radius: 8px;
|
| 771 |
color: var(--success);
|
| 772 |
}
|
| 773 |
-
|
| 774 |
.agreement-icon {
|
| 775 |
font-weight: 700;
|
| 776 |
}
|
| 777 |
-
|
| 778 |
.agreement-text {
|
| 779 |
font-size: 0.9rem;
|
| 780 |
font-weight: 600;
|
| 781 |
}
|
| 782 |
-
|
| 783 |
/* Attribution Section */
|
| 784 |
.attribution-section {
|
| 785 |
margin-top: 2rem;
|
|
@@ -967,6 +951,66 @@ input[type="checkbox"] {
|
|
| 967 |
text-align: center;
|
| 968 |
color: var(--text-muted);
|
| 969 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 970 |
/* Responsive */
|
| 971 |
@media (max-width: 1200px) {
|
| 972 |
.interface-grid {
|
|
@@ -1001,6 +1045,10 @@ input[type="checkbox"] {
|
|
| 1001 |
flex-direction: column;
|
| 1002 |
gap: 0.75rem;
|
| 1003 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1004 |
}
|
| 1005 |
/* Scroll Behavior */
|
| 1006 |
html {
|
|
@@ -1066,7 +1114,7 @@ html {
|
|
| 1066 |
<div class="feature-icon">🔬</div>
|
| 1067 |
<h3 class="feature-title">6-Metric Ensemble</h3>
|
| 1068 |
<p class="feature-description">
|
| 1069 |
-
Combines Perplexity, Entropy, Statistical, Linguistic, Semantic Analysis, and
|
| 1070 |
</p>
|
| 1071 |
</div>
|
| 1072 |
<div class="feature-card">
|
|
@@ -1109,7 +1157,7 @@ html {
|
|
| 1109 |
<div class="metric-icon-box">📊</div>
|
| 1110 |
<div class="metric-content">
|
| 1111 |
<h3>Perplexity <span class="metric-weight">Weight: 25%</span></h3>
|
| 1112 |
-
<p>Measures how predictable the text is using GPT-2
|
| 1113 |
</div>
|
| 1114 |
</div>
|
| 1115 |
<div class="metric-card">
|
|
@@ -1143,7 +1191,7 @@ html {
|
|
| 1143 |
<div class="metric-card">
|
| 1144 |
<div class="metric-icon-box">🔍</div>
|
| 1145 |
<div class="metric-content">
|
| 1146 |
-
<h3>
|
| 1147 |
<p>Tests text stability under random perturbations. AI-generated text tends to maintain higher likelihood scores even when slightly modified, while human text shows more variation.</p>
|
| 1148 |
</div>
|
| 1149 |
</div>
|
|
@@ -1160,132 +1208,138 @@ html {
|
|
| 1160 |
<!-- Left Panel: Input -->
|
| 1161 |
<div class="panel">
|
| 1162 |
<h2 class="panel-title">Submit Content for Analysis</h2>
|
| 1163 |
-
<div class="
|
| 1164 |
-
<
|
| 1165 |
-
|
| 1166 |
-
|
| 1167 |
-
|
| 1168 |
-
|
| 1169 |
-
|
| 1170 |
-
|
| 1171 |
-
<div id="paste-tab" class="tab-content active">
|
| 1172 |
-
<textarea
|
| 1173 |
-
id="text-input"
|
| 1174 |
-
class="text-input"
|
| 1175 |
-
placeholder="Paste your text here for analysis...
|
| 1176 |
-
The more text you provide (minimum 50 characters), the more accurate the detection will be. Our system analyzes linguistic patterns, statistical features, and semantic structures to determine authenticity."
|
| 1177 |
-
></textarea>
|
| 1178 |
-
</div>
|
| 1179 |
-
<div id="upload-tab" class="tab-content">
|
| 1180 |
-
<div class="file-upload-area" id="file-upload-area">
|
| 1181 |
-
<input type="file" id="file-input" class="file-input" accept=".txt,.pdf,.docx,.doc,.md">
|
| 1182 |
-
<div class="file-upload-icon">📄</div>
|
| 1183 |
-
<div style="font-size: 1.1rem; font-weight: 600; margin-bottom: 0.5rem;">
|
| 1184 |
-
Click to upload or drag and drop
|
| 1185 |
-
</div>
|
| 1186 |
-
<div style="color: var(--text-muted); font-size: 0.9rem;">
|
| 1187 |
-
Supported formats: TXT, PDF, DOCX, DOC, MD
|
| 1188 |
-
</div>
|
| 1189 |
-
<div style="color: var(--text-muted); font-size: 0.85rem; margin-top: 0.5rem;">
|
| 1190 |
-
Maximum file size: 10MB
|
| 1191 |
-
</div>
|
| 1192 |
</div>
|
| 1193 |
-
<div id="
|
| 1194 |
-
|
| 1195 |
-
|
| 1196 |
-
|
| 1197 |
-
|
| 1198 |
-
|
| 1199 |
-
|
| 1200 |
-
<option value="academic">Academic</option>
|
| 1201 |
-
<option value="technical_doc">Technical/Medical</option>
|
| 1202 |
-
<option value="creative">Creative Writing</option>
|
| 1203 |
-
<option value="social_media">Social Media</option>
|
| 1204 |
-
</select>
|
| 1205 |
</div>
|
| 1206 |
-
<div class="
|
| 1207 |
-
<
|
| 1208 |
-
|
| 1209 |
-
<
|
| 1210 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1211 |
</div>
|
|
|
|
| 1212 |
</div>
|
| 1213 |
-
<div class="
|
| 1214 |
-
<
|
| 1215 |
-
|
| 1216 |
-
<
|
| 1217 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1218 |
</div>
|
| 1219 |
-
|
| 1220 |
-
|
| 1221 |
-
|
| 1222 |
-
|
| 1223 |
-
|
| 1224 |
-
|
| 1225 |
-
<span style="font-size: 0.85rem; color: var(--text-muted);">More accurate but slower analysis</span>
|
| 1226 |
</div>
|
| 1227 |
-
|
| 1228 |
-
|
| 1229 |
-
|
| 1230 |
-
|
| 1231 |
-
|
| 1232 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1233 |
</div>
|
| 1234 |
</div>
|
| 1235 |
</div>
|
| 1236 |
-
<
|
| 1237 |
-
|
| 1238 |
-
|
| 1239 |
-
<div class="action-buttons">
|
| 1240 |
-
<button id="refresh-btn" class="action-btn refresh">
|
| 1241 |
-
🔄 Refresh
|
| 1242 |
-
</button>
|
| 1243 |
-
<button id="try-next-btn" class="action-btn">
|
| 1244 |
-
➕ Try Next
|
| 1245 |
</button>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1246 |
</div>
|
| 1247 |
</div>
|
| 1248 |
<!-- Right Panel: Results -->
|
| 1249 |
<div class="panel">
|
| 1250 |
<h2 class="panel-title">Analysis Report</h2>
|
| 1251 |
-
<div class="
|
| 1252 |
-
<
|
| 1253 |
-
|
| 1254 |
-
|
| 1255 |
-
|
| 1256 |
-
|
| 1257 |
-
|
| 1258 |
-
|
| 1259 |
-
|
| 1260 |
-
|
| 1261 |
-
|
| 1262 |
-
<!-- Summary Report -->
|
| 1263 |
-
<div id="summary-report" class="report-content active">
|
| 1264 |
-
<div class="empty-state">
|
| 1265 |
-
<div class="empty-icon">✓</div>
|
| 1266 |
-
<h3 class="empty-title">Ready for Analysis</h3>
|
| 1267 |
-
<p class="empty-description">
|
| 1268 |
-
Paste text or upload a document to begin comprehensive AI detection analysis.
|
| 1269 |
-
Our 6-metric ensemble will provide detailed insights.
|
| 1270 |
-
</p>
|
| 1271 |
</div>
|
| 1272 |
-
|
| 1273 |
-
|
| 1274 |
-
|
| 1275 |
-
|
| 1276 |
-
|
| 1277 |
-
|
| 1278 |
-
|
| 1279 |
-
|
|
|
|
|
|
|
| 1280 |
</div>
|
| 1281 |
-
|
| 1282 |
-
|
| 1283 |
-
|
| 1284 |
-
|
| 1285 |
-
|
| 1286 |
-
|
| 1287 |
-
|
| 1288 |
-
</
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1289 |
</div>
|
| 1290 |
</div>
|
| 1291 |
</div>
|
|
@@ -1295,50 +1349,44 @@ The more text you provide (minimum 50 characters), the more accurate the detecti
|
|
| 1295 |
// Configuration
|
| 1296 |
const API_BASE = '';
|
| 1297 |
let currentAnalysisData = null;
|
| 1298 |
-
|
|
|
|
| 1299 |
// Navigation
|
| 1300 |
function showLanding() {
|
| 1301 |
document.getElementById('landing-page').style.display = 'block';
|
| 1302 |
document.getElementById('analysis-interface').style.display = 'none';
|
| 1303 |
window.scrollTo(0, 0);
|
| 1304 |
}
|
| 1305 |
-
|
| 1306 |
function showAnalysis() {
|
| 1307 |
document.getElementById('landing-page').style.display = 'none';
|
| 1308 |
document.getElementById('analysis-interface').style.display = 'block';
|
| 1309 |
window.scrollTo(0, 0);
|
| 1310 |
resetAnalysisInterface();
|
| 1311 |
}
|
| 1312 |
-
|
| 1313 |
// Reset analysis interface
|
| 1314 |
function resetAnalysisInterface() {
|
| 1315 |
// Clear text input
|
| 1316 |
document.getElementById('text-input').value = '';
|
| 1317 |
-
|
| 1318 |
// Clear file input and display
|
| 1319 |
document.getElementById('file-input').value = '';
|
| 1320 |
document.getElementById('file-name-display').style.display = 'none';
|
| 1321 |
document.getElementById('file-name-display').innerHTML = '';
|
| 1322 |
-
|
| 1323 |
// Reset tabs to paste
|
| 1324 |
document.querySelectorAll('.input-tab').forEach(t => t.classList.remove('active'));
|
| 1325 |
document.querySelector('.input-tab[data-tab="paste"]').classList.add('active');
|
| 1326 |
document.querySelectorAll('.tab-content').forEach(content => content.classList.remove('active'));
|
| 1327 |
document.getElementById('paste-tab').classList.add('active');
|
| 1328 |
-
|
| 1329 |
// Reset options to defaults
|
| 1330 |
document.getElementById('domain-select').value = '';
|
| 1331 |
document.getElementById('enable-attribution').checked = true;
|
| 1332 |
document.getElementById('enable-highlighting').checked = true;
|
| 1333 |
document.getElementById('use-sentence-level').checked = true;
|
| 1334 |
document.getElementById('include-metrics-summary').checked = true;
|
| 1335 |
-
|
| 1336 |
// Reset report tabs to summary
|
| 1337 |
document.querySelectorAll('.report-tab').forEach(t => t.classList.remove('active'));
|
| 1338 |
document.querySelector('.report-tab[data-report="summary"]').classList.add('active');
|
| 1339 |
document.querySelectorAll('.report-content').forEach(content => content.classList.remove('active'));
|
| 1340 |
document.getElementById('summary-report').classList.add('active');
|
| 1341 |
-
|
| 1342 |
// Show empty state
|
| 1343 |
document.getElementById('summary-report').innerHTML = `
|
| 1344 |
<div class="empty-state">
|
|
@@ -1366,11 +1414,11 @@ function resetAnalysisInterface() {
|
|
| 1366 |
</p>
|
| 1367 |
</div>
|
| 1368 |
`;
|
| 1369 |
-
|
| 1370 |
// Clear current analysis data
|
| 1371 |
currentAnalysisData = null;
|
|
|
|
|
|
|
| 1372 |
}
|
| 1373 |
-
|
| 1374 |
// Input Tab Switching
|
| 1375 |
document.querySelectorAll('.input-tab').forEach(tab => {
|
| 1376 |
tab.addEventListener('click', () => {
|
|
@@ -1383,7 +1431,6 @@ document.querySelectorAll('.input-tab').forEach(tab => {
|
|
| 1383 |
document.getElementById(`${tabName}-tab`).classList.add('active');
|
| 1384 |
});
|
| 1385 |
});
|
| 1386 |
-
|
| 1387 |
// Report Tab Switching
|
| 1388 |
document.querySelectorAll('.report-tab').forEach(tab => {
|
| 1389 |
tab.addEventListener('click', () => {
|
|
@@ -1396,30 +1443,24 @@ document.querySelectorAll('.report-tab').forEach(tab => {
|
|
| 1396 |
document.getElementById(`${reportName}-report`).classList.add('active');
|
| 1397 |
});
|
| 1398 |
});
|
| 1399 |
-
|
| 1400 |
// File Upload Handling
|
| 1401 |
const fileInput = document.getElementById('file-input');
|
| 1402 |
const fileUploadArea = document.getElementById('file-upload-area');
|
| 1403 |
const fileNameDisplay = document.getElementById('file-name-display');
|
| 1404 |
-
|
| 1405 |
fileUploadArea.addEventListener('click', () => {
|
| 1406 |
fileInput.click();
|
| 1407 |
});
|
| 1408 |
-
|
| 1409 |
fileInput.addEventListener('change', (e) => {
|
| 1410 |
handleFileSelect(e.target.files[0]);
|
| 1411 |
});
|
| 1412 |
-
|
| 1413 |
// Drag and Drop
|
| 1414 |
fileUploadArea.addEventListener('dragover', (e) => {
|
| 1415 |
e.preventDefault();
|
| 1416 |
fileUploadArea.classList.add('drag-over');
|
| 1417 |
});
|
| 1418 |
-
|
| 1419 |
fileUploadArea.addEventListener('dragleave', () => {
|
| 1420 |
fileUploadArea.classList.remove('drag-over');
|
| 1421 |
});
|
| 1422 |
-
|
| 1423 |
fileUploadArea.addEventListener('drop', (e) => {
|
| 1424 |
e.preventDefault();
|
| 1425 |
fileUploadArea.classList.remove('drag-over');
|
|
@@ -1429,77 +1470,61 @@ fileUploadArea.addEventListener('drop', (e) => {
|
|
| 1429 |
handleFileSelect(file);
|
| 1430 |
}
|
| 1431 |
});
|
| 1432 |
-
|
| 1433 |
function handleFileSelect(file) {
|
| 1434 |
if (!file) return;
|
| 1435 |
-
|
| 1436 |
const allowedTypes = ['.txt', '.pdf', '.docx', '.doc', '.md'];
|
| 1437 |
const fileExt = '.' + file.name.split('.').pop().toLowerCase();
|
| 1438 |
-
|
| 1439 |
if (!allowedTypes.includes(fileExt)) {
|
| 1440 |
alert('Unsupported file type. Please upload: TXT, PDF, DOCX, DOC, or MD files.');
|
| 1441 |
return;
|
| 1442 |
}
|
| 1443 |
-
|
| 1444 |
if (file.size > 10 * 1024 * 1024) {
|
| 1445 |
alert('File size exceeds 10MB limit.');
|
| 1446 |
return;
|
| 1447 |
}
|
| 1448 |
-
|
| 1449 |
fileNameDisplay.style.display = 'block';
|
| 1450 |
fileNameDisplay.innerHTML = `
|
| 1451 |
<strong>Selected file:</strong> ${file.name}
|
| 1452 |
<span style="color: var(--text-muted);">(${formatFileSize(file.size)})</span>
|
| 1453 |
`;
|
| 1454 |
}
|
| 1455 |
-
|
| 1456 |
function formatFileSize(bytes) {
|
| 1457 |
if (bytes < 1024) return bytes + ' B';
|
| 1458 |
if (bytes < 1024 * 1024) return (bytes / 1024).toFixed(1) + ' KB';
|
| 1459 |
return (bytes / (1024 * 1024)).toFixed(1) + ' MB';
|
| 1460 |
}
|
| 1461 |
-
|
| 1462 |
// Analyze Button
|
| 1463 |
document.getElementById('analyze-btn').addEventListener('click', async () => {
|
| 1464 |
const activeTab = document.querySelector('.input-tab.active').dataset.tab;
|
| 1465 |
const textInput = document.getElementById('text-input').value.trim();
|
| 1466 |
const fileInput = document.getElementById('file-input').files[0];
|
| 1467 |
-
|
| 1468 |
if (activeTab === 'paste' && !textInput) {
|
| 1469 |
alert('Please paste some text to analyze (minimum 50 characters).');
|
| 1470 |
return;
|
| 1471 |
}
|
| 1472 |
-
|
| 1473 |
if (activeTab === 'paste' && textInput.length < 50) {
|
| 1474 |
alert('Text must be at least 50 characters long for accurate analysis.');
|
| 1475 |
return;
|
| 1476 |
}
|
| 1477 |
-
|
| 1478 |
if (activeTab === 'upload' && !fileInput) {
|
| 1479 |
alert('Please select a file to upload.');
|
| 1480 |
return;
|
| 1481 |
}
|
| 1482 |
-
|
| 1483 |
await performAnalysis(activeTab, textInput, fileInput);
|
| 1484 |
});
|
| 1485 |
-
|
| 1486 |
// Refresh Button - clears everything and shows empty state
|
| 1487 |
document.getElementById('refresh-btn').addEventListener('click', () => {
|
| 1488 |
resetAnalysisInterface();
|
| 1489 |
});
|
| 1490 |
-
|
| 1491 |
// Try Next Button - same as refresh but keeps the interface ready
|
| 1492 |
document.getElementById('try-next-btn').addEventListener('click', () => {
|
| 1493 |
resetAnalysisInterface();
|
| 1494 |
});
|
| 1495 |
-
|
| 1496 |
async function performAnalysis(mode, text, file) {
|
| 1497 |
const analyzeBtn = document.getElementById('analyze-btn');
|
| 1498 |
analyzeBtn.disabled = true;
|
| 1499 |
analyzeBtn.innerHTML = '⏳ Analyzing...';
|
| 1500 |
-
|
| 1501 |
showLoading();
|
| 1502 |
-
|
| 1503 |
try {
|
| 1504 |
let response;
|
| 1505 |
if (mode === 'paste') {
|
|
@@ -1507,7 +1532,6 @@ async function performAnalysis(mode, text, file) {
|
|
| 1507 |
} else {
|
| 1508 |
response = await analyzeFile(file);
|
| 1509 |
}
|
| 1510 |
-
|
| 1511 |
currentAnalysisData = response;
|
| 1512 |
displayResults(response);
|
| 1513 |
} catch (error) {
|
|
@@ -1518,14 +1542,12 @@ async function performAnalysis(mode, text, file) {
|
|
| 1518 |
analyzeBtn.innerHTML = '🔍 Analyze Text';
|
| 1519 |
}
|
| 1520 |
}
|
| 1521 |
-
|
| 1522 |
async function analyzeText(text) {
|
| 1523 |
const domain = document.getElementById('domain-select').value || null;
|
| 1524 |
const enableAttribution = document.getElementById('enable-attribution').checked;
|
| 1525 |
const enableHighlighting = document.getElementById('enable-highlighting').checked;
|
| 1526 |
const useSentenceLevel = document.getElementById('use-sentence-level').checked;
|
| 1527 |
const includeMetricsSummary = document.getElementById('include-metrics-summary').checked;
|
| 1528 |
-
|
| 1529 |
const response = await fetch(`${API_BASE}/api/analyze`, {
|
| 1530 |
method: 'POST',
|
| 1531 |
headers: { 'Content-Type': 'application/json' },
|
|
@@ -1539,21 +1561,17 @@ async function analyzeText(text) {
|
|
| 1539 |
skip_expensive_metrics: false
|
| 1540 |
})
|
| 1541 |
});
|
| 1542 |
-
|
| 1543 |
if (!response.ok) {
|
| 1544 |
const error = await response.json();
|
| 1545 |
throw new Error(error.error || 'Analysis failed');
|
| 1546 |
}
|
| 1547 |
-
|
| 1548 |
return await response.json();
|
| 1549 |
}
|
| 1550 |
-
|
| 1551 |
async function analyzeFile(file) {
|
| 1552 |
const domain = document.getElementById('domain-select').value || null;
|
| 1553 |
const enableAttribution = document.getElementById('enable-attribution').checked;
|
| 1554 |
const useSentenceLevel = document.getElementById('use-sentence-level').checked;
|
| 1555 |
const includeMetricsSummary = document.getElementById('include-metrics-summary').checked;
|
| 1556 |
-
|
| 1557 |
const formData = new FormData();
|
| 1558 |
formData.append('file', file);
|
| 1559 |
if (domain) formData.append('domain', domain);
|
|
@@ -1561,20 +1579,16 @@ async function analyzeFile(file) {
|
|
| 1561 |
formData.append('use_sentence_level', useSentenceLevel.toString());
|
| 1562 |
formData.append('include_metrics_summary', includeMetricsSummary.toString());
|
| 1563 |
formData.append('skip_expensive_metrics', 'false');
|
| 1564 |
-
|
| 1565 |
const response = await fetch(`${API_BASE}/api/analyze/file`, {
|
| 1566 |
method: 'POST',
|
| 1567 |
body: formData
|
| 1568 |
});
|
| 1569 |
-
|
| 1570 |
if (!response.ok) {
|
| 1571 |
const error = await response.json();
|
| 1572 |
throw new Error(error.error || 'File analysis failed');
|
| 1573 |
}
|
| 1574 |
-
|
| 1575 |
return await response.json();
|
| 1576 |
}
|
| 1577 |
-
|
| 1578 |
function showLoading() {
|
| 1579 |
document.getElementById('summary-report').innerHTML = `
|
| 1580 |
<div class="loading">
|
|
@@ -1586,7 +1600,6 @@ function showLoading() {
|
|
| 1586 |
</div>
|
| 1587 |
`;
|
| 1588 |
}
|
| 1589 |
-
|
| 1590 |
function showError(message) {
|
| 1591 |
document.getElementById('summary-report').innerHTML = `
|
| 1592 |
<div class="empty-state">
|
|
@@ -1596,10 +1609,8 @@ function showError(message) {
|
|
| 1596 |
</div>
|
| 1597 |
`;
|
| 1598 |
}
|
| 1599 |
-
|
| 1600 |
function displayResults(data) {
|
| 1601 |
console.log('Response data:', data);
|
| 1602 |
-
|
| 1603 |
// Handle different response structures
|
| 1604 |
const detection = data.detection_result;
|
| 1605 |
if (!detection) {
|
|
@@ -1607,16 +1618,13 @@ function displayResults(data) {
|
|
| 1607 |
console.error('Full response:', data);
|
| 1608 |
return;
|
| 1609 |
}
|
| 1610 |
-
|
| 1611 |
// Extract data based on your actual API structure
|
| 1612 |
const ensemble = detection.ensemble_result || detection.ensemble;
|
| 1613 |
const prediction = detection.prediction || {};
|
| 1614 |
const metrics = detection.metric_results || detection.metrics;
|
| 1615 |
const analysis = detection.analysis || {};
|
| 1616 |
-
|
| 1617 |
// Display Summary with enhanced reasoning
|
| 1618 |
displaySummary(ensemble, prediction, analysis, data.attribution, data.reasoning);
|
| 1619 |
-
|
| 1620 |
// Display Highlighted Text with enhanced features
|
| 1621 |
if (data.highlighted_html) {
|
| 1622 |
displayHighlightedText(data.highlighted_html);
|
|
@@ -1627,10 +1635,9 @@ function displayResults(data) {
|
|
| 1627 |
</div>
|
| 1628 |
`;
|
| 1629 |
}
|
| 1630 |
-
|
| 1631 |
-
// Display Metrics with full details
|
| 1632 |
if (metrics && Object.keys(metrics).length > 0) {
|
| 1633 |
-
|
| 1634 |
} else {
|
| 1635 |
document.getElementById('metrics-report').innerHTML = `
|
| 1636 |
<div class="empty-state">
|
|
@@ -1639,7 +1646,6 @@ function displayResults(data) {
|
|
| 1639 |
`;
|
| 1640 |
}
|
| 1641 |
}
|
| 1642 |
-
|
| 1643 |
function displaySummary(ensemble, prediction, analysis, attribution, reasoning) {
|
| 1644 |
// Use ensemble values from your actual API response
|
| 1645 |
const aiProbability = ensemble.ai_probability !== undefined ?
|
|
@@ -1651,12 +1657,10 @@ function displaySummary(ensemble, prediction, analysis, attribution, reasoning)
|
|
| 1651 |
const isAI = verdict.toLowerCase().includes('ai');
|
| 1652 |
const gaugeColor = isAI ? 'var(--danger)' : 'var(--success)';
|
| 1653 |
const gaugeDegree = aiProbability * 3.6;
|
| 1654 |
-
|
| 1655 |
const confidenceLevel = parseFloat(confidence) >= 70 ? 'HIGH' :
|
| 1656 |
parseFloat(confidence) >= 40 ? 'MEDIUM' : 'LOW';
|
| 1657 |
const confidenceClass = confidenceLevel === 'HIGH' ? 'confidence-high' :
|
| 1658 |
confidenceLevel === 'MEDIUM' ? 'confidence-medium' : 'confidence-low';
|
| 1659 |
-
|
| 1660 |
let attributionHTML = '';
|
| 1661 |
if (attribution && attribution.predicted_model) {
|
| 1662 |
const modelName = attribution.predicted_model.replace(/_/g, ' ').replace(/-/g, ' ').toUpperCase();
|
|
@@ -1677,17 +1681,12 @@ function displaySummary(ensemble, prediction, analysis, attribution, reasoning)
|
|
| 1677 |
attributionHTML = `
|
| 1678 |
<div class="attribution-section">
|
| 1679 |
<div class="attribution-title">🤖 AI Model Attribution</div>
|
| 1680 |
-
<div class="model-match">
|
| 1681 |
-
<span class="model-name">Most Likely: ${modelName}</span>
|
| 1682 |
-
<span class="model-confidence">${modelConf}%</span>
|
| 1683 |
-
</div>
|
| 1684 |
${topModels}
|
| 1685 |
${attribution.reasoning && attribution.reasoning.length > 0 ?
|
| 1686 |
`<p style="color: var(--text-secondary); margin-top: 1rem; font-size: 0.9rem;">${attribution.reasoning[0]}</p>` : ''}
|
| 1687 |
</div>
|
| 1688 |
`;
|
| 1689 |
}
|
| 1690 |
-
|
| 1691 |
document.getElementById('summary-report').innerHTML = `
|
| 1692 |
<div class="result-summary">
|
| 1693 |
<div class="gauge-container">
|
|
@@ -1727,10 +1726,12 @@ function displaySummary(ensemble, prediction, analysis, attribution, reasoning)
|
|
| 1727 |
</div>
|
| 1728 |
`;
|
| 1729 |
}
|
| 1730 |
-
|
| 1731 |
function createEnhancedReasoningHTML(ensemble, analysis, reasoning) {
|
| 1732 |
// Use actual reasoning data if available
|
| 1733 |
if (reasoning && reasoning.summary) {
|
|
|
|
|
|
|
|
|
|
| 1734 |
return `
|
| 1735 |
<div class="reasoning-box enhanced">
|
| 1736 |
<div class="reasoning-header">
|
|
@@ -1740,30 +1741,30 @@ function createEnhancedReasoningHTML(ensemble, analysis, reasoning) {
|
|
| 1740 |
${ensemble.overall_confidence >= 0.7 ? 'High Confidence' : ensemble.overall_confidence >= 0.4 ? 'Medium Confidence' : 'Low Confidence'}
|
| 1741 |
</div>
|
| 1742 |
</div>
|
| 1743 |
-
|
| 1744 |
<div class="verdict-summary">
|
| 1745 |
<div class="verdict-text">${ensemble.final_verdict}</div>
|
| 1746 |
<div class="probability">AI Probability: <span class="probability-value">${(ensemble.ai_probability * 100).toFixed(2)}%</span></div>
|
| 1747 |
</div>
|
| 1748 |
-
|
| 1749 |
-
|
| 1750 |
-
${reasoning.summary}
|
| 1751 |
</div>
|
| 1752 |
-
|
| 1753 |
${reasoning.key_indicators && reasoning.key_indicators.length > 0 ? `
|
| 1754 |
<div class="metrics-breakdown">
|
| 1755 |
<div class="breakdown-header">Key Indicators</div>
|
| 1756 |
-
${reasoning.key_indicators.map(indicator =>
|
| 1757 |
-
|
| 1758 |
-
|
| 1759 |
-
|
| 1760 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1761 |
</div>
|
| 1762 |
-
|
| 1763 |
-
|
| 1764 |
</div>
|
| 1765 |
` : ''}
|
| 1766 |
-
|
| 1767 |
${ensemble.consensus_level > 0.7 ? `
|
| 1768 |
<div class="agreement-indicator">
|
| 1769 |
<div class="agreement-icon">✓</div>
|
|
@@ -1773,7 +1774,6 @@ function createEnhancedReasoningHTML(ensemble, analysis, reasoning) {
|
|
| 1773 |
</div>
|
| 1774 |
`;
|
| 1775 |
}
|
| 1776 |
-
|
| 1777 |
// Fallback to basic reasoning if no reasoning data
|
| 1778 |
return `
|
| 1779 |
<div class="reasoning-box">
|
|
@@ -1786,7 +1786,6 @@ function createEnhancedReasoningHTML(ensemble, analysis, reasoning) {
|
|
| 1786 |
</div>
|
| 1787 |
`;
|
| 1788 |
}
|
| 1789 |
-
|
| 1790 |
function displayHighlightedText(html) {
|
| 1791 |
document.getElementById('highlighted-report').innerHTML = `
|
| 1792 |
${createDefaultLegend()}
|
|
@@ -1796,7 +1795,6 @@ function displayHighlightedText(html) {
|
|
| 1796 |
${getHighlightStyles()}
|
| 1797 |
`;
|
| 1798 |
}
|
| 1799 |
-
|
| 1800 |
function createDefaultLegend() {
|
| 1801 |
return `
|
| 1802 |
<div class="highlight-legend">
|
|
@@ -1835,7 +1833,6 @@ function createDefaultLegend() {
|
|
| 1835 |
</div>
|
| 1836 |
`;
|
| 1837 |
}
|
| 1838 |
-
|
| 1839 |
function getHighlightStyles() {
|
| 1840 |
return `
|
| 1841 |
<style>
|
|
@@ -1891,87 +1888,118 @@ function getHighlightStyles() {
|
|
| 1891 |
</style>
|
| 1892 |
`;
|
| 1893 |
}
|
| 1894 |
-
|
| 1895 |
-
|
| 1896 |
-
const
|
| 1897 |
-
|
| 1898 |
-
|
| 1899 |
-
|
| 1900 |
-
<
|
| 1901 |
-
|
| 1902 |
-
<div>
|
| 1903 |
-
<div style="font-size: 0.85rem; color: var(--text-secondary);">Method</div>
|
| 1904 |
-
<div style="font-size: 1.1rem; font-weight: 700; color: #fff;">Confidence Calibrated</div>
|
| 1905 |
-
</div>
|
| 1906 |
-
<div>
|
| 1907 |
-
<div style="font-size: 0.85rem; color: var(--text-secondary);">Consensus</div>
|
| 1908 |
-
<div style="font-size: 1.1rem; font-weight: 700; color: #fff;">${(ensemble.consensus_level * 100).toFixed(1)}%</div>
|
| 1909 |
-
</div>
|
| 1910 |
-
<div>
|
| 1911 |
-
<div style="font-size: 0.85rem; color: var(--text-secondary);">Uncertainty</div>
|
| 1912 |
-
<div style="font-size: 1.1rem; font-weight: 700; color: #fff;">${(ensemble.uncertainty_score * 100).toFixed(1)}%</div>
|
| 1913 |
-
</div>
|
| 1914 |
</div>
|
| 1915 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1916 |
`;
|
| 1917 |
-
|
| 1918 |
-
metricOrder.forEach(metricKey => {
|
| 1919 |
const metric = metrics[metricKey];
|
| 1920 |
if (!metric) return;
|
| 1921 |
-
|
| 1922 |
const aiProb = (metric.ai_probability * 100).toFixed(1);
|
| 1923 |
const humanProb = (metric.human_probability * 100).toFixed(1);
|
| 1924 |
const confidence = (metric.confidence * 100).toFixed(1);
|
| 1925 |
-
const weight = ensemble.
|
| 1926 |
-
|
| 1927 |
-
|
| 1928 |
const color = metric.ai_probability >= 0.6 ? 'var(--danger)' :
|
| 1929 |
metric.ai_probability >= 0.4 ? 'var(--warning)' : 'var(--success)';
|
| 1930 |
const verdictText = metric.ai_probability >= 0.6 ? 'AI' :
|
| 1931 |
metric.ai_probability >= 0.4 ? 'UNCERTAIN' : 'HUMAN';
|
| 1932 |
const verdictClass = verdictText === 'AI' ? 'verdict-ai' :
|
| 1933 |
verdictText === 'UNCERTAIN' ? 'verdict-uncertain' : 'verdict-human';
|
| 1934 |
-
|
| 1935 |
-
|
| 1936 |
-
|
| 1937 |
-
|
| 1938 |
-
|
| 1939 |
-
<div class="metric-score" style="color: ${color};">${aiProb}%</div>
|
| 1940 |
-
</div>
|
| 1941 |
-
<div style="display: flex; gap: 1rem; margin: 1rem 0;">
|
| 1942 |
-
<div style="flex: 1;">
|
| 1943 |
-
<div style="font-size: 0.75rem; color: var(--text-muted); margin-bottom: 0.25rem;">AI</div>
|
| 1944 |
-
<div style="background: rgba(51, 65, 85, 0.5); height: 8px; border-radius: 4px; overflow: hidden;">
|
| 1945 |
-
<div style="background: var(--danger); height: 100%; width: ${aiProb}%; transition: width 0.5s;"></div>
|
| 1946 |
-
</div>
|
| 1947 |
-
<div style="font-size: 0.85rem; font-weight: 600; margin-top: 0.25rem;">${aiProb}%</div>
|
| 1948 |
</div>
|
| 1949 |
-
<div
|
| 1950 |
-
|
| 1951 |
-
|
| 1952 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1953 |
</div>
|
| 1954 |
-
<div style="font-size: 0.85rem; font-weight: 600; margin-top: 0.25rem;">${humanProb}%</div>
|
| 1955 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1956 |
</div>
|
| 1957 |
-
<div style="display: flex; justify-content: space-between; align-items: center; margin: 0.75rem 0;">
|
| 1958 |
-
<span class="metric-verdict ${verdictClass}">${verdictText}</span>
|
| 1959 |
-
<span style="font-size: 0.85rem; color: var(--text-secondary);">Confidence: ${confidence}% | Weight: ${weight}%</span>
|
| 1960 |
-
</div>
|
| 1961 |
-
<div class="metric-description">
|
| 1962 |
-
${getMetricDescription(metricKey)}
|
| 1963 |
-
</div>
|
| 1964 |
-
${metric.details ? renderMetricDetails(metricKey, metric.details) : ''}
|
| 1965 |
</div>
|
| 1966 |
`;
|
| 1967 |
});
|
| 1968 |
-
|
| 1969 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1970 |
}
|
| 1971 |
-
|
| 1972 |
function renderMetricDetails(metricName, details) {
|
| 1973 |
if (!details || Object.keys(details).length === 0) return '';
|
| 1974 |
-
|
| 1975 |
// Key metrics to show for each type
|
| 1976 |
const importantKeys = {
|
| 1977 |
'structural': ['burstiness_score', 'length_uniformity', 'avg_sentence_length', 'std_sentence_length'],
|
|
@@ -1979,14 +2007,12 @@ function renderMetricDetails(metricName, details) {
|
|
| 1979 |
'entropy': ['token_diversity', 'sequence_unpredictability', 'char_entropy'],
|
| 1980 |
'semantic_analysis': ['coherence_score', 'consistency_score', 'repetition_score'],
|
| 1981 |
'linguistic': ['pos_diversity', 'syntactic_complexity', 'grammatical_consistency'],
|
| 1982 |
-
'
|
| 1983 |
};
|
| 1984 |
-
|
| 1985 |
const keysToShow = importantKeys[metricName] || Object.keys(details).slice(0, 6);
|
| 1986 |
let detailsHTML = '<div style="margin-top: 1rem; padding-top: 1rem; border-top: 1px solid var(--border);">';
|
| 1987 |
detailsHTML += '<div style="font-size: 0.9rem; font-weight: 600; color: var(--text-secondary); margin-bottom: 0.75rem;">📈 Detailed Metrics:</div>';
|
| 1988 |
detailsHTML += '<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 0.75rem; font-size: 0.85rem;">';
|
| 1989 |
-
|
| 1990 |
keysToShow.forEach(key => {
|
| 1991 |
if (details[key] !== undefined && details[key] !== null) {
|
| 1992 |
const value = typeof details[key] === 'number' ?
|
|
@@ -2001,11 +2027,9 @@ function renderMetricDetails(metricName, details) {
|
|
| 2001 |
`;
|
| 2002 |
}
|
| 2003 |
});
|
| 2004 |
-
|
| 2005 |
detailsHTML += '</div></div>';
|
| 2006 |
return detailsHTML;
|
| 2007 |
}
|
| 2008 |
-
|
| 2009 |
function getMetricDescription(metricName) {
|
| 2010 |
const descriptions = {
|
| 2011 |
structural: 'Analyzes sentence structure, length patterns, and statistical features.',
|
|
@@ -2013,11 +2037,10 @@ function getMetricDescription(metricName) {
|
|
| 2013 |
entropy: 'Evaluates token diversity and sequence unpredictability.',
|
| 2014 |
semantic_analysis: 'Examines semantic coherence, topic consistency, and logical flow.',
|
| 2015 |
linguistic: 'Assesses grammatical patterns, syntactic complexity, and style markers.',
|
| 2016 |
-
|
| 2017 |
};
|
| 2018 |
return descriptions[metricName] || 'Metric analysis complete.';
|
| 2019 |
}
|
| 2020 |
-
|
| 2021 |
function formatMetricName(name) {
|
| 2022 |
const names = {
|
| 2023 |
structural: 'Structural Analysis',
|
|
@@ -2025,25 +2048,21 @@ function formatMetricName(name) {
|
|
| 2025 |
entropy: 'Entropy',
|
| 2026 |
semantic_analysis: 'Semantic Analysis',
|
| 2027 |
linguistic: 'Linguistic Analysis',
|
| 2028 |
-
|
| 2029 |
};
|
| 2030 |
return names[name] || name.split('_').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(' ');
|
| 2031 |
}
|
| 2032 |
-
|
| 2033 |
function formatDomainName(domain) {
|
| 2034 |
return domain.split('_').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(' ');
|
| 2035 |
}
|
| 2036 |
-
|
| 2037 |
async function downloadReport(format) {
|
| 2038 |
if (!currentAnalysisData) {
|
| 2039 |
alert('No analysis data available');
|
| 2040 |
return;
|
| 2041 |
}
|
| 2042 |
-
|
| 2043 |
try {
|
| 2044 |
const analysisId = currentAnalysisData.analysis_id;
|
| 2045 |
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
| 2046 |
-
|
| 2047 |
// For JSON, download directly from current data
|
| 2048 |
if (format === 'json') {
|
| 2049 |
const data = {
|
|
@@ -2058,7 +2077,6 @@ async function downloadReport(format) {
|
|
| 2058 |
await downloadBlob(blob, filename);
|
| 2059 |
return;
|
| 2060 |
}
|
| 2061 |
-
|
| 2062 |
// Get the original text for report generation
|
| 2063 |
const activeTab = document.querySelector('.input-tab.active').dataset.tab;
|
| 2064 |
let textToSend = '';
|
|
@@ -2068,23 +2086,19 @@ async function downloadReport(format) {
|
|
| 2068 |
textToSend = currentAnalysisData.detection_result?.processed_text?.text ||
|
| 2069 |
'Uploaded file content - see analysis for details';
|
| 2070 |
}
|
| 2071 |
-
|
| 2072 |
// For PDF, request from server
|
| 2073 |
const formData = new FormData();
|
| 2074 |
formData.append('analysis_id', analysisId);
|
| 2075 |
formData.append('text', textToSend);
|
| 2076 |
formData.append('formats', format);
|
| 2077 |
formData.append('include_highlights', document.getElementById('enable-highlighting').checked.toString());
|
| 2078 |
-
|
| 2079 |
const response = await fetch(`${API_BASE}/api/report/generate`, {
|
| 2080 |
method: 'POST',
|
| 2081 |
body: formData
|
| 2082 |
});
|
| 2083 |
-
|
| 2084 |
if (!response.ok) {
|
| 2085 |
throw new Error('Report generation failed');
|
| 2086 |
}
|
| 2087 |
-
|
| 2088 |
const result = await response.json();
|
| 2089 |
if (result.reports && result.reports[format]) {
|
| 2090 |
const filename = result.reports[format];
|
|
@@ -2103,7 +2117,6 @@ async function downloadReport(format) {
|
|
| 2103 |
alert('Failed to download report. Please try again.');
|
| 2104 |
}
|
| 2105 |
}
|
| 2106 |
-
|
| 2107 |
async function downloadBlob(blob, filename) {
|
| 2108 |
try {
|
| 2109 |
const url = URL.createObjectURL(blob);
|
|
@@ -2113,7 +2126,6 @@ async function downloadBlob(blob, filename) {
|
|
| 2113 |
a.style.display = 'none';
|
| 2114 |
document.body.appendChild(a);
|
| 2115 |
a.click();
|
| 2116 |
-
|
| 2117 |
setTimeout(() => {
|
| 2118 |
document.body.removeChild(a);
|
| 2119 |
URL.revokeObjectURL(url);
|
|
@@ -2124,7 +2136,6 @@ async function downloadBlob(blob, filename) {
|
|
| 2124 |
alert('Download failed. Please try again.');
|
| 2125 |
}
|
| 2126 |
}
|
| 2127 |
-
|
| 2128 |
function showDownloadSuccess(filename) {
|
| 2129 |
const notification = document.createElement('div');
|
| 2130 |
notification.style.cssText = `
|
|
@@ -2147,7 +2158,6 @@ function showDownloadSuccess(filename) {
|
|
| 2147 |
</div>
|
| 2148 |
`;
|
| 2149 |
document.body.appendChild(notification);
|
| 2150 |
-
|
| 2151 |
if (!document.querySelector('#download-animation')) {
|
| 2152 |
const style = document.createElement('style');
|
| 2153 |
style.id = 'download-animation';
|
|
@@ -2159,14 +2169,12 @@ function showDownloadSuccess(filename) {
|
|
| 2159 |
`;
|
| 2160 |
document.head.appendChild(style);
|
| 2161 |
}
|
| 2162 |
-
|
| 2163 |
setTimeout(() => {
|
| 2164 |
if (notification.parentNode) {
|
| 2165 |
notification.parentNode.removeChild(notification);
|
| 2166 |
}
|
| 2167 |
}, 3000);
|
| 2168 |
}
|
| 2169 |
-
|
| 2170 |
// Smooth scrolling for anchor links
|
| 2171 |
document.querySelectorAll('a[href^="#"]').forEach(anchor => {
|
| 2172 |
anchor.addEventListener('click', function (e) {
|
|
@@ -2180,7 +2188,6 @@ document.querySelectorAll('a[href^="#"]').forEach(anchor => {
|
|
| 2180 |
}
|
| 2181 |
});
|
| 2182 |
});
|
| 2183 |
-
|
| 2184 |
// Initialize - show landing page by default
|
| 2185 |
showLanding();
|
| 2186 |
</script>
|
|
|
|
| 273 |
padding: 2rem;
|
| 274 |
border: 1px solid var(--border);
|
| 275 |
backdrop-filter: blur(10px);
|
| 276 |
+
/* Changed from fixed height to use available space */
|
| 277 |
+
height: 850px;
|
| 278 |
+
overflow: hidden;
|
| 279 |
+
display: flex;
|
| 280 |
+
flex-direction: column;
|
| 281 |
+
}
|
| 282 |
+
.panel-content {
|
| 283 |
+
flex: 1;
|
| 284 |
+
overflow-y: auto;
|
| 285 |
+
padding: 1rem 0;
|
| 286 |
}
|
| 287 |
.panel-title {
|
| 288 |
font-size: 1.5rem;
|
|
|
|
| 621 |
color: var(--text-secondary);
|
| 622 |
line-height: 1.7;
|
| 623 |
}
|
|
|
|
| 624 |
/* Enhanced Reasoning Styles */
|
| 625 |
.reasoning-box.enhanced {
|
| 626 |
background: linear-gradient(135deg, rgba(30, 41, 59, 0.95) 0%, rgba(15, 23, 42, 0.95) 100%);
|
|
|
|
| 630 |
margin-top: 2rem;
|
| 631 |
backdrop-filter: blur(10px);
|
| 632 |
}
|
|
|
|
| 633 |
.reasoning-header {
|
| 634 |
display: flex;
|
| 635 |
align-items: center;
|
| 636 |
gap: 0.75rem;
|
| 637 |
margin-bottom: 1rem;
|
| 638 |
}
|
|
|
|
| 639 |
.reasoning-icon {
|
| 640 |
font-size: 1.5rem;
|
| 641 |
}
|
|
|
|
| 642 |
.reasoning-title {
|
| 643 |
font-size: 1.1rem;
|
| 644 |
font-weight: 700;
|
| 645 |
color: var(--primary);
|
| 646 |
flex: 1;
|
| 647 |
}
|
|
|
|
| 648 |
.confidence-tag {
|
| 649 |
padding: 0.25rem 0.75rem;
|
| 650 |
border-radius: 20px;
|
|
|
|
| 652 |
font-weight: 600;
|
| 653 |
text-transform: uppercase;
|
| 654 |
}
|
|
|
|
| 655 |
.high-confidence {
|
| 656 |
background: rgba(16, 185, 129, 0.2);
|
| 657 |
color: var(--success);
|
| 658 |
border: 1px solid rgba(16, 185, 129, 0.3);
|
| 659 |
}
|
|
|
|
| 660 |
.medium-confidence {
|
| 661 |
background: rgba(245, 158, 11, 0.2);
|
| 662 |
color: var(--warning);
|
| 663 |
border: 1px solid rgba(245, 158, 11, 0.3);
|
| 664 |
}
|
|
|
|
| 665 |
.low-confidence {
|
| 666 |
background: rgba(239, 68, 68, 0.2);
|
| 667 |
color: var(--danger);
|
| 668 |
border: 1px solid rgba(239, 68, 68, 0.3);
|
| 669 |
}
|
|
|
|
| 670 |
.verdict-summary {
|
| 671 |
display: flex;
|
| 672 |
justify-content: space-between;
|
|
|
|
| 676 |
background: rgba(51, 65, 85, 0.3);
|
| 677 |
border-radius: 8px;
|
| 678 |
}
|
|
|
|
| 679 |
.verdict-text {
|
| 680 |
font-size: 1.3rem;
|
| 681 |
font-weight: 800;
|
| 682 |
color: var(--warning);
|
| 683 |
}
|
|
|
|
| 684 |
.probability {
|
| 685 |
color: var(--text-secondary);
|
| 686 |
font-size: 0.95rem;
|
| 687 |
}
|
|
|
|
| 688 |
.probability-value {
|
| 689 |
color: var(--text-primary);
|
| 690 |
font-weight: 700;
|
| 691 |
}
|
|
|
|
| 692 |
.metrics-breakdown {
|
| 693 |
margin-bottom: 1.5rem;
|
| 694 |
}
|
|
|
|
| 695 |
.breakdown-header {
|
| 696 |
font-size: 0.9rem;
|
| 697 |
font-weight: 600;
|
|
|
|
| 700 |
text-transform: uppercase;
|
| 701 |
letter-spacing: 0.5px;
|
| 702 |
}
|
|
|
|
| 703 |
.metric-indicator {
|
| 704 |
display: flex;
|
| 705 |
justify-content: space-between;
|
|
|
|
| 709 |
border-radius: 8px;
|
| 710 |
transition: all 0.2s ease;
|
| 711 |
}
|
|
|
|
| 712 |
.metric-indicator:hover {
|
| 713 |
background: rgba(51, 65, 85, 0.4);
|
| 714 |
transform: translateX(4px);
|
| 715 |
}
|
|
|
|
| 716 |
.metric-name {
|
| 717 |
font-weight: 600;
|
| 718 |
color: var(--text-primary);
|
| 719 |
min-width: 140px;
|
| 720 |
}
|
|
|
|
| 721 |
.metric-details {
|
| 722 |
display: flex;
|
| 723 |
gap: 1rem;
|
| 724 |
align-items: center;
|
| 725 |
}
|
|
|
|
| 726 |
.verdict-badge {
|
| 727 |
padding: 0.2rem 0.6rem;
|
| 728 |
border-radius: 6px;
|
|
|
|
| 732 |
min-width: 60px;
|
| 733 |
text-align: center;
|
| 734 |
}
|
|
|
|
| 735 |
.ai-badge {
|
| 736 |
background: rgba(239, 68, 68, 0.2);
|
| 737 |
color: var(--danger);
|
| 738 |
border: 1px solid rgba(239, 68, 68, 0.3);
|
| 739 |
}
|
|
|
|
| 740 |
.human-badge {
|
| 741 |
background: rgba(16, 185, 129, 0.2);
|
| 742 |
color: var(--success);
|
| 743 |
border: 1px solid rgba(16, 185, 129, 0.3);
|
| 744 |
}
|
|
|
|
| 745 |
.confidence, .weight {
|
| 746 |
font-size: 0.8rem;
|
| 747 |
color: var(--text-muted);
|
| 748 |
min-width: 100px;
|
| 749 |
}
|
|
|
|
| 750 |
.agreement-indicator {
|
| 751 |
display: flex;
|
| 752 |
align-items: center;
|
|
|
|
| 757 |
border-radius: 8px;
|
| 758 |
color: var(--success);
|
| 759 |
}
|
|
|
|
| 760 |
.agreement-icon {
|
| 761 |
font-weight: 700;
|
| 762 |
}
|
|
|
|
| 763 |
.agreement-text {
|
| 764 |
font-size: 0.9rem;
|
| 765 |
font-weight: 600;
|
| 766 |
}
|
|
|
|
| 767 |
/* Attribution Section */
|
| 768 |
.attribution-section {
|
| 769 |
margin-top: 2rem;
|
|
|
|
| 951 |
text-align: center;
|
| 952 |
color: var(--text-muted);
|
| 953 |
}
|
| 954 |
+
/* Metrics Carousel */
|
| 955 |
+
.metrics-carousel-container {
|
| 956 |
+
display: flex;
|
| 957 |
+
flex-direction: column;
|
| 958 |
+
height: 100%;
|
| 959 |
+
}
|
| 960 |
+
.metrics-carousel-content {
|
| 961 |
+
flex: 1;
|
| 962 |
+
/* Removed padding and centering to allow content to fill space */
|
| 963 |
+
padding: 0;
|
| 964 |
+
/* Removed align-items: center; justify-content: center; to let content take natural space */
|
| 965 |
+
display: flex;
|
| 966 |
+
align-items: flex-start;
|
| 967 |
+
justify-content: flex-start;
|
| 968 |
+
overflow-y: auto;
|
| 969 |
+
/* Added some internal spacing for readability */
|
| 970 |
+
padding: 1rem;
|
| 971 |
+
/* min-height: 600px; */
|
| 972 |
+
}
|
| 973 |
+
.metric-slide {
|
| 974 |
+
display: none;
|
| 975 |
+
width: 100%;
|
| 976 |
+
/* Reduced padding to make card tighter */
|
| 977 |
+
padding: 1rem;
|
| 978 |
+
}
|
| 979 |
+
.metric-slide.active {
|
| 980 |
+
display: block;
|
| 981 |
+
}
|
| 982 |
+
.metrics-carousel-nav {
|
| 983 |
+
display: flex;
|
| 984 |
+
justify-content: space-between;
|
| 985 |
+
align-items: center;
|
| 986 |
+
padding: 1rem;
|
| 987 |
+
border-top: 1px solid var(--border);
|
| 988 |
+
background: rgba(15, 23, 42, 0.8);
|
| 989 |
+
}
|
| 990 |
+
.carousel-btn {
|
| 991 |
+
padding: 0.75rem 1.5rem;
|
| 992 |
+
background: linear-gradient(135deg, var(--primary) 0%, var(--secondary) 100%);
|
| 993 |
+
color: #fff;
|
| 994 |
+
border: none;
|
| 995 |
+
border-radius: 8px;
|
| 996 |
+
font-weight: 600;
|
| 997 |
+
cursor: pointer;
|
| 998 |
+
transition: transform 0.3s, box-shadow 0.3s;
|
| 999 |
+
}
|
| 1000 |
+
.carousel-btn:hover {
|
| 1001 |
+
transform: translateY(-2px);
|
| 1002 |
+
box-shadow: 0 8px 20px rgba(6, 182, 212, 0.4);
|
| 1003 |
+
}
|
| 1004 |
+
.carousel-btn:disabled {
|
| 1005 |
+
opacity: 0.5;
|
| 1006 |
+
cursor: not-allowed;
|
| 1007 |
+
transform: none;
|
| 1008 |
+
}
|
| 1009 |
+
.carousel-position {
|
| 1010 |
+
font-size: 0.9rem;
|
| 1011 |
+
color: var(--text-secondary);
|
| 1012 |
+
font-weight: 600;
|
| 1013 |
+
}
|
| 1014 |
/* Responsive */
|
| 1015 |
@media (max-width: 1200px) {
|
| 1016 |
.interface-grid {
|
|
|
|
| 1045 |
flex-direction: column;
|
| 1046 |
gap: 0.75rem;
|
| 1047 |
}
|
| 1048 |
+
.panel {
|
| 1049 |
+
height: auto;
|
| 1050 |
+
min-height: 600px;
|
| 1051 |
+
}
|
| 1052 |
}
|
| 1053 |
/* Scroll Behavior */
|
| 1054 |
html {
|
|
|
|
| 1114 |
<div class="feature-icon">🔬</div>
|
| 1115 |
<h3 class="feature-title">6-Metric Ensemble</h3>
|
| 1116 |
<p class="feature-description">
|
| 1117 |
+
Combines Perplexity, Entropy, Statistical, Linguistic, Semantic Analysis, and Multi-Perturbation Stability for comprehensive detection with orthogonal signal capture.
|
| 1118 |
</p>
|
| 1119 |
</div>
|
| 1120 |
<div class="feature-card">
|
|
|
|
| 1157 |
<div class="metric-icon-box">📊</div>
|
| 1158 |
<div class="metric-content">
|
| 1159 |
<h3>Perplexity <span class="metric-weight">Weight: 25%</span></h3>
|
| 1160 |
+
<p>Measures how predictable the text is using GPT-2 language model. AI-generated text typically has lower perplexity (more predictable) than human writing, which tends to be more varied and surprising.</p>
|
| 1161 |
</div>
|
| 1162 |
</div>
|
| 1163 |
<div class="metric-card">
|
|
|
|
| 1191 |
<div class="metric-card">
|
| 1192 |
<div class="metric-icon-box">🔍</div>
|
| 1193 |
<div class="metric-content">
|
| 1194 |
+
<h3>Multi-Perturbation Stability <span class="metric-weight">Weight: 10%</span></h3>
|
| 1195 |
<p>Tests text stability under random perturbations. AI-generated text tends to maintain higher likelihood scores even when slightly modified, while human text shows more variation.</p>
|
| 1196 |
</div>
|
| 1197 |
</div>
|
|
|
|
| 1208 |
<!-- Left Panel: Input -->
|
| 1209 |
<div class="panel">
|
| 1210 |
<h2 class="panel-title">Submit Content for Analysis</h2>
|
| 1211 |
+
<div class="panel-content">
|
| 1212 |
+
<div class="input-tabs">
|
| 1213 |
+
<button class="input-tab active" data-tab="paste">
|
| 1214 |
+
📋 Paste Text
|
| 1215 |
+
</button>
|
| 1216 |
+
<button class="input-tab" data-tab="upload">
|
| 1217 |
+
📁 Upload File
|
| 1218 |
+
</button>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1219 |
</div>
|
| 1220 |
+
<div id="paste-tab" class="tab-content active">
|
| 1221 |
+
<textarea
|
| 1222 |
+
id="text-input"
|
| 1223 |
+
class="text-input"
|
| 1224 |
+
placeholder="Paste your text here for analysis...
|
| 1225 |
+
The more text you provide (minimum 50 characters), the more accurate the detection will be. Our system analyzes linguistic patterns, statistical features, and semantic structures to determine authenticity."
|
| 1226 |
+
></textarea>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1227 |
</div>
|
| 1228 |
+
<div id="upload-tab" class="tab-content">
|
| 1229 |
+
<div class="file-upload-area" id="file-upload-area">
|
| 1230 |
+
<input type="file" id="file-input" class="file-input" accept=".txt,.pdf,.docx,.doc,.md">
|
| 1231 |
+
<div class="file-upload-icon">📄</div>
|
| 1232 |
+
<div style="font-size: 1.1rem; font-weight: 600; margin-bottom: 0.5rem;">
|
| 1233 |
+
Click to upload or drag and drop
|
| 1234 |
+
</div>
|
| 1235 |
+
<div style="color: var(--text-muted); font-size: 0.9rem;">
|
| 1236 |
+
Supported formats: TXT, PDF, DOCX, DOC, MD
|
| 1237 |
+
</div>
|
| 1238 |
+
<div style="color: var(--text-muted); font-size: 0.85rem; margin-top: 0.5rem;">
|
| 1239 |
+
Maximum file size: 10MB
|
| 1240 |
+
</div>
|
| 1241 |
</div>
|
| 1242 |
+
<div id="file-name-display" class="file-name-display"></div>
|
| 1243 |
</div>
|
| 1244 |
+
<div class="options-section">
|
| 1245 |
+
<div class="option-row">
|
| 1246 |
+
<label class="option-label">Content Domain:</label>
|
| 1247 |
+
<select id="domain-select">
|
| 1248 |
+
<option value="">Auto-detect</option>
|
| 1249 |
+
<option value="academic">Academic</option>
|
| 1250 |
+
<option value="technical_doc">Technical/Medical</option>
|
| 1251 |
+
<option value="creative">Creative Writing</option>
|
| 1252 |
+
<option value="social_media">Social Media</option>
|
| 1253 |
+
</select>
|
| 1254 |
</div>
|
| 1255 |
+
<div class="option-row">
|
| 1256 |
+
<label class="option-label">Enable AI Model Attribution:</label>
|
| 1257 |
+
<div class="checkbox-wrapper">
|
| 1258 |
+
<input type="checkbox" id="enable-attribution" checked>
|
| 1259 |
+
<span style="font-size: 0.85rem; color: var(--text-muted);">Identify which AI model generated the text</span>
|
| 1260 |
+
</div>
|
|
|
|
| 1261 |
</div>
|
| 1262 |
+
<div class="option-row">
|
| 1263 |
+
<label class="option-label">Enable Sentence Highlighting:</label>
|
| 1264 |
+
<div class="checkbox-wrapper">
|
| 1265 |
+
<input type="checkbox" id="enable-highlighting" checked>
|
| 1266 |
+
<span style="font-size: 0.85rem; color: var(--text-muted);">Show suspicious sentences</span>
|
| 1267 |
+
</div>
|
| 1268 |
+
</div>
|
| 1269 |
+
<!-- NEW OPTIONS -->
|
| 1270 |
+
<div class="option-row">
|
| 1271 |
+
<label class="option-label">Sentence-Level Analysis:</label>
|
| 1272 |
+
<div class="checkbox-wrapper">
|
| 1273 |
+
<input type="checkbox" id="use-sentence-level" checked>
|
| 1274 |
+
<span style="font-size: 0.85rem; color: var(--text-muted);">More accurate but slower analysis</span>
|
| 1275 |
+
</div>
|
| 1276 |
+
</div>
|
| 1277 |
+
<div class="option-row">
|
| 1278 |
+
<label class="option-label">Include Metrics Summary:</label>
|
| 1279 |
+
<div class="checkbox-wrapper">
|
| 1280 |
+
<input type="checkbox" id="include-metrics-summary" checked>
|
| 1281 |
+
<span style="font-size: 0.85rem; color: var(--text-muted);">Show text analysis statistics</span>
|
| 1282 |
+
</div>
|
| 1283 |
</div>
|
| 1284 |
</div>
|
| 1285 |
</div>
|
| 1286 |
+
<div style="display: flex; flex-direction: column; gap: 1rem;">
|
| 1287 |
+
<button id="analyze-btn" class="analyze-btn">
|
| 1288 |
+
🔍 Analyze Text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1289 |
</button>
|
| 1290 |
+
<div class="action-buttons">
|
| 1291 |
+
<button id="refresh-btn" class="action-btn refresh">
|
| 1292 |
+
🔄 Refresh
|
| 1293 |
+
</button>
|
| 1294 |
+
<button id="try-next-btn" class="action-btn">
|
| 1295 |
+
➕ Try Next
|
| 1296 |
+
</button>
|
| 1297 |
+
</div>
|
| 1298 |
</div>
|
| 1299 |
</div>
|
| 1300 |
<!-- Right Panel: Results -->
|
| 1301 |
<div class="panel">
|
| 1302 |
<h2 class="panel-title">Analysis Report</h2>
|
| 1303 |
+
<div class="panel-content">
|
| 1304 |
+
<div class="report-tabs">
|
| 1305 |
+
<button class="report-tab active" data-report="summary">
|
| 1306 |
+
📊 Summary
|
| 1307 |
+
</button>
|
| 1308 |
+
<button class="report-tab" data-report="highlighted">
|
| 1309 |
+
📝 Highlighted Text
|
| 1310 |
+
</button>
|
| 1311 |
+
<button class="report-tab" data-report="metrics">
|
| 1312 |
+
ℹ️ Detailed Metrics
|
| 1313 |
+
</button>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1314 |
</div>
|
| 1315 |
+
<!-- Summary Report -->
|
| 1316 |
+
<div id="summary-report" class="report-content active">
|
| 1317 |
+
<div class="empty-state">
|
| 1318 |
+
<div class="empty-icon">✓</div>
|
| 1319 |
+
<h3 class="empty-title">Ready for Analysis</h3>
|
| 1320 |
+
<p class="empty-description">
|
| 1321 |
+
Paste text or upload a document to begin comprehensive AI detection analysis.
|
| 1322 |
+
Our 6-metric ensemble will provide detailed insights.
|
| 1323 |
+
</p>
|
| 1324 |
+
</div>
|
| 1325 |
</div>
|
| 1326 |
+
<!-- Highlighted Text Report -->
|
| 1327 |
+
<div id="highlighted-report" class="report-content">
|
| 1328 |
+
<div class="empty-state">
|
| 1329 |
+
<div class="empty-icon">📝</div>
|
| 1330 |
+
<p class="empty-description">
|
| 1331 |
+
Run an analysis to see sentence-level highlighting
|
| 1332 |
+
</p>
|
| 1333 |
+
</div>
|
| 1334 |
+
</div>
|
| 1335 |
+
<!-- Metrics Report -->
|
| 1336 |
+
<div id="metrics-report" class="report-content">
|
| 1337 |
+
<div class="empty-state">
|
| 1338 |
+
<div class="empty-icon">📊</div>
|
| 1339 |
+
<p class="empty-description">
|
| 1340 |
+
Run an analysis to see detailed metric breakdowns
|
| 1341 |
+
</p>
|
| 1342 |
+
</div>
|
| 1343 |
</div>
|
| 1344 |
</div>
|
| 1345 |
</div>
|
|
|
|
| 1349 |
// Configuration
|
| 1350 |
const API_BASE = '';
|
| 1351 |
let currentAnalysisData = null;
|
| 1352 |
+
let currentMetricIndex = 0;
|
| 1353 |
+
let totalMetrics = 0;
|
| 1354 |
// Navigation
|
| 1355 |
function showLanding() {
|
| 1356 |
document.getElementById('landing-page').style.display = 'block';
|
| 1357 |
document.getElementById('analysis-interface').style.display = 'none';
|
| 1358 |
window.scrollTo(0, 0);
|
| 1359 |
}
|
|
|
|
| 1360 |
function showAnalysis() {
|
| 1361 |
document.getElementById('landing-page').style.display = 'none';
|
| 1362 |
document.getElementById('analysis-interface').style.display = 'block';
|
| 1363 |
window.scrollTo(0, 0);
|
| 1364 |
resetAnalysisInterface();
|
| 1365 |
}
|
|
|
|
| 1366 |
// Reset analysis interface
|
| 1367 |
function resetAnalysisInterface() {
|
| 1368 |
// Clear text input
|
| 1369 |
document.getElementById('text-input').value = '';
|
|
|
|
| 1370 |
// Clear file input and display
|
| 1371 |
document.getElementById('file-input').value = '';
|
| 1372 |
document.getElementById('file-name-display').style.display = 'none';
|
| 1373 |
document.getElementById('file-name-display').innerHTML = '';
|
|
|
|
| 1374 |
// Reset tabs to paste
|
| 1375 |
document.querySelectorAll('.input-tab').forEach(t => t.classList.remove('active'));
|
| 1376 |
document.querySelector('.input-tab[data-tab="paste"]').classList.add('active');
|
| 1377 |
document.querySelectorAll('.tab-content').forEach(content => content.classList.remove('active'));
|
| 1378 |
document.getElementById('paste-tab').classList.add('active');
|
|
|
|
| 1379 |
// Reset options to defaults
|
| 1380 |
document.getElementById('domain-select').value = '';
|
| 1381 |
document.getElementById('enable-attribution').checked = true;
|
| 1382 |
document.getElementById('enable-highlighting').checked = true;
|
| 1383 |
document.getElementById('use-sentence-level').checked = true;
|
| 1384 |
document.getElementById('include-metrics-summary').checked = true;
|
|
|
|
| 1385 |
// Reset report tabs to summary
|
| 1386 |
document.querySelectorAll('.report-tab').forEach(t => t.classList.remove('active'));
|
| 1387 |
document.querySelector('.report-tab[data-report="summary"]').classList.add('active');
|
| 1388 |
document.querySelectorAll('.report-content').forEach(content => content.classList.remove('active'));
|
| 1389 |
document.getElementById('summary-report').classList.add('active');
|
|
|
|
| 1390 |
// Show empty state
|
| 1391 |
document.getElementById('summary-report').innerHTML = `
|
| 1392 |
<div class="empty-state">
|
|
|
|
| 1414 |
</p>
|
| 1415 |
</div>
|
| 1416 |
`;
|
|
|
|
| 1417 |
// Clear current analysis data
|
| 1418 |
currentAnalysisData = null;
|
| 1419 |
+
currentMetricIndex = 0;
|
| 1420 |
+
totalMetrics = 0;
|
| 1421 |
}
|
|
|
|
| 1422 |
// Input Tab Switching
|
| 1423 |
document.querySelectorAll('.input-tab').forEach(tab => {
|
| 1424 |
tab.addEventListener('click', () => {
|
|
|
|
| 1431 |
document.getElementById(`${tabName}-tab`).classList.add('active');
|
| 1432 |
});
|
| 1433 |
});
|
|
|
|
| 1434 |
// Report Tab Switching
|
| 1435 |
document.querySelectorAll('.report-tab').forEach(tab => {
|
| 1436 |
tab.addEventListener('click', () => {
|
|
|
|
| 1443 |
document.getElementById(`${reportName}-report`).classList.add('active');
|
| 1444 |
});
|
| 1445 |
});
|
|
|
|
| 1446 |
// File Upload Handling
|
| 1447 |
const fileInput = document.getElementById('file-input');
|
| 1448 |
const fileUploadArea = document.getElementById('file-upload-area');
|
| 1449 |
const fileNameDisplay = document.getElementById('file-name-display');
|
|
|
|
| 1450 |
fileUploadArea.addEventListener('click', () => {
|
| 1451 |
fileInput.click();
|
| 1452 |
});
|
|
|
|
| 1453 |
fileInput.addEventListener('change', (e) => {
|
| 1454 |
handleFileSelect(e.target.files[0]);
|
| 1455 |
});
|
|
|
|
| 1456 |
// Drag and Drop
|
| 1457 |
fileUploadArea.addEventListener('dragover', (e) => {
|
| 1458 |
e.preventDefault();
|
| 1459 |
fileUploadArea.classList.add('drag-over');
|
| 1460 |
});
|
|
|
|
| 1461 |
fileUploadArea.addEventListener('dragleave', () => {
|
| 1462 |
fileUploadArea.classList.remove('drag-over');
|
| 1463 |
});
|
|
|
|
| 1464 |
fileUploadArea.addEventListener('drop', (e) => {
|
| 1465 |
e.preventDefault();
|
| 1466 |
fileUploadArea.classList.remove('drag-over');
|
|
|
|
| 1470 |
handleFileSelect(file);
|
| 1471 |
}
|
| 1472 |
});
|
|
|
|
| 1473 |
function handleFileSelect(file) {
|
| 1474 |
if (!file) return;
|
|
|
|
| 1475 |
const allowedTypes = ['.txt', '.pdf', '.docx', '.doc', '.md'];
|
| 1476 |
const fileExt = '.' + file.name.split('.').pop().toLowerCase();
|
|
|
|
| 1477 |
if (!allowedTypes.includes(fileExt)) {
|
| 1478 |
alert('Unsupported file type. Please upload: TXT, PDF, DOCX, DOC, or MD files.');
|
| 1479 |
return;
|
| 1480 |
}
|
|
|
|
| 1481 |
if (file.size > 10 * 1024 * 1024) {
|
| 1482 |
alert('File size exceeds 10MB limit.');
|
| 1483 |
return;
|
| 1484 |
}
|
|
|
|
| 1485 |
fileNameDisplay.style.display = 'block';
|
| 1486 |
fileNameDisplay.innerHTML = `
|
| 1487 |
<strong>Selected file:</strong> ${file.name}
|
| 1488 |
<span style="color: var(--text-muted);">(${formatFileSize(file.size)})</span>
|
| 1489 |
`;
|
| 1490 |
}
|
|
|
|
| 1491 |
function formatFileSize(bytes) {
|
| 1492 |
if (bytes < 1024) return bytes + ' B';
|
| 1493 |
if (bytes < 1024 * 1024) return (bytes / 1024).toFixed(1) + ' KB';
|
| 1494 |
return (bytes / (1024 * 1024)).toFixed(1) + ' MB';
|
| 1495 |
}
|
|
|
|
| 1496 |
// Analyze Button
|
| 1497 |
document.getElementById('analyze-btn').addEventListener('click', async () => {
|
| 1498 |
const activeTab = document.querySelector('.input-tab.active').dataset.tab;
|
| 1499 |
const textInput = document.getElementById('text-input').value.trim();
|
| 1500 |
const fileInput = document.getElementById('file-input').files[0];
|
|
|
|
| 1501 |
if (activeTab === 'paste' && !textInput) {
|
| 1502 |
alert('Please paste some text to analyze (minimum 50 characters).');
|
| 1503 |
return;
|
| 1504 |
}
|
|
|
|
| 1505 |
if (activeTab === 'paste' && textInput.length < 50) {
|
| 1506 |
alert('Text must be at least 50 characters long for accurate analysis.');
|
| 1507 |
return;
|
| 1508 |
}
|
|
|
|
| 1509 |
if (activeTab === 'upload' && !fileInput) {
|
| 1510 |
alert('Please select a file to upload.');
|
| 1511 |
return;
|
| 1512 |
}
|
|
|
|
| 1513 |
await performAnalysis(activeTab, textInput, fileInput);
|
| 1514 |
});
|
|
|
|
| 1515 |
// Refresh Button - clears everything and shows empty state
|
| 1516 |
document.getElementById('refresh-btn').addEventListener('click', () => {
|
| 1517 |
resetAnalysisInterface();
|
| 1518 |
});
|
|
|
|
| 1519 |
// Try Next Button - same as refresh but keeps the interface ready
|
| 1520 |
document.getElementById('try-next-btn').addEventListener('click', () => {
|
| 1521 |
resetAnalysisInterface();
|
| 1522 |
});
|
|
|
|
| 1523 |
async function performAnalysis(mode, text, file) {
|
| 1524 |
const analyzeBtn = document.getElementById('analyze-btn');
|
| 1525 |
analyzeBtn.disabled = true;
|
| 1526 |
analyzeBtn.innerHTML = '⏳ Analyzing...';
|
|
|
|
| 1527 |
showLoading();
|
|
|
|
| 1528 |
try {
|
| 1529 |
let response;
|
| 1530 |
if (mode === 'paste') {
|
|
|
|
| 1532 |
} else {
|
| 1533 |
response = await analyzeFile(file);
|
| 1534 |
}
|
|
|
|
| 1535 |
currentAnalysisData = response;
|
| 1536 |
displayResults(response);
|
| 1537 |
} catch (error) {
|
|
|
|
| 1542 |
analyzeBtn.innerHTML = '🔍 Analyze Text';
|
| 1543 |
}
|
| 1544 |
}
|
|
|
|
| 1545 |
async function analyzeText(text) {
|
| 1546 |
const domain = document.getElementById('domain-select').value || null;
|
| 1547 |
const enableAttribution = document.getElementById('enable-attribution').checked;
|
| 1548 |
const enableHighlighting = document.getElementById('enable-highlighting').checked;
|
| 1549 |
const useSentenceLevel = document.getElementById('use-sentence-level').checked;
|
| 1550 |
const includeMetricsSummary = document.getElementById('include-metrics-summary').checked;
|
|
|
|
| 1551 |
const response = await fetch(`${API_BASE}/api/analyze`, {
|
| 1552 |
method: 'POST',
|
| 1553 |
headers: { 'Content-Type': 'application/json' },
|
|
|
|
| 1561 |
skip_expensive_metrics: false
|
| 1562 |
})
|
| 1563 |
});
|
|
|
|
| 1564 |
if (!response.ok) {
|
| 1565 |
const error = await response.json();
|
| 1566 |
throw new Error(error.error || 'Analysis failed');
|
| 1567 |
}
|
|
|
|
| 1568 |
return await response.json();
|
| 1569 |
}
|
|
|
|
| 1570 |
async function analyzeFile(file) {
|
| 1571 |
const domain = document.getElementById('domain-select').value || null;
|
| 1572 |
const enableAttribution = document.getElementById('enable-attribution').checked;
|
| 1573 |
const useSentenceLevel = document.getElementById('use-sentence-level').checked;
|
| 1574 |
const includeMetricsSummary = document.getElementById('include-metrics-summary').checked;
|
|
|
|
| 1575 |
const formData = new FormData();
|
| 1576 |
formData.append('file', file);
|
| 1577 |
if (domain) formData.append('domain', domain);
|
|
|
|
| 1579 |
formData.append('use_sentence_level', useSentenceLevel.toString());
|
| 1580 |
formData.append('include_metrics_summary', includeMetricsSummary.toString());
|
| 1581 |
formData.append('skip_expensive_metrics', 'false');
|
|
|
|
| 1582 |
const response = await fetch(`${API_BASE}/api/analyze/file`, {
|
| 1583 |
method: 'POST',
|
| 1584 |
body: formData
|
| 1585 |
});
|
|
|
|
| 1586 |
if (!response.ok) {
|
| 1587 |
const error = await response.json();
|
| 1588 |
throw new Error(error.error || 'File analysis failed');
|
| 1589 |
}
|
|
|
|
| 1590 |
return await response.json();
|
| 1591 |
}
|
|
|
|
| 1592 |
function showLoading() {
|
| 1593 |
document.getElementById('summary-report').innerHTML = `
|
| 1594 |
<div class="loading">
|
|
|
|
| 1600 |
</div>
|
| 1601 |
`;
|
| 1602 |
}
|
|
|
|
| 1603 |
function showError(message) {
|
| 1604 |
document.getElementById('summary-report').innerHTML = `
|
| 1605 |
<div class="empty-state">
|
|
|
|
| 1609 |
</div>
|
| 1610 |
`;
|
| 1611 |
}
|
|
|
|
| 1612 |
function displayResults(data) {
|
| 1613 |
console.log('Response data:', data);
|
|
|
|
| 1614 |
// Handle different response structures
|
| 1615 |
const detection = data.detection_result;
|
| 1616 |
if (!detection) {
|
|
|
|
| 1618 |
console.error('Full response:', data);
|
| 1619 |
return;
|
| 1620 |
}
|
|
|
|
| 1621 |
// Extract data based on your actual API structure
|
| 1622 |
const ensemble = detection.ensemble_result || detection.ensemble;
|
| 1623 |
const prediction = detection.prediction || {};
|
| 1624 |
const metrics = detection.metric_results || detection.metrics;
|
| 1625 |
const analysis = detection.analysis || {};
|
|
|
|
| 1626 |
// Display Summary with enhanced reasoning
|
| 1627 |
displaySummary(ensemble, prediction, analysis, data.attribution, data.reasoning);
|
|
|
|
| 1628 |
// Display Highlighted Text with enhanced features
|
| 1629 |
if (data.highlighted_html) {
|
| 1630 |
displayHighlightedText(data.highlighted_html);
|
|
|
|
| 1635 |
</div>
|
| 1636 |
`;
|
| 1637 |
}
|
| 1638 |
+
// Display Metrics with carousel
|
|
|
|
| 1639 |
if (metrics && Object.keys(metrics).length > 0) {
|
| 1640 |
+
displayMetricsCarousel(metrics, analysis, ensemble);
|
| 1641 |
} else {
|
| 1642 |
document.getElementById('metrics-report').innerHTML = `
|
| 1643 |
<div class="empty-state">
|
|
|
|
| 1646 |
`;
|
| 1647 |
}
|
| 1648 |
}
|
|
|
|
| 1649 |
function displaySummary(ensemble, prediction, analysis, attribution, reasoning) {
|
| 1650 |
// Use ensemble values from your actual API response
|
| 1651 |
const aiProbability = ensemble.ai_probability !== undefined ?
|
|
|
|
| 1657 |
const isAI = verdict.toLowerCase().includes('ai');
|
| 1658 |
const gaugeColor = isAI ? 'var(--danger)' : 'var(--success)';
|
| 1659 |
const gaugeDegree = aiProbability * 3.6;
|
|
|
|
| 1660 |
const confidenceLevel = parseFloat(confidence) >= 70 ? 'HIGH' :
|
| 1661 |
parseFloat(confidence) >= 40 ? 'MEDIUM' : 'LOW';
|
| 1662 |
const confidenceClass = confidenceLevel === 'HIGH' ? 'confidence-high' :
|
| 1663 |
confidenceLevel === 'MEDIUM' ? 'confidence-medium' : 'confidence-low';
|
|
|
|
| 1664 |
let attributionHTML = '';
|
| 1665 |
if (attribution && attribution.predicted_model) {
|
| 1666 |
const modelName = attribution.predicted_model.replace(/_/g, ' ').replace(/-/g, ' ').toUpperCase();
|
|
|
|
| 1681 |
attributionHTML = `
|
| 1682 |
<div class="attribution-section">
|
| 1683 |
<div class="attribution-title">🤖 AI Model Attribution</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1684 |
${topModels}
|
| 1685 |
${attribution.reasoning && attribution.reasoning.length > 0 ?
|
| 1686 |
`<p style="color: var(--text-secondary); margin-top: 1rem; font-size: 0.9rem;">${attribution.reasoning[0]}</p>` : ''}
|
| 1687 |
</div>
|
| 1688 |
`;
|
| 1689 |
}
|
|
|
|
| 1690 |
document.getElementById('summary-report').innerHTML = `
|
| 1691 |
<div class="result-summary">
|
| 1692 |
<div class="gauge-container">
|
|
|
|
| 1726 |
</div>
|
| 1727 |
`;
|
| 1728 |
}
|
|
|
|
| 1729 |
function createEnhancedReasoningHTML(ensemble, analysis, reasoning) {
|
| 1730 |
// Use actual reasoning data if available
|
| 1731 |
if (reasoning && reasoning.summary) {
|
| 1732 |
+
// Process markdown-style *text* to <strong> tags
|
| 1733 |
+
let processedSummary = reasoning.summary;
|
| 1734 |
+
processedSummary = processedSummary.replace(/\*([^*]+)\*/g, '<strong>$1</strong>');
|
| 1735 |
return `
|
| 1736 |
<div class="reasoning-box enhanced">
|
| 1737 |
<div class="reasoning-header">
|
|
|
|
| 1741 |
${ensemble.overall_confidence >= 0.7 ? 'High Confidence' : ensemble.overall_confidence >= 0.4 ? 'Medium Confidence' : 'Low Confidence'}
|
| 1742 |
</div>
|
| 1743 |
</div>
|
|
|
|
| 1744 |
<div class="verdict-summary">
|
| 1745 |
<div class="verdict-text">${ensemble.final_verdict}</div>
|
| 1746 |
<div class="probability">AI Probability: <span class="probability-value">${(ensemble.ai_probability * 100).toFixed(2)}%</span></div>
|
| 1747 |
</div>
|
| 1748 |
+
<div class="reasoning-text-content">
|
| 1749 |
+
${processedSummary}
|
|
|
|
| 1750 |
</div>
|
|
|
|
| 1751 |
${reasoning.key_indicators && reasoning.key_indicators.length > 0 ? `
|
| 1752 |
<div class="metrics-breakdown">
|
| 1753 |
<div class="breakdown-header">Key Indicators</div>
|
| 1754 |
+
${reasoning.key_indicators.map(indicator => {
|
| 1755 |
+
let processedIndicator = indicator;
|
| 1756 |
+
processedIndicator = processedIndicator.replace(/\*([^*]+)\*/g, '<strong>$1</strong>');
|
| 1757 |
+
return `
|
| 1758 |
+
<div class="metric-indicator">
|
| 1759 |
+
<div class="metric-name">${processedIndicator.split(':')[0]}</div>
|
| 1760 |
+
<div class="metric-details">
|
| 1761 |
+
<span class="reasoning-text-content">${processedIndicator.split(':')[1]}</span>
|
| 1762 |
+
</div>
|
| 1763 |
</div>
|
| 1764 |
+
`;
|
| 1765 |
+
}).join('')}
|
| 1766 |
</div>
|
| 1767 |
` : ''}
|
|
|
|
| 1768 |
${ensemble.consensus_level > 0.7 ? `
|
| 1769 |
<div class="agreement-indicator">
|
| 1770 |
<div class="agreement-icon">✓</div>
|
|
|
|
| 1774 |
</div>
|
| 1775 |
`;
|
| 1776 |
}
|
|
|
|
| 1777 |
// Fallback to basic reasoning if no reasoning data
|
| 1778 |
return `
|
| 1779 |
<div class="reasoning-box">
|
|
|
|
| 1786 |
</div>
|
| 1787 |
`;
|
| 1788 |
}
|
|
|
|
| 1789 |
function displayHighlightedText(html) {
|
| 1790 |
document.getElementById('highlighted-report').innerHTML = `
|
| 1791 |
${createDefaultLegend()}
|
|
|
|
| 1795 |
${getHighlightStyles()}
|
| 1796 |
`;
|
| 1797 |
}
|
|
|
|
| 1798 |
function createDefaultLegend() {
|
| 1799 |
return `
|
| 1800 |
<div class="highlight-legend">
|
|
|
|
| 1833 |
</div>
|
| 1834 |
`;
|
| 1835 |
}
|
|
|
|
| 1836 |
function getHighlightStyles() {
|
| 1837 |
return `
|
| 1838 |
<style>
|
|
|
|
| 1888 |
</style>
|
| 1889 |
`;
|
| 1890 |
}
|
| 1891 |
+
function displayMetricsCarousel(metrics, analysis, ensemble) {
|
| 1892 |
+
const metricOrder = ['structural', 'perplexity', 'entropy', 'semantic_analysis', 'linguistic', 'multi_perturbation_stability'];
|
| 1893 |
+
const availableMetrics = metricOrder.filter(key => metrics[key]);
|
| 1894 |
+
totalMetrics = availableMetrics.length;
|
| 1895 |
+
if (totalMetrics === 0) {
|
| 1896 |
+
document.getElementById('metrics-report').innerHTML = `
|
| 1897 |
+
<div class="empty-state">
|
| 1898 |
+
<p class="empty-description">No metric details available</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1899 |
</div>
|
| 1900 |
+
`;
|
| 1901 |
+
return;
|
| 1902 |
+
}
|
| 1903 |
+
let carouselHTML = `
|
| 1904 |
+
<div class="metrics-carousel-container">
|
| 1905 |
+
<div class="metrics-carousel-content">
|
| 1906 |
`;
|
| 1907 |
+
availableMetrics.forEach((metricKey, index) => {
|
|
|
|
| 1908 |
const metric = metrics[metricKey];
|
| 1909 |
if (!metric) return;
|
|
|
|
| 1910 |
const aiProb = (metric.ai_probability * 100).toFixed(1);
|
| 1911 |
const humanProb = (metric.human_probability * 100).toFixed(1);
|
| 1912 |
const confidence = (metric.confidence * 100).toFixed(1);
|
| 1913 |
+
const weight = ensemble.metric_contributions && ensemble.metric_contributions[metricKey] ?
|
| 1914 |
+
(ensemble.metric_contributions[metricKey].weight * 100).toFixed(1) : '0.0';
|
|
|
|
| 1915 |
const color = metric.ai_probability >= 0.6 ? 'var(--danger)' :
|
| 1916 |
metric.ai_probability >= 0.4 ? 'var(--warning)' : 'var(--success)';
|
| 1917 |
const verdictText = metric.ai_probability >= 0.6 ? 'AI' :
|
| 1918 |
metric.ai_probability >= 0.4 ? 'UNCERTAIN' : 'HUMAN';
|
| 1919 |
const verdictClass = verdictText === 'AI' ? 'verdict-ai' :
|
| 1920 |
verdictText === 'UNCERTAIN' ? 'verdict-uncertain' : 'verdict-human';
|
| 1921 |
+
carouselHTML += `
|
| 1922 |
+
<div class="metric-slide ${index === 0 ? 'active' : ''}" data-metric-index="${index}">
|
| 1923 |
+
<div class="metric-result-card">
|
| 1924 |
+
<div class="metric-header">
|
| 1925 |
+
<div class="metric-name">${formatMetricName(metricKey)}</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1926 |
</div>
|
| 1927 |
+
<div class="metric-description">
|
| 1928 |
+
${getMetricDescription(metricKey)}
|
| 1929 |
+
</div>
|
| 1930 |
+
<div style="display: flex; gap: 1rem; margin: 1rem 0;">
|
| 1931 |
+
<div style="flex: 1;">
|
| 1932 |
+
<div style="font-size: 0.75rem; color: var(--text-muted); margin-bottom: 0.25rem;">AI</div>
|
| 1933 |
+
<div style="background: rgba(51, 65, 85, 0.5); height: 8px; border-radius: 4px; overflow: hidden;">
|
| 1934 |
+
<div style="background: var(--danger); height: 100%; width: ${aiProb}%; transition: width 0.5s;"></div>
|
| 1935 |
+
</div>
|
| 1936 |
+
<div style="font-size: 0.85rem; font-weight: 600; margin-top: 0.25rem;">${aiProb}%</div>
|
| 1937 |
+
</div>
|
| 1938 |
+
<div style="flex: 1;">
|
| 1939 |
+
<div style="font-size: 0.75rem; color: var(--text-muted); margin-bottom: 0.25rem;">Human</div>
|
| 1940 |
+
<div style="background: rgba(51, 65, 85, 0.5); height: 8px; border-radius: 4px; overflow: hidden;">
|
| 1941 |
+
<div style="background: var(--success); height: 100%; width: ${humanProb}%; transition: width 0.5s;"></div>
|
| 1942 |
+
</div>
|
| 1943 |
+
<div style="font-size: 0.85rem; font-weight: 600; margin-top: 0.25rem;">${humanProb}%</div>
|
| 1944 |
</div>
|
|
|
|
| 1945 |
</div>
|
| 1946 |
+
<div style="display: flex; justify-content: space-between; align-items: center; margin: 0.75rem 0;">
|
| 1947 |
+
<span class="metric-verdict ${verdictClass}">${verdictText}</span>
|
| 1948 |
+
<span style="font-size: 0.85rem; color: var(--text-secondary);">Confidence: ${confidence}% | Weight: ${weight}%</span>
|
| 1949 |
+
</div>
|
| 1950 |
+
|
| 1951 |
+
${metric.details ? renderMetricDetails(metricKey, metric.details) : ''}
|
| 1952 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1953 |
</div>
|
| 1954 |
`;
|
| 1955 |
});
|
| 1956 |
+
carouselHTML += `
|
| 1957 |
+
</div>
|
| 1958 |
+
<div class="metrics-carousel-nav">
|
| 1959 |
+
<button class="carousel-btn prev-btn" onclick="navigateMetrics(-1)" ${currentMetricIndex === 0 ? 'disabled' : ''}>← Previous</button>
|
| 1960 |
+
<div class="carousel-position">${currentMetricIndex + 1} / ${totalMetrics}</div>
|
| 1961 |
+
<button class="carousel-btn next-btn" onclick="navigateMetrics(1)" ${currentMetricIndex === totalMetrics - 1 ? 'disabled' : ''}>Next →</button>
|
| 1962 |
+
</div>
|
| 1963 |
+
</div>
|
| 1964 |
+
`;
|
| 1965 |
+
document.getElementById('metrics-report').innerHTML = carouselHTML;
|
| 1966 |
+
updateCarouselButtons();
|
| 1967 |
+
}
|
| 1968 |
+
function navigateMetrics(direction) {
|
| 1969 |
+
const newMetricIndex = currentMetricIndex + direction;
|
| 1970 |
+
if (newMetricIndex >= 0 && newMetricIndex < totalMetrics) {
|
| 1971 |
+
currentMetricIndex = newMetricIndex;
|
| 1972 |
+
updateMetricCarousel();
|
| 1973 |
+
}
|
| 1974 |
+
}
|
| 1975 |
+
function updateMetricCarousel() {
|
| 1976 |
+
const slides = document.querySelectorAll('.metric-slide');
|
| 1977 |
+
slides.forEach((slide, index) => {
|
| 1978 |
+
if (index === currentMetricIndex) {
|
| 1979 |
+
slide.classList.add('active');
|
| 1980 |
+
} else {
|
| 1981 |
+
slide.classList.remove('active');
|
| 1982 |
+
}
|
| 1983 |
+
});
|
| 1984 |
+
updateCarouselButtons();
|
| 1985 |
+
// Update position indicator
|
| 1986 |
+
const positionElement = document.querySelector('.carousel-position');
|
| 1987 |
+
if (positionElement) {
|
| 1988 |
+
positionElement.textContent = `${currentMetricIndex + 1} / ${totalMetrics}`;
|
| 1989 |
+
}
|
| 1990 |
+
}
|
| 1991 |
+
function updateCarouselButtons() {
|
| 1992 |
+
const prevBtn = document.querySelector('.prev-btn');
|
| 1993 |
+
const nextBtn = document.querySelector('.next-btn');
|
| 1994 |
+
if (prevBtn) {
|
| 1995 |
+
prevBtn.disabled = currentMetricIndex === 0;
|
| 1996 |
+
}
|
| 1997 |
+
if (nextBtn) {
|
| 1998 |
+
nextBtn.disabled = currentMetricIndex === totalMetrics - 1;
|
| 1999 |
+
}
|
| 2000 |
}
|
|
|
|
| 2001 |
function renderMetricDetails(metricName, details) {
|
| 2002 |
if (!details || Object.keys(details).length === 0) return '';
|
|
|
|
| 2003 |
// Key metrics to show for each type
|
| 2004 |
const importantKeys = {
|
| 2005 |
'structural': ['burstiness_score', 'length_uniformity', 'avg_sentence_length', 'std_sentence_length'],
|
|
|
|
| 2007 |
'entropy': ['token_diversity', 'sequence_unpredictability', 'char_entropy'],
|
| 2008 |
'semantic_analysis': ['coherence_score', 'consistency_score', 'repetition_score'],
|
| 2009 |
'linguistic': ['pos_diversity', 'syntactic_complexity', 'grammatical_consistency'],
|
| 2010 |
+
'multi_perturbation_stability': ['stability_score', 'curvature_score', 'likelihood_ratio']
|
| 2011 |
};
|
|
|
|
| 2012 |
const keysToShow = importantKeys[metricName] || Object.keys(details).slice(0, 6);
|
| 2013 |
let detailsHTML = '<div style="margin-top: 1rem; padding-top: 1rem; border-top: 1px solid var(--border);">';
|
| 2014 |
detailsHTML += '<div style="font-size: 0.9rem; font-weight: 600; color: var(--text-secondary); margin-bottom: 0.75rem;">📈 Detailed Metrics:</div>';
|
| 2015 |
detailsHTML += '<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 0.75rem; font-size: 0.85rem;">';
|
|
|
|
| 2016 |
keysToShow.forEach(key => {
|
| 2017 |
if (details[key] !== undefined && details[key] !== null) {
|
| 2018 |
const value = typeof details[key] === 'number' ?
|
|
|
|
| 2027 |
`;
|
| 2028 |
}
|
| 2029 |
});
|
|
|
|
| 2030 |
detailsHTML += '</div></div>';
|
| 2031 |
return detailsHTML;
|
| 2032 |
}
|
|
|
|
| 2033 |
function getMetricDescription(metricName) {
|
| 2034 |
const descriptions = {
|
| 2035 |
structural: 'Analyzes sentence structure, length patterns, and statistical features.',
|
|
|
|
| 2037 |
entropy: 'Evaluates token diversity and sequence unpredictability.',
|
| 2038 |
semantic_analysis: 'Examines semantic coherence, topic consistency, and logical flow.',
|
| 2039 |
linguistic: 'Assesses grammatical patterns, syntactic complexity, and style markers.',
|
| 2040 |
+
multi_perturbation_stability: 'Tests text stability under perturbation using curvature analysis.'
|
| 2041 |
};
|
| 2042 |
return descriptions[metricName] || 'Metric analysis complete.';
|
| 2043 |
}
|
|
|
|
| 2044 |
function formatMetricName(name) {
|
| 2045 |
const names = {
|
| 2046 |
structural: 'Structural Analysis',
|
|
|
|
| 2048 |
entropy: 'Entropy',
|
| 2049 |
semantic_analysis: 'Semantic Analysis',
|
| 2050 |
linguistic: 'Linguistic Analysis',
|
| 2051 |
+
multi_perturbation_stability: 'Multi-Perturbation Stability'
|
| 2052 |
};
|
| 2053 |
return names[name] || name.split('_').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(' ');
|
| 2054 |
}
|
|
|
|
| 2055 |
function formatDomainName(domain) {
|
| 2056 |
return domain.split('_').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(' ');
|
| 2057 |
}
|
|
|
|
| 2058 |
async function downloadReport(format) {
|
| 2059 |
if (!currentAnalysisData) {
|
| 2060 |
alert('No analysis data available');
|
| 2061 |
return;
|
| 2062 |
}
|
|
|
|
| 2063 |
try {
|
| 2064 |
const analysisId = currentAnalysisData.analysis_id;
|
| 2065 |
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
|
|
|
| 2066 |
// For JSON, download directly from current data
|
| 2067 |
if (format === 'json') {
|
| 2068 |
const data = {
|
|
|
|
| 2077 |
await downloadBlob(blob, filename);
|
| 2078 |
return;
|
| 2079 |
}
|
|
|
|
| 2080 |
// Get the original text for report generation
|
| 2081 |
const activeTab = document.querySelector('.input-tab.active').dataset.tab;
|
| 2082 |
let textToSend = '';
|
|
|
|
| 2086 |
textToSend = currentAnalysisData.detection_result?.processed_text?.text ||
|
| 2087 |
'Uploaded file content - see analysis for details';
|
| 2088 |
}
|
|
|
|
| 2089 |
// For PDF, request from server
|
| 2090 |
const formData = new FormData();
|
| 2091 |
formData.append('analysis_id', analysisId);
|
| 2092 |
formData.append('text', textToSend);
|
| 2093 |
formData.append('formats', format);
|
| 2094 |
formData.append('include_highlights', document.getElementById('enable-highlighting').checked.toString());
|
|
|
|
| 2095 |
const response = await fetch(`${API_BASE}/api/report/generate`, {
|
| 2096 |
method: 'POST',
|
| 2097 |
body: formData
|
| 2098 |
});
|
|
|
|
| 2099 |
if (!response.ok) {
|
| 2100 |
throw new Error('Report generation failed');
|
| 2101 |
}
|
|
|
|
| 2102 |
const result = await response.json();
|
| 2103 |
if (result.reports && result.reports[format]) {
|
| 2104 |
const filename = result.reports[format];
|
|
|
|
| 2117 |
alert('Failed to download report. Please try again.');
|
| 2118 |
}
|
| 2119 |
}
|
|
|
|
| 2120 |
async function downloadBlob(blob, filename) {
|
| 2121 |
try {
|
| 2122 |
const url = URL.createObjectURL(blob);
|
|
|
|
| 2126 |
a.style.display = 'none';
|
| 2127 |
document.body.appendChild(a);
|
| 2128 |
a.click();
|
|
|
|
| 2129 |
setTimeout(() => {
|
| 2130 |
document.body.removeChild(a);
|
| 2131 |
URL.revokeObjectURL(url);
|
|
|
|
| 2136 |
alert('Download failed. Please try again.');
|
| 2137 |
}
|
| 2138 |
}
|
|
|
|
| 2139 |
function showDownloadSuccess(filename) {
|
| 2140 |
const notification = document.createElement('div');
|
| 2141 |
notification.style.cssText = `
|
|
|
|
| 2158 |
</div>
|
| 2159 |
`;
|
| 2160 |
document.body.appendChild(notification);
|
|
|
|
| 2161 |
if (!document.querySelector('#download-animation')) {
|
| 2162 |
const style = document.createElement('style');
|
| 2163 |
style.id = 'download-animation';
|
|
|
|
| 2169 |
`;
|
| 2170 |
document.head.appendChild(style);
|
| 2171 |
}
|
|
|
|
| 2172 |
setTimeout(() => {
|
| 2173 |
if (notification.parentNode) {
|
| 2174 |
notification.parentNode.removeChild(notification);
|
| 2175 |
}
|
| 2176 |
}, 3000);
|
| 2177 |
}
|
|
|
|
| 2178 |
// Smooth scrolling for anchor links
|
| 2179 |
document.querySelectorAll('a[href^="#"]').forEach(anchor => {
|
| 2180 |
anchor.addEventListener('click', function (e) {
|
|
|
|
| 2188 |
}
|
| 2189 |
});
|
| 2190 |
});
|
|
|
|
| 2191 |
// Initialize - show landing page by default
|
| 2192 |
showLanding();
|
| 2193 |
</script>
|