Spaces:

sgAtdbd
/

Hateshield-bn

Sleeping

App Files Files Community

sgAtdbd commited on Nov 10

Commit

cb75d9a

verified ·

1 Parent(s): 1e84de6

Update models/hate_speech_classifier.py

Browse files

Files changed (1) hide show

models/hate_speech_classifier.py +100 -100

models/hate_speech_classifier.py CHANGED Viewed

@@ -5,39 +5,15 @@ import os
 import re
 import torch
 from deep_translator import GoogleTranslator
-from pathlib import Path
 class HateSpeechClassifier:
-    # def __init__(self):
-    #     base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-    #     models_dir = os.path.join(base_dir, "models", "model_weights", "custom_models")
-    #     # Initialize translator
-    #     self.translator = GoogleTranslator(source='bn', target='en')
-    #     # Use multiple pretrained models for better accuracy
-    #     self.pretrained_models = {
-    #         "primary": {
-    #             "name": "facebook/roberta-hate-speech-dynabench-r4-target",
-    #             "pipeline": None,
-    #             "weight": 0.6
-    #         },
-    #         "secondary": {
-    #             "name": "cardiffnlp/twitter-roberta-base-hate-latest",
-    #             "pipeline": None,
-    #             "weight": 0.4
-    #         }
-    #     }
     def __init__(self):
-        # Get absolute path to model weights
-        base_dir = Path(__file__).parent
-        self.model_dir = base_dir / 'model_weights' / 'custom_models'
-        # For Hugging Face Spaces, also check environment
-        if not self.model_dir.exists():
-            # Try alternative paths for deployed environment
-            self.model_dir = Path('/app/models/model_weights/custom_models')
         self.translator = GoogleTranslator(source='bn', target='en')
         # Use multiple pretrained models for better accuracy
@@ -53,80 +29,104 @@ class HateSpeechClassifier:
                 "weight": 0.4
             }
         }
-        print(f"Model directory: {self.model_dir}")
-        print(f"Model directory exists: {self.model_dir.exists()}")
-        if self.model_dir.exists():
-            print(f"Files in model directory: {list(self.model_dir.iterdir())}")
-        # English custom model paths
-        self.english_model_path = os.path.join(models_dir, "english_model.pkl")
-        self.english_vectorizer_path = os.path.join(models_dir, "english_vectorizer.pkl")
-        self.english_model = None
-        self.english_vectorizer = None
-        self.english_model_loaded = False
-        # Bengali custom model paths
-        self.bengali_model_path = os.path.join(models_dir, "bengali_model.pkl")
-        self.bengali_vectorizer_path = os.path.join(models_dir, "bengali_vectorizer.pkl")
-        self.bengali_model = None
-        self.bengali_vectorizer = None
-        self.bengali_model_loaded = False
-        # Load models
-        self._load_custom_models()
-        # Enhanced hate keywords
-        self.hate_keywords = {
-            "english": [
-                "hate", "kill", "death", "violence", "murder", "attack", "destroy", "eliminate",
-                "die", "dead", "shoot", "stab", "burn", "hang", "lynch",
-                "terrorist", "racist", "sexist", "discrimination", "discriminate",
-                "scheduled caste", "scheduled tribe", "dalit", "lower caste", "untouchable",
-                "chamar", "bhangi", "sc/st", "reservation quota",
-                "no right to live", "don't deserve", "shouldn't exist", "subhuman",
-                "inferior", "worthless", "scum", "vermin", "parasite",
-                "should be killed", "must die", "deserve to die", "need to be eliminated",
-                "jihadi", "kafir", "infidel", "terrorist religion", "religious extremist",
-                "nigger", "chink", "paki", "kike", "faggot", "tranny"
-            ],
-            "bengali": [
-                "শালা", "হালা", "মাগি", "কুত্তা", "হারামি", "চোদ", "বাল",
-                "ঘৃণা", "মারো", "মৃত্যু", "সন্ত্রাসী", "বোকা", "মূর্খ",
-                "বিদ্বেষ", "ভয়ঙ্কর", "জঘন্য", "হত্যা", "আক্রমণ",
-                "দলিত", "নিম্নবর্ণ", "অস্পৃশ্য"
-            ]
-        }
-        self.hate_patterns = {
-            "english": [
-                r"no right to (live|exist|be here|survive)",
-                r"(should|must|need to|ought to) (die|be killed|be eliminated|perish)",
-                r"don'?t deserve (to live|life|existence|to exist)",
-                r"(get rid of|eliminate|exterminate|wipe out) (them|these|those|the)",
-                r"(scheduled caste|dalit|lower caste|sc/st).{0,50}(no right|shouldn't|don't deserve)",
-                r"(religious|ethnic|caste|racial) (cleansing|purification|genocide)",
-                r"(send|throw|kick|drive) (them|back) (out|away|home)",
-                r"(all|these) .{0,30} (should die|must be killed|need to go)",
-                r"(death to|kill all|eliminate all) .{0,30}",
-                r"(inferior|subhuman|less than human|not human)",
-            ],
-            "bengali": [
-                r"বাঁচার অধিকার নেই",
-                r"মরে যাওয়া উচিত",
-                r"নিশ্চিহ্ন করা উচিত"
-            ]
-        }
-        self.offensive_keywords = {
-            "english": [
-                "damn", "hell", "crap", "suck", "dumb", "loser", "trash",
-                "stupid", "idiot", "moron", "pathetic", "bad", "ugly",
-                "disgusting", "nasty", "filthy", "asshole", "bitch", "bastard"
-            ],
-            "bengali": ["বাজে", "খারাপ", "নোংরা", "বেকুব"]
-        }
     def _translate_to_english(self, text: str) -> Optional[str]:
         """Translate Bengali to English using deep-translator"""

 import re
 import torch
 from deep_translator import GoogleTranslator
+# from pathlib import Path
 class HateSpeechClassifier:
     def __init__(self):
+        base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+        models_dir = os.path.join(base_dir, "models", "model_weights", "custom_models")
+        # Initialize translator
         self.translator = GoogleTranslator(source='bn', target='en')
         # Use multiple pretrained models for better accuracy
                 "weight": 0.4
             }
         }
+    # def __init__(self):
+    #     # Get absolute path to model weights
+    #     base_dir = Path(__file__).parent
+    #     self.model_dir = base_dir / 'model_weights' / 'custom_models'
+    #     # For Hugging Face Spaces, also check environment
+    #     if not self.model_dir.exists():
+    #         # Try alternative paths for deployed environment
+    #         self.model_dir = Path('/app/models/model_weights/custom_models')
+    #     self.translator = GoogleTranslator(source='bn', target='en')
+    #     # Use multiple pretrained models for better accuracy
+    #     self.pretrained_models = {
+    #         "primary": {
+    #             "name": "facebook/roberta-hate-speech-dynabench-r4-target",
+    #             "pipeline": None,
+    #             "weight": 0.6
+    #         },
+    #         "secondary": {
+    #             "name": "cardiffnlp/twitter-roberta-base-hate-latest",
+    #             "pipeline": None,
+    #             "weight": 0.4
+    #         }
+    #     }
+    #     print(f"Model directory: {self.model_dir}")
+    #     print(f"Model directory exists: {self.model_dir.exists()}")
+    #     if self.model_dir.exists():
+    #         print(f"Files in model directory: {list(self.model_dir.iterdir())}")
+    #     # English custom model paths
+    #     self.english_model_path = os.path.join(models_dir, "english_model.pkl")
+    #     self.english_vectorizer_path = os.path.join(models_dir, "english_vectorizer.pkl")
+    #     self.english_model = None
+    #     self.english_vectorizer = None
+    #     self.english_model_loaded = False
+    #     # Bengali custom model paths
+    #     self.bengali_model_path = os.path.join(models_dir, "bengali_model.pkl")
+    #     self.bengali_vectorizer_path = os.path.join(models_dir, "bengali_vectorizer.pkl")
+    #     self.bengali_model = None
+    #     self.bengali_vectorizer = None
+    #     self.bengali_model_loaded = False
+    #     # Load models
+    #     self._load_custom_models()
+    #     # Enhanced hate keywords
+    #     self.hate_keywords = {
+    #         "english": [
+    #             "hate", "kill", "death", "violence", "murder", "attack", "destroy", "eliminate",
+    #             "die", "dead", "shoot", "stab", "burn", "hang", "lynch",
+    #             "terrorist", "racist", "sexist", "discrimination", "discriminate",
+    #             "scheduled caste", "scheduled tribe", "dalit", "lower caste", "untouchable",
+    #             "chamar", "bhangi", "sc/st", "reservation quota",
+    #             "no right to live", "don't deserve", "shouldn't exist", "subhuman",
+    #             "inferior", "worthless", "scum", "vermin", "parasite",
+    #             "should be killed", "must die", "deserve to die", "need to be eliminated",
+    #             "jihadi", "kafir", "infidel", "terrorist religion", "religious extremist",
+    #             "nigger", "chink", "paki", "kike", "faggot", "tranny"
+    #         ],
+    #         "bengali": [
+    #             "শালা", "হালা", "মাগি", "কুত্তা", "হারামি", "চোদ", "বাল",
+    #             "ঘৃণা", "মারো", "মৃত্যু", "সন্ত্রাসী", "বোকা", "মূর্খ",
+    #             "বিদ্বেষ", "ভয়ঙ্কর", "জঘন্য", "হত্যা", "আক্রমণ",
+    #             "দলিত", "নিম্নবর্ণ", "অস্পৃশ্য"
+    #         ]
+    #     }
+    #     self.hate_patterns = {
+    #         "english": [
+    #             r"no right to (live|exist|be here|survive)",
+    #             r"(should|must|need to|ought to) (die|be killed|be eliminated|perish)",
+    #             r"don'?t deserve (to live|life|existence|to exist)",
+    #             r"(get rid of|eliminate|exterminate|wipe out) (them|these|those|the)",
+    #             r"(scheduled caste|dalit|lower caste|sc/st).{0,50}(no right|shouldn't|don't deserve)",
+    #             r"(religious|ethnic|caste|racial) (cleansing|purification|genocide)",
+    #             r"(send|throw|kick|drive) (them|back) (out|away|home)",
+    #             r"(all|these) .{0,30} (should die|must be killed|need to go)",
+    #             r"(death to|kill all|eliminate all) .{0,30}",
+    #             r"(inferior|subhuman|less than human|not human)",
+    #         ],
+    #         "bengali": [
+    #             r"বাঁচার অধিকার নেই",
+    #             r"মরে যাওয়া উচিত",
+    #             r"নিশ্চিহ্ন করা উচিত"
+    #         ]
+    #     }
+    #     self.offensive_keywords = {
+    #         "english": [
+    #             "damn", "hell", "crap", "suck", "dumb", "loser", "trash",
+    #             "stupid", "idiot", "moron", "pathetic", "bad", "ugly",
+    #             "disgusting", "nasty", "filthy", "asshole", "bitch", "bastard"
+    #         ],
+    #         "bengali": ["বাজে", "খারাপ", "নোংরা", "বেকুব"]
+    #     }
     def _translate_to_english(self, text: str) -> Optional[str]:
         """Translate Bengali to English using deep-translator"""