File size: 5,821 Bytes
edf1149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b1e6c7
edf1149
 
3b1e6c7
 
 
 
edf1149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b1e6c7
 
 
 
 
 
edf1149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# DEPENDENCIES
import os
import torch
from pathlib import Path
from pydantic import Field
from typing import Optional
from pydantic_settings import BaseSettings


class Settings(BaseSettings):
    """
    Main application settings
    """
    # Application Info
    APP_NAME                : str           = "TEXT-AUTH"
    APP_VERSION             : str           = "1.0.0"
    APP_DESCRIPTION         : str           = "AI Text Detection Platform"
    
    # Environment
    ENVIRONMENT             : str           = Field(default = "development", env = "ENVIRONMENT")
    DEBUG                   : bool          = Field(default = True, env = "DEBUG")
    
    # Server Configuration
    HOST                    : str           = Field(default = "0.0.0.0", env = "HOST")
    PORT                    : int           = Field(default = 8000, env = "PORT")
    WORKERS                 : int           = Field(default = 4, env = "WORKERS")
    
    # Paths
    BASE_DIR                : Path          = Path(__file__).parent.parent.resolve()
    MODEL_CACHE_DIR         : Path          = Field(default = Path(__file__).parent.parent / "models" / "cache", env = "MODEL_CACHE_DIR")
    LOG_DIR                 : Path          = Field(default = Path(__file__).parent.parent / "logs", env = "LOG_DIR")
    UPLOAD_DIR              : Path          = Field(default = Path(__file__).parent.parent / "data" / "uploads", env = "UPLOAD_DIR")
    REPORT_DIR              : Path          = Field(default = Path(__file__).parent.parent / "data" / "reports", env = "REPORT_DIR")
    
    # File Upload Settings
    MAX_UPLOAD_SIZE         : int           = 10 * 1024 * 1024  # 10MB
    ALLOWED_EXTENSIONS      : list          = [".txt", ".pdf", ".docx", ".doc", ".md"]
    
    # Processing Settings
    MAX_TEXT_LENGTH         : int           = 500000  # Maximum characters to process
    MIN_TEXT_LENGTH         : int           = 50      # Minimum characters for analysis
    CHUNK_SIZE              : int           = 512     # Tokens per chunk
    CHUNK_OVERLAP           : int           = 50      # Overlap between chunks
    
    # Model Settings
    DEVICE                  : str           = Field(default = "cpu", env = "DEVICE")  # "cuda" or "cpu"
    USE_QUANTIZATION        : bool          = Field(default = False, env = "USE_QUANTIZATION")
    USE_ONNX                : bool          = Field(default = False, env = "USE_ONNX")
    MODEL_LOAD_STRATEGY     : str           = "lazy"  # "lazy" or "eager"
    MAX_CACHED_MODELS       : int           = 5
    
    # Detection Settings
    CONFIDENCE_THRESHOLD    : float         = 0.7  # Minimum confidence for classification
    ENSEMBLE_METHOD         : str           = "weighted_average"  # "weighted_average", "voting", "stacking"
    USE_DOMAIN_ADAPTATION   : bool          = True
    
    # Rate Limiting
    RATE_LIMIT_ENABLED      : bool          = True
    RATE_LIMIT_REQUESTS     : int           = 100
    RATE_LIMIT_WINDOW       : int           = 3600  # seconds (1 hour)
    
    # Logging
    LOG_LEVEL               : str           = Field(default="INFO", env="LOG_LEVEL")
    LOG_FORMAT              : str           = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
    LOG_ROTATION            : str           = "1 day"
    LOG_RETENTION           : str           = "30 days"
    
    # API Settings
    API_PREFIX              : str           = "/api/v1"
    CORS_ORIGINS            : list          = ["*"]  # For production, specify exact origins
    
    # Database (Optional - for future)
    DATABASE_URL            : Optional[str] = Field(default = None, env = "DATABASE_URL")
    
    # Security
    SECRET_KEY              : str           = Field(default = "your-secret-key-change-in-production", env = "SECRET_KEY")
    API_KEY_ENABLED         : bool          = False
    
    # Feature Flags
    ENABLE_ATTRIBUTION      : bool          = True
    ENABLE_HIGHLIGHTING     : bool          = True
    ENABLE_PDF_REPORTS      : bool          = True
    ENABLE_BATCH_PROCESSING : bool          = True
     
    # Performance
    MAX_CONCURRENT_REQUESTS : int           = 10
    REQUEST_TIMEOUT         : int           = 300  # seconds (5 minutes)
    
    # Metrics Configuration
    METRICS_ENABLED         : dict          = {"semantic_analysis"            : True,
                                               "multi_perturbation_stability" : True,
                                               "perplexity"                   : True,
                                               "statistical"                  : True,
                                               "entropy"                      : True,
                                               "linguistic"                   : True,
                                              }
    
    class Config:
        env_file       = ".env"
        case_sensitive = True
        extra          = "ignore"

    
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self._create_directories()
    

    def _create_directories(self):
        """
        Create necessary directories if they don't exist
        """
        for directory in [self.MODEL_CACHE_DIR, self.LOG_DIR, self.UPLOAD_DIR, self.REPORT_DIR]:
            directory.mkdir(parents = True, exist_ok = True)
    

    @property
    def is_production(self) -> bool:
        """
        Check if running in production
        """
        return self.ENVIRONMENT.lower() == "production"
    

    @property
    def use_gpu(self) -> bool:
        """
        Check if GPU is available and should be used
        """
        return self.DEVICE == "cuda" and torch.cuda.is_available()



# Singleton instance
settings = Settings()


# Export for easy import
__all__  = ["settings", 
            "Settings",
           ]