import os BASE_DIR = os.path.dirname(os.path.abspath(__file__)) DATA_DIR = os.path.join(BASE_DIR, "data") MODELS_DIR = os.path.join(BASE_DIR, "models") REPORTS_DIR = os.path.join(BASE_DIR, "reports") TRAIN_SAMPLE_FRACTION = 0.5 ENGINEERED_TRAIN_FILE = os.path.join(BASE_DIR, "engineered_features.csv") REPORT_SAMPLE_SIZE = 1500 ENGINEERED_TEST_FILE = os.path.join(BASE_DIR, "engineered_features_test.csv") LEXICAL_FEATURES = [ 'url_length', 'hostname_length', 'path_length', 'query_length', 'fragment_length', 'count_dot', 'count_hyphen', 'count_underscore', 'count_slash', 'count_at', 'count_equals', 'count_percent', 'count_digits', 'count_letters', 'count_special_chars', 'has_ip_address', 'has_http', 'has_https', ] WHOIS_FEATURES = [ 'domain_age_days', 'domain_lifespan_days', 'days_since_domain_update', 'registrar_name', ] SSL_FEATURES = [ 'cert_age_days', 'cert_validity_days', 'cert_issuer_cn', 'cert_subject_cn', 'ssl_protocol_version', 'cert_has_valid_hostname', ] ALL_FEATURE_COLUMNS = ( LEXICAL_FEATURES + WHOIS_FEATURES + SSL_FEATURES ) CATEGORICAL_FEATURES = [ 'registrar_name', 'cert_issuer_cn', 'cert_subject_cn', 'ssl_protocol_version' ] NUMERICAL_FEATURES = [ col for col in ALL_FEATURE_COLUMNS if col not in CATEGORICAL_FEATURES ] ML_MODEL_RANDOM_STATE = 42 ML_TEST_SIZE = 0.2 DL_EPOCHS = 50 DL_BATCH_SIZE = 64 DL_LEARNING_RATE = 0.001