coptic-translation-interface / coptic_prolog_rules.py
Rogaton
Restore full Walter Till grammar integration with Prolog validation
a1ad529
#!/usr/bin/env python3
"""
Coptic Prolog Rules - Neural-Symbolic Integration
==================================================
Integrates Prolog logic programming with neural dependency parsing
to enhance parsing accuracy through explicit grammatical rules.
Uses janus (SWI-Prolog Python interface) for bidirectional integration.
Author: Coptic NLP Project
License: CC BY-NC-SA 4.0
"""
from pyswip import Prolog
import warnings
import sys
import os
from contextlib import contextmanager
warnings.filterwarnings('ignore')
@contextmanager
def suppress_stderr():
"""Temporarily suppress stderr output from Prolog queries"""
devnull = open(os.devnull, 'w')
old_stderr = sys.stderr
sys.stderr = devnull
try:
yield
finally:
sys.stderr = old_stderr
devnull.close()
class CopticPrologRules:
"""
Prolog-based grammatical rule engine for Coptic parsing validation
and enhancement.
"""
def __init__(self):
"""Initialize Prolog engine and load Coptic grammar rules"""
self.prolog_initialized = False
self.prolog = None
self._initialize_prolog()
def _initialize_prolog(self):
"""Initialize SWI-Prolog and define Coptic grammatical rules"""
try:
# Initialize pyswip Prolog instance
self.prolog = Prolog()
# Define Coptic-specific grammatical rules
self._load_coptic_grammar()
self.prolog_initialized = True
print("✓ Prolog engine initialized successfully")
except Exception as e:
print(f"⚠️ Warning: Prolog initialization failed: {e}")
print(" Parser will continue without Prolog validation")
self.prolog_initialized = False
def _load_dcg_grammar(self):
"""
Load DCG-based grammar rules from coptic_grammar.pl
and Coptic lexicon from coptic_lexicon.pl
This adds more sophisticated pattern matching using Definite Clause Grammars,
adapted from the French DETECT5.PRO error detector.
"""
try:
from pathlib import Path
# Get path to DCG grammar file
# Note: The grammar file will load the lexicon automatically via ensure_loaded
current_dir = Path(__file__).parent
grammar_file = current_dir / "coptic_grammar.pl"
# Load grammar rules (which will load the lexicon)
if grammar_file.exists():
# Convert path to Prolog-compatible format
grammar_path = str(grammar_file.absolute()).replace('\\', '/')
# Load the module
query = f"consult('{grammar_path}')"
list(self.prolog.query(query))
print(f"✓ DCG grammar rules and lexicon loaded from {grammar_file.name}")
self.dcg_loaded = True
else:
print(f"ℹ DCG grammar file not found at {grammar_file}")
self.dcg_loaded = False
except Exception as e:
print(f"⚠️ Warning: Could not load DCG grammar: {e}")
self.dcg_loaded = False
def _load_coptic_grammar(self):
"""Load Coptic linguistic rules into Prolog"""
# Try to load DCG grammar file if it exists
self._load_dcg_grammar()
# ===================================================================
# COPTIC MORPHOLOGICAL RULES
# ===================================================================
# Article system: definite articles
self.prolog.assertz("definite_article('ⲡ')") # masculine singular
self.prolog.assertz("definite_article('ⲧ')") # feminine singular
self.prolog.assertz("definite_article('ⲛ')") # plural
self.prolog.assertz("definite_article('ⲡⲉ')") # masculine singular (variant)
self.prolog.assertz("definite_article('ⲧⲉ')") # feminine singular (variant)
self.prolog.assertz("definite_article('ⲛⲉ')") # plural (variant)
# Pronominal system - Independent pronouns
self.prolog.assertz("independent_pronoun('ⲁⲛⲟⲕ')") # I
self.prolog.assertz("independent_pronoun('ⲛⲧⲟⲕ')") # you (m.sg)
self.prolog.assertz("independent_pronoun('ⲛⲧⲟ')") # you (f.sg)
self.prolog.assertz("independent_pronoun('ⲛⲧⲟϥ')") # he
self.prolog.assertz("independent_pronoun('ⲛⲧⲟⲥ')") # she
self.prolog.assertz("independent_pronoun('ⲁⲛⲟⲛ')") # we
self.prolog.assertz("independent_pronoun('ⲛⲧⲱⲧⲛ')") # you (pl)
self.prolog.assertz("independent_pronoun('ⲛⲧⲟⲟⲩ')") # they
# Suffix pronouns (enclitic)
self.prolog.assertz("suffix_pronoun('ⲓ')") # my/me
self.prolog.assertz("suffix_pronoun('ⲕ')") # your (m.sg)
self.prolog.assertz("suffix_pronoun('ϥ')") # his/him
self.prolog.assertz("suffix_pronoun('ⲥ')") # her
self.prolog.assertz("suffix_pronoun('ⲛ')") # our/us
self.prolog.assertz("suffix_pronoun('ⲧⲛ')") # your (pl)
self.prolog.assertz("suffix_pronoun('ⲟⲩ')") # their/them
# Coptic verbal system - Conjugation bases (tense/aspect markers)
self.prolog.assertz("conjugation_base('ⲁ')") # Perfect (aorist)
self.prolog.assertz("conjugation_base('ⲛⲉ')") # Imperfect/past
self.prolog.assertz("conjugation_base('ϣⲁ')") # Future/conditional
self.prolog.assertz("conjugation_base('ⲙⲡⲉ')") # Negative perfect
self.prolog.assertz("conjugation_base('ⲙⲛ')") # Negative existential
self.prolog.assertz("conjugation_base('ⲉⲣϣⲁⲛ')") # Conditional
# Auxiliary verbs (copulas)
self.prolog.assertz("copula('ⲡⲉ')") # is (m.sg)
self.prolog.assertz("copula('ⲧⲉ')") # is (f.sg)
self.prolog.assertz("copula('ⲛⲉ')") # are (pl)
# ===================================================================
# COPTIC SYNTACTIC RULES
# ===================================================================
# Noun phrase structure rules
# Valid NP structure: Article + Noun
self.prolog.assertz("valid_np(Article, Noun) :- definite_article(Article), noun_compatible(Noun)")
# Helper: Any word can be a noun (simplified)
self.prolog.assertz("noun_compatible(_)")
# Definiteness agreement rule - In Coptic, definiteness is marked by articles
self.prolog.assertz("requires_definiteness(Noun, Article) :- definite_article(Article)")
# Tripartite nominal sentence pattern
# Coptic tripartite pattern: Subject - Copula - Predicate
# Example: ⲁⲛⲟⲕ ⲡⲉ ⲡⲛⲟⲩⲧⲉ (I am God)
self.prolog.assertz("tripartite_sentence(Subject, Copula, Predicate) :- independent_pronoun(Subject), copula(Copula), noun_compatible(Predicate)")
# Verbal sentence patterns
# Verbal sentence: Conjugation + Subject + Verb
self.prolog.assertz("verbal_sentence(Conj, Subject, Verb) :- conjugation_base(Conj), (independent_pronoun(Subject) ; definite_article(Subject)), verb_compatible(Verb)")
# Helper: Any word can be a verb (simplified)
self.prolog.assertz("verb_compatible(_)")
# ===================================================================
# DEPENDENCY VALIDATION RULES
# ===================================================================
# Validate subject-verb relationship
self.prolog.assertz("valid_subject_verb(Subject, Verb, SubjPOS, VerbPOS) :- member(SubjPOS, ['PRON', 'NOUN', 'PROPN']), member(VerbPOS, ['VERB', 'AUX'])")
# Validate determiner-noun relationship
self.prolog.assertz("valid_det_noun(Det, Noun, DetPOS, NounPOS) :- DetPOS = 'DET', member(NounPOS, ['NOUN', 'PROPN'])")
# Validate modifier relationships
self.prolog.assertz("valid_modifier(Head, Modifier, ModPOS) :- member(ModPOS, ['ADJ', 'ADV', 'DET'])")
# Validate punctuation assignments - content words should NOT be punct
# Only actual punctuation marks (PUNCT POS tag) should have punct relation
self.prolog.assertz("invalid_punct(Word, POS, Relation) :- Relation = 'punct', member(POS, ['VERB', 'NOUN', 'PRON', 'PROPN', 'DET', 'ADJ', 'ADV', 'AUX', 'NUM'])")
# ===================================================================
# ERROR CORRECTION RULES
# ===================================================================
# Suggest correct relation for DET (determiner)
# DET before NOUN should be 'det' relation
self.prolog.assertz("suggest_correction('DET', _, 'det')")
# Suggest correct relation for PRON (pronoun)
# PRON is typically subject (nsubj), object (obj), or possessive
self.prolog.assertz("suggest_correction('PRON', 'VERB', 'nsubj')") # Pronoun before verb = subject
self.prolog.assertz("suggest_correction('PRON', 'AUX', 'nsubj')") # Pronoun before aux = subject
self.prolog.assertz("suggest_correction('PRON', _, 'nsubj')") # Default for pronoun
# Suggest correct relation for NOUN
self.prolog.assertz("suggest_correction('NOUN', 'VERB', 'obj')") # Noun after verb = object
self.prolog.assertz("suggest_correction('NOUN', 'AUX', 'nsubj')") # Noun after copula = predicate nominal
self.prolog.assertz("suggest_correction('NOUN', _, 'obl')") # Default for noun
# Suggest correct relation for VERB
# Main verbs are often root, ccomp (complement clause), or advcl (adverbial clause)
self.prolog.assertz("suggest_correction('VERB', 'SCONJ', 'ccomp')") # Verb after subordinator = complement
self.prolog.assertz("suggest_correction('VERB', 'VERB', 'ccomp')") # Verb after verb = complement
self.prolog.assertz("suggest_correction('VERB', _, 'root')") # Default for verb
# Suggest correct relation for AUX (auxiliary/copula)
self.prolog.assertz("suggest_correction('AUX', _, 'cop')") # Copula relation
# Suggest correct relation for ADJ (adjective)
self.prolog.assertz("suggest_correction('ADJ', 'NOUN', 'amod')") # Adjective modifying noun
# Suggest correct relation for ADV (adverb)
self.prolog.assertz("suggest_correction('ADV', _, 'advmod')") # Adverbial modifier
# Suggest correct relation for NUM (number)
self.prolog.assertz("suggest_correction('NUM', 'NOUN', 'nummod')") # Number modifying noun
self.prolog.assertz("suggest_correction('NUM', _, 'obl')") # Default for number (temporal/oblique)
# ===================================================================
# MORPHOLOGICAL ANALYSIS RULES
# ===================================================================
# Clitic attachment patterns
self.prolog.assertz("has_suffix_pronoun(Word, Base, Suffix) :- atom_concat(Base, Suffix, Word), suffix_pronoun(Suffix), atom_length(Base, BaseLen), BaseLen > 0")
# Article stripping for lemmatization
self.prolog.assertz("strip_article(Word, Lemma) :- definite_article(Article), atom_concat(Article, Lemma, Word), atom_length(Lemma, LemmaLen), LemmaLen > 0")
# If no article found, word is its own lemma
self.prolog.assertz("strip_article(Word, Word) :- \\+ (definite_article(Article), atom_concat(Article, _, Word))")
print("✓ Coptic grammatical rules loaded into Prolog")
# ===================================================================
# PYTHON INTERFACE METHODS
# ===================================================================
def validate_dependency(self, head_word, dep_word, head_pos, dep_pos, relation):
"""
Validate a dependency relation using Prolog rules
Args:
head_word: The head word text
dep_word: The dependent word text
head_pos: POS tag of head
dep_pos: POS tag of dependent
relation: Dependency relation (nsubj, obj, det, etc.)
Returns:
dict: Validation result with status and suggestions
"""
if not self.prolog_initialized:
return {"valid": True, "message": "Prolog not available"}
try:
result = {"valid": True, "warnings": [], "suggestions": []}
# Check subject-verb relationships
if relation in ['nsubj', 'csubj']:
query = f"valid_subject_verb('{dep_word}', '{head_word}', '{dep_pos}', '{head_pos}')"
query_result = list(self.prolog.query(query))
if not query_result:
result["warnings"].append(
f"Unusual subject-verb: {dep_word} ({dep_pos}) → {head_word} ({head_pos})"
)
# Check determiner-noun relationships
elif relation == 'det':
query = f"valid_det_noun('{dep_word}', '{head_word}', '{dep_pos}', '{head_pos}')"
query_result = list(self.prolog.query(query))
if not query_result:
result["warnings"].append(
f"Unusual det-noun: {dep_word}{head_word}"
)
# Check for incorrect punctuation assignments and suggest corrections
query = f"invalid_punct('{dep_word}', '{dep_pos}', '{relation}')"
query_result = list(self.prolog.query(query))
if query_result:
# Query for suggested correction
correction_query = f"suggest_correction('{dep_pos}', '{head_pos}', Suggestion)"
correction_result = list(self.prolog.query(correction_query))
if correction_result and 'Suggestion' in correction_result[0]:
suggested_rel = correction_result[0]['Suggestion']
result["warnings"].append(
f"⚠️ PARSER ERROR: '{dep_word}' ({dep_pos}) incorrectly labeled as 'punct' → SUGGESTED: '{suggested_rel}'"
)
result["suggestions"].append({
"word": dep_word,
"pos": dep_pos,
"incorrect": relation,
"suggested": suggested_rel,
"head_pos": head_pos
})
else:
result["warnings"].append(
f"⚠️ PARSER ERROR: '{dep_word}' ({dep_pos}) incorrectly labeled as 'punct' - should be a content relation"
)
return result
except Exception as e:
return {"valid": True, "message": f"Validation error: {e}"}
def check_tripartite_pattern(self, words, pos_tags):
"""
Check if a sentence follows the Coptic tripartite nominal pattern
Args:
words: List of word forms
pos_tags: List of POS tags
Returns:
dict: Pattern analysis results
"""
if not self.prolog_initialized or len(words) < 3:
return {"is_tripartite": False}
try:
# Check for tripartite pattern: Pronoun - Copula - Noun
subj, cop, pred = words[0], words[1], words[2]
query = f"tripartite_sentence('{subj}', '{cop}', '{pred}')"
query_result = list(self.prolog.query(query))
is_tripartite = len(query_result) > 0
return {
"is_tripartite": is_tripartite,
"pattern": f"{subj} - {cop} - {pred}" if is_tripartite else None,
"description": "Tripartite nominal sentence" if is_tripartite else None
}
except Exception as e:
return {"is_tripartite": False, "error": str(e)}
def analyze_morphology(self, word):
"""
Analyze word morphology using Prolog rules
Args:
word: Coptic word to analyze
Returns:
dict: Morphological analysis
"""
if not self.prolog_initialized:
return {"word": word, "analyzed": False}
try:
analysis = {"word": word, "components": []}
# Check for definite article
article_query = f"strip_article('{word}', Lemma)"
results = list(self.prolog.query(article_query))
if results:
result = results[0]
if 'Lemma' in result:
lemma = result['Lemma']
if lemma != word:
analysis["has_article"] = True
analysis["lemma"] = lemma
analysis["article"] = word.replace(lemma, '')
# Check for suffix pronouns
suffix_query = f"has_suffix_pronoun('{word}', Base, Suffix)"
results = list(self.prolog.query(suffix_query))
if results:
result = results[0]
analysis["has_suffix"] = True
analysis["base"] = result.get('Base')
analysis["suffix"] = result.get('Suffix')
return analysis
except Exception as e:
return {"word": word, "error": str(e)}
def validate_parse_tree(self, words, pos_tags, heads, deprels):
"""
Validate an entire parse tree using Prolog constraints
Args:
words: List of word forms
pos_tags: List of POS tags
heads: List of head indices
deprels: List of dependency relations
Returns:
dict: Overall validation results with warnings and suggestions
"""
if not self.prolog_initialized:
return {"validated": False, "reason": "Prolog not available"}
try:
results = {
"validated": True,
"warnings": [],
"suggestions": [],
"patterns_found": []
}
# Check for tripartite pattern (basic assertz-based)
tripartite = self.check_tripartite_pattern(words, pos_tags)
if tripartite.get("is_tripartite"):
results["patterns_found"].append(tripartite)
# If DCG grammar is loaded, use advanced pattern matching
if hasattr(self, 'dcg_loaded') and self.dcg_loaded:
try:
dcg_results = self._validate_with_dcg(words, pos_tags, heads, deprels)
if dcg_results and isinstance(dcg_results, dict):
# Merge DCG results
if "patterns_found" in dcg_results and dcg_results["patterns_found"]:
results["patterns_found"].extend(dcg_results["patterns_found"])
if "warnings" in dcg_results and dcg_results["warnings"]:
results["warnings"].extend(dcg_results["warnings"])
except Exception as e:
print(f"Warning: DCG validation failed: {e}")
# Continue with basic validation even if DCG fails
# Validate each dependency (existing validation)
for i, (word, pos, head, rel) in enumerate(zip(words, pos_tags, heads, deprels)):
if head > 0 and head <= len(words): # Not root
head_word = words[head - 1]
head_pos = pos_tags[head - 1]
validation = self.validate_dependency(head_word, word, head_pos, pos, rel)
if validation.get("warnings"):
results["warnings"].extend(validation["warnings"])
return results
except Exception as e:
return {"validated": False, "error": str(e)}
def _validate_with_dcg(self, words, pos_tags, heads, deprels):
"""
Validate parse tree using DCG grammar rules
Args:
words: List of word tokens
pos_tags: List of POS tags
heads: List of head indices
deprels: List of dependency relations
Returns:
dict: DCG validation results
"""
try:
# Convert Python lists to Prolog format
words_pl = self._list_to_prolog_atoms(words)
pos_pl = self._list_to_prolog_atoms(pos_tags)
heads_pl = '[' + ','.join(map(str, heads)) + ']'
deprels_pl = self._list_to_prolog_atoms(deprels)
# Query the DCG validation predicate (if implemented in grammar file)
# Note: coptic_grammar.pl doesn't currently export validate_parse_tree/4,
# so this will fail gracefully. The main validation happens via assertz rules
# and validate_dependency/4 calls in the Python method.
query = f"coptic_dependency_rules:validate_parse_tree({words_pl}, {pos_pl}, {heads_pl}, {deprels_pl})"
# Execute query - suppress stderr since predicate may not exist
try:
with suppress_stderr():
list(self.prolog.query(query))
except Exception:
# Predicate not implemented - validation continues via other methods
return {"patterns_found": [], "warnings": []}
# Retrieve patterns
patterns = []
pattern_query = "coptic_grammar:pattern_found(P)"
try:
for result in self.prolog.query(pattern_query):
if isinstance(result, dict) and 'P' in result:
pattern_data = result.get('P')
if pattern_data:
formatted = self._format_prolog_term(pattern_data)
patterns.append(formatted)
except Exception as e:
print(f"Warning: Error retrieving patterns: {e}")
# Retrieve warnings
warnings = []
warning_query = "coptic_grammar:warning(W)"
try:
for result in self.prolog.query(warning_query):
if isinstance(result, dict) and 'W' in result:
warning_data = result.get('W')
if warning_data:
formatted = self._format_prolog_term(warning_data)
warnings.append(formatted)
except Exception as e:
print(f"Warning: Error retrieving warnings: {e}")
# Clean up dynamic predicates
try:
list(self.prolog.query("coptic_grammar:retractall(pattern_found(_))"))
list(self.prolog.query("coptic_grammar:retractall(warning(_))"))
except Exception as e:
print(f"Warning: Error cleaning up Prolog predicates: {e}")
return {
"patterns_found": patterns,
"warnings": warnings
}
except Exception as e:
print(f"DCG validation error: {e}")
import traceback
traceback.print_exc()
return {
"patterns_found": [],
"warnings": []
}
def _list_to_prolog_atoms(self, python_list):
"""
Convert Python list of strings to Prolog list with properly quoted atoms
Args:
python_list: Python list of strings
Returns:
str: Prolog list syntax
"""
if not python_list:
return "[]"
# Quote and escape each string
items = []
for item in python_list:
# Escape single quotes
escaped = str(item).replace("'", "\\'")
items.append(f"'{escaped}'")
return '[' + ','.join(items) + ']'
def _format_prolog_term(self, term):
"""
Format a Prolog term for Python display
Args:
term: Prolog term (can be atom, list, or compound)
Returns:
dict: Formatted representation (always a dict)
"""
if isinstance(term, list):
result = {}
for item in term:
if hasattr(item, 'name') and hasattr(item, 'args'):
# Compound term like pattern_name('...')
key = item.name
value = item.args[0] if len(item.args) > 0 else None
result[key] = str(value) if value is not None else ''
return result if result else {'data': str(term)}
elif isinstance(term, str):
# Simple string/atom - wrap in dict
return {'type': term, 'data': term}
else:
# Other types - convert to string and wrap
return {'data': str(term)}
def query_prolog(self, query_string):
"""
Direct Prolog query interface for custom queries
Args:
query_string: Prolog query as string
Returns:
Query result or None
"""
if not self.prolog_initialized:
return None
try:
results = list(self.prolog.query(query_string))
return results[0] if results else None
except Exception as e:
print(f"Prolog query error: {e}")
return None
def cleanup(self):
"""
Cleanup Prolog engine and threads properly
"""
if self.prolog_initialized and self.prolog is not None:
try:
# Try to properly halt the Prolog engine
# This attempts to stop all Prolog threads
try:
# Query halt to stop Prolog cleanly
list(self.prolog.query("halt"))
except:
# halt will raise an exception as Prolog stops, which is expected
pass
# Clean up the Prolog instance
self.prolog = None
self.prolog_initialized = False
print("✓ Prolog engine cleaned up successfully")
except Exception as e:
print(f"Warning: Error during Prolog cleanup: {e}")
# ===================================================================
# CONVENIENCE FUNCTIONS
# ===================================================================
def create_prolog_engine():
"""Factory function to create and initialize Prolog engine"""
return CopticPrologRules()
# ===================================================================
# EXAMPLE USAGE
# ===================================================================
if __name__ == "__main__":
print("="*70)
print("Coptic Prolog Rules - Test Suite")
print("="*70)
# Initialize engine
prolog = create_prolog_engine()
if not prolog.prolog_initialized:
print("\n⚠️ Prolog not available. Cannot run tests.")
exit(1)
print("\n" + "="*70)
print("TEST 1: Tripartite Pattern Recognition")
print("="*70)
# Test tripartite sentence: ⲁⲛⲟⲕ ⲡⲉ ⲡⲛⲟⲩⲧⲉ (I am God)
words = ['ⲁⲛⲟⲕ', 'ⲡⲉ', 'ⲡⲛⲟⲩⲧⲉ']
pos_tags = ['PRON', 'AUX', 'NOUN']
result = prolog.check_tripartite_pattern(words, pos_tags)
print(f"\nInput: {' '.join(words)}")
print(f"Result: {result}")
print("\n" + "="*70)
print("TEST 2: Morphological Analysis")
print("="*70)
# Test article stripping
test_words = ['ⲡⲛⲟⲩⲧⲉ', 'ⲧⲃⲁϣⲟⲣ', 'ⲛⲣⲱⲙⲉ']
for word in test_words:
analysis = prolog.analyze_morphology(word)
print(f"\nWord: {word}")
print(f"Analysis: {analysis}")
print("\n" + "="*70)
print("TEST 3: Dependency Validation")
print("="*70)
# Test subject-verb relationship
validation = prolog.validate_dependency(
head_word='ⲡⲉ',
dep_word='ⲁⲛⲟⲕ',
head_pos='AUX',
dep_pos='PRON',
relation='nsubj'
)
print(f"\nDependency: ⲁⲛⲟⲕ (PRON) --nsubj--> ⲡⲉ (AUX)")
print(f"Validation: {validation}")
print("\n" + "="*70)
print("TEST 4: Custom Prolog Query")
print("="*70)
# Test custom query
result = prolog.query_prolog("definite_article(X)")
print(f"\nQuery: definite_article(X)")
print(f"Result: {result}")
print("\n" + "="*70)
print("All tests completed!")
print("="*70)