| """ |
| Phase 11 — Task 5: OffsetMapper Validation Suite |
| |
| Tests the OffsetMapper class for correctness across: |
| - Insertions |
| - Deletions |
| - Replacements |
| - Arabic text mutations |
| - Multi-edit examples |
| - Chained mutations (Spelling → Grammar → Punctuation) |
| |
| Validates: |
| - reverse_map_offset (text_after → text_before) |
| - forward_map_range (text_before → text_after) |
| - _forward_map_pos (internal, tested via forward_map_range) |
| """ |
| import sys |
| import os |
| import difflib |
| import pytest |
|
|
| |
| |
| sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src')) |
|
|
|
|
| |
| |
| |
| |
| |
|
|
| class OffsetMapper: |
| """Exact copy from app.py for isolated testing.""" |
|
|
| def __init__(self, text_before, text_after): |
| self._text_before = text_before |
| self._text_after = text_after |
| self._opcodes = [] |
| self._build() |
|
|
| def _build(self): |
| s = difflib.SequenceMatcher(None, self._text_before, self._text_after) |
| for tag, i1, i2, j1, j2 in s.get_opcodes(): |
| self._opcodes.append((i1, i2, j1, j2)) |
|
|
| def reverse_map_offset(self, pos_in_after): |
| for i1, i2, j1, j2 in self._opcodes: |
| if j1 <= pos_in_after <= j2: |
| if j2 == j1: |
| return i1 |
| ratio = (pos_in_after - j1) / (j2 - j1) |
| return round(i1 + ratio * (i2 - i1)) |
| return len(self._text_before) |
|
|
| def forward_map_range(self, start_in_before, end_in_before): |
| new_start = self._forward_map_pos(start_in_before) |
| new_end = self._forward_map_pos(end_in_before) |
| new_end = max(new_start, new_end) |
| return new_start, new_end |
|
|
| def _forward_map_pos(self, pos): |
| for i1, i2, j1, j2 in self._opcodes: |
| if i1 <= pos <= i2: |
| if i2 == i1: |
| return j1 |
| ratio = (pos - i1) / (i2 - i1) |
| return int(j1 + ratio * (j2 - j1)) |
| if self._opcodes: |
| last = self._opcodes[-1] |
| return last[3] + (pos - last[1]) |
| return pos |
|
|
|
|
| |
| |
| |
|
|
| class StageLockerStub: |
| """Minimal StageLocker for chained mutation tests.""" |
| def __init__(self): |
| self.locked_spans = [] |
|
|
| def lock(self, start, end, owner): |
| self.locked_spans.append((start, end, owner)) |
|
|
| def is_locked(self, start, end): |
| for ls, le, _ in self.locked_spans: |
| if start < le and end > ls: |
| return True |
| return False |
|
|
| def update_via_mapper(self, mapper): |
| updated = [] |
| for ls, le, owner in self.locked_spans: |
| new_ls, new_le = mapper.forward_map_range(ls, le) |
| if new_le > new_ls: |
| updated.append((new_ls, new_le, owner)) |
| self.locked_spans = updated |
|
|
|
|
| |
| |
| |
|
|
|
|
| class TestOffsetMapperIdentity: |
| """No changes — identity mapping.""" |
|
|
| def test_identity_ascii(self): |
| m = OffsetMapper("hello world", "hello world") |
| assert m.reverse_map_offset(0) == 0 |
| assert m.reverse_map_offset(5) == 5 |
| assert m.reverse_map_offset(11) == 11 |
|
|
| def test_identity_arabic(self): |
| text = "مرحبا بالعالم" |
| m = OffsetMapper(text, text) |
| assert m.reverse_map_offset(0) == 0 |
| assert m.reverse_map_offset(len(text)) == len(text) |
|
|
| def test_identity_forward(self): |
| m = OffsetMapper("hello world", "hello world") |
| assert m.forward_map_range(0, 5) == (0, 5) |
| assert m.forward_map_range(6, 11) == (6, 11) |
|
|
|
|
| class TestOffsetMapperInsertions: |
| """Single character/word insertions.""" |
|
|
| def test_insert_beginning(self): |
| m = OffsetMapper("abc", "Xabc") |
| |
| |
| assert m.reverse_map_offset(1) == 0 |
| assert m.reverse_map_offset(4) == 3 |
|
|
| def test_insert_middle(self): |
| m = OffsetMapper("abc", "aXbc") |
| |
| assert m.reverse_map_offset(0) == 0 |
| assert m.reverse_map_offset(2) == 1 |
| assert m.reverse_map_offset(3) == 2 |
|
|
| def test_insert_end(self): |
| m = OffsetMapper("abc", "abcX") |
| assert m.reverse_map_offset(0) == 0 |
| assert m.reverse_map_offset(2) == 2 |
| assert m.reverse_map_offset(3) == 3 |
|
|
| def test_insert_forward(self): |
| m = OffsetMapper("abc", "aXbc") |
| |
| |
| |
| s, e = m.forward_map_range(1, 2) |
| |
| |
| assert s == 1 |
| assert e == 3 |
|
|
|
|
| class TestOffsetMapperDeletions: |
| """Single character/word deletions.""" |
|
|
| def test_delete_beginning(self): |
| m = OffsetMapper("Xabc", "abc") |
| |
| |
| |
| |
| |
| |
| |
| |
| assert m.reverse_map_offset(0) == 0 |
| assert m.reverse_map_offset(2) == 3 |
|
|
| def test_delete_middle(self): |
| m = OffsetMapper("abcd", "acd") |
| |
| |
| |
| |
| |
| |
| |
| assert m.reverse_map_offset(0) == 0 |
| assert m.reverse_map_offset(1) == 1 |
| assert m.reverse_map_offset(2) == 3 |
|
|
| def test_delete_end(self): |
| m = OffsetMapper("abcX", "abc") |
| assert m.reverse_map_offset(0) == 0 |
| assert m.reverse_map_offset(2) == 2 |
|
|
| def test_delete_forward(self): |
| m = OffsetMapper("abcd", "acd") |
| |
| s, e = m.forward_map_range(2, 3) |
| assert s == 1 |
| assert e == 2 |
|
|
|
|
| class TestOffsetMapperReplacements: |
| """Character/word replacements.""" |
|
|
| def test_replace_same_length(self): |
| m = OffsetMapper("abc", "aXc") |
| assert m.reverse_map_offset(0) == 0 |
| assert m.reverse_map_offset(1) == 1 |
| assert m.reverse_map_offset(2) == 2 |
|
|
| def test_replace_longer(self): |
| m = OffsetMapper("abc", "aXYZc") |
| |
| assert m.reverse_map_offset(0) == 0 |
| assert m.reverse_map_offset(4) == 2 |
|
|
| def test_replace_shorter(self): |
| m = OffsetMapper("aXYZc", "abc") |
| |
| assert m.reverse_map_offset(0) == 0 |
| assert m.reverse_map_offset(1) == 1 |
| assert m.reverse_map_offset(2) == 4 |
|
|
| def test_replace_forward(self): |
| m = OffsetMapper("abc", "aXYZc") |
| s, e = m.forward_map_range(1, 2) |
| |
| assert s == 1 |
| assert e == 4 |
|
|
|
|
| class TestOffsetMapperArabic: |
| """Arabic-specific text mutations.""" |
|
|
| def test_hamza_correction(self): |
| before = "الانسان" |
| after = "الإنسان" |
| m = OffsetMapper(before, after) |
| |
| assert m.reverse_map_offset(0) == 0 |
| assert m.reverse_map_offset(len(after)) == len(before) |
|
|
| def test_ta_marbuta(self): |
| before = "المدرسه" |
| after = "المدرسة" |
| m = OffsetMapper(before, after) |
| |
| assert m.reverse_map_offset(0) == 0 |
| assert m.reverse_map_offset(len(after) - 1) == len(before) - 1 |
|
|
| def test_word_split(self): |
| before = "فيالمدرسة" |
| after = "في المدرسة" |
| m = OffsetMapper(before, after) |
| |
| assert m.reverse_map_offset(len(after)) == len(before) |
|
|
| def test_tanween_removal(self): |
| before = "جداً" |
| after = "جدا" |
| m = OffsetMapper(before, after) |
| |
| |
| |
| |
| |
| |
| assert m.reverse_map_offset(0) == 0 |
| |
| assert m.reverse_map_offset(len(after)) == 3 |
|
|
| def test_punct_addition(self): |
| before = "مرحبا" |
| after = "مرحبا." |
| m = OffsetMapper(before, after) |
| |
| assert m.reverse_map_offset(0) == 0 |
| assert m.reverse_map_offset(len(before)) == len(before) |
|
|
|
|
| class TestOffsetMapperMultiEdit: |
| """Multiple non-contiguous edits.""" |
|
|
| def test_two_replacements(self): |
| before = "abcd" |
| after = "aXcY" |
| |
| m = OffsetMapper(before, after) |
| assert m.reverse_map_offset(0) == 0 |
| assert m.reverse_map_offset(1) == 1 |
| assert m.reverse_map_offset(2) == 2 |
| assert m.reverse_map_offset(3) == 3 |
|
|
| def test_insert_and_delete(self): |
| before = "abcde" |
| after = "aXcde" |
| m = OffsetMapper(before, after) |
| assert m.reverse_map_offset(0) == 0 |
| assert m.reverse_map_offset(1) == 1 |
| assert m.reverse_map_offset(4) == 4 |
|
|
| def test_arabic_multi_edit(self): |
| before = "الانسان يذهب الى المدرسه" |
| after = "الإنسان يذهب إلى المدرسة" |
| |
| m = OffsetMapper(before, after) |
| |
| assert m.reverse_map_offset(0) == 0 |
| end_after = len(after) |
| end_before = len(before) |
| mapped_end = m.reverse_map_offset(end_after) |
| assert mapped_end == end_before |
|
|
|
|
| class TestOffsetMapperForwardReverse: |
| """Verify forward and reverse are consistent inverses.""" |
|
|
| def _check_roundtrip_forward(self, before, after, start, end): |
| """Forward then reverse should approximate identity.""" |
| m = OffsetMapper(before, after) |
| fwd_s, fwd_e = m.forward_map_range(start, end) |
| |
| m_rev = OffsetMapper(after, before) |
| rev_s, rev_e = m_rev.forward_map_range(fwd_s, fwd_e) |
| |
| assert abs(rev_s - start) <= 1, f"Start drift: {start} → {fwd_s} → {rev_s}" |
| assert abs(rev_e - end) <= 1, f"End drift: {end} → {fwd_e} → {rev_e}" |
|
|
| def test_roundtrip_identity(self): |
| self._check_roundtrip_forward("hello world", "hello world", 0, 5) |
|
|
| def test_roundtrip_insertion(self): |
| self._check_roundtrip_forward("abc", "aXbc", 0, 1) |
|
|
| def test_roundtrip_deletion(self): |
| self._check_roundtrip_forward("abcd", "acd", 0, 1) |
|
|
| def test_roundtrip_arabic(self): |
| self._check_roundtrip_forward("الانسان", "الإنسان", 0, 2) |
|
|
|
|
| class TestOffsetMapperChained: |
| """Chained mutations simulating Spelling → Grammar → Punctuation.""" |
|
|
| def test_three_stage_chain(self): |
| """Simulate: spelling fixes hamza, grammar fixes verb, punct adds period.""" |
| original = "الانسان يذهب" |
| |
| after_spelling = "الإنسان يذهب" |
| m1 = OffsetMapper(original, after_spelling) |
|
|
| |
| after_grammar = "الإنسان يذهب" |
| m2 = OffsetMapper(after_spelling, after_grammar) |
|
|
| |
| after_punct = "الإنسان يذهب." |
| m3 = OffsetMapper(after_grammar, after_punct) |
|
|
| |
| |
| pos_final = len(after_punct) - 1 |
|
|
| |
| pos_after_m3 = m3.reverse_map_offset(pos_final) |
| pos_after_m2 = m2.reverse_map_offset(pos_after_m3) |
| pos_original = m1.reverse_map_offset(pos_after_m2) |
|
|
| |
| assert pos_original == len(original) |
|
|
| def test_stagelocker_with_mapper(self): |
| """Verify StageLocker spans shift correctly through mutations.""" |
| locker = StageLockerStub() |
|
|
| |
| original = "في الانسان كبير" |
| after_spelling = "في الإنسان كبير" |
| m1 = OffsetMapper(original, after_spelling) |
| locker.lock(3, 10, 'spelling') |
| |
|
|
| |
| assert locker.is_locked(3, 10) == True |
| assert locker.is_locked(11, 16) == False |
|
|
| |
| after_grammar = "في الإنسان كبيرة" |
| m2 = OffsetMapper(after_spelling, after_grammar) |
| locker.update_via_mapper(m2) |
|
|
| |
| has_lock = False |
| for ls, le, owner in locker.locked_spans: |
| if owner == 'spelling': |
| has_lock = True |
| |
| assert ls >= 2 and ls <= 4 |
| assert le >= 9 and le <= 11 |
| assert has_lock, "Spelling lock was lost during grammar mutation" |
|
|
| def test_chained_offset_accuracy(self): |
| """Full pipeline: verify ORIGINAL coordinates are recoverable.""" |
| original = "الانسان يذهب الى المدرسه" |
| after_spell = "الإنسان يذهب إلى المدرسة" |
| after_grammar = "الإنسان يذهب إلى المدرسة" |
| after_punct = "الإنسان يذهب إلى المدرسة." |
|
|
| m1 = OffsetMapper(original, after_spell) |
| m2 = OffsetMapper(after_spell, after_grammar) |
| m3 = OffsetMapper(after_grammar, after_punct) |
|
|
| |
| |
| idx = after_punct.index("المدرسة") |
| assert idx > 0 |
|
|
| |
| p3 = m3.reverse_map_offset(idx) |
| p2 = m2.reverse_map_offset(p3) |
| p1 = m1.reverse_map_offset(p2) |
|
|
| |
| orig_idx = original.index("المدرسه") |
| assert abs(p1 - orig_idx) <= 1, f"Expected ~{orig_idx}, got {p1}" |
|
|
|
|
| class TestOffsetMapperEdgeCases: |
| """Edge cases and boundary conditions.""" |
|
|
| def test_empty_strings(self): |
| m = OffsetMapper("", "") |
| assert m.reverse_map_offset(0) == 0 |
|
|
| def test_empty_to_text(self): |
| m = OffsetMapper("", "hello") |
| assert m.reverse_map_offset(0) == 0 |
| assert m.reverse_map_offset(5) == 0 |
|
|
| def test_text_to_empty(self): |
| m = OffsetMapper("hello", "") |
| assert m.reverse_map_offset(0) == 0 |
|
|
| def test_single_char_replace(self): |
| m = OffsetMapper("a", "b") |
| assert m.reverse_map_offset(0) == 0 |
|
|
| def test_monotonicity_guard(self): |
| """forward_map_range should never return inverted ranges.""" |
| m = OffsetMapper("abcdef", "aXYZf") |
| s, e = m.forward_map_range(1, 5) |
| assert s <= e, f"Inverted range: ({s}, {e})" |
|
|
| def test_position_beyond_text(self): |
| """Positions beyond text should map to end.""" |
| m = OffsetMapper("abc", "aXbc") |
| result = m.reverse_map_offset(100) |
| assert result == len("abc") |
|
|
|
|
| |
| |
| |
|
|
| if __name__ == '__main__': |
| pytest.main([__file__, '-v', '--tb=short']) |
|
|