| import difflib |
|
|
| class OffsetMapper: |
| def __init__(self, original, modified): |
| self.original = original |
| self.modified = modified |
| self.mapping = [] |
| self._build_mapping() |
| |
| def _build_mapping(self): |
| s = difflib.SequenceMatcher(None, self.original, self.modified) |
| for tag, i1, i2, j1, j2 in s.get_opcodes(): |
| self.mapping.append((j1, j2, i1, i2)) |
| |
| def map_offset(self, mod_offset): |
| for j1, j2, i1, i2 in self.mapping: |
| if j1 <= mod_offset <= j2: |
| if j2 == j1: |
| return i1 |
| ratio = (mod_offset - j1) / (j2 - j1) |
| return int(i1 + ratio * (i2 - i1)) |
| return len(self.original) |
|
|
| def test(): |
| original = "قال محمد: علي أننا حققنا نجاحا كبيرا في المشروع رغم الصعوباالصعوبات...." |
| spelling = "قال محمد علي أننا حققنا نجاحا كبيرا في المشروع رغم الصعوباالصعوبات...." |
| |
| mapper = OffsetMapper(original, spelling) |
| |
| |
| |
| |
| |
| |
| |
| spelling_start = spelling.find("علي") |
| spelling_end = spelling_start + 3 |
| print(f"Spelling string 'علي' is at [{spelling_start}:{spelling_end}]") |
| |
| orig_start = mapper.map_offset(spelling_start) |
| orig_end = mapper.map_offset(spelling_end) |
| print(f"Mapped to original: [{orig_start}:{orig_end}]") |
| print(f"Original text at mapped span: '{original[orig_start:orig_end]}'") |
|
|
| if __name__ == "__main__": |
| test() |
|
|