bayan-api / tests /test_mapper.py
youssefreda9's picture
ui: Clean up editor placeholder text and alignment (top-right)
dfe1d91
Raw
History Blame Contribute Delete
1.71 kB
import difflib
class OffsetMapper:
def __init__(self, original, modified):
self.original = original
self.modified = modified
self.mapping = []
self._build_mapping()
def _build_mapping(self):
s = difflib.SequenceMatcher(None, self.original, self.modified)
for tag, i1, i2, j1, j2 in s.get_opcodes():
self.mapping.append((j1, j2, i1, i2))
def map_offset(self, mod_offset):
for j1, j2, i1, i2 in self.mapping:
if j1 <= mod_offset <= j2:
if j2 == j1:
return i1
ratio = (mod_offset - j1) / (j2 - j1)
return int(i1 + ratio * (i2 - i1))
return len(self.original)
def test():
original = "قال محمد: علي أننا حققنا نجاحا كبيرا في المشروع رغم الصعوباالصعوبات...."
spelling = "قال محمد علي أننا حققنا نجاحا كبيرا في المشروع رغم الصعوباالصعوبات...."
mapper = OffsetMapper(original, spelling)
# In spelling text, where is "علي"?
# "قال محمد علي أننا"
# "قال " -> 0:4
# "محمد " -> 4:9
# "علي" -> 9:12
spelling_start = spelling.find("علي")
spelling_end = spelling_start + 3
print(f"Spelling string 'علي' is at [{spelling_start}:{spelling_end}]")
orig_start = mapper.map_offset(spelling_start)
orig_end = mapper.map_offset(spelling_end)
print(f"Mapped to original: [{orig_start}:{orig_end}]")
print(f"Original text at mapped span: '{original[orig_start:orig_end]}'")
if __name__ == "__main__":
test()