lexguard-backend / tests /test_parser.py
Dar4devil's picture
LexGuard backend
c34b339
raw
history blame contribute delete
847 Bytes
import pytest
from app.services.parser import EmptyDocument, UnsupportedFormat, page_for_offset, parse
def test_parse_text_basic():
doc = parse("note.txt", b"Hello world\n\nSecond paragraph.")
assert "Hello world" in doc.text
assert doc.source_format == "text"
assert doc.char_count > 0
assert doc.page_count == 1
def test_parse_empty_raises():
with pytest.raises(EmptyDocument):
parse("blank.txt", b" \n ")
def test_unknown_format_raises():
with pytest.raises(UnsupportedFormat):
parse("mystery.xyz", b"data")
def test_page_for_offset_returns_correct_page():
from app.services.parser import PageSpan
pages = [PageSpan(1, 0, 50), PageSpan(2, 50, 120)]
assert page_for_offset(pages, 10) == 1
assert page_for_offset(pages, 75) == 2
assert page_for_offset(pages, 500) == 2