import pytest from app.services.parser import EmptyDocument, UnsupportedFormat, page_for_offset, parse def test_parse_text_basic(): doc = parse("note.txt", b"Hello world\n\nSecond paragraph.") assert "Hello world" in doc.text assert doc.source_format == "text" assert doc.char_count > 0 assert doc.page_count == 1 def test_parse_empty_raises(): with pytest.raises(EmptyDocument): parse("blank.txt", b" \n ") def test_unknown_format_raises(): with pytest.raises(UnsupportedFormat): parse("mystery.xyz", b"data") def test_page_for_offset_returns_correct_page(): from app.services.parser import PageSpan pages = [PageSpan(1, 0, 50), PageSpan(2, 50, 120)] assert page_for_offset(pages, 10) == 1 assert page_for_offset(pages, 75) == 2 assert page_for_offset(pages, 500) == 2