| |
| import json |
| import tempfile |
|
|
| from mmocr.utils import list_from_file, list_to_file |
|
|
| lists = [ |
| [], |
| [' '], |
| ['\t'], |
| ['a'], |
| [1], |
| [1.], |
| ['a', 'b'], |
| ['a', 1, 1.], |
| [1, 1., 'a'], |
| ['啊', '啊啊'], |
| ['選択', 'noël', 'Информацией', 'ÄÆä'], |
| ] |
|
|
| dicts = [ |
| [{ |
| 'text': [] |
| }], |
| [{ |
| 'text': [' '] |
| }], |
| [{ |
| 'text': ['\t'] |
| }], |
| [{ |
| 'text': ['a'] |
| }], |
| [{ |
| 'text': [1] |
| }], |
| [{ |
| 'text': [1.] |
| }], |
| [{ |
| 'text': ['a', 'b'] |
| }], |
| [{ |
| 'text': ['a', 1, 1.] |
| }], |
| [{ |
| 'text': [1, 1., 'a'] |
| }], |
| [{ |
| 'text': ['啊', '啊啊'] |
| }], |
| [{ |
| 'text': ['選択', 'noël', 'Информацией', 'ÄÆä'] |
| }], |
| ] |
|
|
|
|
| def test_list_to_file(): |
| with tempfile.TemporaryDirectory() as tmpdirname: |
| |
| for i, lines in enumerate(lists): |
| filename = f'{tmpdirname}/{i}.txt' |
| list_to_file(filename, lines) |
| lines2 = [ |
| line.rstrip('\r\n') |
| for line in open(filename, 'r', encoding='utf-8').readlines() |
| ] |
| lines = list(map(str, lines)) |
| assert len(lines) == len(lines2) |
| assert all(line1 == line2 for line1, line2 in zip(lines, lines2)) |
| |
| for i, lines in enumerate(dicts): |
| filename = f'{tmpdirname}/{i}.jsonl' |
| list_to_file(filename, [json.dumps(line) for line in lines]) |
| lines2 = [ |
| json.loads(line.rstrip('\r\n'))['text'] |
| for line in open(filename, 'r', encoding='utf-8').readlines() |
| ][0] |
|
|
| lines = list(lines[0]['text']) |
| assert len(lines) == len(lines2) |
| assert all(line1 == line2 for line1, line2 in zip(lines, lines2)) |
|
|
|
|
| def test_list_from_file(): |
| with tempfile.TemporaryDirectory() as tmpdirname: |
| |
| for i, lines in enumerate(lists): |
| filename = f'{tmpdirname}/{i}.txt' |
| with open(filename, 'w', encoding='utf-8') as f: |
| f.writelines(f'{line}\n' for line in lines) |
| lines2 = list_from_file(filename, encoding='utf-8') |
| lines = list(map(str, lines)) |
| assert len(lines) == len(lines2) |
| assert all(line1 == line2 for line1, line2 in zip(lines, lines2)) |
| |
| for i, lines in enumerate(dicts): |
| filename = f'{tmpdirname}/{i}.jsonl' |
| with open(filename, 'w', encoding='utf-8') as f: |
| f.writelines(f'{line}\n' for line in lines) |
| lines2 = list_from_file(filename, encoding='utf-8') |
| lines = list(map(str, lines)) |
| assert len(lines) == len(lines2) |
| assert all(line1 == line2 for line1, line2 in zip(lines, lines2)) |
|
|