Update to graph based reading order algorithm
Browse files- reading_order.py +151 -97
reading_order.py
CHANGED
|
@@ -1,106 +1,160 @@
|
|
| 1 |
-
import
|
| 2 |
|
| 3 |
class OrderPolygons:
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
# Defines whether two lines overlap vertically
|
| 8 |
-
def _y_overlaps(self, u, v):
|
| 9 |
-
#u_y_min < v_y_max and u_y_max > v_y_min
|
| 10 |
-
return u[3] < v[2] and u[2] > v[3]
|
| 11 |
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
# Defines whether one line (u) is left of the other (v)
|
| 23 |
-
def _left_of(self, u, v):
|
| 24 |
-
#u_x_max < v_x_min
|
| 25 |
-
return u[0] < v[1]
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
def reading_order(self, lines):
|
| 45 |
-
"""Given the list of lines, computes
|
| 46 |
-
the partial reading order. The output is a binary 2D array
|
| 47 |
-
such that order[i,j] is true if line i comes before line j
|
| 48 |
-
in reading order."""
|
| 49 |
-
# Input lines are arrays with 4 polygon coordinates:
|
| 50 |
-
# 0=x_right/x_max, 1=x_left/x_min, 2=y_down/y_max, 3=y_up/y_min
|
| 51 |
-
|
| 52 |
-
# Array where the order of precedence between the lines is defined
|
| 53 |
-
order = np.zeros((len(lines), len(lines)), 'B')
|
| 54 |
-
|
| 55 |
-
# Defines reading direction: default is from left to right
|
| 56 |
-
if self.text_direction == 'rl':
|
| 57 |
-
def horizontal_order(u, v):
|
| 58 |
-
return not self._left_of(u, v)
|
| 59 |
-
else:
|
| 60 |
-
horizontal_order = self._left_of
|
| 61 |
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
-
# Taken from the Kraken implementation at
|
| 79 |
-
# https://github.com/mittagessen/kraken/blob/master/kraken/lib/segmentation.py
|
| 80 |
-
def topsort(self, order):
|
| 81 |
-
"""Given a binary array defining a partial order (o[i,j]==True means i<j),
|
| 82 |
-
compute a topological sort. This is a quick and dirty implementation
|
| 83 |
-
that works for up to a few thousand elements."""
|
| 84 |
-
|
| 85 |
-
n = len(order)
|
| 86 |
-
visited = np.zeros(n)
|
| 87 |
-
L = []
|
| 88 |
-
|
| 89 |
-
def _visit(k):
|
| 90 |
-
if visited[k]:
|
| 91 |
-
return
|
| 92 |
-
visited[k] = 1
|
| 93 |
-
a, = np.nonzero(np.ravel(order[:, k]))
|
| 94 |
-
for line in a:
|
| 95 |
-
_visit(line)
|
| 96 |
-
L.append(k)
|
| 97 |
-
|
| 98 |
-
for k in range(n):
|
| 99 |
-
_visit(k)
|
| 100 |
-
return L
|
| 101 |
-
|
| 102 |
def order(self, lines):
|
| 103 |
-
|
| 104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections import defaultdict, deque
|
| 2 |
|
| 3 |
class OrderPolygons:
|
| 4 |
+
"""
|
| 5 |
+
Graph-Based Reading Order Algorithm
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
+
Determines the reading order of polygons by:
|
| 8 |
+
1. Comparing each pair of polygons to determine precedence
|
| 9 |
+
2. Building a directed graph of these relationships
|
| 10 |
+
3. Finding a topological ordering of the graph
|
| 11 |
|
| 12 |
+
Args:
|
| 13 |
+
text_direction (str): Reading direction, either 'lr' (left-to-right) or 'rl' (right-to-left). Default: 'lr'
|
| 14 |
+
"""
|
| 15 |
+
def __init__(self, text_direction='lr'):
|
| 16 |
+
self.text_direction = text_direction
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
+
def _get_features(self, line):
|
| 19 |
+
"""
|
| 20 |
+
Extract spatial features from a text line's bounding box.
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
line (list or tuple): Bounding box coordinates in format:
|
| 24 |
+
[x_min, y_min, x_max, y_max]
|
| 25 |
+
where:
|
| 26 |
+
- x_min: leftmost x-coordinate
|
| 27 |
+
- x_max: rightmost x-coordinate
|
| 28 |
+
- y_min: topmost y-coordinate
|
| 29 |
+
- y_max: bottommost y-coordinate
|
| 30 |
+
|
| 31 |
+
Returns:
|
| 32 |
+
dict: Dictionary containing extracted features
|
| 33 |
+
"""
|
| 34 |
+
x_min, y_min, x_max, y_max = line
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
+
return {
|
| 37 |
+
'center': ((x_min + x_max) / 2, (y_min + y_max) / 2),
|
| 38 |
+
'x_min': x_min,
|
| 39 |
+
'x_max': x_max,
|
| 40 |
+
'y_min': y_min,
|
| 41 |
+
'y_max': y_max,
|
| 42 |
+
'width': x_max - x_min,
|
| 43 |
+
'height': y_max - y_min
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
def _should_precede(self, u_feat, v_feat):
|
| 47 |
+
""""
|
| 48 |
+
Determine if line u should come before line v in reading order.
|
| 49 |
+
|
| 50 |
+
The logic follows natural reading patterns:
|
| 51 |
+
1. If lines are on the same row (vertical overlap) -> use horizontal order
|
| 52 |
+
2. If lines are on different rows -> use vertical order (top to bottom)
|
| 53 |
+
|
| 54 |
+
Args:
|
| 55 |
+
u_feat (dict): Feature dictionary for line u (from _get_features)
|
| 56 |
+
Must contain: 'center', 'y_min', 'y_max', 'height'
|
| 57 |
+
v_feat (dict): Feature dictionary for line v (from _get_features)
|
| 58 |
+
Must contain: 'center', 'y_min', 'y_max', 'height'
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
bool: True if line u should come before line v in reading order
|
| 62 |
+
False otherwise
|
| 63 |
+
"""
|
| 64 |
+
u_center = u_feat['center']
|
| 65 |
+
v_center = v_feat['center']
|
| 66 |
+
|
| 67 |
+
# Vertical overlap threshold
|
| 68 |
+
v_overlap = min(u_feat['y_max'], v_feat['y_max']) - max(u_feat['y_min'], v_feat['y_min'])
|
| 69 |
+
avg_height = (u_feat['height'] + v_feat['height']) / 2
|
| 70 |
+
|
| 71 |
+
# If significant vertical overlap, use horizontal order
|
| 72 |
+
if v_overlap > 0.5 * avg_height:
|
| 73 |
+
if self.text_direction == 'lr':
|
| 74 |
+
return u_center[0] < v_center[0]
|
| 75 |
+
else:
|
| 76 |
+
return u_center[0] > v_center[0]
|
| 77 |
+
|
| 78 |
+
# Otherwise, use vertical order (top to bottom)
|
| 79 |
+
return u_center[1] < v_center[1]
|
| 80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
def order(self, lines):
|
| 82 |
+
"""
|
| 83 |
+
Compute the reading order of text lines using graph-based approach.
|
| 84 |
+
|
| 85 |
+
Args:
|
| 86 |
+
lines (list): List of bounding boxes, where each bounding box is:
|
| 87 |
+
[x_min, x_max, y_min, y_max] NOW x_min,y_min,x_max,y_max
|
| 88 |
+
|
| 89 |
+
Example input:
|
| 90 |
+
[
|
| 91 |
+
[50, 250, 100, 130], # Line 0
|
| 92 |
+
[300, 500, 100, 130], # Line 1
|
| 93 |
+
[50, 250, 180, 210], # Line 2
|
| 94 |
+
[300, 500, 180, 210] # Line 3
|
| 95 |
+
]
|
| 96 |
+
|
| 97 |
+
This represents a 2x2 grid:
|
| 98 |
+
[Line 0] [Line 1]
|
| 99 |
+
[Line 2] [Line 3]
|
| 100 |
+
|
| 101 |
+
Returns:
|
| 102 |
+
list: Indices of lines in reading order
|
| 103 |
+
|
| 104 |
+
Example output: [0, 1, 2, 3]
|
| 105 |
+
Meaning: Read line 0, then 1, then 2, then 3
|
| 106 |
+
|
| 107 |
+
If input is empty, returns []
|
| 108 |
+
|
| 109 |
+
Algorithm Steps:
|
| 110 |
+
1. Handle empty input
|
| 111 |
+
2. Extract features for all lines
|
| 112 |
+
3. Build directed graph:
|
| 113 |
+
- Nodes = line indices (0, 1, 2, ...)
|
| 114 |
+
- Edges = precedence relationships (i→j means "i before j")
|
| 115 |
+
4. Calculate in-degrees (number of predecessors for each node)
|
| 116 |
+
5. Perform topological sort using Kahn's algorithm
|
| 117 |
+
6. Return the sorted order
|
| 118 |
+
"""
|
| 119 |
+
if not lines:
|
| 120 |
+
return []
|
| 121 |
+
|
| 122 |
+
n = len(lines)
|
| 123 |
+
features = [self._get_features(line) for line in lines]
|
| 124 |
+
|
| 125 |
+
# Build adjacency list
|
| 126 |
+
graph = defaultdict(list)
|
| 127 |
+
in_degree = [0] * n
|
| 128 |
+
|
| 129 |
+
for i in range(n):
|
| 130 |
+
for j in range(i + 1, n):
|
| 131 |
+
if self._should_precede(features[i], features[j]):
|
| 132 |
+
graph[i].append(j)
|
| 133 |
+
in_degree[j] += 1
|
| 134 |
+
else:
|
| 135 |
+
graph[j].append(i)
|
| 136 |
+
in_degree[i] += 1
|
| 137 |
+
|
| 138 |
+
# Kahn's algorithm for topological sort
|
| 139 |
+
queue = deque([i for i in range(n) if in_degree[i] == 0])
|
| 140 |
+
result = []
|
| 141 |
+
|
| 142 |
+
while queue:
|
| 143 |
+
node = queue.popleft()
|
| 144 |
+
result.append(node)
|
| 145 |
+
|
| 146 |
+
for neighbor in graph[node]:
|
| 147 |
+
in_degree[neighbor] -= 1
|
| 148 |
+
if in_degree[neighbor] == 0:
|
| 149 |
+
queue.append(neighbor)
|
| 150 |
|
| 151 |
+
# Check if all nodes were processed
|
| 152 |
+
if len(result) < n:
|
| 153 |
+
# Fallback: add remaining nodes sorted by position
|
| 154 |
+
missing = set(range(n)) - set(result)
|
| 155 |
+
missing_sorted = sorted(missing,
|
| 156 |
+
key=lambda i: (features[i]['center'][1],
|
| 157 |
+
features[i]['center'][0]))
|
| 158 |
+
result.extend(missing_sorted)
|
| 159 |
+
|
| 160 |
+
return result
|