MikkoLipsanen commited on
Commit
ba2832f
·
verified ·
1 Parent(s): 59b1228

Update to graph based reading order algorithm

Browse files
Files changed (1) hide show
  1. reading_order.py +151 -97
reading_order.py CHANGED
@@ -1,106 +1,160 @@
1
- import numpy as np
2
 
3
  class OrderPolygons:
4
- def __init__(self, text_direction = 'lr'):
5
- self.text_direction = text_direction
6
-
7
- # Defines whether two lines overlap vertically
8
- def _y_overlaps(self, u, v):
9
- #u_y_min < v_y_max and u_y_max > v_y_min
10
- return u[3] < v[2] and u[2] > v[3]
11
 
12
- # Defines whether two lines overlap horizontally
13
- def _x_overlaps(self, u, v):
14
- #u_x_min < v_x_max and u_x_max > v_x_min
15
- return u[1] < v[0] and u[0] > v[1]
16
 
17
- # Defines whether one line (u) is above the other (v)
18
- def _above(self, u, v):
19
- #u_y_min < v_y_min
20
- return u[3] < v[3]
21
-
22
- # Defines whether one line (u) is left of the other (v)
23
- def _left_of(self, u, v):
24
- #u_x_max < v_x_min
25
- return u[0] < v[1]
26
 
27
- # Defines whether one line (w) overlaps with two others (u,v)
28
- def _separates(self, w, u, v):
29
- if w == u or w == v:
30
- return 0
31
- #w_y_max < (min(u_y_min, v_y_min))
32
- if w[2] < min(u[3], v[3]):
33
- return 0
34
- #w_y_min > max(u_y_max, v_y_max)
35
- if w[3] > max(u[2], v[2]):
36
- return 0
37
- #w_x_min < u_x_max and w_x_max > v_x_min
38
- if w[1] < u[0] and w[0] > v[1]:
39
- return 1
40
- return 0
41
-
42
- # Slightly modified version of the Kraken implementation at
43
- # https://github.com/mittagessen/kraken/blob/master/kraken/lib/segmentation.py
44
- def reading_order(self, lines):
45
- """Given the list of lines, computes
46
- the partial reading order. The output is a binary 2D array
47
- such that order[i,j] is true if line i comes before line j
48
- in reading order."""
49
- # Input lines are arrays with 4 polygon coordinates:
50
- # 0=x_right/x_max, 1=x_left/x_min, 2=y_down/y_max, 3=y_up/y_min
51
-
52
- # Array where the order of precedence between the lines is defined
53
- order = np.zeros((len(lines), len(lines)), 'B')
54
-
55
- # Defines reading direction: default is from left to right
56
- if self.text_direction == 'rl':
57
- def horizontal_order(u, v):
58
- return not self._left_of(u, v)
59
- else:
60
- horizontal_order = self._left_of
61
 
62
- for i, u in enumerate(lines):
63
- for j, v in enumerate(lines):
64
- if self._x_overlaps(u, v):
65
- if self._above(u, v):
66
- # line u is placed before line v in reading order
67
- order[i, j] = 1
68
- else:
69
-
70
- if [w for w in lines if self._separates(w, u, v)] == []:
71
- if horizontal_order(u, v):
72
- order[i, j] = 1
73
- elif self._y_overlaps(u, v) and horizontal_order(u, v):
74
- order[i, j] = 1
75
-
76
- return order
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
- # Taken from the Kraken implementation at
79
- # https://github.com/mittagessen/kraken/blob/master/kraken/lib/segmentation.py
80
- def topsort(self, order):
81
- """Given a binary array defining a partial order (o[i,j]==True means i<j),
82
- compute a topological sort. This is a quick and dirty implementation
83
- that works for up to a few thousand elements."""
84
-
85
- n = len(order)
86
- visited = np.zeros(n)
87
- L = []
88
-
89
- def _visit(k):
90
- if visited[k]:
91
- return
92
- visited[k] = 1
93
- a, = np.nonzero(np.ravel(order[:, k]))
94
- for line in a:
95
- _visit(line)
96
- L.append(k)
97
-
98
- for k in range(n):
99
- _visit(k)
100
- return L
101
-
102
  def order(self, lines):
103
- order = self.reading_order(lines)
104
- sorted = self.topsort(order)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
- return sorted
 
 
 
 
 
 
 
 
 
 
1
+ from collections import defaultdict, deque
2
 
3
  class OrderPolygons:
4
+ """
5
+ Graph-Based Reading Order Algorithm
 
 
 
 
 
6
 
7
+ Determines the reading order of polygons by:
8
+ 1. Comparing each pair of polygons to determine precedence
9
+ 2. Building a directed graph of these relationships
10
+ 3. Finding a topological ordering of the graph
11
 
12
+ Args:
13
+ text_direction (str): Reading direction, either 'lr' (left-to-right) or 'rl' (right-to-left). Default: 'lr'
14
+ """
15
+ def __init__(self, text_direction='lr'):
16
+ self.text_direction = text_direction
 
 
 
 
17
 
18
+ def _get_features(self, line):
19
+ """
20
+ Extract spatial features from a text line's bounding box.
21
+
22
+ Args:
23
+ line (list or tuple): Bounding box coordinates in format:
24
+ [x_min, y_min, x_max, y_max]
25
+ where:
26
+ - x_min: leftmost x-coordinate
27
+ - x_max: rightmost x-coordinate
28
+ - y_min: topmost y-coordinate
29
+ - y_max: bottommost y-coordinate
30
+
31
+ Returns:
32
+ dict: Dictionary containing extracted features
33
+ """
34
+ x_min, y_min, x_max, y_max = line
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ return {
37
+ 'center': ((x_min + x_max) / 2, (y_min + y_max) / 2),
38
+ 'x_min': x_min,
39
+ 'x_max': x_max,
40
+ 'y_min': y_min,
41
+ 'y_max': y_max,
42
+ 'width': x_max - x_min,
43
+ 'height': y_max - y_min
44
+ }
45
+
46
+ def _should_precede(self, u_feat, v_feat):
47
+ """"
48
+ Determine if line u should come before line v in reading order.
49
+
50
+ The logic follows natural reading patterns:
51
+ 1. If lines are on the same row (vertical overlap) -> use horizontal order
52
+ 2. If lines are on different rows -> use vertical order (top to bottom)
53
+
54
+ Args:
55
+ u_feat (dict): Feature dictionary for line u (from _get_features)
56
+ Must contain: 'center', 'y_min', 'y_max', 'height'
57
+ v_feat (dict): Feature dictionary for line v (from _get_features)
58
+ Must contain: 'center', 'y_min', 'y_max', 'height'
59
+
60
+ Returns:
61
+ bool: True if line u should come before line v in reading order
62
+ False otherwise
63
+ """
64
+ u_center = u_feat['center']
65
+ v_center = v_feat['center']
66
+
67
+ # Vertical overlap threshold
68
+ v_overlap = min(u_feat['y_max'], v_feat['y_max']) - max(u_feat['y_min'], v_feat['y_min'])
69
+ avg_height = (u_feat['height'] + v_feat['height']) / 2
70
+
71
+ # If significant vertical overlap, use horizontal order
72
+ if v_overlap > 0.5 * avg_height:
73
+ if self.text_direction == 'lr':
74
+ return u_center[0] < v_center[0]
75
+ else:
76
+ return u_center[0] > v_center[0]
77
+
78
+ # Otherwise, use vertical order (top to bottom)
79
+ return u_center[1] < v_center[1]
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  def order(self, lines):
82
+ """
83
+ Compute the reading order of text lines using graph-based approach.
84
+
85
+ Args:
86
+ lines (list): List of bounding boxes, where each bounding box is:
87
+ [x_min, x_max, y_min, y_max] NOW x_min,y_min,x_max,y_max
88
+
89
+ Example input:
90
+ [
91
+ [50, 250, 100, 130], # Line 0
92
+ [300, 500, 100, 130], # Line 1
93
+ [50, 250, 180, 210], # Line 2
94
+ [300, 500, 180, 210] # Line 3
95
+ ]
96
+
97
+ This represents a 2x2 grid:
98
+ [Line 0] [Line 1]
99
+ [Line 2] [Line 3]
100
+
101
+ Returns:
102
+ list: Indices of lines in reading order
103
+
104
+ Example output: [0, 1, 2, 3]
105
+ Meaning: Read line 0, then 1, then 2, then 3
106
+
107
+ If input is empty, returns []
108
+
109
+ Algorithm Steps:
110
+ 1. Handle empty input
111
+ 2. Extract features for all lines
112
+ 3. Build directed graph:
113
+ - Nodes = line indices (0, 1, 2, ...)
114
+ - Edges = precedence relationships (i→j means "i before j")
115
+ 4. Calculate in-degrees (number of predecessors for each node)
116
+ 5. Perform topological sort using Kahn's algorithm
117
+ 6. Return the sorted order
118
+ """
119
+ if not lines:
120
+ return []
121
+
122
+ n = len(lines)
123
+ features = [self._get_features(line) for line in lines]
124
+
125
+ # Build adjacency list
126
+ graph = defaultdict(list)
127
+ in_degree = [0] * n
128
+
129
+ for i in range(n):
130
+ for j in range(i + 1, n):
131
+ if self._should_precede(features[i], features[j]):
132
+ graph[i].append(j)
133
+ in_degree[j] += 1
134
+ else:
135
+ graph[j].append(i)
136
+ in_degree[i] += 1
137
+
138
+ # Kahn's algorithm for topological sort
139
+ queue = deque([i for i in range(n) if in_degree[i] == 0])
140
+ result = []
141
+
142
+ while queue:
143
+ node = queue.popleft()
144
+ result.append(node)
145
+
146
+ for neighbor in graph[node]:
147
+ in_degree[neighbor] -= 1
148
+ if in_degree[neighbor] == 0:
149
+ queue.append(neighbor)
150
 
151
+ # Check if all nodes were processed
152
+ if len(result) < n:
153
+ # Fallback: add remaining nodes sorted by position
154
+ missing = set(range(n)) - set(result)
155
+ missing_sorted = sorted(missing,
156
+ key=lambda i: (features[i]['center'][1],
157
+ features[i]['center'][0]))
158
+ result.extend(missing_sorted)
159
+
160
+ return result