File size: 6,093 Bytes
ba2832f
36e7edb
7f74e3e
ba2832f
 
36e7edb
ba2832f
 
 
 
36e7edb
ba2832f
 
 
 
 
36e7edb
ba2832f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36e7edb
ba2832f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36e7edb
 
ba2832f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36e7edb
ba2832f
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
from collections import defaultdict, deque

class GraphBasedOrdering:
    """
    Graph-Based Reading Order Algorithm
    
    Determines the reading order of polygons by:
    1. Comparing each pair of polygons to determine precedence
    2. Building a directed graph of these relationships
    3. Finding a topological ordering of the graph
    
    Args:
        text_direction (str): Reading direction, either 'lr' (left-to-right) or 'rl' (right-to-left). Default: 'lr'
    """
    def __init__(self, text_direction='lr'):
        self.text_direction = text_direction
    
    def _get_features(self, line):
        """
        Extract spatial features from a text line's bounding box.
        
        Args:
            line (list or tuple): Bounding box coordinates in format:
                                 [x_min, y_min, x_max, y_max]
                                 where:
                                   - x_min: leftmost x-coordinate
                                   - x_max: rightmost x-coordinate  
                                   - y_min: topmost y-coordinate
                                   - y_max: bottommost y-coordinate
        
        Returns:
            dict: Dictionary containing extracted features
        """
        x_min, y_min, x_max, y_max = line

        return {
            'center': ((x_min + x_max) / 2, (y_min + y_max) / 2),
            'x_min': x_min, 
            'x_max': x_max,
            'y_min': y_min, 
            'y_max': y_max,
            'width': x_max - x_min,
            'height': y_max - y_min
        }
    
    def _should_precede(self, u_feat, v_feat):
        """"
        Determine if line u should come before line v in reading order.
        
        The logic follows natural reading patterns:
        1. If lines are on the same row (vertical overlap) -> use horizontal order
        2. If lines are on different rows -> use vertical order (top to bottom)
        
        Args:
            u_feat (dict): Feature dictionary for line u (from _get_features)
                          Must contain: 'center', 'y_min', 'y_max', 'height'
            v_feat (dict): Feature dictionary for line v (from _get_features)
                          Must contain: 'center', 'y_min', 'y_max', 'height'
        
        Returns:
            bool: True if line u should come before line v in reading order
                  False otherwise
        """
        u_center = u_feat['center']
        v_center = v_feat['center']
        
        # Vertical overlap threshold
        v_overlap = min(u_feat['y_max'], v_feat['y_max']) - max(u_feat['y_min'], v_feat['y_min'])
        avg_height = (u_feat['height'] + v_feat['height']) / 2
        
        # If significant vertical overlap, use horizontal order
        if v_overlap > 0.5 * avg_height:
            if self.text_direction == 'lr':
                return u_center[0] < v_center[0]
            else:
                return u_center[0] > v_center[0]
        
        # Otherwise, use vertical order (top to bottom)
        return u_center[1] < v_center[1]
    
    def order(self, lines):
        """
        Compute the reading order of text lines using graph-based approach.
        
        Args:
            lines (list): List of bounding boxes, where each bounding box is:
                         [x_min, x_max, y_min, y_max] NOW x_min,y_min,x_max,y_max
                         
                         Example input:
                         [
                             [50, 250, 100, 130],    # Line 0
                             [300, 500, 100, 130],   # Line 1
                             [50, 250, 180, 210],    # Line 2
                             [300, 500, 180, 210]    # Line 3
                         ]
                         
                         This represents a 2x2 grid:
                         [Line 0]  [Line 1]
                         [Line 2]  [Line 3]
        
        Returns:
            list: Indices of lines in reading order
                  
                  Example output: [0, 1, 2, 3]
                  Meaning: Read line 0, then 1, then 2, then 3
                  
                  If input is empty, returns []
        
        Algorithm Steps:
            1. Handle empty input
            2. Extract features for all lines
            3. Build directed graph:
               - Nodes = line indices (0, 1, 2, ...)
               - Edges = precedence relationships (i→j means "i before j")
            4. Calculate in-degrees (number of predecessors for each node)
            5. Perform topological sort using Kahn's algorithm
            6. Return the sorted order
        """
        if not lines:
            return []
        
        n = len(lines)
        features = [self._get_features(line) for line in lines]
        
        # Build adjacency list
        graph = defaultdict(list)
        in_degree = [0] * n
        
        for i in range(n):
            for j in range(i + 1, n):
                if self._should_precede(features[i], features[j]):
                    graph[i].append(j)
                    in_degree[j] += 1
                else:  
                    graph[j].append(i)
                    in_degree[i] += 1
        
        # Kahn's algorithm for topological sort
        queue = deque([i for i in range(n) if in_degree[i] == 0])
        result = []
        
        while queue:
            node = queue.popleft()
            result.append(node)
            
            for neighbor in graph[node]:
                in_degree[neighbor] -= 1
                if in_degree[neighbor] == 0:
                    queue.append(neighbor)

        # Check if all nodes were processed
        if len(result) < n:
            # Fallback: add remaining nodes sorted by position
            missing = set(range(n)) - set(result)
            missing_sorted = sorted(missing, 
                                  key=lambda i: (features[i]['center'][1], 
                                               features[i]['center'][0]))
            result.extend(missing_sorted)
        
        return result