File size: 7,061 Bytes
e5534ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
from reading_order import GraphBasedOrdering
from shapely.geometry import Polygon
import glob
import os

from shapely.validation import make_valid

order_poly = GraphBasedOrdering()

def load_image_paths(input_folder, extensions=None):
    """
    Load image files from a folder.
    
    Args:
        input_folder: Path to the folder containing images
        extensions: List of file extensions to include (default: common image formats)
    
    Returns:
        List of image file paths
    """
    if extensions is None:
        extensions = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff', 'tif', 'webp']
    
    images = []
    for ext in extensions:
        # Add both lowercase and uppercase versions
        images.extend(glob.glob(os.path.join(input_folder, f'*.{ext}')))
        images.extend(glob.glob(os.path.join(input_folder, f'*.{ext.upper()}')))
    
    return sorted(images)

def get_default_region(image_shape):
    """Function for creating a default region if no regions are detected."""
    w, h = image_shape
    region = {'coords': [[0.0, 0.0], [w, 0.0], [w, h], [0.0, h]], 
            'max_min': [w, 0.0, h, 0.0], 
            'name': "paragraph", 
            'id': '0', 
            'img_shape': (h, w)}
    return [region]

def get_dist(line_polygon, regions):
    """Function for finding the closest region to the text line."""
    dist, reg_id = 1000000, None
    line_polygon = validate_polygon(line_polygon)
    if line_polygon:
        for region in regions:
            # Calculates dictance between line and regions polygons
            region_polygon = validate_polygon(region['coords'])
            if region_polygon:
                line_reg_dist = line_polygon.distance(region_polygon)
                if line_reg_dist < dist:
                    dist = line_reg_dist
                    reg_id = region['id']
    return reg_id

def validate_polygon(polygon):
    """"Function for testing and correcting the validity of polygons."""
    if len(polygon) > 2:
        polygon = Polygon(polygon)
        if not polygon.is_valid:
            polygon = make_valid(polygon)
        return polygon
    else:
        return None

def get_iou(poly1, poly2):
    """Function for calculating Intersection over Union (IoU) values."""
    # If the polygons don't intersect, IoU is 0
    iou = 0
    poly1 = validate_polygon(poly1)
    poly2 = validate_polygon(poly2)
    if poly1 and poly2:
        if poly1.intersects(poly2):
            # Calculates intersection of the 2 polygons
            intersect = poly1.intersection(poly2).area
            # Calculates union of the 2 polygons
            uni = poly1.union(poly2)
            # Calculates intersection over union
            iou = intersect / uni.area
    return iou

def get_line_regions(lines, regions):
    """Function for connecting each text line to one region.
    
    Docstring generated with Claude
    Connect each text line to a region based on intersection or distance.

    Args:
        lines (dict): Dictionary containing text line information with keys:
            - 'coords' (list): List of line polygons (coordinates). E.g. [[x1,y1], ..., [xn,yn]]
            - 'max_min' (list): List of bounding box coordinates for each line
        regions (list): List of region dictionaries, each containing:
            - 'coords': Region polygon coordinates. E.g. [[x1,y1], ..., [xn,yn]]
            - 'id': Region identifier

    Returns:
        list: List of dictionaries, each representing a line with keys:
            - 'polygon': Line polygon coordinates
            - 'reg_id': ID of the region the line belongs to
            - 'max_min': Bounding box coordinates [x_min, y_min, x_max, y_max]

    """
    lines_list = []
    for i in range(len(lines['coords'])):
        iou, reg_id = 0, ''
        max_min = [0.0, 0.0, 0.0, 0.0]
        polygon = lines['coords'][i]
        for region in regions:
            line_reg_iou = get_iou(polygon, region['coords']) 
            if line_reg_iou > iou:
                iou = line_reg_iou
                reg_id = region['id']
        # If line polygon does not intersect with any region, a distance metric is used for defining 
        # the region that the line belongs to
        if iou == 0:
            reg_id = get_dist(polygon, regions)

        if (len(lines['max_min']) - 1) >= i:
            max_min = lines['max_min'][i]

        new_line = {'polygon': polygon, 'reg_id': reg_id, 'max_min': max_min}
        lines_list.append(new_line)
    return lines_list

def order_regions_lines(lines, regions):
    """Function for ordering line predictions inside each region.
    
    Docstring generated with Claude
    Order text lines within each region and order the regions themselves.

    Args:
        lines (list): List of line dictionaries, each containing:
            - 'reg_id': ID of the region the line belongs to
            - 'max_min': Bounding box coordinates for the line
            - 'polygon': Line polygon coordinates
        regions (list): List of region dictionaries, each containing:
            - 'id': Region identifier
            - 'coords': Region polygon coordinates
            - 'name': Region name
            - 'max_min': Bounding box coordinates for the region
            - 'img_shape': Shape of the source image

    Returns:
        list: List of ordered region dictionaries, each containing:
            - 'region_coords': Region polygon coordinates
            - 'region_name': Region name
            - 'lines': Ordered list of line polygons within the region
            - 'img_shape': Shape of the source image
        
        Note: Only regions containing at least one line are included in the output.
        Both lines within regions and regions themselves are ordered by their spatial positions.
    """
    regions_with_rows = []
    region_max_mins = []
    for i, region in enumerate(regions):
        line_max_mins = []
        line_polygons = []
        for line in lines:
            if line['reg_id'] == region['id']:
                line_max_mins.append(line['max_min'])
                line_polygons.append(line['polygon'])
        if line_polygons:
            # If one or more lines are connected to a region, line order inside the region is defined
            # and the predicted text lines are joined in the same python dict
            line_order = order_poly.order(line_max_mins)
            line_polygons = [line_polygons[i] for i in line_order]
            new_region = {'region_coords': region['coords'], 
                        'region_name': region['name'], 
                        'lines': line_polygons, 
                        'img_shape': region['img_shape']}
            region_max_mins.append(region['max_min'])
            regions_with_rows.append(new_region)
        else:
            continue
    # Creates an ordering of the detected regions based on their polygon coordinates
    region_order = order_poly.order(region_max_mins)
    regions_with_rows = [regions_with_rows[i] for i in region_order]
    return regions_with_rows