File size: 7,061 Bytes
e5534ce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
from reading_order import GraphBasedOrdering
from shapely.geometry import Polygon
import glob
import os
from shapely.validation import make_valid
order_poly = GraphBasedOrdering()
def load_image_paths(input_folder, extensions=None):
"""
Load image files from a folder.
Args:
input_folder: Path to the folder containing images
extensions: List of file extensions to include (default: common image formats)
Returns:
List of image file paths
"""
if extensions is None:
extensions = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff', 'tif', 'webp']
images = []
for ext in extensions:
# Add both lowercase and uppercase versions
images.extend(glob.glob(os.path.join(input_folder, f'*.{ext}')))
images.extend(glob.glob(os.path.join(input_folder, f'*.{ext.upper()}')))
return sorted(images)
def get_default_region(image_shape):
"""Function for creating a default region if no regions are detected."""
w, h = image_shape
region = {'coords': [[0.0, 0.0], [w, 0.0], [w, h], [0.0, h]],
'max_min': [w, 0.0, h, 0.0],
'name': "paragraph",
'id': '0',
'img_shape': (h, w)}
return [region]
def get_dist(line_polygon, regions):
"""Function for finding the closest region to the text line."""
dist, reg_id = 1000000, None
line_polygon = validate_polygon(line_polygon)
if line_polygon:
for region in regions:
# Calculates dictance between line and regions polygons
region_polygon = validate_polygon(region['coords'])
if region_polygon:
line_reg_dist = line_polygon.distance(region_polygon)
if line_reg_dist < dist:
dist = line_reg_dist
reg_id = region['id']
return reg_id
def validate_polygon(polygon):
""""Function for testing and correcting the validity of polygons."""
if len(polygon) > 2:
polygon = Polygon(polygon)
if not polygon.is_valid:
polygon = make_valid(polygon)
return polygon
else:
return None
def get_iou(poly1, poly2):
"""Function for calculating Intersection over Union (IoU) values."""
# If the polygons don't intersect, IoU is 0
iou = 0
poly1 = validate_polygon(poly1)
poly2 = validate_polygon(poly2)
if poly1 and poly2:
if poly1.intersects(poly2):
# Calculates intersection of the 2 polygons
intersect = poly1.intersection(poly2).area
# Calculates union of the 2 polygons
uni = poly1.union(poly2)
# Calculates intersection over union
iou = intersect / uni.area
return iou
def get_line_regions(lines, regions):
"""Function for connecting each text line to one region.
Docstring generated with Claude
Connect each text line to a region based on intersection or distance.
Args:
lines (dict): Dictionary containing text line information with keys:
- 'coords' (list): List of line polygons (coordinates). E.g. [[x1,y1], ..., [xn,yn]]
- 'max_min' (list): List of bounding box coordinates for each line
regions (list): List of region dictionaries, each containing:
- 'coords': Region polygon coordinates. E.g. [[x1,y1], ..., [xn,yn]]
- 'id': Region identifier
Returns:
list: List of dictionaries, each representing a line with keys:
- 'polygon': Line polygon coordinates
- 'reg_id': ID of the region the line belongs to
- 'max_min': Bounding box coordinates [x_min, y_min, x_max, y_max]
"""
lines_list = []
for i in range(len(lines['coords'])):
iou, reg_id = 0, ''
max_min = [0.0, 0.0, 0.0, 0.0]
polygon = lines['coords'][i]
for region in regions:
line_reg_iou = get_iou(polygon, region['coords'])
if line_reg_iou > iou:
iou = line_reg_iou
reg_id = region['id']
# If line polygon does not intersect with any region, a distance metric is used for defining
# the region that the line belongs to
if iou == 0:
reg_id = get_dist(polygon, regions)
if (len(lines['max_min']) - 1) >= i:
max_min = lines['max_min'][i]
new_line = {'polygon': polygon, 'reg_id': reg_id, 'max_min': max_min}
lines_list.append(new_line)
return lines_list
def order_regions_lines(lines, regions):
"""Function for ordering line predictions inside each region.
Docstring generated with Claude
Order text lines within each region and order the regions themselves.
Args:
lines (list): List of line dictionaries, each containing:
- 'reg_id': ID of the region the line belongs to
- 'max_min': Bounding box coordinates for the line
- 'polygon': Line polygon coordinates
regions (list): List of region dictionaries, each containing:
- 'id': Region identifier
- 'coords': Region polygon coordinates
- 'name': Region name
- 'max_min': Bounding box coordinates for the region
- 'img_shape': Shape of the source image
Returns:
list: List of ordered region dictionaries, each containing:
- 'region_coords': Region polygon coordinates
- 'region_name': Region name
- 'lines': Ordered list of line polygons within the region
- 'img_shape': Shape of the source image
Note: Only regions containing at least one line are included in the output.
Both lines within regions and regions themselves are ordered by their spatial positions.
"""
regions_with_rows = []
region_max_mins = []
for i, region in enumerate(regions):
line_max_mins = []
line_polygons = []
for line in lines:
if line['reg_id'] == region['id']:
line_max_mins.append(line['max_min'])
line_polygons.append(line['polygon'])
if line_polygons:
# If one or more lines are connected to a region, line order inside the region is defined
# and the predicted text lines are joined in the same python dict
line_order = order_poly.order(line_max_mins)
line_polygons = [line_polygons[i] for i in line_order]
new_region = {'region_coords': region['coords'],
'region_name': region['name'],
'lines': line_polygons,
'img_shape': region['img_shape']}
region_max_mins.append(region['max_min'])
regions_with_rows.append(new_region)
else:
continue
# Creates an ordering of the detected regions based on their polygon coordinates
region_order = order_poly.order(region_max_mins)
regions_with_rows = [regions_with_rows[i] for i in region_order]
return regions_with_rows |