MikkoLipsanen's picture
Create utils.py
e5534ce verified
from reading_order import GraphBasedOrdering
from shapely.geometry import Polygon
import glob
import os
from shapely.validation import make_valid
order_poly = GraphBasedOrdering()
def load_image_paths(input_folder, extensions=None):
"""
Load image files from a folder.
Args:
input_folder: Path to the folder containing images
extensions: List of file extensions to include (default: common image formats)
Returns:
List of image file paths
"""
if extensions is None:
extensions = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff', 'tif', 'webp']
images = []
for ext in extensions:
# Add both lowercase and uppercase versions
images.extend(glob.glob(os.path.join(input_folder, f'*.{ext}')))
images.extend(glob.glob(os.path.join(input_folder, f'*.{ext.upper()}')))
return sorted(images)
def get_default_region(image_shape):
"""Function for creating a default region if no regions are detected."""
w, h = image_shape
region = {'coords': [[0.0, 0.0], [w, 0.0], [w, h], [0.0, h]],
'max_min': [w, 0.0, h, 0.0],
'name': "paragraph",
'id': '0',
'img_shape': (h, w)}
return [region]
def get_dist(line_polygon, regions):
"""Function for finding the closest region to the text line."""
dist, reg_id = 1000000, None
line_polygon = validate_polygon(line_polygon)
if line_polygon:
for region in regions:
# Calculates dictance between line and regions polygons
region_polygon = validate_polygon(region['coords'])
if region_polygon:
line_reg_dist = line_polygon.distance(region_polygon)
if line_reg_dist < dist:
dist = line_reg_dist
reg_id = region['id']
return reg_id
def validate_polygon(polygon):
""""Function for testing and correcting the validity of polygons."""
if len(polygon) > 2:
polygon = Polygon(polygon)
if not polygon.is_valid:
polygon = make_valid(polygon)
return polygon
else:
return None
def get_iou(poly1, poly2):
"""Function for calculating Intersection over Union (IoU) values."""
# If the polygons don't intersect, IoU is 0
iou = 0
poly1 = validate_polygon(poly1)
poly2 = validate_polygon(poly2)
if poly1 and poly2:
if poly1.intersects(poly2):
# Calculates intersection of the 2 polygons
intersect = poly1.intersection(poly2).area
# Calculates union of the 2 polygons
uni = poly1.union(poly2)
# Calculates intersection over union
iou = intersect / uni.area
return iou
def get_line_regions(lines, regions):
"""Function for connecting each text line to one region.
Docstring generated with Claude
Connect each text line to a region based on intersection or distance.
Args:
lines (dict): Dictionary containing text line information with keys:
- 'coords' (list): List of line polygons (coordinates). E.g. [[x1,y1], ..., [xn,yn]]
- 'max_min' (list): List of bounding box coordinates for each line
regions (list): List of region dictionaries, each containing:
- 'coords': Region polygon coordinates. E.g. [[x1,y1], ..., [xn,yn]]
- 'id': Region identifier
Returns:
list: List of dictionaries, each representing a line with keys:
- 'polygon': Line polygon coordinates
- 'reg_id': ID of the region the line belongs to
- 'max_min': Bounding box coordinates [x_min, y_min, x_max, y_max]
"""
lines_list = []
for i in range(len(lines['coords'])):
iou, reg_id = 0, ''
max_min = [0.0, 0.0, 0.0, 0.0]
polygon = lines['coords'][i]
for region in regions:
line_reg_iou = get_iou(polygon, region['coords'])
if line_reg_iou > iou:
iou = line_reg_iou
reg_id = region['id']
# If line polygon does not intersect with any region, a distance metric is used for defining
# the region that the line belongs to
if iou == 0:
reg_id = get_dist(polygon, regions)
if (len(lines['max_min']) - 1) >= i:
max_min = lines['max_min'][i]
new_line = {'polygon': polygon, 'reg_id': reg_id, 'max_min': max_min}
lines_list.append(new_line)
return lines_list
def order_regions_lines(lines, regions):
"""Function for ordering line predictions inside each region.
Docstring generated with Claude
Order text lines within each region and order the regions themselves.
Args:
lines (list): List of line dictionaries, each containing:
- 'reg_id': ID of the region the line belongs to
- 'max_min': Bounding box coordinates for the line
- 'polygon': Line polygon coordinates
regions (list): List of region dictionaries, each containing:
- 'id': Region identifier
- 'coords': Region polygon coordinates
- 'name': Region name
- 'max_min': Bounding box coordinates for the region
- 'img_shape': Shape of the source image
Returns:
list: List of ordered region dictionaries, each containing:
- 'region_coords': Region polygon coordinates
- 'region_name': Region name
- 'lines': Ordered list of line polygons within the region
- 'img_shape': Shape of the source image
Note: Only regions containing at least one line are included in the output.
Both lines within regions and regions themselves are ordered by their spatial positions.
"""
regions_with_rows = []
region_max_mins = []
for i, region in enumerate(regions):
line_max_mins = []
line_polygons = []
for line in lines:
if line['reg_id'] == region['id']:
line_max_mins.append(line['max_min'])
line_polygons.append(line['polygon'])
if line_polygons:
# If one or more lines are connected to a region, line order inside the region is defined
# and the predicted text lines are joined in the same python dict
line_order = order_poly.order(line_max_mins)
line_polygons = [line_polygons[i] for i in line_order]
new_region = {'region_coords': region['coords'],
'region_name': region['name'],
'lines': line_polygons,
'img_shape': region['img_shape']}
region_max_mins.append(region['max_min'])
regions_with_rows.append(new_region)
else:
continue
# Creates an ordering of the detected regions based on their polygon coordinates
region_order = order_poly.order(region_max_mins)
regions_with_rows = [regions_with_rows[i] for i in region_order]
return regions_with_rows