#!/usr/bin/env python3 """ Fine-tuning Script for PaddleOCR Text Recognition Models Based on the Text Recognition Module Tutorial This script provides a complete pipeline for fine-tuning text recognition models: 1. Dataset preparation and validation 2. Model training with custom configurations 3. Model evaluation 4. Model export for inference Supported models: PP-OCRv5_server_rec, PP-OCRv5_mobile_rec, PP-OCRv4_server_rec, etc. """ import os import sys import argparse import yaml import wget import tarfile import subprocess from pathlib import Path import logging # Set up logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) class TextRecognitionFineTuner: def __init__(self, config_path=None, model_name="PP-OCRv5_server_rec", work_dir="./work_dir"): """ Initialize the fine-tuner Args: config_path: Path to custom config file model_name: Name of the model to fine-tune work_dir: Working directory for outputs """ self.model_name = model_name self.work_dir = Path(work_dir) self.work_dir.mkdir(exist_ok=True) # Model configurations mapping self.model_configs = { "PP-OCRv5_server_rec": { "config": "configs/rec/PP-OCRv5/PP-OCRv5_server_rec.yml", "pretrained_url": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-OCRv5_server_rec_pretrained.pdparams" }, "PP-OCRv5_mobile_rec": { "config": "configs/rec/PP-OCRv5/PP-OCRv5_mobile_rec.yml", "pretrained_url": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-OCRv5_mobile_rec_pretrained.pdparams" }, "PP-OCRv4_server_rec": { "config": "configs/rec/PP-OCRv4/PP-OCRv4_server_rec.yml", "pretrained_url": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-OCRv4_server_rec_pretrained.pdparams" }, "PP-OCRv4_mobile_rec": { "config": "configs/rec/PP-OCRv4/PP-OCRv4_mobile_rec.yml", "pretrained_url": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-OCRv4_mobile_rec_pretrained.pdparams" } } self.config_path = config_path or self.model_configs[model_name]["config"] self.pretrained_path = self.work_dir / f"{model_name}_pretrained.pdparams" def prepare_demo_dataset(self): """Download and prepare demo dataset""" logger.info("Preparing demo dataset...") dataset_url = "https://paddle-model-ecology.bj.bcebos.com/paddlex/data/ocr_rec_dataset_examples.tar" dataset_path = self.work_dir / "ocr_rec_dataset_examples.tar" if not dataset_path.exists(): logger.info(f"Downloading dataset from {dataset_url}...") wget.download(dataset_url, str(dataset_path)) # Extract dataset extract_path = self.work_dir / "dataset" if not extract_path.exists(): logger.info("Extracting dataset...") with tarfile.open(dataset_path, 'r') as tar: tar.extractall(self.work_dir) # Rename extracted folder extracted_folder = self.work_dir / "ocr_rec_dataset_examples" if extracted_folder.exists(): extracted_folder.rename(extract_path) logger.info(f"Dataset prepared at {extract_path}") return extract_path def download_pretrained_model(self): """Download pretrained model weights""" if self.pretrained_path.exists(): logger.info(f"Pretrained model already exists at {self.pretrained_path}") return self.pretrained_path logger.info(f"Downloading pretrained model for {self.model_name}...") pretrained_url = self.model_configs[self.model_name]["pretrained_url"] wget.download(pretrained_url, str(self.pretrained_path)) logger.info(f"Pretrained model downloaded to {self.pretrained_path}") return self.pretrained_path def create_custom_config(self, dataset_path, custom_params=None): """ Create custom training configuration Args: dataset_path: Path to training dataset custom_params: Dictionary of custom parameters to override """ logger.info("Creating custom configuration...") # Default custom parameters default_params = { "Global": { "epoch_num": 20, "log_smooth_window": 20, "print_batch_step": 10, "save_model_dir": str(self.work_dir / "output"), "save_epoch_step": 5, "eval_batch_step": [0, 2000], "cal_metric_during_train": True, "pretrained_model": str(self.pretrained_path), "checkpoints": None, "use_visualdl": False, "infer_img": str(dataset_path / "test_imgs"), "character_dict_path": str(dataset_path / "character_dict.txt"), "character_type": "ch", "max_text_length": 25, "infer_mode": False, "use_space_char": True, "distributed": False, "save_res_path": str(self.work_dir / "output" / "predicts_rec.txt") }, "Train": { "dataset": { "name": "SimpleDataSet", "data_dir": str(dataset_path), "label_file_list": [str(dataset_path / "train_list.txt")], "transforms": [ {"DecodeImage": {"img_mode": "BGR", "channel_first": False}}, {"RecConAug": {"prob": 0.5, "ext_data_num": 2, "image_shape": [48, 320, 3]}}, {"RecAug": {}}, {"MultiLabelEncode": {}}, {"RecResizeImg": {"image_shape": [3, 48, 320]}}, {"KeepKeys": {"keep_keys": ["image", "label_list", "length"]}} ] }, "loader": { "shuffle": True, "batch_size_per_card": 256, "drop_last": True, "num_workers": 4 } }, "Eval": { "dataset": { "name": "SimpleDataSet", "data_dir": str(dataset_path), "label_file_list": [str(dataset_path / "val_list.txt")], "transforms": [ {"DecodeImage": {"img_mode": "BGR", "channel_first": False}}, {"MultiLabelEncode": {}}, {"RecResizeImg": {"image_shape": [3, 48, 320]}}, {"KeepKeys": {"keep_keys": ["image", "label_list", "length"]}} ] }, "loader": { "shuffle": False, "drop_last": False, "batch_size_per_card": 256, "num_workers": 4 } } } # Merge with custom parameters if custom_params: self._deep_update(default_params, custom_params) # Save custom config custom_config_path = self.work_dir / f"{self.model_name}_custom.yml" with open(custom_config_path, 'w', encoding='utf-8') as f: yaml.dump(default_params, f, default_flow_style=False, allow_unicode=True) logger.info(f"Custom configuration saved to {custom_config_path}") return custom_config_path def _deep_update(self, base_dict, update_dict): """Recursively update nested dictionary""" for key, value in update_dict.items(): if isinstance(value, dict) and key in base_dict and isinstance(base_dict[key], dict): self._deep_update(base_dict[key], value) else: base_dict[key] = value def train(self, config_path, gpus="0", resume_from=None): """ Train the model Args: config_path: Path to configuration file gpus: GPU IDs to use (e.g., "0" or "0,1,2,3") resume_from: Path to checkpoint to resume from """ logger.info(f"Starting training with GPUs: {gpus}") # Prepare training command if len(gpus.split(',')) > 1: # Multi-GPU training cmd = [ "python3", "-m", "paddle.distributed.launch", "--gpus", gpus, "tools/train.py", "-c", str(config_path) ] else: # Single GPU training cmd = [ "python3", "tools/train.py", "-c", str(config_path) ] # Add resume option if provided if resume_from: cmd.extend(["-o", f"Global.checkpoints={resume_from}"]) # Set environment variable for GPU env = os.environ.copy() env["CUDA_VISIBLE_DEVICES"] = gpus logger.info(f"Training command: {' '.join(cmd)}") try: result = subprocess.run(cmd, env=env, check=True, capture_output=False) logger.info("Training completed successfully!") return True except subprocess.CalledProcessError as e: logger.error(f"Training failed with error: {e}") return False def evaluate(self, config_path, checkpoint_path, gpus="0"): """ Evaluate the trained model Args: config_path: Path to configuration file checkpoint_path: Path to model checkpoint gpus: GPU IDs to use """ logger.info(f"Starting evaluation...") cmd = [ "python3", "tools/eval.py", "-c", str(config_path), "-o", f"Global.pretrained_model={checkpoint_path}" ] # Set environment variable for GPU env = os.environ.copy() env["CUDA_VISIBLE_DEVICES"] = gpus logger.info(f"Evaluation command: {' '.join(cmd)}") try: result = subprocess.run(cmd, env=env, check=True, capture_output=True, text=True) logger.info("Evaluation completed successfully!") logger.info(f"Evaluation results:\n{result.stdout}") return True except subprocess.CalledProcessError as e: logger.error(f"Evaluation failed with error: {e}") logger.error(f"Error output: {e.stderr}") return False def export_model(self, config_path, checkpoint_path, output_dir=None): """ Export trained model for inference Args: config_path: Path to configuration file checkpoint_path: Path to trained model checkpoint output_dir: Directory to save exported model """ if output_dir is None: output_dir = self.work_dir / f"{self.model_name}_infer" logger.info(f"Exporting model to {output_dir}") cmd = [ "python3", "tools/export_model.py", "-c", str(config_path), "-o", f"Global.pretrained_model={checkpoint_path}", "-o", f"Global.save_inference_dir={output_dir}" ] logger.info(f"Export command: {' '.join(cmd)}") try: result = subprocess.run(cmd, check=True, capture_output=True, text=True) logger.info("Model export completed successfully!") logger.info(f"Exported model saved to {output_dir}") # List exported files if Path(output_dir).exists(): exported_files = list(Path(output_dir).glob("*")) logger.info(f"Exported files: {[f.name for f in exported_files]}") return True except subprocess.CalledProcessError as e: logger.error(f"Model export failed with error: {e}") logger.error(f"Error output: {e.stderr}") return False def run_complete_pipeline(self, custom_params=None, gpus="0", skip_demo_data=False): """ Run the complete fine-tuning pipeline Args: custom_params: Custom parameters to override defaults gpus: GPU IDs to use skip_demo_data: Whether to skip demo data preparation """ logger.info("=== Starting Complete Fine-tuning Pipeline ===") try: # Step 1: Prepare dataset if not skip_demo_data: dataset_path = self.prepare_demo_dataset() else: dataset_path = Path(custom_params.get("dataset_path", "./dataset")) # Use custom dataset path # Step 2: Download pretrained model self.download_pretrained_model() # Step 3: Create custom configuration config_path = self.create_custom_config(dataset_path, custom_params) # Step 4: Train model logger.info("=== Starting Training ===") training_success = self.train(config_path, gpus) if not training_success: logger.error("Training failed. Stopping pipeline.") return False # Step 5: Find best checkpoint output_dir = self.work_dir / "output" checkpoints = list(output_dir.glob("**/best_accuracy.pdparams")) if not checkpoints: # Try to find latest checkpoint checkpoints = list(output_dir.glob("**/latest.pdparams")) if not checkpoints: logger.error("No checkpoint found for evaluation and export.") return False best_checkpoint = checkpoints[0] logger.info(f"Using checkpoint: {best_checkpoint}") # Step 6: Evaluate model logger.info("=== Starting Evaluation ===") self.evaluate(config_path, best_checkpoint, gpus) # Step 7: Export model logger.info("=== Starting Model Export ===") self.export_model(config_path, best_checkpoint) logger.info("=== Complete Pipeline Finished Successfully ===") return True except Exception as e: logger.error(f"Pipeline failed with error: {e}") return False def main(): parser = argparse.ArgumentParser(description="Fine-tune PaddleOCR Text Recognition Models") parser.add_argument("--model_name", type=str, default="PP-OCRv5_server_rec", choices=["PP-OCRv5_server_rec", "PP-OCRv5_mobile_rec", "PP-OCRv4_server_rec", "PP-OCRv4_mobile_rec"], help="Model name to fine-tune") parser.add_argument("--work_dir", type=str, default="./work_dir", help="Working directory for outputs") parser.add_argument("--gpus", type=str, default="0", help="GPU IDs to use (e.g., '0' or '0,1,2,3')") parser.add_argument("--config", type=str, default=None, help="Path to custom config file") parser.add_argument("--skip_demo_data", action="store_true", help="Skip demo data preparation (use your own dataset)") parser.add_argument("--dataset_path", type=str, default="./dataset", help="Path to custom dataset directory") parser.add_argument("--mode", type=str, default="complete", choices=["complete", "train", "eval", "export"], help="Mode to run") parser.add_argument("--checkpoint", type=str, default=None, help="Checkpoint path for evaluation/export") args = parser.parse_args() # Initialize fine-tuner fine_tuner = TextRecognitionFineTuner( config_path=args.config, model_name=args.model_name, work_dir=args.work_dir ) # Example custom parameters (you can modify these) custom_params = { "dataset_path": args.dataset_path, # Add dataset path to custom params "Global": { "epoch_num": 10, # Reduce epochs for faster training "save_epoch_step": 2, "eval_batch_step": [0, 1000] }, "Train": { "loader": { "batch_size_per_card": 128 # Reduce batch size if GPU memory is limited } } } if args.mode == "complete": # Run complete pipeline success = fine_tuner.run_complete_pipeline( custom_params=custom_params, gpus=args.gpus, skip_demo_data=args.skip_demo_data ) sys.exit(0 if success else 1) elif args.mode == "train": # Training only if not args.skip_demo_data: dataset_path = fine_tuner.prepare_demo_dataset() else: dataset_path = Path(args.dataset_path) fine_tuner.download_pretrained_model() config_path = fine_tuner.create_custom_config(dataset_path, custom_params) success = fine_tuner.train(config_path, args.gpus) sys.exit(0 if success else 1) elif args.mode == "eval": # Evaluation only if not args.checkpoint: logger.error("Checkpoint path required for evaluation mode") sys.exit(1) config_path = args.config or fine_tuner.config_path success = fine_tuner.evaluate(config_path, args.checkpoint, args.gpus) sys.exit(0 if success else 1) elif args.mode == "export": # Export only if not args.checkpoint: logger.error("Checkpoint path required for export mode") sys.exit(1) config_path = args.config or fine_tuner.config_path success = fine_tuner.export_model(config_path, args.checkpoint) sys.exit(0 if success else 1) if __name__ == "__main__": main()