Update preprocess.py
Browse files- preprocess.py +0 -5
preprocess.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
# %% [markdown]
|
| 2 |
# ### Step 1: Reading PDF Files
|
| 3 |
# Setup directories
|
| 4 |
pdf_directory = r"F:\Preprocessing"
|
|
@@ -7,10 +6,8 @@ os.makedirs(output_directory, exist_ok=True)
|
|
| 7 |
|
| 8 |
pages = convert_from_path(pdf_path, dpi=dpi)
|
| 9 |
|
| 10 |
-
# %% [markdown]
|
| 11 |
# ### Step 2: Convert PDF files to Images
|
| 12 |
|
| 13 |
-
# %%
|
| 14 |
import os
|
| 15 |
import cv2
|
| 16 |
import numpy as np
|
|
@@ -60,10 +57,8 @@ if __name__ == "__main__":
|
|
| 60 |
process_all_pdfs()
|
| 61 |
print("\n✓ Processing completed!")
|
| 62 |
|
| 63 |
-
# %% [markdown]
|
| 64 |
# ### Step 3: Image Preprocessing
|
| 65 |
|
| 66 |
-
# %%
|
| 67 |
import os
|
| 68 |
import cv2
|
| 69 |
import numpy as np
|
|
|
|
|
|
|
| 1 |
# ### Step 1: Reading PDF Files
|
| 2 |
# Setup directories
|
| 3 |
pdf_directory = r"F:\Preprocessing"
|
|
|
|
| 6 |
|
| 7 |
pages = convert_from_path(pdf_path, dpi=dpi)
|
| 8 |
|
|
|
|
| 9 |
# ### Step 2: Convert PDF files to Images
|
| 10 |
|
|
|
|
| 11 |
import os
|
| 12 |
import cv2
|
| 13 |
import numpy as np
|
|
|
|
| 57 |
process_all_pdfs()
|
| 58 |
print("\n✓ Processing completed!")
|
| 59 |
|
|
|
|
| 60 |
# ### Step 3: Image Preprocessing
|
| 61 |
|
|
|
|
| 62 |
import os
|
| 63 |
import cv2
|
| 64 |
import numpy as np
|