| try: from pip._internal.operations import freeze |
| except ImportError: |
| from pip.operations import freeze |
|
|
| pkgs = freeze.freeze() |
| for pkg in pkgs: print(pkg) |
| import os |
| import uvicorn |
| from fastapi import FastAPI, HTTPException, File, UploadFile,Query |
| from fastapi.middleware.cors import CORSMiddleware |
| from PyPDF2 import PdfReader |
| import google.generativeai as genai |
| import json |
| from PIL import Image |
| import io |
| import requests |
| import fitz |
| import os |
|
|
|
|
| from dotenv import load_dotenv |
| |
| load_dotenv() |
|
|
| |
| secret = os.environ["GEMINI"] |
| genai.configure(api_key=secret) |
| model_vision = genai.GenerativeModel('gemini-1.5-flash') |
| model_text = genai.GenerativeModel('gemini-1.5-pro-latest') |
|
|
|
|
|
|
|
|
|
|
|
|
| app = FastAPI() |
|
|
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_credentials=True, |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
|
|
|
|
|
|
|
|
| def vision(file_content): |
| |
| pdf_document = fitz.open("pdf",file_content) |
| gemini_input = ["extract the whole text"] |
| |
| for page_num in range(len(pdf_document)): |
| |
| page = pdf_document.load_page(page_num) |
| |
| |
| pix = page.get_pixmap() |
| print(type(pix)) |
| |
| |
| img_bytes = pix.tobytes("png") |
| |
| |
| img = Image.open(io.BytesIO(img_bytes)) |
| gemini_input.append(img) |
| |
| |
| |
| print("PDF pages converted to images successfully!") |
| |
| |
| response = model_vision.generate_content(gemini_input).text |
| return response |
|
|
|
|
| @app.post("/get_ocr_data/") |
| def get_data(input_file: UploadFile = File(...)): |
| |
| |
| file_content = input_file.file.read() |
| file_type = input_file.content_type |
| |
| text = "" |
|
|
| if file_type == "application/pdf": |
| |
| pdf_reader = PdfReader(io.BytesIO(file_content)) |
| for page in pdf_reader.pages: |
| text += page.extract_text() |
| |
| if len(text)<10: |
| print("vision called") |
| text = vision(file_content) |
| else: |
| raise HTTPException(status_code=400, detail="Unsupported file type") |
| |
| |
|
|
| |
| prompt = f"""This is CV data: {text.strip()} |
| IMPORTANT: The output should be a JSON array! Make Sure the JSON is valid. |
| |
| Example Output: |
| [ |
| "firstname" : "firstname", |
| "lastname" : "lastname", |
| "email" : "email", |
| "contact_number" : "contact number", |
| "home_address" : "full home address", |
| "home_town" : "home town or city", |
| "total_years_of_experience" : "total years of experience", |
| "education": "Institution Name, Degree Name", |
| "LinkedIn_link" : "LinkedIn link", |
| "experience" : "experience", |
| "industry": "industry of work", |
| "skills" : skills(Identify and list specific skills mentioned in both the skills section and inferred from the experience section), |
| "positions": [ "Job title 1", "Job title 2", "Job title 3" ], |
| "summary": "Generate a summary of the CV, including key qualifications, notable experiences, and relevant skills." |
| |
| |
| |
| |
| |
| |
| ] |
| """ |
| |
| response = model_text.generate_content(prompt) |
| print(response.text) |
| data = json.loads(response.text.replace("JSON", "").replace("json", "").replace("```", "")) |
| return {"data": data} |
|
|
| |
| |