Spaces:

phyloforfun
/

VoucherVision

Running

App Files Files Community

phyloforfun commited on Aug 28, 2024

Commit

d0291ae

1 Parent(s): a145e37

fixing pdfs

Browse files

Files changed (8) hide show

api_cost/api_cost.yaml +2 -2
app.py +123 -39
vouchervision/LLM_GoogleGemini.py +4 -1
vouchervision/LLM_GooglePalm2.py +55 -29
vouchervision/OCR_GPT4oMini.py +1 -1
vouchervision/OCR_google_cloud_vision.py +8 -6
vouchervision/model_maps.py +58 -58
vouchervision/utils_hf.py +88 -39

api_cost/api_cost.yaml CHANGED Viewed

@@ -61,8 +61,8 @@ GEMINI_PRO:
   in: 0.0005
   out: 0.0015
 GEMINI_1_5_FLASH:
-  in: 0.00035
-  out: 0.00105
 GEMINI_1_5_PRO:
   in: 0.0035
   out: 0.0105

   in: 0.0005
   out: 0.0015
 GEMINI_1_5_FLASH:
+  in: 0.000075
+  out: 0.00030
 GEMINI_1_5_PRO:
   in: 0.0035
   out: 0.0105

app.py CHANGED Viewed

@@ -254,7 +254,6 @@ def load_gallery(converted_files, uploaded_file):
 def handle_image_upload_and_gallery_hf(uploaded_files):
     SAFE = SafetyCheck(st.session_state['is_hf'])
     if uploaded_files:
@@ -271,37 +270,46 @@ def handle_image_upload_and_gallery_hf(uploaded_files):
                 st.error("Warning: You uploaded an image that violates our terms of service.")
                 return True
             # Determine the file type
             if uploaded_file.name.lower().endswith('.pdf'):
-                # Handle PDF files
-                file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
-                # Convert each page of the PDF to an image
-                n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=200)#st.session_state.config['leafmachine']['project']['dir_images_local'])
-                # Update the input list for each page image
-                converted_files = os.listdir(st.session_state['dir_uploaded_images'])
-                for file_name in converted_files:
-                    if file_name.split('.')[1].lower() in ['jpg','jpeg']:
-                        ind_small += 1
-                        jpg_file_path = os.path.join(st.session_state['dir_uploaded_images'], file_name)
-                        st.session_state['input_list'].append(jpg_file_path)
-                        if ind_small < MAX_GALLERY_IMAGES +5:
-                            # Optionally, create a thumbnail for the gallery
-                            img = Image.open(jpg_file_path)
-                            img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
-                            try:
-                                file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], file_name, img)
-                            except:
-                                file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images_small'],st.session_state['dir_uploaded_images_small'], file_name, img)
-                            st.session_state['input_list_small'].append(file_path_small)
             else:
-                ind_small += 1
                 # Handle JPG/JPEG files (existing process)
-                file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
                 st.session_state['input_list'].append(file_path)
-                if ind_small < MAX_GALLERY_IMAGES +5:
                     img = Image.open(file_path)
                     img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
                     file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
@@ -313,15 +321,80 @@ def handle_image_upload_and_gallery_hf(uploaded_files):
     if st.session_state['input_list_small']:
         if len(st.session_state['input_list_small']) > MAX_GALLERY_IMAGES:
-            # Only take the first 100 images from the list
             images_to_display = st.session_state['input_list_small'][:MAX_GALLERY_IMAGES]
         else:
-            # If there are less than 100 images, take them all
             images_to_display = st.session_state['input_list_small']
         show_gallery_small_hf(images_to_display)
     return False
 def handle_image_upload_and_gallery():
@@ -371,6 +444,7 @@ def handle_image_upload_and_gallery():
 def content_input_images(col_left, col_right):
     st.write('---')
     # col1, col2 = st.columns([2,8])
     with col_left:
@@ -385,11 +459,11 @@ def content_input_images(col_left, col_right):
             pass
     with col_left:
-        if st.session_state.is_hf:
-            st.session_state['dir_uploaded_images'] = os.path.join(st.session_state.dir_home,'uploads')
-            st.session_state['dir_uploaded_images_small'] = os.path.join(st.session_state.dir_home,'uploads_small')
-            uploaded_files = st.file_uploader("Upload Images", type=['jpg', 'jpeg','pdf'], accept_multiple_files=True, key=st.session_state['uploader_idk'])
-            st.button("Use Test Image",help="This will clear any uploaded images and load the 1 provided test image.",on_click=use_test_image)
     with col_right:
         if st.session_state.is_hf:
@@ -1007,7 +1081,8 @@ def create_private_file():
         cfg_private['open_cage_geocode'] = {}
         cfg_private['open_cage_geocode']['API_KEY'] =''
     with col_private:
         st.header("Set API keys")
@@ -1060,7 +1135,7 @@ def create_private_file():
                                 fullpath=os.path.join(st.session_state.dir_home, 'demo','google','google_api_5.PNG'))
             st.subheader("Getting a Google JSON authentication key")
-            st.write("Google uses a JSON file to store additional authentication information. Save this file in a safe, private location and assign the `GOOGLE_APPLICATION_CREDENTIALS` value to the file path. For Hugging Face, copy the contents of the JSON file including the `\{\}` and paste it as the secret value.")
             st.write("To download your JSON key...")
             blog_text_and_image(text="Open the navigation menu. Click on the hamburger menu (three horizontal lines) in the top left corner. Go to IAM & Admin. ",
                                 fullpath=os.path.join(st.session_state.dir_home, 'demo','google','google_api_7.PNG'),width=300)
@@ -1958,11 +2033,20 @@ def content_ocr_method():
     OCR_option_llava_bit = st.session_state.config['leafmachine']['project']['OCR_option_llava_bit']
     double_OCR = st.session_state.config['leafmachine']['project']['double_OCR']
-    # Map the OCR option to the index in options list
-    # You need to define the mapping based on your application's logic
     default_index = 0  # Default to 0 if option not found
     default_index_llava = 0  # Default to 0 if option not found
     default_index_llava_bit = 0
     with c1:
         st.subheader("API Methods (Google Vision)")
         st.write("Using APIs for OCR allows VoucherVision to run on most computers. You can use multiple OCR engines simultaneously.")
@@ -1999,7 +2083,7 @@ def content_ocr_method():
         # Map selected options to their corresponding internal representations
         selected_OCR_options = [OCR_options[option] for option in OCR_option_select]
         # Assuming you need to use these mapped values elsewhere in your application
         st.session_state.config['leafmachine']['project']['OCR_option'] = selected_OCR_options
@@ -2043,7 +2127,7 @@ def content_ocr_method():
         st.session_state.config['leafmachine']['project']['OCR_GPT_4o_mini_resolution'] = st.radio(
             "Select level of detail for :violet[GPT-4o-mini] OCR. We only recommend 'high' detail in most scenarios.",
             ["high", "low", ],
-            captions=["$0.50 per 1,000", "\$5 - \$10 per 1,000"])
     if 'LLaVA' in selected_OCR_options:

 def handle_image_upload_and_gallery_hf(uploaded_files):
     SAFE = SafetyCheck(st.session_state['is_hf'])
     if uploaded_files:
                 st.error("Warning: You uploaded an image that violates our terms of service.")
                 return True
+            # Save the uploaded file (PDF or image)
+            file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
+            if not file_path:
+                st.error(f"Failed to process the file: {uploaded_file.name}")
+                continue  # Skip to the next file
             # Determine the file type
             if uploaded_file.name.lower().endswith('.pdf'):
+                try:
+                    # Convert each page of the PDF to an image
+                    n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=200)
+                    if n_pages == 0:
+                        st.error(f"No pages were converted from the PDF: {uploaded_file.name}")
+                        continue  # Skip to the next file
+                    # Update the input list for each page image
+                    converted_files = os.listdir(st.session_state['dir_uploaded_images'])
+                    for file_name in converted_files:
+                        if file_name.split('.')[1].lower() in ['jpg', 'jpeg']:
+                            ind_small += 1
+                            jpg_file_path = os.path.join(st.session_state['dir_uploaded_images'], file_name)
+                            st.session_state['input_list'].append(jpg_file_path)
+                            if ind_small < MAX_GALLERY_IMAGES + 5:
+                                # Create a thumbnail for the gallery
+                                img = Image.open(jpg_file_path)
+                                img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
+                                file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], jpg_file_path, img)
+                                st.session_state['input_list_small'].append(file_path_small)
+                except Exception as e:
+                    st.error(f"Failed to process PDF file {uploaded_file.name}. Error: {e}")
+                    continue  # Skip to the next file
             else:
                 # Handle JPG/JPEG files (existing process)
+                ind_small += 1
                 st.session_state['input_list'].append(file_path)
+                if ind_small < MAX_GALLERY_IMAGES + 5:
                     img = Image.open(file_path)
                     img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
                     file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
     if st.session_state['input_list_small']:
         if len(st.session_state['input_list_small']) > MAX_GALLERY_IMAGES:
             images_to_display = st.session_state['input_list_small'][:MAX_GALLERY_IMAGES]
         else:
             images_to_display = st.session_state['input_list_small']
         show_gallery_small_hf(images_to_display)
     return False
+# def handle_image_upload_and_gallery_hf(uploaded_files): # not working with pdfs
+#     SAFE = SafetyCheck(st.session_state['is_hf'])
+#     if uploaded_files:
+#         # Clear input image gallery and input list
+#         clear_image_uploads()
+#         ind_small = 0
+#         for uploaded_file in uploaded_files:
+#             if SAFE.check_for_inappropriate_content(uploaded_file):
+#                 clear_image_uploads()
+#                 report_violation(uploaded_file.name, is_hf=st.session_state['is_hf'])
+#                 st.error("Warning: You uploaded an image that violates our terms of service.")
+#                 return True
+#             # Determine the file type
+#             if uploaded_file.name.lower().endswith('.pdf'):
+#                 # Handle PDF files
+#                 file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
+#                 # Convert each page of the PDF to an image
+#                 n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=200)#st.session_state.config['leafmachine']['project']['dir_images_local'])
+#                 # Update the input list for each page image
+#                 converted_files = os.listdir(st.session_state['dir_uploaded_images'])
+#                 for file_name in converted_files:
+#                     if file_name.split('.')[1].lower() in ['jpg','jpeg']:
+#                         ind_small += 1
+#                         jpg_file_path = os.path.join(st.session_state['dir_uploaded_images'], file_name)
+#                         st.session_state['input_list'].append(jpg_file_path)
+#                         if ind_small < MAX_GALLERY_IMAGES +5:
+#                             # Optionally, create a thumbnail for the gallery
+#                             img = Image.open(jpg_file_path)
+#                             img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
+#                             try:
+#                                 file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], file_name, img)
+#                             except:
+#                                 file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images_small'],st.session_state['dir_uploaded_images_small'], file_name, img)
+#                             st.session_state['input_list_small'].append(file_path_small)
+#             else:
+#                 ind_small += 1
+#                 # Handle JPG/JPEG files (existing process)
+#                 file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
+#                 st.session_state['input_list'].append(file_path)
+#                 if ind_small < MAX_GALLERY_IMAGES +5:
+#                     img = Image.open(file_path)
+#                     img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
+#                     file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
+#                     st.session_state['input_list_small'].append(file_path_small)
+#         # After processing all files
+#         st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images']
+#         st.info(f"Processing images from {st.session_state.config['leafmachine']['project']['dir_images_local']}")
+#     if st.session_state['input_list_small']:
+#         if len(st.session_state['input_list_small']) > MAX_GALLERY_IMAGES:
+#             # Only take the first 100 images from the list
+#             images_to_display = st.session_state['input_list_small'][:MAX_GALLERY_IMAGES]
+#         else:
+#             # If there are less than 100 images, take them all
+#             images_to_display = st.session_state['input_list_small']
+#         show_gallery_small_hf(images_to_display)
+#     return False
 def handle_image_upload_and_gallery():
 def content_input_images(col_left, col_right):
     st.write('---')
     # col1, col2 = st.columns([2,8])
     with col_left:
             pass
     with col_left:
+        # if st.session_state.is_hf:
+        st.session_state['dir_uploaded_images'] = os.path.join(st.session_state.dir_home,'uploads')
+        st.session_state['dir_uploaded_images_small'] = os.path.join(st.session_state.dir_home,'uploads_small')
+        uploaded_files = st.file_uploader("Upload Images", type=['jpg', 'jpeg','pdf'], accept_multiple_files=True, key=st.session_state['uploader_idk'])
+        st.button("Use Test Image",help="This will clear any uploaded images and load the 1 provided test image.",on_click=use_test_image)
     with col_right:
         if st.session_state.is_hf:
         cfg_private['open_cage_geocode'] = {}
         cfg_private['open_cage_geocode']['API_KEY'] =''
+        cfg_private['huggingface'] = {}
     with col_private:
         st.header("Set API keys")
                                 fullpath=os.path.join(st.session_state.dir_home, 'demo','google','google_api_5.PNG'))
             st.subheader("Getting a Google JSON authentication key")
+            st.write(f"Google uses a JSON file to store additional authentication information. Save this file in a safe, private location and assign the `GOOGLE_APPLICATION_CREDENTIALS` value to the file path. For Hugging Face, copy the contents of the JSON file including the curly brackets and paste it as the secret value.")
             st.write("To download your JSON key...")
             blog_text_and_image(text="Open the navigation menu. Click on the hamburger menu (three horizontal lines) in the top left corner. Go to IAM & Admin. ",
                                 fullpath=os.path.join(st.session_state.dir_home, 'demo','google','google_api_7.PNG'),width=300)
     OCR_option_llava_bit = st.session_state.config['leafmachine']['project']['OCR_option_llava_bit']
     double_OCR = st.session_state.config['leafmachine']['project']['double_OCR']
     default_index = 0  # Default to 0 if option not found
     default_index_llava = 0  # Default to 0 if option not found
     default_index_llava_bit = 0
+    # Map the OCR option to the index in options list
+    # You need to define the mapping for multiple OCR options
+    # based on your application's logic
+    if len(OCR_option) == 1:
+        OCR_option = OCR_option[0]
+        try:
+            default_index = options.index(OCR_option)
+        except ValueError:
+            pass
     with c1:
         st.subheader("API Methods (Google Vision)")
         st.write("Using APIs for OCR allows VoucherVision to run on most computers. You can use multiple OCR engines simultaneously.")
         # Map selected options to their corresponding internal representations
         selected_OCR_options = [OCR_options[option] for option in OCR_option_select]
+        print('Selected OCR options:',selected_OCR_options)
         # Assuming you need to use these mapped values elsewhere in your application
         st.session_state.config['leafmachine']['project']['OCR_option'] = selected_OCR_options
         st.session_state.config['leafmachine']['project']['OCR_GPT_4o_mini_resolution'] = st.radio(
             "Select level of detail for :violet[GPT-4o-mini] OCR. We only recommend 'high' detail in most scenarios.",
             ["high", "low", ],
+            captions=[f"$0.50 per 1,000", f"$5 - $10 per 1,000"])
     if 'LLaVA' in selected_OCR_options:

vouchervision/LLM_GoogleGemini.py CHANGED Viewed

@@ -101,7 +101,10 @@ class GoogleGeminiHandler:
         #                           top_p=self.config.get('top_p'))
         # Set up the retry parser with the runnable
-        self.retry_parser = RetryWithErrorOutputParser.from_llm(parser=self.parser, llm=self.llm_model, max_retries=self.MAX_RETRIES)
         # Prepare the chain
         self.chain = self.prompt | self.call_google_gemini

         #                           top_p=self.config.get('top_p'))
         # Set up the retry parser with the runnable
+        self.retry_parser = RetryWithErrorOutputParser.from_llm(
+            parser=self.parser,
+            llm=self.llm_model,
+            max_retries=self.MAX_RETRIES)
         # Prepare the chain
         self.chain = self.prompt | self.call_google_gemini

vouchervision/LLM_GooglePalm2.py CHANGED Viewed

@@ -1,17 +1,17 @@
 import os, time, json, typing
 # import vertexai
 from vertexai.language_models import TextGenerationModel
 from vertexai.generative_models._generative_models import HarmCategory, HarmBlockThreshold
 from vertexai.language_models import TextGenerationModel
 # from vertexai.preview.generative_models import GenerativeModel
-from langchain.output_parsers import RetryWithErrorOutputParser
 # from langchain.schema import HumanMessage
 from langchain.prompts import PromptTemplate
 from langchain_core.output_parsers import JsonOutputParser
 # from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_google_vertexai import VertexAI
 from langchain_core.messages import BaseMessage, HumanMessage
-from langchain_core.prompt_values import PromptValue as BasePromptValue
 from vouchervision.utils_LLM import SystemLoadMonitor, run_tools, count_tokens, save_individual_prompt, sanitize_prompt
 from vouchervision.utils_LLM_JSON_validation import validate_and_align_JSON_keys_with_template
@@ -25,6 +25,18 @@ from vouchervision.utils_LLM_JSON_validation import validate_and_align_JSON_keys
 # gcloud config set project XXXXXXXXX
 # https://cloud.google.com/docs/authentication
 class GooglePalm2Handler:
     RETRY_DELAY = 10  # Wait 10 seconds before retrying
@@ -45,8 +57,6 @@ class GooglePalm2Handler:
         self.config_vals_for_permutation = config_vals_for_permutation
         self.monitor = SystemLoadMonitor(logger)
         self.parser = JsonOutputParser()
@@ -104,19 +114,35 @@ class GooglePalm2Handler:
         self.adjust_temp = self.starting_temp
         self.config['temperature'] = self.starting_temp
     def _build_model_chain_parser(self):
         # Instantiate the parser and the retry parser
-        # self.llm_model = ChatGoogleGenerativeAI(model=self.model_name)
         self.llm_model = VertexAI(model=self.model_name,
                                   max_output_tokens=self.config.get('max_output_tokens'),
                                   temperature=self.config.get('temperature'),
                                   top_k=self.config.get('top_k'),
                                   top_p=self.config.get('top_p'))
         self.retry_parser = RetryWithErrorOutputParser.from_llm(
-                                                parser=self.parser,
-                                                llm=self.llm_model,
-                                                max_retries=self.MAX_RETRIES)
         # Prepare the chain
         self.chain = self.prompt | self.call_google_palm2
@@ -148,22 +174,27 @@ class GooglePalm2Handler:
         while ind < self.MAX_RETRIES:
             ind += 1
             try:
-                # model_kwargs = {"temperature": self.adjust_temp}
                 # Invoke the chain to generate prompt text
-                response = self.chain.invoke({"query": prompt_template})#, "model_kwargs": model_kwargs})
-                # Use retry_parser to parse the response with retry logic
-                try:
-                    output = self.retry_parser.parse_with_prompt(response, prompt_value=PromptValue(prompt_template))
-                except:
-                    try:
-                        output = self.retry_parser.parse_with_prompt(response, prompt_value=prompt_template)
-                    except:
-                        try:
-                            output = json.loads(response)
-                        except Exception as e:
-                            print(e)
-                            output = None
                 if output is None:
@@ -215,8 +246,3 @@ class GooglePalm2Handler:
             self.json_report.set_text(text_main=f'LLM call failed')
         return None, nt_in, nt_out, None, None, usage_report
-class PromptValue(BasePromptValue):
-    prompt_str: str
-    def to_string(self) -> str:
-        return self.prompt_str

 import os, time, json, typing
+from dataclasses import dataclass
 # import vertexai
 from vertexai.language_models import TextGenerationModel
 from vertexai.generative_models._generative_models import HarmCategory, HarmBlockThreshold
 from vertexai.language_models import TextGenerationModel
 # from vertexai.preview.generative_models import GenerativeModel
+from langchain.output_parsers.retry import RetryWithErrorOutputParser
 # from langchain.schema import HumanMessage
 from langchain.prompts import PromptTemplate
 from langchain_core.output_parsers import JsonOutputParser
 # from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_google_vertexai import VertexAI
 from langchain_core.messages import BaseMessage, HumanMessage
 from vouchervision.utils_LLM import SystemLoadMonitor, run_tools, count_tokens, save_individual_prompt, sanitize_prompt
 from vouchervision.utils_LLM_JSON_validation import validate_and_align_JSON_keys_with_template
 # gcloud config set project XXXXXXXXX
 # https://cloud.google.com/docs/authentication
+from pydantic import BaseModel
+from langchain_core.prompt_values import PromptValue as BasePromptValue
+class PromptValueWrapper(BaseModel):
+    prompt_str: str
+    def to_string(self) -> str:
+        return self.prompt_str
+    def to_messages(self):
+        return [HumanMessage(content=self.prompt_str)]
 class GooglePalm2Handler:
     RETRY_DELAY = 10  # Wait 10 seconds before retrying
         self.config_vals_for_permutation = config_vals_for_permutation
         self.monitor = SystemLoadMonitor(logger)
         self.parser = JsonOutputParser()
         self.adjust_temp = self.starting_temp
         self.config['temperature'] = self.starting_temp
+    # def _build_model_chain_parser(self):
+    #     # Instantiate the parser and the retry parser
+    #     # self.llm_model = ChatGoogleGenerativeAI(model=self.model_name)
+    #     self.llm_model = VertexAI(model=self.model_name,
+    #                               max_output_tokens=self.config.get('max_output_tokens'),
+    #                               temperature=self.config.get('temperature'),
+    #                               top_k=self.config.get('top_k'),
+    #                               top_p=self.config.get('top_p'))
+    #     self.retry_parser = RetryWithErrorOutputParser.from_llm(
+    #                                             parser=self.parser,
+    #                                             llm=self.llm_model,
+    #                                             max_retries=self.MAX_RETRIES)
+    #     # Prepare the chain
+    #     self.chain = self.prompt | self.call_google_palm2
     def _build_model_chain_parser(self):
         # Instantiate the parser and the retry parser
         self.llm_model = VertexAI(model=self.model_name,
                                   max_output_tokens=self.config.get('max_output_tokens'),
                                   temperature=self.config.get('temperature'),
                                   top_k=self.config.get('top_k'),
                                   top_p=self.config.get('top_p'))
         self.retry_parser = RetryWithErrorOutputParser.from_llm(
+            llm=self.llm_model,
+            parser=self.parser,
+            max_retries=self.MAX_RETRIES
+        )
         # Prepare the chain
         self.chain = self.prompt | self.call_google_palm2
         while ind < self.MAX_RETRIES:
             ind += 1
             try:
+                model_kwargs = {"temperature": self.adjust_temp}
                 # Invoke the chain to generate prompt text
+                response = self.chain.invoke({"query": prompt_template, "model_kwargs": model_kwargs})
+                # Clean up the response by removing any Markdown formatting
+                response_text = response.strip('```JSON\n').strip('\n```')
+                output = json.loads(response_text)
+                # # Use retry_parser to parse the response with retry logic
+                # try:
+                #     output = self.retry_parser.parse_with_prompt(response, prompt_value=PromptValue(prompt_template))
+                # except:
+                #     try:
+                #         output = self.retry_parser.parse_with_prompt(response, prompt_value=prompt_template)
+                #     except:
+                #         try:
+                #             output = json.loads(response)
+                #         except Exception as e:
+                #             print(e)
+                #             output = None
                 if output is None:
             self.json_report.set_text(text_main=f'LLM call failed')
         return None, nt_in, nt_out, None, None, usage_report

vouchervision/OCR_GPT4oMini.py CHANGED Viewed

@@ -74,7 +74,7 @@ def main():
     img_path = 'D:/D_Desktop/BR_1839468565_Ochnaceae_Campylospermum_reticulatum_label.jpg'
     # $env:OPENAI_API_KEY="KEY"
-    API_KEY = "sk-proj-DxHlMH1H6jZzs8V12qbLT3BlbkFJIJnAVzt4kquOfhGURGW0"
     ocr = GPT4oMiniOCR(API_KEY)

     img_path = 'D:/D_Desktop/BR_1839468565_Ochnaceae_Campylospermum_reticulatum_label.jpg'
     # $env:OPENAI_API_KEY="KEY"
+    API_KEY = ""
     ocr = GPT4oMiniOCR(API_KEY)

vouchervision/OCR_google_cloud_vision.py CHANGED Viewed

@@ -99,12 +99,14 @@ class OCREngine:
     def set_client(self):
-        if self.is_hf:
-            self.client_beta = vision_beta.ImageAnnotatorClient(credentials=self.get_google_credentials())
-            self.client = vision.ImageAnnotatorClient(credentials=self.get_google_credentials())
-        else:
-            self.client_beta = vision_beta.ImageAnnotatorClient(credentials=self.get_google_credentials())
-            self.client = vision.ImageAnnotatorClient(credentials=self.get_google_credentials())
     def get_google_credentials(self):

     def set_client(self):
+        # Only init Google Vision if it is needed
+        if 'hand' in self.OCR_option or 'normal' in self.OCR_option:
+            if self.is_hf:
+                self.client_beta = vision_beta.ImageAnnotatorClient(credentials=self.get_google_credentials())
+                self.client = vision.ImageAnnotatorClient(credentials=self.get_google_credentials())
+            else:
+                self.client_beta = vision_beta.ImageAnnotatorClient(credentials=self.get_google_credentials())
+                self.client = vision.ImageAnnotatorClient(credentials=self.get_google_credentials())
     def get_google_credentials(self):

vouchervision/model_maps.py CHANGED Viewed

@@ -1,5 +1,5 @@
 class ModelMaps:
-    PROMPTS_THAT_NEED_DOMAIN_KNOWLEDGE = ["Version 1", "Version 1 PaLM 2"]
     COLORS_EXPENSE_REPORT = {
         'GPT_4': '#32CD32',  # Lime Green
         'GPT_3_5': '#008000',  # Green
@@ -41,49 +41,49 @@ class ModelMaps:
     }
     MODELS_OPENAI = [
-                    "GPT 4o 2024-05-13", #GPT_4o_2024_05_13
-                    "GPT 4o mini 2024-07-18",
-                    "GPT 4 Turbo 2024-04-09",#GPT_4_TURBO_2024_04_09
-                    "GPT 4",
-                    "GPT 4 32k",
-                    "GPT 4 Turbo 0125-preview",
-                    "GPT 4 Turbo 1106-preview",
-                    "GPT 3.5 Turbo",
-                    "GPT 3.5 Instruct",
                     ]
     MODELS_OPENAI_AZURE = [
-                    "Azure GPT 4",
-                    #  "Azure GPT 4 32k",
-                    #  "Azure GPT 4 Turbo 0125-preview",
-                    #  "Azure GPT 4 Turbo 1106-preview",
-                    #  "Azure GPT 3.5 Turbo",
-                    #  "Azure GPT 3.5 Instruct",
                     ]
     MODELS_GOOGLE = [
-                    # "PaLM 2 text-bison@001",
-                     "PaLM 2 text-bison@002",
-                     "PaLM 2 text-unicorn@001",
-                     "Gemini 1.0 Pro ",
-                     "Gemini 1.5 Flash",
-                     "Gemini 1.5 Pro",
                      ]
-    MODELS_MISTRAL = ["Mistral Small",
-                      "Mistral Medium",
-                      "Mistral Large",
-                      "Open Mixtral 8x7B",
-                      "Open Mistral 7B",
                       ]
-    MODELS_LOCAL = ["LOCAL Mixtral 8x7B Instruct v0.1",
-                    "LOCAL Mistral 7B Instruct v0.2",
-                    "LOCAL CPU Mistral 7B Instruct v0.2 GGUF",
                     'phyloforfun/mistral-7b-instruct-v2-bnb-4bit__HLT_MICH_Angiospermae_SLTPvC_v1-0_medium_OCR-C25-L25-E50-R05']
-    MODELS_GUI_DEFAULT = "Gemini 1.5 Flash" #"Azure GPT 4" # "GPT 4 Turbo 1106-preview"
     MODEL_FAMILY = {
                     'OpenAI': MODELS_OPENAI,
@@ -95,9 +95,9 @@ class ModelMaps:
     version_mapping_cost = {
         'GPT 4 32k': 'GPT_4_32K',
         'GPT 4': 'GPT_4',
-        "GPT 4o 2024-05-13": 'GPT_4o_2024_05_13',
-        "GPT 4o mini 2024-07-18": 'GPT_4o_mini_2024_07_18',
-        "GPT 4 Turbo 2024-04-09": 'GPT_4_TURBO_2024_04_09',
         'GPT 4 Turbo 0125-preview': 'GPT_4_TURBO_0125',
         'GPT 4 Turbo 1106-preview': 'GPT_4_TURBO_1106',
         'GPT 3.5 Instruct': 'GPT_3_5_INSTRUCT',
@@ -110,7 +110,7 @@ class ModelMaps:
         # 'Azure GPT 3.5 Instruct': 'AZURE_GPT_3_5_INSTRUCT',
         # 'Azure GPT 3.5 Turbo': 'AZURE_GPT_3_5',
-        'Gemini 1.0 Pro ': 'GEMINI_PRO',
         'Gemini 1.5 Flash': 'GEMINI_1_5_FLASH',  # gemini-1.5-flash
         'Gemini 1.5 Pro': 'GEMINI_1_5_PRO',  # gemini-1.5-pro
@@ -136,12 +136,12 @@ class ModelMaps:
     def get_version_has_key(cls, key, has_key_openai, has_key_azure_openai, has_key_google_application_credentials, has_key_mistral):
         # Define the mapping for 'has_key' values
         version_has_key = {
-            "GPT 4 Turbo 2024-04-09": has_key_openai,
             'GPT 4 Turbo 1106-preview': has_key_openai,
             'GPT 4 Turbo 0125-preview': has_key_openai,
             'GPT 4':  has_key_openai,
-            "GPT 4o 2024-05-13": has_key_openai,
-            "GPT 4o mini 2024-07-18": has_key_openai,
             'GPT 4 32k':  has_key_openai,
             'GPT 3.5 Turbo':  has_key_openai,
             'GPT 3.5 Instruct':  has_key_openai,
@@ -156,9 +156,9 @@ class ModelMaps:
             # 'PaLM 2 text-bison@001':  has_key_google_application_credentials,
             'PaLM 2 text-bison@002':  has_key_google_application_credentials,
             'PaLM 2 text-unicorn@001':  has_key_google_application_credentials,
-            'Gemini 1.0 Pro ':  has_key_google_application_credentials,
             'Gemini 1.5 Flash':  has_key_google_application_credentials,
-            'Gemini 1.5 Pro ':  has_key_google_application_credentials,
             'Mistral Small':  has_key_mistral,
             'Mistral Medium':  has_key_mistral,
@@ -178,11 +178,11 @@ class ModelMaps:
     @classmethod
     def get_version_mapping_is_azure(cls, key):
         version_mapping_is_azure = {
-            "GPT 4o 2024-05-13": False,
-            "GPT 4o mini 2024-07-18": False,
-            "GPT 4 Turbo 2024-04-09": False,
-            "GPT 4 Turbo 1106-preview": False,
-            "GPT 4 Turbo 0125-preview": False,
             'GPT 4': False,
             'GPT 4 32k':  False,
             'GPT 3.5 Turbo':  False,
@@ -198,9 +198,9 @@ class ModelMaps:
             # 'PaLM 2 text-bison@001':  False,
             'PaLM 2 text-bison@002':  False,
             'PaLM 2 text-unicorn@001':  False,
-            'Gemini 1.0 Pro ':  False,
             'Gemini 1.5 Flash':  False,
-            'Gemini 1.5 Pro ':  False,
             'Mistral Small':  False,
             'Mistral Medium':  False,
@@ -256,39 +256,39 @@ class ModelMaps:
         #     return 'gpt-35-turbo-instruct'
         elif key == 'AZURE_GPT_4':
-            return "gpt-4"
         # elif key == 'AZURE_GPT_4_TURBO_1106':
-        #     return "gpt-4-1106-preview"
         # elif key == 'AZURE_GPT_4_TURBO_0125':
         #     return 'gpt-4-0125-preview'
         # elif key == 'AZURE_GPT_4_32K':
-        #     return "gpt-4-32k"
         ### Google
         # elif key == 'PALM2_TB_1':
-        #     return "text-bison@001"
         elif key == 'PALM2_TB_2':
-            return "text-bison@002"
         elif key == 'PALM2_TU_1':
-            return "text-unicorn@001"
         elif key == 'GEMINI_PRO':
-            return "gemini-1.0-pro"
         elif key == 'GEMINI_1_5_FLASH':
-            return "gemini-1.5-flash"
         elif key == 'GEMINI_1_5_PRO':
-            return "gemini-1.5-pro"
         ### Mistral
         elif key == 'OPEN_MISTRAL_7B':
-            return "open-mistral-7b"
         elif key == 'OPEN_MIXTRAL_8X7B':
             return 'open-mixtral-8x7b'
@@ -323,7 +323,7 @@ class ModelMaps:
         else:
-            raise ValueError(f"Invalid model name {key}. See model_maps.py")
     @classmethod
     def get_models_gui_list(cls):

 class ModelMaps:
+    PROMPTS_THAT_NEED_DOMAIN_KNOWLEDGE = ['Version 1', 'Version 1 PaLM 2']
     COLORS_EXPENSE_REPORT = {
         'GPT_4': '#32CD32',  # Lime Green
         'GPT_3_5': '#008000',  # Green
     }
     MODELS_OPENAI = [
+                    'GPT 4o 2024-05-13', #GPT_4o_2024_05_13
+                    'GPT 4o mini 2024-07-18',
+                    'GPT 4 Turbo 2024-04-09',#GPT_4_TURBO_2024_04_09
+                    'GPT 4',
+                    'GPT 4 32k',
+                    'GPT 4 Turbo 0125-preview',
+                    'GPT 4 Turbo 1106-preview',
+                    'GPT 3.5 Turbo',
+                    'GPT 3.5 Instruct',
                     ]
     MODELS_OPENAI_AZURE = [
+                    'Azure GPT 4',
+                    #  'Azure GPT 4 32k',
+                    #  'Azure GPT 4 Turbo 0125-preview',
+                    #  'Azure GPT 4 Turbo 1106-preview',
+                    #  'Azure GPT 3.5 Turbo',
+                    #  'Azure GPT 3.5 Instruct',
                     ]
     MODELS_GOOGLE = [
+                    # 'PaLM 2 text-bison@001',
+                     'PaLM 2 text-bison@002',
+                     'PaLM 2 text-unicorn@001',
+                     'Gemini 1.0 Pro',
+                     'Gemini 1.5 Flash',
+                     'Gemini 1.5 Pro',
                      ]
+    MODELS_MISTRAL = ['Mistral Small',
+                      'Mistral Medium',
+                      'Mistral Large',
+                      'Open Mixtral 8x7B',
+                      'Open Mistral 7B',
                       ]
+    MODELS_LOCAL = ['LOCAL Mixtral 8x7B Instruct v0.1',
+                    'LOCAL Mistral 7B Instruct v0.2',
+                    'LOCAL CPU Mistral 7B Instruct v0.2 GGUF',
                     'phyloforfun/mistral-7b-instruct-v2-bnb-4bit__HLT_MICH_Angiospermae_SLTPvC_v1-0_medium_OCR-C25-L25-E50-R05']
+    MODELS_GUI_DEFAULT = 'Gemini 1.5 Flash' #'Azure GPT 4' # 'GPT 4 Turbo 1106-preview'
     MODEL_FAMILY = {
                     'OpenAI': MODELS_OPENAI,
     version_mapping_cost = {
         'GPT 4 32k': 'GPT_4_32K',
         'GPT 4': 'GPT_4',
+        'GPT 4o 2024-05-13': 'GPT_4o_2024_05_13',
+        'GPT 4o mini 2024-07-18': 'GPT_4o_mini_2024_07_18',
+        'GPT 4 Turbo 2024-04-09': 'GPT_4_TURBO_2024_04_09',
         'GPT 4 Turbo 0125-preview': 'GPT_4_TURBO_0125',
         'GPT 4 Turbo 1106-preview': 'GPT_4_TURBO_1106',
         'GPT 3.5 Instruct': 'GPT_3_5_INSTRUCT',
         # 'Azure GPT 3.5 Instruct': 'AZURE_GPT_3_5_INSTRUCT',
         # 'Azure GPT 3.5 Turbo': 'AZURE_GPT_3_5',
+        'Gemini 1.0 Pro': 'GEMINI_PRO',
         'Gemini 1.5 Flash': 'GEMINI_1_5_FLASH',  # gemini-1.5-flash
         'Gemini 1.5 Pro': 'GEMINI_1_5_PRO',  # gemini-1.5-pro
     def get_version_has_key(cls, key, has_key_openai, has_key_azure_openai, has_key_google_application_credentials, has_key_mistral):
         # Define the mapping for 'has_key' values
         version_has_key = {
+            'GPT 4 Turbo 2024-04-09': has_key_openai,
             'GPT 4 Turbo 1106-preview': has_key_openai,
             'GPT 4 Turbo 0125-preview': has_key_openai,
             'GPT 4':  has_key_openai,
+            'GPT 4o 2024-05-13': has_key_openai,
+            'GPT 4o mini 2024-07-18': has_key_openai,
             'GPT 4 32k':  has_key_openai,
             'GPT 3.5 Turbo':  has_key_openai,
             'GPT 3.5 Instruct':  has_key_openai,
             # 'PaLM 2 text-bison@001':  has_key_google_application_credentials,
             'PaLM 2 text-bison@002':  has_key_google_application_credentials,
             'PaLM 2 text-unicorn@001':  has_key_google_application_credentials,
+            'Gemini 1.0 Pro':  has_key_google_application_credentials,
             'Gemini 1.5 Flash':  has_key_google_application_credentials,
+            'Gemini 1.5 Pro':  has_key_google_application_credentials,
             'Mistral Small':  has_key_mistral,
             'Mistral Medium':  has_key_mistral,
     @classmethod
     def get_version_mapping_is_azure(cls, key):
         version_mapping_is_azure = {
+            'GPT 4o 2024-05-13': False,
+            'GPT 4o mini 2024-07-18': False,
+            'GPT 4 Turbo 2024-04-09': False,
+            'GPT 4 Turbo 1106-preview': False,
+            'GPT 4 Turbo 0125-preview': False,
             'GPT 4': False,
             'GPT 4 32k':  False,
             'GPT 3.5 Turbo':  False,
             # 'PaLM 2 text-bison@001':  False,
             'PaLM 2 text-bison@002':  False,
             'PaLM 2 text-unicorn@001':  False,
+            'Gemini 1.0 Pro':  False,
             'Gemini 1.5 Flash':  False,
+            'Gemini 1.5 Pro':  False,
             'Mistral Small':  False,
             'Mistral Medium':  False,
         #     return 'gpt-35-turbo-instruct'
         elif key == 'AZURE_GPT_4':
+            return 'gpt-4'
         # elif key == 'AZURE_GPT_4_TURBO_1106':
+        #     return 'gpt-4-1106-preview'
         # elif key == 'AZURE_GPT_4_TURBO_0125':
         #     return 'gpt-4-0125-preview'
         # elif key == 'AZURE_GPT_4_32K':
+        #     return 'gpt-4-32k'
         ### Google
         # elif key == 'PALM2_TB_1':
+        #     return 'text-bison@001'
         elif key == 'PALM2_TB_2':
+            return 'text-bison@002'
         elif key == 'PALM2_TU_1':
+            return 'text-unicorn@001'
         elif key == 'GEMINI_PRO':
+            return 'gemini-1.0-pro'
         elif key == 'GEMINI_1_5_FLASH':
+            return 'gemini-1.5-flash'
         elif key == 'GEMINI_1_5_PRO':
+            return 'gemini-1.5-pro'
         ### Mistral
         elif key == 'OPEN_MISTRAL_7B':
+            return 'open-mistral-7b'
         elif key == 'OPEN_MIXTRAL_8X7B':
             return 'open-mixtral-8x7b'
         else:
+            raise ValueError(f'Invalid model name {key}. See model_maps.py')
     @classmethod
     def get_models_gui_list(cls):

vouchervision/utils_hf.py CHANGED Viewed

@@ -68,57 +68,106 @@ def save_uploaded_file_local(directory_in, directory_out, img_file_name, image=N
             except:
                 pass
-def save_uploaded_file(directory, img_file, image=None):
     if not os.path.exists(directory):
         os.makedirs(directory)
-    full_path = os.path.join(directory, img_file.name) ########## TODO THIS MUST BE MOVED TO conditional specific location
-    # Assuming the uploaded file is an image
-    if img_file.name.lower().endswith('.pdf'):
-        with open(full_path, 'wb') as out_file:
-            # If img_file is a file-like object (e.g., Django's UploadedFile),
-            # you can use copyfileobj or read chunks.
-            # If it's a path, you'd need to open and then save it.
-            if hasattr(img_file, 'read'):
-                # This is a file-like object
-                copyfileobj(img_file, out_file)
-            else:
-                # If img_file is a path string
-                with open(img_file, 'rb') as fd:
-                    copyfileobj(fd, out_file)
             return full_path
     else:
         if image is None:
             try:
-                with Image.open(img_file) as image:
-                    full_path = os.path.join(directory, img_file.name)
                     image.save(full_path, "JPEG")
-                # Return the full path of the saved image
-                return full_path
-            except:
-                try:
-                    with Image.open(os.path.join(directory,img_file)) as image:
-                        full_path = os.path.join(directory, img_file)
-                        image.save(full_path, "JPEG")
-                    # Return the full path of the saved image
-                    return full_path
-                except:
-                    with Image.open(img_file.name) as image:
-                        full_path = os.path.join(directory, img_file.name)
-                        image.save(full_path, "JPEG")
-                    # Return the full path of the saved image
-                    return full_path
         else:
             try:
-                full_path = os.path.join(directory, img_file.name)
-                image.save(full_path, "JPEG")
-                return full_path
-            except:
-                full_path = os.path.join(directory, img_file)
                 image.save(full_path, "JPEG")
-                return full_path
 # def save_uploaded_file(directory, uploaded_file, image=None):
 #     if not os.path.exists(directory):
 #         os.makedirs(directory)

             except:
                 pass
+def save_uploaded_file(directory, uploaded_file, image=None):
     if not os.path.exists(directory):
         os.makedirs(directory)
+    full_path = os.path.join(directory, uploaded_file.name)
+    # Handle PDF and Image files differently
+    if uploaded_file.name.lower().endswith('.pdf'):
+        # Save PDF file
+        try:
+            with open(full_path, 'wb') as out_file:
+                if hasattr(uploaded_file, 'read'):
+                    # This is a file-like object
+                    out_file.write(uploaded_file.read())
+                else:
+                    # If uploaded_file is a path string
+                    with open(uploaded_file, 'rb') as fd:
+                        out_file.write(fd.read())
+            if os.path.getsize(full_path) == 0:
+                raise ValueError(f"The file {uploaded_file.name} is empty.")
             return full_path
+        except Exception as e:
+            st.error(f"Failed to save PDF file {uploaded_file.name}. Error: {e}")
+            return None
     else:
+        # Handle image files
         if image is None:
             try:
+                with Image.open(uploaded_file) as image:
                     image.save(full_path, "JPEG")
+            except Exception as e:
+                st.error(f"Failed to save image file {uploaded_file.name}. Error: {e}")
+                return None
         else:
             try:
                 image.save(full_path, "JPEG")
+            except Exception as e:
+                st.error(f"Failed to save processed image file {uploaded_file.name}. Error: {e}")
+                return None
+        if os.path.getsize(full_path) == 0:
+            st.error(f"The image file {uploaded_file.name} is empty.")
+            return None
+    return full_path
+# def save_uploaded_file(directory, img_file, image=None): # not working with pdfs
+#     if not os.path.exists(directory):
+#         os.makedirs(directory)
+#     full_path = os.path.join(directory, img_file.name) ########## TODO THIS MUST BE MOVED TO conditional specific location
+#     # Assuming the uploaded file is an image
+#     if img_file.name.lower().endswith('.pdf'):
+#         with open(full_path, 'wb') as out_file:
+#             # If img_file is a file-like object (e.g., Django's UploadedFile),
+#             # you can use copyfileobj or read chunks.
+#             # If it's a path, you'd need to open and then save it.
+#             if hasattr(img_file, 'read'):
+#                 # This is a file-like object
+#                 copyfileobj(img_file, out_file)
+#             else:
+#                 # If img_file is a path string
+#                 with open(img_file, 'rb') as fd:
+#                     copyfileobj(fd, out_file)
+#             return full_path
+#     else:
+#         if image is None:
+#             try:
+#                 with Image.open(img_file) as image:
+#                     full_path = os.path.join(directory, img_file.name)
+#                     image.save(full_path, "JPEG")
+#                 # Return the full path of the saved image
+#                 return full_path
+#             except:
+#                 try:
+#                     with Image.open(os.path.join(directory,img_file)) as image:
+#                         full_path = os.path.join(directory, img_file)
+#                         image.save(full_path, "JPEG")
+#                     # Return the full path of the saved image
+#                     return full_path
+#                 except:
+#                     with Image.open(img_file.name) as image:
+#                         full_path = os.path.join(directory, img_file.name)
+#                         image.save(full_path, "JPEG")
+#                     # Return the full path of the saved image
+#                     return full_path
+#         else:
+#             try:
+#                 full_path = os.path.join(directory, img_file.name)
+#                 image.save(full_path, "JPEG")
+#                 return full_path
+#             except:
+#                 full_path = os.path.join(directory, img_file)
+#                 image.save(full_path, "JPEG")
+#                 return full_path
 # def save_uploaded_file(directory, uploaded_file, image=None):
 #     if not os.path.exists(directory):
 #         os.makedirs(directory)