phyloforfun commited on
Commit
4d5e173
1 Parent(s): 21fc60f

Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing

Browse files
demo/ocr_test/ocr_test.jpg ADDED

Git LFS Details

  • SHA256: af32889d7c1eb136e501952ebd5ec5288452c4bc845c8059cc4a3644f8745e1d
  • Pointer size: 131 Bytes
  • Size of remote file: 411 kB
vouchervision/API_validation.py CHANGED
@@ -6,6 +6,8 @@ from langchain_openai import AzureChatOpenAI
6
  from vertexai.language_models import TextGenerationModel
7
  from vertexai.preview.generative_models import GenerativeModel
8
  from google.cloud import vision
 
 
9
  from datetime import datetime
10
  import google.generativeai as genai
11
  from google.oauth2 import service_account
@@ -37,10 +39,10 @@ class APIvalidation:
37
  return False
38
 
39
  def check_openai_api_key(self):
40
- if self.cfg_private:
41
- openai.api_key = self.cfg_private['openai']['OPENAI_API_KEY']
42
- else:
43
  openai.api_key = os.getenv('OPENAI_API_KEY')
 
 
44
 
45
  try:
46
  openai.models.list()
@@ -48,52 +50,6 @@ class APIvalidation:
48
  except:
49
  return False
50
 
51
- # def check_google_ocr_api_key(self): ##################################################################################### maybe check without initi, post the vertexai
52
- # # if os.path.exists(self.cfg_private['google_cloud']['path_json_file']):
53
- # # os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.cfg_private['google_cloud']['path_json_file']
54
- # # elif os.path.exists(self.cfg_private['google_cloud']['path_json_file_service_account2']):
55
- # # os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.cfg_private['google_cloud']['path_json_file_service_account2']
56
- # # else:
57
- # # return False
58
-
59
- # try:
60
- # if not self.cfg_private:
61
- # # Convert JSON key from string to a dictionary
62
- # service_account_json_str = os.getenv('google_service_account_json')
63
- # if not service_account_json_str:
64
- # print("Service account JSON not found in environment variables.")
65
- # return False
66
-
67
- # # Convert JSON string to a dictionary
68
- # service_account_info = json.loads(service_account_json_str)
69
- # # Create credentials from the service account info
70
- # credentials = service_account.Credentials.from_service_account_info(service_account_info)
71
- # # Initialize the client with the credentials
72
- # client = vision.ImageAnnotatorClient(credentials=credentials)
73
- # logo_path = os.path.join(self.dir_home, 'img','logo.png')
74
- # with io.open(logo_path, 'rb') as image_file:
75
- # content = image_file.read()
76
- # image = vision.Image(content=content)
77
- # response = client.document_text_detection(image=image)
78
- # texts = response.text_annotations
79
- # normal_cleaned_text = texts[0].description if texts else None
80
- # print(f"OCR TEST: {normal_cleaned_text}")
81
- # else:
82
- # logo_path = os.path.join(self.dir_home, 'img','logo.png')
83
- # client = vision.ImageAnnotatorClient()
84
- # with io.open(logo_path, 'rb') as image_file:
85
- # content = image_file.read()
86
- # image = vision.Image(content=content)
87
- # response = client.document_text_detection(image=image)
88
- # texts = response.text_annotations
89
- # normal_cleaned_text = texts[0].description if texts else None
90
- # if normal_cleaned_text:
91
- # return True
92
- # else:
93
- # return False
94
- # except:
95
- # return False
96
-
97
  def check_azure_openai_api_key(self):
98
  if not self.is_hf:
99
  try:
@@ -171,126 +127,97 @@ class APIvalidation:
171
  except Exception as e: # Replace with a more specific exception if possible
172
  return False
173
 
174
- # def get_google_credentials(self):
175
- # # Convert JSON key from string to a dictionary
176
- # service_account_json_str = os.getenv('google_service_account_json')
177
 
178
- # with tempfile.NamedTemporaryFile(mode="w+", delete=False,suffix=".json") as temp:
179
- # temp.write(service_account_json_str)
180
- # temp_filename = temp.name
181
-
182
- # return temp_filename
 
183
 
184
- # https://cloud.google.com/docs/authentication/provide-credentials-adc
185
- def get_google_credentials(self):
186
- creds_json_str = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
187
- credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
188
- return credentials
 
189
 
190
- # def init_google_client(opt, opt2):
191
- # # Fetch the credentials JSON string from Hugging Face Secrets
192
- # creds_json_str = os.getenv('google_service_account_json')
 
 
 
 
 
 
 
 
193
 
194
- # if creds_json_str:
195
- # creds_dict = json.loads(creds_json_str)
196
- # credentials = service_account.Credentials.from_service_account_info(creds_dict)
 
 
 
 
 
 
 
197
 
198
- # # Initialize Google API client (if needed for your use case)
199
- # client = build(opt, opt2, credentials=credentials) # Adjust with actual service details
200
 
201
- # return client, credentials
202
- # else:
203
- # print("Google API credentials not found.")
204
 
205
  def check_google_vertex_genai_api_key(self):
206
  results = {"palm2": False, "gemini": False}
207
- if not self.is_hf:
208
- try: # Local
209
- # Assuming genai and vertexai are clients for Google services
210
- # os.environ["GOOGLE_API_KEY"] = self.cfg_private['google_palm']['google_palm_api']
211
- # genai.configure(api_key=self.cfg_private['google_palm']['google_palm_api'])
212
- vertexai.init(project= self.cfg_private['google_palm']['project_id'], location=self.cfg_private['google_palm']['location'])
213
-
214
- try:
215
- model = TextGenerationModel.from_pretrained("text-bison@001")
216
- response = model.predict("Hello")
217
- test_response_palm = response.text
218
- # llm_palm = ChatGoogleGenerativeAI(model="text-bison@001")
219
- # test_response_palm = llm_palm.invoke("Hello")
220
- if test_response_palm:
221
- results["palm2"] = True
222
- except Exception as e:
223
- pass
224
-
225
- try:
226
- model = GenerativeModel("gemini-pro")
227
- response = model.generate_content("Hello")
228
- test_response_gemini = response.text
229
- # llm_gemini = ChatGoogleGenerativeAI(model="gemini-pro")
230
- # test_response_gemini = llm_gemini.invoke("Hello")
231
- if test_response_gemini:
232
- results["gemini"] = True
233
- except Exception as e:
234
- pass
235
-
236
-
237
- return results
238
- except Exception as e: # Replace with a more specific exception if possible
239
- return results
 
 
 
 
240
  else:
241
- ### is hugging face
242
- try:
243
- # Assuming genai and vertexai are clients for Google services
244
- # os.environ["GOOGLE_API_KEY"] = os.getenv('PALM_API_KEY')
245
- # os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.get_google_credentials()
246
- # client, credentials = self.init_google_client('gemini-pro', 'v1')
247
- # print(credentials)
248
- print("service account")
249
- palm_api_key = os.getenv('PALM_API_KEY')
250
- google_project_id = os.getenv('GOOGLE_PROJECT_ID')
251
- google_location = os.getenv('GOOGLE_LOCATION')
252
- os.environ['GOOGLE_API_KEY'] = os.getenv('PALM_API_KEY')
253
- vertexai.init(project=os.getenv('GOOGLE_PROJECT_ID'), location=os.getenv('GOOGLE_LOCATION'),credentials=self.get_google_credentials())
254
- # genai.configure(api_key=palm_api_key)
255
- # vertexai.init(project=google_project_id, location=google_location)#, credentials=credentials)
256
- print("service account pass")
257
-
258
- try:
259
- model = TextGenerationModel.from_pretrained("text-bison@001")
260
- response = model.predict("Hello")
261
- test_response_palm = response.text
262
- # llm_palm = ChatGoogleGenerativeAI(model="text-bison@001")
263
- # test_response_palm = llm_palm.invoke("Hello")
264
- if test_response_palm:
265
- results["palm2"] = True
266
- print(f"palm2 pass [{test_response_palm}]")
267
- else:
268
- print(f"palm2 yes [{test_response_palm}]")
269
-
270
- except Exception as e:
271
- print(f"palm2 [{e}]")
272
- pass
273
-
274
- try:
275
- model = GenerativeModel("gemini-pro")
276
- response = model.generate_content("Hello")
277
- test_response_gemini = response.text
278
- # llm_gemini = ChatGoogleGenerativeAI(model="gemini-pro")
279
- # test_response_gemini = llm_gemini.invoke("Hello")
280
- if test_response_gemini:
281
- results["gemini"] = True
282
- print(f"gemini pass [{test_response_palm}]")
283
- else:
284
- print(f"gemini yes [{test_response_palm}]")
285
-
286
- except Exception as e:
287
- print(f"gemini [{e}]")
288
- pass
289
-
290
- return results
291
- except Exception as e: # Replace with a more specific exception if possible
292
- print(f"Immediate [{e}]")
293
- return results
294
 
295
  def report_api_key_status(self):
296
  missing_keys = []
@@ -299,31 +226,39 @@ class APIvalidation:
299
  if not self.is_hf:
300
  k_OPENAI_API_KEY = self.cfg_private['openai']['OPENAI_API_KEY']
301
  k_openai_azure = self.cfg_private['openai_azure']['api_version']
302
- k_google_palm_api = self.cfg_private['google_palm']['google_palm_api']
 
303
  k_project_id = self.cfg_private['google_palm']['project_id']
304
  k_location = self.cfg_private['google_palm']['location']
 
 
305
  k_mistral = self.cfg_private['mistral']['mistral_key']
306
  k_here = self.cfg_private['here']['api_key']
307
  k_opencage = self.cfg_private['open_cage_geocode']['api_key']
308
  else:
309
  k_OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
310
  k_openai_azure = os.getenv('AZURE_API_VERSION')
311
- k_google_palm_api = os.getenv('PALM_API_KEY')
 
312
  k_project_id = os.getenv('GOOGLE_PROJECT_ID')
313
  k_location = os.getenv('GOOGLE_LOCATION')
314
  k_mistral = os.getenv('MISTRAL_API_KEY')
315
- k_here = os.getenv('here_api_key')
316
  k_opencage = os.getenv('open_cage_geocode')
317
 
318
 
319
  # Check each key and add to the respective list
320
  # Google OCR key check
321
- if self.has_API_key(k_google_palm_api) and self.has_API_key(k_project_id) and self.has_API_key(k_location):
322
- is_valid = True #self.check_google_ocr_api_key() ###############################################################################################################################
323
- if is_valid:
324
- present_keys.append('Google OCR (Valid)')
 
 
 
 
325
  else:
326
- present_keys.append('Google OCR (Invalid)')
327
  else:
328
  missing_keys.append('Google OCR')
329
 
@@ -349,7 +284,8 @@ class APIvalidation:
349
  missing_keys.append('Azure OpenAI')
350
 
351
  # Google PALM2/Gemini key check
352
- if self.has_API_key(k_google_palm_api) and self.has_API_key(k_project_id) and self.has_API_key(k_location):
 
353
  google_results = self.check_google_vertex_genai_api_key()
354
  if google_results['palm2']:
355
  present_keys.append('Palm2 (Valid)')
 
6
  from vertexai.language_models import TextGenerationModel
7
  from vertexai.preview.generative_models import GenerativeModel
8
  from google.cloud import vision
9
+ from google.cloud import vision_v1p3beta1 as vision_beta
10
+
11
  from datetime import datetime
12
  import google.generativeai as genai
13
  from google.oauth2 import service_account
 
39
  return False
40
 
41
  def check_openai_api_key(self):
42
+ if self.is_hf:
 
 
43
  openai.api_key = os.getenv('OPENAI_API_KEY')
44
+ else:
45
+ openai.api_key = self.cfg_private['openai']['OPENAI_API_KEY']
46
 
47
  try:
48
  openai.models.list()
 
50
  except:
51
  return False
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  def check_azure_openai_api_key(self):
54
  if not self.is_hf:
55
  try:
 
127
  except Exception as e: # Replace with a more specific exception if possible
128
  return False
129
 
130
+ def check_google_vision_client(self):
131
+ results = {"ocr_print": False, "ocr_hand": False}
 
132
 
133
+ if self.is_hf:
134
+ client_beta = vision_beta.ImageAnnotatorClient(credentials=self.get_google_credentials())
135
+ client = vision.ImageAnnotatorClient(credentials=self.get_google_credentials())
136
+ else:
137
+ client_beta = vision_beta.ImageAnnotatorClient(credentials=self.get_google_credentials())
138
+ client = vision.ImageAnnotatorClient(credentials=self.get_google_credentials())
139
 
140
+ try:
141
+ with open(os.path.join(self.dir_home,'demo', 'ocr_test', 'ocr_test.jpg'), "rb") as image_file:
142
+ content = image_file.read()
143
+ except:
144
+ with open("./demo/ocr_test/ocr_test.jpg", "rb") as image_file:
145
+ content = image_file.read()
146
 
147
+ try:
148
+ image = vision_beta.Image(content=content)
149
+ image_context = vision_beta.ImageContext(language_hints=["en-t-i0-handwrit"])
150
+ response = client_beta.document_text_detection(image=image, image_context=image_context)
151
+ texts = response.text_annotations
152
+
153
+ print(f"OCR Hand:\n{texts[0].description}")
154
+ if len(texts[0].description) > 0:
155
+ results['ocr_hand'] = True
156
+ except:
157
+ pass
158
 
159
+ try:
160
+ image = vision.Image(content=content)
161
+ response = client.document_text_detection(image=image)
162
+ texts = response.text_annotations
163
+
164
+ print(f"OCR Print:\n{texts[0].description}")
165
+ if len(texts[0].description) > 0:
166
+ results['ocr_print'] = True
167
+ except:
168
+ pass
169
 
170
+ return results
 
171
 
 
 
 
172
 
173
  def check_google_vertex_genai_api_key(self):
174
  results = {"palm2": False, "gemini": False}
175
+
176
+ try:
177
+ model = TextGenerationModel.from_pretrained("text-bison@001")
178
+ response = model.predict("Hello")
179
+ test_response_palm = response.text
180
+ if test_response_palm:
181
+ results["palm2"] = True
182
+ print(f"palm2 pass [{test_response_palm}]")
183
+ else:
184
+ print(f"palm2 fail [{test_response_palm}]")
185
+
186
+ except Exception as e:
187
+ print(f"palm2 fail2 [{test_response_palm}]")
188
+
189
+
190
+ try:
191
+ model = GenerativeModel("gemini-pro")
192
+ response = model.generate_content("Hello")
193
+ test_response_gemini = response.text
194
+ if test_response_gemini:
195
+ results["gemini"] = True
196
+ print(f"gemini pass [{test_response_gemini}]")
197
+ else:
198
+ print(f"gemini fail [{test_response_gemini}]")
199
+
200
+ except Exception as e:
201
+ print(f"palm2 fail2 [{test_response_gemini}]")
202
+
203
+ return results
204
+
205
+
206
+
207
+ def get_google_credentials(self):
208
+ if self.is_hf:
209
+ creds_json_str = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
210
+ credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
211
+ return credentials
212
  else:
213
+ with open(self.cfg_private['google_cloud']['GOOGLE_APPLICATION_CREDENTIALS'], 'r') as file:
214
+ data = json.load(file)
215
+ creds_json_str = json.dumps(data)
216
+ credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
217
+ os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = creds_json_str
218
+ return credentials
219
+
220
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
 
222
  def report_api_key_status(self):
223
  missing_keys = []
 
226
  if not self.is_hf:
227
  k_OPENAI_API_KEY = self.cfg_private['openai']['OPENAI_API_KEY']
228
  k_openai_azure = self.cfg_private['openai_azure']['api_version']
229
+ # k_google_palm_api = self.cfg_private['google_palm']['google_palm_api']
230
+
231
  k_project_id = self.cfg_private['google_palm']['project_id']
232
  k_location = self.cfg_private['google_palm']['location']
233
+ k_google_application_credentials = self.cfg_private['google_cloud']['GOOGLE_APPLICATION_CREDENTIALS']
234
+
235
  k_mistral = self.cfg_private['mistral']['mistral_key']
236
  k_here = self.cfg_private['here']['api_key']
237
  k_opencage = self.cfg_private['open_cage_geocode']['api_key']
238
  else:
239
  k_OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
240
  k_openai_azure = os.getenv('AZURE_API_VERSION')
241
+ # k_google_palm_api = os.getenv('PALM_API_KEY')
242
+
243
  k_project_id = os.getenv('GOOGLE_PROJECT_ID')
244
  k_location = os.getenv('GOOGLE_LOCATION')
245
  k_mistral = os.getenv('MISTRAL_API_KEY')
246
+ k_here = os.getenv('HERE_API_KEY')
247
  k_opencage = os.getenv('open_cage_geocode')
248
 
249
 
250
  # Check each key and add to the respective list
251
  # Google OCR key check
252
+ if self.has_API_key(k_google_application_credentials) and self.has_API_key(k_project_id) and self.has_API_key(k_location):
253
+ google_ocr_results = self.check_google_vision_client()
254
+ if google_ocr_results['ocr_print']:
255
+ present_keys.append('Google OCR Print (Valid)')
256
+ else:
257
+ present_keys.append('Google OCR Print (Invalid)')
258
+ if google_ocr_results['ocr_hand']:
259
+ present_keys.append('Google OCR Handwriting (Valid)')
260
  else:
261
+ present_keys.append('Google OCR Handwriting (Invalid)')
262
  else:
263
  missing_keys.append('Google OCR')
264
 
 
284
  missing_keys.append('Azure OpenAI')
285
 
286
  # Google PALM2/Gemini key check
287
+ if self.has_API_key(k_google_application_credentials) and self.has_API_key(k_project_id) and self.has_API_key(k_location): ##################
288
+ vertexai.init(project=os.getenv('GOOGLE_PROJECT_ID'), location=os.getenv('GOOGLE_LOCATION'), credentials=self.get_google_credentials())
289
  google_results = self.check_google_vertex_genai_api_key()
290
  if google_results['palm2']:
291
  present_keys.append('Palm2 (Valid)')
vouchervision/OCR_google_cloud_vision.py CHANGED
@@ -71,28 +71,22 @@ class OCRGoogle:
71
  self.trOCR_characters = None
72
  self.set_client()
73
 
74
-
75
 
76
  def set_client(self):
77
  if self.is_hf:
78
- service_account_json_str = os.getenv('google_service_account_json')
79
- if not service_account_json_str:
80
- print("Service account JSON not found in environment variables.")
81
- return False
82
-
83
- # Convert JSON string to a dictionary
84
- service_account_info = json.loads(service_account_json_str)
85
- # Create credentials from the service account info
86
- credentials = service_account.Credentials.from_service_account_info(service_account_info)
87
- # Initialize the client with the credentials
88
- self.client_beta = vision_beta.ImageAnnotatorClient(credentials=credentials)
89
- self.client = vision.ImageAnnotatorClient(credentials=credentials)
90
  else:
91
  self.client_beta = vision_beta.ImageAnnotatorClient()
92
  self.client = vision.ImageAnnotatorClient()
93
 
94
 
95
-
 
 
 
 
 
96
  def detect_text_with_trOCR_using_google_bboxes(self, do_use_trOCR, logger):
97
  CONFIDENCES = 0.80
98
  MAX_NEW_TOKENS = 50
 
71
  self.trOCR_characters = None
72
  self.set_client()
73
 
 
74
 
75
  def set_client(self):
76
  if self.is_hf:
77
+ self.client_beta = vision_beta.ImageAnnotatorClient(credentials=self.get_google_credentials())
78
+ self.client = vision.ImageAnnotatorClient(credentials=self.get_google_credentials())
 
 
 
 
 
 
 
 
 
 
79
  else:
80
  self.client_beta = vision_beta.ImageAnnotatorClient()
81
  self.client = vision.ImageAnnotatorClient()
82
 
83
 
84
+ def get_google_credentials(self):
85
+ creds_json_str = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
86
+ credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
87
+ return credentials
88
+
89
+
90
  def detect_text_with_trOCR_using_google_bboxes(self, do_use_trOCR, logger):
91
  CONFIDENCES = 0.80
92
  MAX_NEW_TOKENS = 50
vouchervision/utils_VoucherVision.py CHANGED
@@ -410,194 +410,132 @@ class VoucherVision():
410
  else:
411
  return False
412
 
413
- # def get_google_credentials(self):
414
- # # Convert JSON key from string to a dictionary
415
- # service_account_json_str = os.getenv('google_service_account_json')
416
 
417
- # with tempfile.NamedTemporaryFile(mode="w+", delete=False,suffix=".json") as temp:
418
- # temp.write(service_account_json_str)
419
- # temp_filename = temp.name
420
-
421
- # return temp_filename
422
-
423
- # def init_google_client(opt, opt2):
424
- # # Fetch the credentials JSON string from Hugging Face Secrets
425
- # creds_json_str = os.getenv('google_service_account_json')
426
-
427
- # if creds_json_str:
428
- # creds_dict = json.loads(creds_json_str)
429
- # credentials = service_account.Credentials.from_service_account_info(creds_dict)
430
-
431
- # # Initialize Google API client (if needed for your use case)
432
- # client = build(opt, opt2, credentials=credentials) # Adjust with actual service details
433
-
434
- # return client, credentials
435
- # else:
436
- # print("Google API credentials not found.")
437
  def get_google_credentials(self):
 
438
  creds_json_str = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
439
  credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
440
  return credentials
 
 
 
 
 
 
 
441
 
 
442
  def set_API_keys(self):
443
  if self.is_hf:
444
- openai_api_key = os.getenv('OPENAI_API_KEY')
445
- # google_application_credentials = os.getenv('google_service_account_json')
446
- palm_api_key = os.getenv('PALM_API_KEY')
447
- mistral_api_key = os.getenv('MISTRAL_API_KEY')
448
- here_api_key = os.getenv('here_api_key')
449
- here_app_id = os.getenv('here_app_id')
450
- open_cage_api_key = os.getenv('open_cage_geocode')
451
- google_project_id = os.getenv('GOOGLE_PROJECT_ID')
452
- google_project_location = os.getenv('GOOGLE_LOCATION')
453
- # os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.get_google_credentials()
454
-
455
-
456
-
457
- self.has_key_openai = openai_api_key is not None
458
- # self.has_key_google_OCR = google_application_credentials is not None
459
- self.has_key_palm2 = palm_api_key is not None
460
- self.has_key_mistral = mistral_api_key is not None
461
- self.has_key_here = here_api_key is not None
462
- self.has_hey_here = here_app_id is not None
463
- self.has_open_cage_geocode = open_cage_api_key is not None
464
- self.has_key_google_project_id = google_project_id is not None
465
- self.has_key_google_project_location = google_project_location is not None
466
-
467
- # genai_api_key = os.getenv('PALM_API_KEY')
468
- os.environ['GOOGLE_API_KEY'] = os.getenv('PALM_API_KEY')
469
- vertexai.init(project=os.getenv('GOOGLE_PROJECT_ID'), location=os.getenv('GOOGLE_LOCATION'))
470
-
471
-
472
- os.environ['GOOGLE_API_KEY'] = os.getenv('PALM_API_KEY')
473
- vertexai.init(project=os.getenv('GOOGLE_PROJECT_ID'), location=os.getenv('GOOGLE_LOCATION'),credentials=self.get_google_credentials())
474
- # try:
475
- # if genai_api_key:
476
- # genai.configure(api_key=genai_api_key)
477
- # except:
478
- # print(f"failed genai.configure")
479
-
480
- # try:
481
- # if google_project_id and google_location:
482
- # # clien, credentials = self.init_google_client('gemini-pro', 'v1')
483
- # # print(credentials)
484
- # vertexai.init(project=google_project_id, location=google_location)#, credentials=credentials)
485
- # except:
486
- # print(f"failed vertexai.init")
487
- # if self.has_key_openai:
488
- # openai.api_key = openai_api_key
489
-
490
- # if self.has_key_google_project_id and self.has_key_google_project_location:
491
- # genai.configure(api_key=os.getenv('PALM_API_KEY'))
492
- # vertexai.init(project=os.getenv('GOOGLE_PROJECT_ID'), location=os.getenv('GOOGLE_LOCATION'))
493
-
494
- if os.getenv('AZURE_API_KEY') is not None:
495
- azure_api_version = os.getenv('AZURE_API_VERSION')
496
- azure_api_key = os.getenv('AZURE_API_KEY')
497
- azure_api_base = os.getenv('AZURE_API_BASE')
498
- azure_organization = os.getenv('AZURE_ORGANIZATION')
499
- # azure_api_type = os.getenv('AZURE_API_TYPE')
500
- # azure_deployment_name = os.getenv('AZURE_DEPLOYMENT_NAME')
501
-
502
- if azure_api_version and azure_api_key and azure_api_base and azure_organization:# and azure_api_type and azure_deployment_name:
503
-
504
- self.has_key_azure_openai = True
505
- self.llm = AzureChatOpenAI(
506
- deployment_name = 'gpt-35-turbo',#'gpt-35-turbo',
507
- openai_api_version = azure_api_version,
508
- openai_api_key = azure_api_key,
509
- azure_endpoint = azure_api_base,
510
- openai_organization = azure_organization,
511
- )
512
-
513
-
514
- else:
515
  self.dir_home = os.path.dirname(os.path.dirname(__file__))
516
  self.path_cfg_private = os.path.join(self.dir_home, 'PRIVATE_DATA.yaml')
517
  self.cfg_private = get_cfg_from_full_path(self.path_cfg_private)
518
 
519
- self.has_key_openai = self.has_API_key(self.cfg_private['openai']['OPENAI_API_KEY'])
 
520
 
521
- self.has_key_azure_openai = self.has_API_key(self.cfg_private['openai_azure']['api_version'])
 
 
522
 
523
- self.has_key_google_OCR = self.has_API_key(self.cfg_private['google_cloud']['path_json_file'])
 
 
 
 
 
 
 
 
 
524
 
525
- self.has_key_palm2 = self.has_API_key(self.cfg_private['google_palm']['google_palm_api'])
526
- self.has_key_google_project_id = self.has_API_key(self.cfg_private['google_palm']['project_id'])
527
- self.has_key_google_project_location = self.has_API_key(self.cfg_private['google_palm']['location'])
528
 
529
- self.has_key_mistral = self.has_API_key(self.cfg_private['mistral']['mistral_key'])
 
 
530
 
531
- self.has_key_here = self.has_API_key(self.cfg_private['here']['api_key'])
532
 
533
- self.has_open_cage_geocode = self.has_API_key(self.cfg_private['open_cage_geocode']['api_key'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
534
 
535
- if self.has_key_openai:
 
 
 
 
536
  openai.api_key = self.cfg_private['openai']['OPENAI_API_KEY']
537
  os.environ["OPENAI_API_KEY"] = self.cfg_private['openai']['OPENAI_API_KEY']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
538
 
539
- if self.has_key_azure_openai:
540
- # os.environ["OPENAI_API_KEY"] = self.cfg_private['openai_azure']['openai_api_key']
541
  self.llm = AzureChatOpenAI(
542
  deployment_name = 'gpt-35-turbo',#'gpt-35-turbo',
543
  openai_api_version = self.cfg_private['openai_azure']['api_version'],
544
  openai_api_key = self.cfg_private['openai_azure']['openai_api_key'],
545
  azure_endpoint = self.cfg_private['openai_azure']['openai_api_base'],
546
- # openai_api_base=self.cfg_private['openai_azure']['openai_api_base'],
547
  openai_organization = self.cfg_private['openai_azure']['openai_organization'],
548
- # openai_api_type = self.cfg_private['openai_azure']['openai_api_type']
549
  )
550
-
551
- # This is frustrating. a #TODO is to figure out when/why these methods conflict with the permissions set in the Palm/Gemini calls
552
- name_check = self.cfg['leafmachine']['LLM_version'].lower().split(' ')
553
- if ('google' in name_check) or( 'palm' in name_check) or ('gemini' in name_check):
554
- os.environ['GOOGLE_PROJECT_ID'] = self.cfg_private['google_palm']['project_id'] # gemini
555
- os.environ['GOOGLE_LOCATION'] = self.cfg_private['google_palm']['location'] # gemini
556
- # genai.configure(api_key=self.cfg_private['google_palm']['google_palm_api'])
557
- os.environ['GOOGLE_API_KEY'] = self.cfg_private['google_palm']['google_palm_api']
558
- vertexai.init(project=os.environ['GOOGLE_PROJECT_ID'], location=os.environ['GOOGLE_LOCATION'])
559
-
560
-
561
-
562
- # os.environ.pop("GOOGLE_APPLICATION_CREDENTIALS", None)
563
- # os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.cfg_private['google_cloud']['path_json_file'] ####
564
- # os.environ['GOOGLE_API_KEY'] = self.cfg_private['google_palm']['google_palm_api']
565
-
566
-
567
- ##### NOTE: this is how you can use ONLY OCR. If you get a vertexAI login it should work without loading all this
568
- # else:
569
- # if self.has_key_google_OCR:
570
- # if os.path.exists(self.cfg_private['google_cloud']['path_json_file']):
571
- # os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.cfg_private['google_cloud']['path_json_file']
572
- # elif os.path.exists(self.cfg_private['google_cloud']['path_json_file_service_account2']):
573
- # os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.cfg_private['google_cloud']['path_json_file_service_account2']
574
- # else:
575
- # raise f"Google JSON API key file not found"
576
-
577
- ##### NOTE: This should also be covered by vertexAI now
578
- # if self.has_key_palm2:
579
- # os.environ['PALM'] = self.cfg_private['google_palm']['google_palm_api']
580
- # os.environ['GOOGLE_PROJECT_ID'] = self.cfg_private['google_palm']['project_id'] # gemini
581
- # os.environ['GOOGLE_LOCATION'] = self.cfg_private['google_palm']['location'] # gemini
582
- # os.environ['GOOGLE_API_KEY'] = self.cfg_private['google_palm']['google_palm_api']
583
-
584
- if self.has_key_mistral:
585
  os.environ['MISTRAL_API_KEY'] = self.cfg_private['mistral']['mistral_key']
586
 
587
- if self.has_key_here:
588
- os.environ['here_app_id'] = self.cfg_private['here']['app_id']
589
- os.environ['here_api_key'] = self.cfg_private['here']['api_key']
590
 
591
- if self.has_open_cage_geocode:
592
- os.environ['open_cage_geocode'] = self.cfg_private['open_cage_geocode']['api_key']
593
-
 
 
 
 
594
 
595
-
596
- # def initialize_embeddings(self):
597
- # '''Loading embedding search __init__(self, db_name, path_domain_knowledge, logger, build_new_db=False, model_name="hkunlp/instructor-xl", device="cuda")'''
598
- # self.Voucher_Vision_Embedding = VoucherVisionEmbedding(self.db_name, self.path_domain_knowledge, logger=self.logger, build_new_db=self.build_new_db)
599
 
 
 
 
 
 
 
 
600
 
 
601
  def clean_catalog_number(self, data, filename_without_extension):
602
  #Cleans up the catalog number in data if it's a dict
603
 
 
410
  else:
411
  return False
412
 
 
 
 
413
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
414
  def get_google_credentials(self):
415
+ if self.is_hf:
416
  creds_json_str = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
417
  credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
418
  return credentials
419
+ else:
420
+ with open(self.cfg_private['google_cloud']['GOOGLE_APPLICATION_CREDENTIALS'], 'r') as file:
421
+ data = json.load(file)
422
+ creds_json_str = json.dumps(data)
423
+ credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
424
+ os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = creds_json_str
425
+ return credentials
426
 
427
+
428
  def set_API_keys(self):
429
  if self.is_hf:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
  self.dir_home = os.path.dirname(os.path.dirname(__file__))
431
  self.path_cfg_private = os.path.join(self.dir_home, 'PRIVATE_DATA.yaml')
432
  self.cfg_private = get_cfg_from_full_path(self.path_cfg_private)
433
 
434
+ k_openai = self.cfg_private['openai']['OPENAI_API_KEY']
435
+ k_openai_azure = self.cfg_private['openai_azure']['api_version']
436
 
437
+ k_google_project_id = self.cfg_private['google_palm']['project_id']
438
+ k_google_location = self.cfg_private['google_palm']['location']
439
+ k_google_application_credentials = self.cfg_private['google_cloud']['GOOGLE_APPLICATION_CREDENTIALS']
440
 
441
+ k_mistral = self.cfg_private['mistral']['mistral_key']
442
+ k_here = self.cfg_private['here']['api_key']
443
+ k_opencage = self.cfg_private['open_cage_geocode']['api_key']
444
+ else:
445
+ self.dir_home = os.path.dirname(os.path.dirname(__file__))
446
+ self.path_cfg_private = None
447
+ self.cfg_private = None
448
+
449
+ k_openai = os.getenv('OPENAI_API_KEY')
450
+ k_openai_azure = os.getenv('AZURE_API_VERSION')
451
 
452
+ k_google_project_id = os.getenv('GOOGLE_PROJECT_ID')
453
+ k_google_location = os.getenv('GOOGLE_LOCATION')
454
+ k_google_application_credentials = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
455
 
456
+ k_mistral = os.getenv('MISTRAL_API_KEY')
457
+ k_here = os.getenv('HERE_API_KEY')
458
+ k_opencage = os.getenv('open_cage_geocode')
459
 
 
460
 
461
+ self.has_key_openai = self.has_API_key(k_openai)
462
+ self.has_key_azure_openai = self.has_API_key(k_openai_azure)
463
+
464
+ self.has_key_google_project_id = self.has_API_key(k_google_project_id)
465
+ self.has_key_google_location = self.has_API_key(k_google_location)
466
+ self.has_key_google_application_credentials = self.has_API_key(k_google_application_credentials)
467
+
468
+ self.has_key_mistral = self.has_API_key(k_mistral)
469
+ self.has_key_here = self.has_API_key(k_here)
470
+ self.has_key_open_cage_geocode = self.has_API_key(k_opencage)
471
+
472
+
473
+ ### Google - OCR, Palm2, Gemini
474
+ if self.is_hf:
475
+ if self.has_key_google_application_credentials and self.has_key_google_project_id and self.has_key_google_location:
476
+ vertexai.init(project=os.getenv('GOOGLE_PROJECT_ID'), location=os.getenv('GOOGLE_LOCATION'), credentials=self.get_google_credentials())
477
+
478
 
479
+ ### OpenAI
480
+ if self.has_key_openai:
481
+ if self.is_hf:
482
+ openai.api_key = os.getenv('OPENAI_API_KEY')
483
+ else:
484
  openai.api_key = self.cfg_private['openai']['OPENAI_API_KEY']
485
  os.environ["OPENAI_API_KEY"] = self.cfg_private['openai']['OPENAI_API_KEY']
486
+
487
+
488
+ ### OpenAI - Azure
489
+ if self.has_key_azure_openai:
490
+ if self.is_hf:
491
+ # Initialize the Azure OpenAI client
492
+ self.llm = AzureChatOpenAI(
493
+ deployment_name = 'gpt-35-turbo',#'gpt-35-turbo',
494
+ openai_api_version = os.getenv('AZURE_API_VERSION'),
495
+ openai_api_key = os.getenv('AZURE_API_KEY'),
496
+ azure_endpoint = os.getenv('AZURE_API_BASE'),
497
+ openai_organization = os.getenv('AZURE_ORGANIZATION'),
498
+ )
499
+ self.has_key_azure_openai = True
500
 
501
+ else:
502
+ # Initialize the Azure OpenAI client
503
  self.llm = AzureChatOpenAI(
504
  deployment_name = 'gpt-35-turbo',#'gpt-35-turbo',
505
  openai_api_version = self.cfg_private['openai_azure']['api_version'],
506
  openai_api_key = self.cfg_private['openai_azure']['openai_api_key'],
507
  azure_endpoint = self.cfg_private['openai_azure']['openai_api_base'],
 
508
  openai_organization = self.cfg_private['openai_azure']['openai_organization'],
 
509
  )
510
+ self.has_key_azure_openai = True
511
+
512
+
513
+ ### Mistral
514
+ if self.has_key_mistral:
515
+ if self.is_hf:
516
+ pass # Already set
517
+ else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
518
  os.environ['MISTRAL_API_KEY'] = self.cfg_private['mistral']['mistral_key']
519
 
 
 
 
520
 
521
+ ### HERE
522
+ if self.has_key_here:
523
+ if self.is_hf:
524
+ pass # Already set
525
+ else:
526
+ os.environ['HERE_APP_ID'] = self.cfg_private['here']['app_id']
527
+ os.environ['HERE_API_KEY'] = self.cfg_private['here']['api_key']
528
 
 
 
 
 
529
 
530
+ ### HERE
531
+ if self.has_key_open_cage_geocode:
532
+ if self.is_hf:
533
+ pass # Already set
534
+ else:
535
+ os.environ['OPENCAGE_API_KEY'] = self.cfg_private['open_cage_geocode']['api_key']
536
+
537
 
538
+
539
  def clean_catalog_number(self, data, filename_without_extension):
540
  #Cleans up the catalog number in data if it's a dict
541
 
vouchervision/utils_geolocate_HERE.py CHANGED
@@ -150,28 +150,28 @@ def validate_coordinates_here(record, replace_if_success_geo=False):
150
  query_reverse = "42,-83" # cannot have any spaces
151
  # params = {
152
  # 'q': query_loc,
153
- # 'apiKey': os.environ['here_api_key'],
154
  # }'''
155
 
156
 
157
  params_rev = {
158
  'at': query_reverse,
159
- 'apiKey': os.environ['here_api_key'],
160
  'lang': 'en',
161
  }
162
  params_reverse_verbatim = {
163
  'at': query_reverse_verbatim,
164
- 'apiKey': os.environ['here_api_key'],
165
  'lang': 'en',
166
  }
167
  params_forward = {
168
  'q': query_forward,
169
- 'apiKey': os.environ['here_api_key'],
170
  'lang': 'en',
171
  }
172
  params_forward_locality = {
173
  'q': query_forward_locality,
174
- 'apiKey': os.environ['here_api_key'],
175
  'lang': 'en',
176
  }
177
 
 
150
  query_reverse = "42,-83" # cannot have any spaces
151
  # params = {
152
  # 'q': query_loc,
153
+ # 'apiKey': os.environ['HERE_API_KEY'],
154
  # }'''
155
 
156
 
157
  params_rev = {
158
  'at': query_reverse,
159
+ 'apiKey': os.environ['HERE_API_KEY'],
160
  'lang': 'en',
161
  }
162
  params_reverse_verbatim = {
163
  'at': query_reverse_verbatim,
164
+ 'apiKey': os.environ['HERE_API_KEY'],
165
  'lang': 'en',
166
  }
167
  params_forward = {
168
  'q': query_forward,
169
+ 'apiKey': os.environ['HERE_API_KEY'],
170
  'lang': 'en',
171
  }
172
  params_forward_locality = {
173
  'q': query_forward_locality,
174
+ 'apiKey': os.environ['HERE_API_KEY'],
175
  'lang': 'en',
176
  }
177
 
vouchervision/utils_geolocate_OpenCage.py CHANGED
@@ -54,7 +54,7 @@ def validate_coordinates_opencage(record, replace_if_success_geo=False):
54
  'GEO_continent': '',
55
  }
56
 
57
- geocoder = OpenCageGeocode(os.environ['open_cage_geocode'])
58
 
59
  query_loc = ', '.join(filter(None, [record.get('municipality', '').strip(),
60
  record.get('county', '').strip(),
 
54
  'GEO_continent': '',
55
  }
56
 
57
+ geocoder = OpenCageGeocode(os.environ['OPENCAGE_API_KEY'])
58
 
59
  query_loc = ', '.join(filter(None, [record.get('municipality', '').strip(),
60
  record.get('county', '').strip(),