vincentclaes commited on
Commit
412c90a
1 Parent(s): 211c5c8

order of model

Browse files
Files changed (1) hide show
  1. app.py +16 -1
app.py CHANGED
@@ -3,6 +3,7 @@ import os
3
  import boto3
4
  import traceback
5
  import re
 
6
 
7
  import gradio as gr
8
  from PIL import Image, ImageDraw
@@ -20,6 +21,9 @@ ssl._create_default_https_context = ssl._create_unverified_context
20
 
21
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
22
 
 
 
 
23
  # Init models
24
 
25
  layoutlm_pipeline = pipeline(
@@ -52,6 +56,7 @@ def image_to_byte_array(image: Image) -> bytes:
52
 
53
 
54
  def run_textract(question, document):
 
55
  image_as_byte_base64 = image_to_byte_array(image=document.b)
56
  response = boto3.client("textract").analyze_document(
57
  Document={
@@ -71,6 +76,7 @@ def run_textract(question, document):
71
  ]
72
  },
73
  )
 
74
  for element in response["Blocks"]:
75
  if element["BlockType"] == "QUERY_RESULT":
76
  return {
@@ -83,7 +89,9 @@ def run_textract(question, document):
83
 
84
 
85
  def run_layoutlm(question, document):
 
86
  result = layoutlm_pipeline(document.context["image"][0][0], question)[0]
 
87
  # [{'score': 0.9999411106109619, 'answer': 'LETTER OF CREDIT', 'start': 106, 'end': 108}]
88
  return {
89
  "score": result["score"],
@@ -94,6 +102,7 @@ def run_layoutlm(question, document):
94
 
95
 
96
  def run_lilt(question, document):
 
97
  # use this model + tokenizer
98
  processed_document = document.context["image"][0][1]
99
  words = [x[0] for x in processed_document]
@@ -107,6 +116,7 @@ def run_lilt(question, document):
107
  return_tensors="pt",
108
  )
109
  outputs = lilt_model(**encoding)
 
110
 
111
  answer_start_index = outputs.start_logits.argmax()
112
  answer_end_index = outputs.end_logits.argmax()
@@ -125,6 +135,7 @@ def run_lilt(question, document):
125
 
126
 
127
  def run_donut(question, document):
 
128
  # prepare encoder inputs
129
  pixel_values = donut_processor(
130
  document.context["image"][0][0], return_tensors="pt"
@@ -150,6 +161,7 @@ def run_donut(question, document):
150
  bad_words_ids=[[donut_processor.tokenizer.unk_token_id]],
151
  return_dict_in_generate=True,
152
  )
 
153
  sequence = donut_processor.batch_decode(outputs.sequences)[0]
154
  sequence = sequence.replace(donut_processor.tokenizer.eos_token, "").replace(
155
  donut_processor.tokenizer.pad_token, ""
@@ -242,10 +254,13 @@ MODELS = {
242
  def process_question(question, document, model=list(MODELS.keys())[0]):
243
  if not question or document is None:
244
  return None, None, None
 
245
  prediction = MODELS[model](question=question, document=document)
 
246
  pages = [x.copy().convert("RGB") for x in document.preview]
247
  text_value = prediction["answer"]
248
  if "word_ids" in prediction:
 
249
  image = pages[prediction["page"]]
250
  draw = ImageDraw.Draw(image, "RGBA")
251
  word_boxes = lift_word_boxes(document, prediction["page"])
@@ -410,7 +425,7 @@ examples = [
410
 
411
  with gr.Blocks(css=CSS) as demo:
412
  gr.Markdown("# Document Question Answer Comparator Engine")
413
- gr.Markdown("__Compare performance of different document layout models.__")
414
 
415
  document = gr.Variable()
416
  example_question = gr.Textbox(visible=False)
 
3
  import boto3
4
  import traceback
5
  import re
6
+ import logging
7
 
8
  import gradio as gr
9
  from PIL import Image, ImageDraw
 
21
 
22
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
23
 
24
+ logging.basicConfig(level=logging.DEBUG)
25
+ logger = logging.getLogger(__name__)
26
+
27
  # Init models
28
 
29
  layoutlm_pipeline = pipeline(
 
56
 
57
 
58
  def run_textract(question, document):
59
+ logger.info(f"Running Textract model.")
60
  image_as_byte_base64 = image_to_byte_array(image=document.b)
61
  response = boto3.client("textract").analyze_document(
62
  Document={
 
76
  ]
77
  },
78
  )
79
+ logger.info(f"Output of Textract model {response}.")
80
  for element in response["Blocks"]:
81
  if element["BlockType"] == "QUERY_RESULT":
82
  return {
 
89
 
90
 
91
  def run_layoutlm(question, document):
92
+ logger.info(f"Running layoutlm model.")
93
  result = layoutlm_pipeline(document.context["image"][0][0], question)[0]
94
+ logger.info(f"Output of layoutlm model {result}.")
95
  # [{'score': 0.9999411106109619, 'answer': 'LETTER OF CREDIT', 'start': 106, 'end': 108}]
96
  return {
97
  "score": result["score"],
 
102
 
103
 
104
  def run_lilt(question, document):
105
+ logger.info(f"Running lilt model.")
106
  # use this model + tokenizer
107
  processed_document = document.context["image"][0][1]
108
  words = [x[0] for x in processed_document]
 
116
  return_tensors="pt",
117
  )
118
  outputs = lilt_model(**encoding)
119
+ logger.info(f"Output for lilt model {outputs}.")
120
 
121
  answer_start_index = outputs.start_logits.argmax()
122
  answer_end_index = outputs.end_logits.argmax()
 
135
 
136
 
137
  def run_donut(question, document):
138
+ logger.info(f"Running donut model.")
139
  # prepare encoder inputs
140
  pixel_values = donut_processor(
141
  document.context["image"][0][0], return_tensors="pt"
 
161
  bad_words_ids=[[donut_processor.tokenizer.unk_token_id]],
162
  return_dict_in_generate=True,
163
  )
164
+ logger.info(f"Output for donut {outputs}")
165
  sequence = donut_processor.batch_decode(outputs.sequences)[0]
166
  sequence = sequence.replace(donut_processor.tokenizer.eos_token, "").replace(
167
  donut_processor.tokenizer.pad_token, ""
 
254
  def process_question(question, document, model=list(MODELS.keys())[0]):
255
  if not question or document is None:
256
  return None, None, None
257
+ logger.info(f"Running for model {model}")
258
  prediction = MODELS[model](question=question, document=document)
259
+ logger.info(f"Got prediction {prediction}")
260
  pages = [x.copy().convert("RGB") for x in document.preview]
261
  text_value = prediction["answer"]
262
  if "word_ids" in prediction:
263
+ logger.info(f"Setting bounding boxes.")
264
  image = pages[prediction["page"]]
265
  draw = ImageDraw.Draw(image, "RGBA")
266
  word_boxes = lift_word_boxes(document, prediction["page"])
 
425
 
426
  with gr.Blocks(css=CSS) as demo:
427
  gr.Markdown("# Document Question Answer Comparator Engine")
428
+ gr.Markdown("This space compares some of the latest models that can be used commercially.")
429
 
430
  document = gr.Variable()
431
  example_question = gr.Textbox(visible=False)