rishabh062 commited on
Commit
97f9f1f
β€’
1 Parent(s): 6fc74ef

Removed image name error

Browse files
Files changed (1) hide show
  1. app.py +18 -16
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import gradio as gr
2
  import csv
3
- import json
4
  import re
5
 
6
  import torch
@@ -38,35 +37,38 @@ def process_document(image):
38
  sequence = processor.batch_decode(outputs.sequences)[0]
39
  sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
40
  sequence = re.sub(r"<.*?>", "", sequence, count=1).strip() # remove first task start token
 
41
  with open('output.csv', 'a', newline='') as file:
42
  writer = csv.writer(file)
43
- writer.writerow([image_filename, sequence])
44
 
45
- return {"json": processor.token2json(sequence)}
46
 
47
- def download_csv(output_csv):
48
- with open(output_csv) as f:
49
- response = f.read()
50
- return response, {"Content-Type": "text/csv"}
51
 
52
  description = "To use it, simply upload your image and click 'submit', or click one of the examples to load them. Read more at the links below."
53
  article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.15664' target='_blank'>Donut: OCR-free Document Understanding Transformer</a> | <a href='https://github.com/clovaai/donut' target='_blank'>Github Repo</a></p>"
54
 
55
- output_csv = "output.csv"
 
 
 
 
 
56
 
57
- demo = gr.Interface(
58
- fn=process_document,
59
- inputs="image",
60
- outputs="json",
61
  title="Donut 🍩 for Document Parsing and Converting the Image to Database",
62
  description=description,
63
  article=article,
 
64
  enable_queue=True,
65
- examples=[["example.png"], ["example_2.png"], ["example_3.png"]],
66
  cache_examples=False,
67
  allow_download=True,
68
- download_name="output.csv",
69
- download=download_csv
70
  )
71
 
72
- demo.launch()
 
1
  import gradio as gr
2
  import csv
 
3
  import re
4
 
5
  import torch
 
37
  sequence = processor.batch_decode(outputs.sequences)[0]
38
  sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
39
  sequence = re.sub(r"<.*?>", "", sequence, count=1).strip() # remove first task start token
40
+
41
  with open('output.csv', 'a', newline='') as file:
42
  writer = csv.writer(file)
43
+ writer.writerow(["image", sequence])
44
 
45
+ return processor.token2json(sequence)
46
 
47
+ def download_csv():
48
+ return "output.csv"
 
 
49
 
50
  description = "To use it, simply upload your image and click 'submit', or click one of the examples to load them. Read more at the links below."
51
  article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.15664' target='_blank'>Donut: OCR-free Document Understanding Transformer</a> | <a href='https://github.com/clovaai/donut' target='_blank'>Github Repo</a></p>"
52
 
53
+ inputs = gr.inputs.Image()
54
+ outputs = gr.outputs.Json()
55
+
56
+ examples = [["example.png"], ["example_2.png"], ["example_3.png"]]
57
+
58
+ download_button = gr.outputs.Button("Download CSV")
59
 
60
+ interface = gr.Interface(
61
+ process_document,
62
+ inputs=inputs,
63
+ outputs=outputs,
64
  title="Donut 🍩 for Document Parsing and Converting the Image to Database",
65
  description=description,
66
  article=article,
67
+ examples=examples,
68
  enable_queue=True,
 
69
  cache_examples=False,
70
  allow_download=True,
71
+ download_button=download_button
 
72
  )
73
 
74
+ interface.launch()