Spaces:

rizgiak
/

table-to-csv-pipeline

Running

App Files Files Community

rizgiak commited on Jan 19

Commit

10889be

•

1 Parent(s): 1a53410

add japan support lang, change to ppocr-v4, fix several bugs related padding

Browse files

Files changed (2) hide show

app.py +48 -13
test_pdf2img.py +16 -0

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import string
 import random
 from collections import Counter
 from itertools import count, tee
 import cv2
 import matplotlib.pyplot as plt
@@ -14,7 +15,7 @@ from PIL import Image
 from transformers import DetrImageProcessor, TableTransformerForObjectDetection
 from paddleocr import PaddleOCR
-ocr = PaddleOCR(use_angle_cls=True, lang="en",use_gpu=False)
 st.set_option('deprecation.showPyplotGlobalUse', False)
 st.set_page_config(layout='wide')
@@ -28,6 +29,10 @@ table_detection_model = TableTransformerForObjectDetection.from_pretrained(
 table_recognition_model = TableTransformerForObjectDetection.from_pretrained(
     "microsoft/table-transformer-structure-recognition")
 def PIL_to_cv(pil_img):
     return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
@@ -201,6 +206,32 @@ class TableExtractionPipeline():
         result.paste(pil_img, (left, top))
         return result
     def plot_results_detection(self, c1, model, pil_img, prob, boxes,
                                delta_xmin, delta_ymin, delta_xmax, delta_ymax):
         '''
@@ -213,7 +244,7 @@ class TableExtractionPipeline():
         for p, (xmin, ymin, xmax, ymax) in zip(prob, boxes.tolist()):
             cl = p.argmax()
-            xmin, ymin, xmax, ymax = xmin - delta_xmin, ymin - delta_ymin, xmax + delta_xmax, ymax + delta_ymax
             ax.add_patch(
                 plt.Rectangle((xmin, ymin),
                               xmax - xmin,
@@ -238,8 +269,7 @@ class TableExtractionPipeline():
         cropped_img_list = []
         for p, (xmin, ymin, xmax, ymax) in zip(prob, boxes.tolist()):
-            xmin, ymin, xmax, ymax = xmin - delta_xmin, ymin - delta_ymin, xmax + delta_xmax, ymax + delta_ymax
             cropped_img = pil_img.crop((xmin, ymin, xmax, ymax))
             cropped_img_list.append(cropped_img)
@@ -412,7 +442,8 @@ class TableExtractionPipeline():
     @st.cache
     def convert_df(self, df):
-        return df.to_csv().encode('utf-8')
     def create_dataframe(self, c3, cell_ocr_res: list, max_cols: int,
                          max_rows: int):
@@ -456,15 +487,15 @@ class TableExtractionPipeline():
         csv = self.convert_df(df)
         try:
-            numkey = df.iloc[0, 0]
-        except:
             numkey = str(0)
-        c3.download_button("Download table",
-                           csv,
-                           "file.csv",
-                           "text/csv",
-                           key='download-csv-' + numkey)
         return df
@@ -548,7 +579,11 @@ class TableExtractionPipeline():
 if __name__ == "__main__":
-    img_name = st.file_uploader("Upload an image with table(s)")
     st1, st2, st3 = st.columns((1, 1, 1))
     TD_th = st1.slider('Table detection threshold', 0.0, 1.0, 0.8)
     TSR_th = st2.slider('Table structure recognition threshold', 0.0, 1.0, 0.7)

 import random
 from collections import Counter
 from itertools import count, tee
+import base64
 import cv2
 import matplotlib.pyplot as plt
 from transformers import DetrImageProcessor, TableTransformerForObjectDetection
 from paddleocr import PaddleOCR
+ocr = PaddleOCR(use_angle_cls=True, lang="en", use_gpu=False, ocr_version='PP-OCRv4')
 st.set_option('deprecation.showPyplotGlobalUse', False)
 st.set_page_config(layout='wide')
 table_recognition_model = TableTransformerForObjectDetection.from_pretrained(
     "microsoft/table-transformer-structure-recognition")
+def reload_ocr(vlang):
+    global ocr
+    ocr = PaddleOCR(use_angle_cls=True, lang=vlang, use_gpu=False, ocr_version='PP-OCRv4')
 def PIL_to_cv(pil_img):
     return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
         result.paste(pil_img, (left, top))
         return result
+    @staticmethod
+    def dynamic_delta(xmin, ymin, xmax, ymax, delta_xmin, delta_ymin, delta_xmax, delta_ymax, pil_img):
+        offset_x = (xmax - xmin) * 0.05
+        offset_y = (ymax - ymin) * 0.05
+        w_img, h_img = pil_img.size
+        doxmin = xmin - (delta_xmin + offset_x)
+        if (doxmin < 0):
+            doxmin = 0
+        doymin = ymin - (delta_ymin + offset_y)
+        if (doymin < 0):
+            doymin = 0
+        doxmax = xmax + (delta_xmax + offset_x)
+        if (doxmax > w_img):
+            doxmax = w_img
+        doymax = ymax + (delta_ymax + offset_y)
+        if (doymax > h_img):
+            doymax = h_img
+        return doxmin, doymin, doxmax, doymax
     def plot_results_detection(self, c1, model, pil_img, prob, boxes,
                                delta_xmin, delta_ymin, delta_xmax, delta_ymax):
         '''
         for p, (xmin, ymin, xmax, ymax) in zip(prob, boxes.tolist()):
             cl = p.argmax()
+            xmin, ymin, xmax, ymax = self.dynamic_delta(xmin, ymin, xmax, ymax, delta_xmin, delta_ymin, delta_xmax, delta_ymax, pil_img)
             ax.add_patch(
                 plt.Rectangle((xmin, ymin),
                               xmax - xmin,
         cropped_img_list = []
         for p, (xmin, ymin, xmax, ymax) in zip(prob, boxes.tolist()):
+            xmin, ymin, xmax, ymax = self.dynamic_delta(xmin, ymin, xmax, ymax, delta_xmin, delta_ymin, delta_xmax, delta_ymax, pil_img)
             cropped_img = pil_img.crop((xmin, ymin, xmax, ymax))
             cropped_img_list.append(cropped_img)
     @st.cache
     def convert_df(self, df):
+        csv = df.to_csv(index=False, encoding='utf-8-sig')  # utf-8-sig to handle BOM for Excel
+        return csv.encode('utf-8')
     def create_dataframe(self, c3, cell_ocr_res: list, max_cols: int,
                          max_rows: int):
         csv = self.convert_df(df)
         try:
+            numkey = str(df.iloc[0, 0])
+        except IndexError:
             numkey = str(0)
+        # Create a download link with filename and extension
+        filename = f"table_{numkey}.csv"  # Adjust the filename as needed
+        b64_csv = base64.b64encode(csv).decode()  # Encode CSV data to base64
+        href = f'<a href="data:file/csv;base64,{b64_csv}" download="{filename}">Download {filename}</a>'
+        c3.markdown(href, unsafe_allow_html=True)
         return df
 if __name__ == "__main__":
+    st_up, st_lang = st.columns((1, 1))
+    img_name = st_up.file_uploader("Upload an image with table(s)")
+    lang = st_lang.selectbox('Language', ('en', 'japan'))
+    reload_ocr(lang)
     st1, st2, st3 = st.columns((1, 1, 1))
     TD_th = st1.slider('Table detection threshold', 0.0, 1.0, 0.8)
     TSR_th = st2.slider('Table structure recognition threshold', 0.0, 1.0, 0.7)

test_pdf2img.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import os
+from pdf2image import convert_from_path
+# Set the PDF file path
+pdf_path = 'test.pdf'
+# Convert the first page of the PDF to a JPEG image
+first = 14
+last = 14
+images = convert_from_path(pdf_path, dpi=300, first_page=first, last_page=last, poppler_path=r"C:\poppler-23.07.0\Library\bin")
+# Save the image file
+image_path = os.path.splitext(pdf_path)[0]
+for index, image in enumerate(images):
+    image.save(image_path + "p" + str(index+first) + '.jpg', 'JPEG')