Tuana commited on
Commit
5f91d5b
1 Parent(s): 2a7e605

bug fix for writing documents

Browse files
Files changed (1) hide show
  1. app.py +2 -6
app.py CHANGED
@@ -23,18 +23,14 @@ def start_haystack():
23
 
24
  def pdf_to_document_store(pdf_files):
25
  converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
26
- documents = []
27
  for pdf in pdf_files:
28
  with open("temp-path.pdf", 'wb') as temp_file:
29
  base64_pdf = base64.b64encode(pdf.read()).decode('utf-8')
30
  temp_file.write(base64.b64decode(base64_pdf))
31
  doc = converter.convert(file_path="temp-path.pdf", meta=None)[0]
32
- st.write(doc)
33
- preprocessed_doc=preprocessor.process([doc])
34
- st.write(preprocessed_doc)
35
- documents.append(preprocessed_doc)
36
  temp_file.close()
37
- document_store.write_documents(documents)
38
  st.write('Document count: ', document_store.get_document_count())
39
 
40
 
 
23
 
24
  def pdf_to_document_store(pdf_files):
25
  converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
 
26
  for pdf in pdf_files:
27
  with open("temp-path.pdf", 'wb') as temp_file:
28
  base64_pdf = base64.b64encode(pdf.read()).decode('utf-8')
29
  temp_file.write(base64.b64decode(base64_pdf))
30
  doc = converter.convert(file_path="temp-path.pdf", meta=None)[0]
31
+ preprocessed_docs=preprocessor.process([doc])
32
+ document_store.write_documents(preprocessed_docs)
 
 
33
  temp_file.close()
 
34
  st.write('Document count: ', document_store.get_document_count())
35
 
36