|
from AssistantService import GPTAssistant |
|
from openai.error import AuthenticationError |
|
import streamlit as st |
|
import configparser |
|
import os |
|
|
|
config = configparser.ConfigParser() |
|
config.read('config.ini') |
|
if 'DEFAULT' in config: |
|
assistant_api_key = config['DEFAULT'].get('API-KEY', '') |
|
|
|
os.environ["LANGCHAIN_TRACING_V2"]="true" |
|
os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com" |
|
os.environ["LANGCHAIN_API_KEY"]=st.secrets["LANGCHAIN_API_KEY"] |
|
os.environ["LANGCHAIN_PROJECT"]=st.secrets["LANGCHAIN_PROJECT"] |
|
|
|
st.title("Web Scraping Assistant") |
|
st.write("This app helps you to extract data from HTML code using web scraping. It uses GPT-3.5-turbo to generate the code for you.") |
|
st.write("Contribute to this project on [GitHub](https://github.com/CognitiveLabs/GPT-auto-webscraping)") |
|
if assistant_api_key == '': |
|
assistant_api_key = st.text_input("Paste your API key here:") |
|
if assistant_api_key: |
|
gpt_assistant = GPTAssistant(assistant_api_key) |
|
else: |
|
gpt_assistant = GPTAssistant(assistant_api_key) |
|
|
|
html_content = st.text_input("Paste your piece of HTML here:") |
|
|
|
extract_button = st.button("Extract data format") |
|
if html_content and extract_button: |
|
try: |
|
output = gpt_assistant.chain_response_format(html_content) |
|
st.session_state['output_format'] = output |
|
except NameError: |
|
st.write("Complete the API key field") |
|
except AuthenticationError: |
|
st.write("Invalid API key") |
|
|
|
if 'output_format' in st.session_state: |
|
output_format = st.code(st.session_state['output_format'], language="json") |
|
|
|
if st.button("Generate the code"): |
|
try: |
|
python_code = gpt_assistant.chain_code_generator(st.session_state['output_format'], html_content) |
|
st.session_state['code_generated'] = python_code |
|
st.session_state['code_generated_exec'] = python_code + "\nresult = extract_info(html_data)" |
|
|
|
except NameError: |
|
st.write("Complete the API key field") |
|
except AuthenticationError: |
|
st.write("Invalid API key") |
|
|
|
|
|
if 'code_generated' in st.session_state: |
|
python_function_label = st.write("Here is your python function:") |
|
code_generated = st.code(st.session_state['code_generated'],language="python") |
|
full_content = st.text_input("Paste your complete HTML here:") |
|
test_code = st.button("Test the code") |
|
if full_content and test_code: |
|
html_data = full_content |
|
result = None |
|
exec(st.session_state['code_generated_exec'], globals()) |
|
if result: |
|
st.write("data extracted successfully") |
|
|
|
st.table(result) |
|
else: |
|
st.write("error extracting data") |
|
|
|
with st.expander(label="How to use this app"): |
|
|
|
st.write("1. Paste the html code of your target element in the first text box and press \"Enter\"") |
|
example = st.button("Show example") |
|
if example: |
|
example = False |
|
text_area = st.text_area("Example", value='<li><div class="product"> <h3 class="title">Product 1</h3> <p class="description">This is the description of the product 1</p> <span class="price">10.00</span> </div></li>') |
|
close_example = st.button("Close example") |
|
if close_example: |
|
example = False |
|
close_example.disabled = True |
|
text_area = None |
|
|
|
st.write("2. Click on the button 'Extract data format'") |
|
|
|
st.write("3. Click on the button 'Generate the code'") |
|
|
|
st.write("4. Paste the complete html code in the last text box to test the auto generated code") |
|
|
|
st.write("5. Copy the code and include it in your own projects") |
|
|