mdj1412
complete
e2deeb1
raw
history blame contribute delete
No virus
8.42 kB
import os
import pandas as pd
from flask import Flask, jsonify, request, render_template
import spacy
from spacy import displacy
# ๋ชจ๋ธ์„ ์ ์šฉ์‹œํ‚ค๋Š” ํŒŒ์ผ
from modules.inference import Tk_instruct
# Stocks Data
from dataset_creation.nasdaq_data import get_list, get_data
# Flask Object ์ƒ์„ฑ
# __name__์€ ํ˜„์žฌ ์‹คํ–‰ ์ค‘์ธ ๋ชจ๋“ˆ ์ด๋ฆ„์„ ์ „๋‹ฌํ•˜๋Š” ๊ฒƒ์ด๋‹ค.
app = Flask(__name__)
# def stocks() ์‚ฌ์šฉ & News Data
# Pandas DataFrame : ticker, name, sector, industry, diff, open, close, date
demo_dic = get_list()
##### Home #####
@app.route('/')
def home_page():
website_description_box = 'This website analyzes stock market news and provides answers to questions related to news articles.'
return render_template('index.html', embed=website_description_box)# html์„ ๋ถˆ๋Ÿฌ์˜ฌ ๋•Œ,
##### Data fetch #####
# Show Ticker's Table
@app.route('/stocks', methods=['GET', 'POST'])
def stocks():
result = demo_dic.to_dict() # dictionary ํ˜•ํƒœ๋กœ ๋ณ€ํ™˜
return jsonify(result=result)
################################################################################################
# {ticker1: [{๋‚ ์งœ1: [์ œ๋ชฉ1, ์ œ๋ชฉ2, ...]}, {๋‚ ์งœ2: [์ œ๋ชฉ3, ์ œ๋ชฉ4, ...]}, ...], ticker2: [{๋‚ ์งœ3: [์ œ๋ชฉ5, ์ œ๋ชฉ6, ...]}, {๋‚ ์งœ4: [์ œ๋ชฉ7, ์ œ๋ชฉ8, ...]}, ...], ... }
ticker_dic = dict.fromkeys(demo_dic.ticker, []) # ticker1: [{๋‚ ์งœ1: [์ œ๋ชฉ1, ์ œ๋ชฉ2, ...]}
dir = './news'
if not os.path.exists(dir):
raise NotImplementedError("Not exists News Data") # ์˜ค๋ฅ˜ ๊ฐ•์ œ ๋ฐœ์ƒ
# News Data List ๊ฐ€์ ธ์˜ค๊ธฐ
for key in os.listdir(dir):
if key not in ticker_dic.keys():
raise NotImplementedError("Not exists Ticker") # ์˜ค๋ฅ˜ ๊ฐ•์ œ ๋ฐœ์ƒ
dir2 = os.path.join(dir, key)
ticker_dic[key] = dict.fromkeys(os.listdir(dir2), []) # ๋‚ ์งœ1: [์ œ๋ชฉ1, ์ œ๋ชฉ2, ...]
for date in os.listdir(dir2):
dir3 = os.path.join(dir2, date)
title_list = [title for title in os.listdir(dir3)]
# ํ•ด๋‹น ๋‚ ์งœ์— News๊ฐ€ ์—†์„ ์ˆ˜๋„ ์žˆ์Œ
if len(title_list) != 0:
ticker_dic[key][date] = title_list # [์ œ๋ชฉ1, ์ œ๋ชฉ2, ...]
else:
ticker_dic[key].pop(date)
# Show Ticker's Data
@app.route('/chart', methods=['GET', 'POST'])
def chart():
print("Start /chart ")
# Javascript ์—์„œ ๋ฐ›์€ ๋ฉ”์‹œ์ง€
ticker = request.args.get('ticker')
# Implement Module
chart_data = get_data(tickers=[ticker], numOfDay=60)[0]
# ๋‚ ์งœ ํ˜•์‹ ๋ฐ”๊พธ๊ธฐ
chart_data.index = [k.strftime("%Y-%m-%d") for k in chart_data.index]
chart_data = chart_data.to_dict()
################
news_dir = os.path.join('./news', ticker)
# ํ•ด๋‹น Ticker์˜ ๋‚ ์งœ๋ณ„ ๋‰ด์Šค ์ œ๋ชฉ์„ ๊ฐ€์ ธ์˜จ๋‹ค.
article_news_dict = {}
for key in os.listdir(news_dir):
title_list = os.listdir(os.path.join(news_dir, key))
if len(title_list) != 0:
article_news_dict[key] = os.listdir(os.path.join(news_dir, key))
# ์ตœ๊ทผ ๋‰ด์Šค๋ถ€ํ„ฐ ๋ณด์ด๊ฒŒ (์ •๋ ฌ)
news_articles = {}
for key, value in sorted(article_news_dict.items(), reverse=True):
news_articles[key] = value
return jsonify(chart_data=chart_data, news_articles=news_articles)
################################################################################################
# 1. ๊ธฐ๋ณธ url
# 2. ์ฟผ๋ฆฌ ์ŠคํŠธ๋ง์ด ์กด์žฌํ•˜๋Š” url
# : request.args.get('๋ณ€์ˆ˜์ด๋ฆ„')์„ ์‚ฌ์šฉํ•˜์—ฌ /user?๋ณ€์ˆ˜=๊ฐ’&๋ณ€์ˆ˜=๊ฐ’&...์—์„œ ์›ํ•˜๋Š” ๋ณ€์ˆ˜์˜ ๊ฐ’์„ ์–ป์„ ์ˆ˜ ์žˆ๋‹ค.
# 3. clean URL
# Show Ticker's Title and News's Title
@app.route('/info_and_newsNER', methods=['GET', 'POST'])
def news_info_ner():
# Javascript ์—์„œ ๋ฐ›์€ ๋ฉ”์‹œ์ง€
ticker = request.args.get('ticker')
date = request.args.get('date')
title = request.args.get('title')
andSymbolInTitle = request.args.get('andSymbolInTitle')
print(ticker, date, title, andSymbolInTitle)
# Title ์—์„œ '&'๋กœ ํ‘œ์‹œ๋˜์–ด ์žˆ๋Š”๋ฐ ๋”ฐ๋กœ ๊ตฌ๋ณ„ํ•ด์•ผ ๋œ๋‹ค.
# andSymbolInTitle ์—์„œ ๊ฐ€์ ธ์˜จ '&' ์œ„์น˜ index๋ฅผ title๊ณผ ํ•ฉ์ณ์ค€๋‹ค.
if andSymbolInTitle != '':
andSymbolInTitle = andSymbolInTitle.split(',')
for i in range(len(andSymbolInTitle)): # String -> int
andSymbolInTitle[i] = int(andSymbolInTitle[i])
for idx in andSymbolInTitle:
title = title[0:idx] + '&' + title[idx:len(title)]
# ํ•ด๋‹น Ticker, Date, Title์˜ URL์„ ๊ฐ€์ ธ์˜ค๊ธฐ
url_dir = "dataset_creation/save_news_url.tsv"
if not os.path.exists(url_dir):
raise NotImplementedError("Not exists {} directory", url_dir)
else:
df = pd.read_csv(url_dir, sep='\t', index_col=0)
filt = (df['ticker'] == ticker) & (df['date'] == date) & (df['title'] == title)
url = list(df.loc[filt, 'url'].values)
if len(url) != 1:
from IPython import embed; embed()
raise NotImplementedError("There exists many URL or empty")
else:
url = url[0]
#######################################################
# ๋‰ด์Šค ๋ฐ์ดํ„ฐ ์œ„์น˜ ์ฐพ๊ธฐ ( in directory )
dir = os.path.join('./news', ticker, date, title+'.txt')
f = open(dir, 'r')
news_data = f.read()
# NER
nlp = spacy.load("en_core_web_sm")
doc = nlp(news_data) # News Data Analysis
# ํ•„์š”์—†๋Š” ์šฉ์–ด๋“ค ๋ฒ„๋ฆฌ๊ธฐ
print("=====================================================================")
ents = {'text': [], 'start_char': [], 'end_char': [], 'label_': []}
for ent in doc.ents:
# print(ent.text, ent.start_char, ent.end_char, ent.label_)
# ๋ฒ„๋ฆฌ๋Š” ์šฉ์–ด๋“ค
if ent.label_ == 'DATE':
continue
if ent.label_ == 'TIME':
continue
if ent.label_ == 'CARDINAL':
continue
if ent.label_ == 'MONEY':
continue
if ent.label_ == 'PERCENT':
continue
if ent.label_ == 'ORDINAL':
continue
if ent.label_ == 'PRODUCT':
continue
print(ent.text, ent.start_char, ent.end_char, ent.label_)
ents['text'].append(ent.text)
ents['start_char'].append(ent.start_char)
ents['end_char'].append(ent.end_char)
ents['label_'].append(ent.label_)
print("=====================================================================")
ents['news'] = news_data
# ents = {'text': [], 'start_char': [], 'end_char': [], 'label_': [], 'news': []}
print("ents : ", ents)
return jsonify(ticker=ticker, date=date, title=title, url=url, ents=ents)
# return render_template('news.html', embed1=example_embed1, embed2=example_embed2, embed3=example_embed3, embed4=example_embed4)
@app.route('/newsQuestions', methods=['GET', 'POST'])
def newsQuestions():
# Javascript ์—์„œ ๋ฐ›์€ ๋ฉ”์‹œ์ง€
ticker = request.args.get('ticker')
date = request.args.get('date')
title = request.args.get('title')
andSymbolInTitle = request.args.get('andSymbolInTitle')
questions = request.args.get('questions')
# Title ์—์„œ '&'๋กœ ํ‘œ์‹œ๋˜์–ด ์žˆ๋Š”๋ฐ ๋”ฐ๋กœ ๊ตฌ๋ณ„ํ•ด์•ผ ๋œ๋‹ค.
# andSymbolInTitle ์—์„œ ๊ฐ€์ ธ์˜จ '&' ์œ„์น˜ index๋ฅผ title๊ณผ ํ•ฉ์ณ์ค€๋‹ค.
if andSymbolInTitle != '':
andSymbolInTitle = andSymbolInTitle.split(',')
for i in range(len(andSymbolInTitle)): # String -> int
andSymbolInTitle[i] = int(andSymbolInTitle[i])
for idx in andSymbolInTitle:
title = title[0:idx] + '&' + title[idx:len(title)]
# ๋‰ด์Šค ๋ฐ์ดํ„ฐ ์œ„์น˜ ์ฐพ๊ธฐ ( in directory )
dir = os.path.join('./news', ticker, date, title+'.txt')
# ๋‰ด์Šซ ๋ฐ์ดํ„ฐ ๊ฐ€์ ธ์˜ค๊ธฐ
f = open(dir, 'r')
text = f.read()
f.close()
# ๋ชจ๋ธ ์ ์šฉ
answer = Tk_instruct(text, questions)
result = {}
result['answer'] = answer
return jsonify(result=result)
# Terminal : Flask : ์ˆ˜์ •ํ•˜๋ฉด ํ„ฐ๋ฏธ๋„ ์žฌ์‹คํ–‰
# Elements : HTML : ์ˆ˜์ •ํ•˜๋ฉด ํ„ฐ๋ฏธ๋„ ์žฌ์‹คํ–‰
# Console : javascript : ์‚ฌ์ดํŠธ ๋™๊ธฐํ™”
# Sources : File : ์‚ฌ์ดํŠธ ๋™๊ธฐํ™”
# CSS : ์‚ฌ์ดํŠธ ๋™๊ธฐํ™”
if __name__ == "__main__":
# run app
# host : ๋ชจ๋“  IP์— ๋Œ€ํ•ด ์ ‘๊ทผ ํ—ˆ์šฉ, ( Default. localhost = 127.0.0.1 )
# port : ์ ‘์†์‹œ open๋  http port, ( Default. port = 5000 )
app.run(host='0.0.0.0', port='7860') #http://0.0.0.0:5001
# app.run(debug=True) #http://0.0.0.0:5001