티스토리 뷰
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | import pandas as pd import plotly.offline as offline import plotly.graph_objs as go import tensorflow.compat.v1 as tf tf.disable_v2_behavior() from pandas.io.parsers import read_csv import numpy as np class Trader: def __init__(self): self.code_df = pd.DataFrame({'name':[], 'code':[]}) def krx_crawl(self): self.code_df = \ pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0] self.code_df.종목코드 = self.code_df.종목코드.map('{:06d}'.format()) self.code_df = self.code_df[['회사명', '종목코드']] self.code_df = self.code_df.rename(columns={'회사명': 'name', '종목코드': 'code'}) def code_df_head(self): print(self.code_df.head()) def get_url(self, item_name, code_df): code = code_df.query("name=='{}'".format(item_name))['code'].to_string(index=False) url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code='005930') # 'code'로 대체 print('요청 URL = {}'.format(url)) return url def test(self, code): # item_name = '삼성전자' # url = self.get_url(item_name, self.code_df) df = pd.DataFrame() for page in range(1, 21): pg_url = 'https://finance.naver.com/item/sise_day.nhn?code={code}&page={page}'.format(code=code, page=page) df = df.append(pd.read_html(pg_url, header=0)[0], ignore_index=True) df.dropna(inplace=True) # na 결측값 있는 행 제거 return df def rename_item_name(self, param): df = param.rename(columns={'날짜': 'date', '종가': 'close', '전일비': 'diff', '시가': 'open', '고가': 'high', '저가': 'low', '거래량': 'volumn'}) df[['close', 'diff', 'open', 'high', 'low', 'volumn']] = \ df[['close', 'diff', 'open', 'high', 'low', 'volumn']].astype(int) df['date'] = pd.to_datetime(df['date']) df = df.sort_values(by=['date'], ascending=True) return df def crawling(self): import time from datetime import datetime, timedelta import re import pandas as pd ts = time.time() """ 라이브러리 호출 """ import urllib from bs4 import BeautifulSoup import requests """ 회사코드 및 조회기간 설정 """ symbol = '005930' startTime = (datetime.today() - timedelta(days=1)).strftime('%Y%m%d') count = str(1000) """ url 설정 """ url = 'https://fchart.stock.naver.com/sise.nhn?symbol={}&timeframe=day&startTime={}&count={}&requestType=2'.format( symbol, startTime, count) """ 크롤링 & 전처리 """ r = requests.get(url) html = r.content soup = BeautifulSoup(html, 'html.parser') tr = soup.find_all('item') cols = ['일자', '시가', '고가', '저가', '종가', '거래량'] list = [] for i in range(0, len(soup.find_all('item'))): list.append(re.search(r'"(.*)"', str(tr[i])).group(1).split('|')) df = pd.DataFrame(list, columns=cols) df['일자'] = pd.to_datetime(df['일자'].str[:4] + '-' + df['일자'].str[4:6] + '-' + df['일자'].str[6:]) df.set_index(df['일자'], inplace=True) df = df.drop(columns='일자') print('작동소요시간 :', round(time.time() - ts, 1), '초') df.to_csv('samsungOHLC.csv') def model(self): tf.global_variables_initializer() data = read_csv('samsungOHLC.csv', sep=',') data = data.iloc[:,1:6] print(data.head()) xy = np.array(data, dtype=np.float32) x_data = xy[:, 1:-1] y_data = xy[:, [3]] print("type:{}".format(type(xy))) print("shape: {}, dimension: {}, dtype:{}".format(xy.shape, xy.ndim, xy.dtype)) print("Array's Data:\n", xy) X = tf.placeholder(tf.float32, shape=[None, 4]) Y = tf.placeholder(tf.float32, shape=[None, 1]) W = tf.Variable(tf.random_normal([4, 1]), name='weight') b = tf.Variable(tf.random_normal([1]), name='bias') hypothesis = tf.matmul(X, W) + b cost = tf.reduce_mean(tf.square(hypothesis - Y)) optmizer = tf.train.GradientDescentOptimizer(learning_rate=0.000005) train = optmizer.minimize(cost) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for step in range(100000): cost_, hypo_, _ = sess.run([cost, hypothesis, train], {X: x_data, Y: y_data}) if step % 500 == 0: print(f' step: {step}, cost: {cost_} ') print(f' price: {hypo_}') saver = tf.train.Saver() saver.save(sess, 'trader.ckpt') if __name__ == '__main__': def print_menu(): print('0. EXIT\n' '1. 종목헤드\n' '2. 종목컬럼명 보기\n' '3. 전처리결과 보기\n' '4. 종목 클로링 및 csv 저장\n' '5. 모델 생성 \n') return input('CHOOSE ONE \n') m = Trader() while 1: menu = print_menu() print('MENU %s \n' % menu) if menu == '0': break elif menu == '1': m.code_df_head() elif menu == '2': print(m.test('005930')) elif menu == '3': print(m.rename_item_name(m.test('005930'))) elif menu == '4': m.crawling() elif menu == '5': m.model() | cs |
'5. 파이썬' 카테고리의 다른 글
83536 fashion_checker.py (0) | 2020.06.02 |
---|---|
83536 linear_regression.py (0) | 2020.06.02 |
[Telaviv] 뉴스 데이터 수집 (0) | 2020.05.30 |
HanBert 네이버 긍정부정 댓글 판단 (10) | 2020.05.30 |
[Telaviv] 코랩(colab) 사용서 (0) | 2020.05.29 |
댓글
공지사항
최근에 올라온 글
최근에 달린 댓글
- Total
- Today
- Yesterday
링크
TAG
- SpringBoot
- AWS
- Mongo
- FLASK
- terms
- maven
- Django
- React
- vscode
- docker
- Mlearn
- mariadb
- Git
- Java
- ERD
- Algorithm
- COLAB
- Python
- SQLAlchemy
- Oracle
- JPA
- database
- intellij
- KAFKA
- nodejs
- jQuery
- Eclipse
- springMVC
- JUnit
- tensorflow
일 | 월 | 화 | 수 | 목 | 금 | 토 |
---|---|---|---|---|---|---|
1 | 2 | 3 | 4 | |||
5 | 6 | 7 | 8 | 9 | 10 | 11 |
12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 | 20 | 21 | 22 | 23 | 24 | 25 |
26 | 27 | 28 | 29 | 30 | 31 |
글 보관함