티스토리 뷰

5. 파이썬

83536 코스피 Trader.py

패스트코드블로그 2020. 6. 2. 22:07
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import pandas as pd
import plotly.offline as offline
import plotly.graph_objs as go
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
from pandas.io.parsers import read_csv
import numpy as np
class Trader:
    def __init__(self):
        self.code_df = pd.DataFrame({'name':[], 'code':[]})
 
    def krx_crawl(self):
        self.code_df = \
        pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
        self.code_df.종목코드 = self.code_df.종목코드.map('{:06d}'.format())
        self.code_df = self.code_df[['회사명''종목코드']]
        self.code_df = self.code_df.rename(columns={'회사명''name''종목코드''code'})
 
    def code_df_head(self):
        print(self.code_df.head())
 
    def get_url(self, item_name, code_df):
        code = code_df.query("name=='{}'".format(item_name))['code'].to_string(index=False)
        url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code='005930')  # 'code'로 대체
        print('요청 URL = {}'.format(url))
        return url
 
    def test(self, code):
        # item_name = '삼성전자'
        # url = self.get_url(item_name, self.code_df)
        df = pd.DataFrame()
        for page in range(121):
            pg_url = 'https://finance.naver.com/item/sise_day.nhn?code={code}&page={page}'.format(code=code, page=page)
            df = df.append(pd.read_html(pg_url, header=0)[0], ignore_index=True)
        df.dropna(inplace=True)  # na 결측값 있는 행 제거
        return df
 
    def rename_item_name(self, param):
        df = param.rename(columns={'날짜''date''종가''close''전일비''diff',
                                   '시가''open''고가''high''저가''low''거래량''volumn'})
        df[['close''diff''open''high''low''volumn']] = \
            df[['close''diff''open''high''low''volumn']].astype(int)
        df['date'= pd.to_datetime(df['date'])
        df = df.sort_values(by=['date'], ascending=True)
        return df
 
    def crawling(self):
        import time
        from datetime import datetime, timedelta
        import re
        import pandas as pd
        ts = time.time()
        """ 라이브러리 호출 """
        import urllib
        from bs4 import BeautifulSoup
        import requests
        """ 회사코드 및 조회기간 설정 """
        symbol = '005930'
        startTime = (datetime.today() - timedelta(days=1)).strftime('%Y%m%d')
        count = str(1000)
        """ url 설정 """
        url = 'https://fchart.stock.naver.com/sise.nhn?symbol={}&timeframe=day&startTime={}&count={}&requestType=2'.format(
            symbol, startTime, count)
        """ 크롤링 & 전처리 """
        r = requests.get(url)
        html = r.content
        soup = BeautifulSoup(html, 'html.parser')
        tr = soup.find_all('item')
        cols = ['일자''시가''고가''저가''종가''거래량']
        list = []
        for i in range(0len(soup.find_all('item'))):
            list.append(re.search(r'"(.*)"'str(tr[i])).group(1).split('|'))
        df = pd.DataFrame(list, columns=cols)
        df['일자'= pd.to_datetime(df['일자'].str[:4+ '-' + df['일자'].str[4:6+ '-' + df['일자'].str[6:])
        df.set_index(df['일자'], inplace=True)
        df = df.drop(columns='일자')
        print('작동소요시간 :', round(time.time() - ts, 1), '초')
        df.to_csv('samsungOHLC.csv')
 
    def model(self):
        tf.global_variables_initializer()
        data = read_csv('samsungOHLC.csv', sep=',')
        data = data.iloc[:,1:6]
        print(data.head())
        xy = np.array(data, dtype=np.float32)
        x_data = xy[:, 1:-1]
        y_data = xy[:, [3]]
        print("type:{}".format(type(xy)))
        print("shape: {}, dimension: {}, dtype:{}".format(xy.shape, xy.ndim, xy.dtype))
        print("Array's Data:\n", xy)
        X = tf.placeholder(tf.float32, shape=[None, 4])
        Y = tf.placeholder(tf.float32, shape=[None, 1])
        W = tf.Variable(tf.random_normal([41]), name='weight')
        b = tf.Variable(tf.random_normal([1]), name='bias')
        hypothesis = tf.matmul(X, W) + b
        cost = tf.reduce_mean(tf.square(hypothesis - Y))
        optmizer = tf.train.GradientDescentOptimizer(learning_rate=0.000005)
        train = optmizer.minimize(cost)
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for step in range(100000):
                cost_, hypo_, _ = sess.run([cost, hypothesis, train], {X: x_data, Y: y_data})
                if step % 500 == 0:
                    print(f' step: {step}, cost: {cost_} ')
                    print(f' price: {hypo_}')
            saver = tf.train.Saver()
            saver.save(sess, 'trader.ckpt')
 
 
 
 
 
if __name__ == '__main__':
 
    def print_menu():
        print('0. EXIT\n'
              '1. 종목헤드\n'
              '2. 종목컬럼명 보기\n'
              '3. 전처리결과 보기\n'
              '4. 종목 클로링 및 csv 저장\n'
              '5. 모델 생성 \n')
        return input('CHOOSE ONE \n')
 
 
    m = Trader()
    while 1:
        menu = print_menu()
        print('MENU %s \n' % menu)
        if menu == '0':
            break
        elif menu == '1':
            m.code_df_head()
        elif menu == '2':
            print(m.test('005930'))
        elif menu == '3':
            print(m.rename_item_name(m.test('005930')))
        elif menu == '4':
            m.crawling()
        elif menu == '5':
            m.model()
cs

'5. 파이썬' 카테고리의 다른 글

83536 fashion_checker.py  (0) 2020.06.02
83536 linear_regression.py  (0) 2020.06.02
[Telaviv] 뉴스 데이터 수집  (0) 2020.05.30
HanBert 네이버 긍정부정 댓글 판단  (10) 2020.05.30
[Telaviv] 코랩(colab) 사용서  (0) 2020.05.29
댓글
공지사항
최근에 올라온 글
최근에 달린 댓글
Total
Today
Yesterday
링크
«   2025/01   »
1 2 3 4
5 6 7 8 9 10 11
12 13 14 15 16 17 18
19 20 21 22 23 24 25
26 27 28 29 30 31
글 보관함