공부집

집 값 학습 본문

개인 소스코드/AI

집 값 학습

ikemen_hk 2018. 12. 18. 18:25

import pandas as pd

import numpy as np

import re

import tensorflow as tf


def regexp(reg, val):

    p = re.compile(reg)

    m = p.search(str(val))

    return m


#도쿄 23구 csv 넣기

df_chiyoda = pd.read_csv('chiyoda.csv', sep='\t', encoding='utf-8')

df_chuo = pd.read_csv('chuo.csv', sep='\t', encoding='utf-8')

df_minato = pd.read_csv('minato.csv', sep='\t', encoding='utf-8')

df_shinjuku = pd.read_csv('shinjuku.csv', sep='\t', encoding='utf-8')

df_bunkyo = pd.read_csv('bunkyo.csv', sep='\t', encoding='utf-8')

df_taito = pd.read_csv('taito.csv', sep='\t', encoding='utf-8')

df_summida = pd.read_csv('summida.csv', sep='\t', encoding='utf-8')

df_koto = pd.read_csv('koto.csv', sep='\t', encoding='utf-8')

df_sinagawa = pd.read_csv('sinagawa.csv', sep='\t', encoding='utf-8')

df_meguro = pd.read_csv('meguro.csv', sep='\t', encoding='utf-8')

df_ota = pd.read_csv('ota.csv', sep='\t', encoding='utf-8')

df_setagaya = pd.read_csv('setagaya.csv', sep='\t', encoding='utf-8')

df_sibuya = pd.read_csv('sibuya.csv', sep='\t', encoding='utf-8')

df_nakano = pd.read_csv('nakano.csv', sep='\t', encoding='utf-8')

df_suginami = pd.read_csv('suginami.csv', sep='\t', encoding='utf-8')

df_toshima = pd.read_csv('toshima.csv', sep='\t', encoding='utf-8')

df_kita = pd.read_csv('kita.csv', sep='\t', encoding='utf-8')

df_arakawa = pd.read_csv('arakawa.csv', sep='\t', encoding='utf-8')

df_itabashi = pd.read_csv('itabashi.csv', sep='\t', encoding='utf-8')

df_nerima = pd.read_csv('nerima.csv', sep='\t', encoding='utf-8')

df_adachi = pd.read_csv('adachi.csv', sep='\t', encoding='utf-8')

df_katsushika = pd.read_csv('katsushika.csv', sep='\t', encoding='utf-8')

df_edogawa = pd.read_csv('edogawa.csv', sep='\t', encoding='utf-8')


df = pd.concat([

                df_chiyoda, df_chuo, df_minato, df_shinjuku, df_bunkyo,

                df_taito, df_summida, df_koto, df_sinagawa, df_meguro,

                df_ota, df_setagaya, df_sibuya, df_nakano, df_suginami,

                df_toshima, df_kita, df_arakawa, df_itabashi,

                df_nerima, df_adachi, df_katsushika, df_edogawa

               ], axis=0, ignore_index=True)


#불필요 칼럼 제거

df.drop(['Unnamed: 0'], axis=1, inplace=True)

df.drop(['マンション名'], axis=1, inplace=True)


df['管理費'] = df['管理費'].str.replace(u',', u'')

df['礼'] = df['礼'].str.replace(u'ヶ月', u'')

df['礼'] = df['礼'].str.replace(u'円', u'')

df['礼'] = df['礼'].str.replace(u'万', u'')

df['敷'] = df['敷'].str.replace(u'ヶ月', u'')

df['敷'] = df['敷'].str.replace(u'万', u'')

df['敷'] = df['敷'].str.replace(u'円', u'')

df['階'] = df['階'].str.replace(u'F', u'')

df['階'] = df['階'].fillna(100)

df['階'] = [x if regexp("[A-D]+", x) is None else 100 for x in df['階']]

df['階'] = [x if regexp("[^\d](\d)[^\d]", x) is None else int(regexp("[^\d](\d)[^\d]", x).group(1))*100 for x in df['階']]

df['階'] = [x if regexp("(\d)-", x) is None else int(regexp("(\d)-", x).group(1))*100 for x in df['階']]

df['階'] = [int(x) if type(x) is str else x for x in df['階']]



df['階'] = (df['階'] / 100).astype(int)*100

#df['賃料'] = df['賃料'] * 10000

#df['敷'] = df['敷'].astype(float) * df['賃料']

#df['礼'] = df['礼'].astype(float) * df['賃料']


#2차 수정---------------

df.drop(['礼'], axis=1, inplace=True)

df.drop(['敷'], axis=1, inplace=True)

df.drop(['徒歩'], axis=1, inplace=True)

df.drop(['管理費'], axis=1, inplace=True)

df.drop(['立地'], axis=1, inplace=True)

#-------------


#df = (df - df.mean()) / (df.max() - df.min())


#더미화

dummies_address = pd.get_dummies(df['住所'])

dummies_sub = pd.get_dummies(df['間取り'])

df = pd.concat([df, dummies_address], axis=1)

df = pd.concat([df, dummies_sub], axis=1)

df.drop(['住所'], inplace=True, axis=1)

df.drop(['間取り'], inplace=True, axis=1)


Y = np.array(df['賃料']).reshape(-1,1)

#정규화

df.drop(['賃料'], axis=1, inplace=True)

df = (df - df.mean()) / (df.max() - df.min())

data = np.array(df)

X = data


#--여기까지 데이터 정제 작업--


x = tf.placeholder(tf.float32, [None,984])

y = tf.placeholder(tf.float32, [None,1])


L1 = tf.layers.dense(x, units=10, activation=tf.nn.relu)

L2 = tf.layers.dense(L1, units=10, activation=tf.nn.relu)

L3 = tf.layers.dense(L2, units=1, activation=None)

loss = tf.reduce_mean( 0.5*tf.square(L3-y) )

train = tf.train.AdamOptimizer(0.025).minimize(loss)


sess = tf.Session()
sess.run(tf.global_variables_initializer())
for j in range(50000):
    loss_, _ = sess.run([loss, train], feed_dict={x: X, y: Y})

print("loss값 : {}".format(loss_) )
test = df[0:5]
Y_ = sess.run(L3, feed_dict={x:test})
print( Y_ * 10000)


'개인 소스코드 > AI' 카테고리의 다른 글

CNN 연습 / 이상한 것  (0) 2018.12.14
Comments