공부집
집 값 학습 본문
import pandas as pd
import numpy as np
import re
import tensorflow as tf
def regexp(reg, val):
p = re.compile(reg)
m = p.search(str(val))
return m
#도쿄 23구 csv 넣기
df_chiyoda = pd.read_csv('chiyoda.csv', sep='\t', encoding='utf-8')
df_chuo = pd.read_csv('chuo.csv', sep='\t', encoding='utf-8')
df_minato = pd.read_csv('minato.csv', sep='\t', encoding='utf-8')
df_shinjuku = pd.read_csv('shinjuku.csv', sep='\t', encoding='utf-8')
df_bunkyo = pd.read_csv('bunkyo.csv', sep='\t', encoding='utf-8')
df_taito = pd.read_csv('taito.csv', sep='\t', encoding='utf-8')
df_summida = pd.read_csv('summida.csv', sep='\t', encoding='utf-8')
df_koto = pd.read_csv('koto.csv', sep='\t', encoding='utf-8')
df_sinagawa = pd.read_csv('sinagawa.csv', sep='\t', encoding='utf-8')
df_meguro = pd.read_csv('meguro.csv', sep='\t', encoding='utf-8')
df_ota = pd.read_csv('ota.csv', sep='\t', encoding='utf-8')
df_setagaya = pd.read_csv('setagaya.csv', sep='\t', encoding='utf-8')
df_sibuya = pd.read_csv('sibuya.csv', sep='\t', encoding='utf-8')
df_nakano = pd.read_csv('nakano.csv', sep='\t', encoding='utf-8')
df_suginami = pd.read_csv('suginami.csv', sep='\t', encoding='utf-8')
df_toshima = pd.read_csv('toshima.csv', sep='\t', encoding='utf-8')
df_kita = pd.read_csv('kita.csv', sep='\t', encoding='utf-8')
df_arakawa = pd.read_csv('arakawa.csv', sep='\t', encoding='utf-8')
df_itabashi = pd.read_csv('itabashi.csv', sep='\t', encoding='utf-8')
df_nerima = pd.read_csv('nerima.csv', sep='\t', encoding='utf-8')
df_adachi = pd.read_csv('adachi.csv', sep='\t', encoding='utf-8')
df_katsushika = pd.read_csv('katsushika.csv', sep='\t', encoding='utf-8')
df_edogawa = pd.read_csv('edogawa.csv', sep='\t', encoding='utf-8')
df = pd.concat([
df_chiyoda, df_chuo, df_minato, df_shinjuku, df_bunkyo,
df_taito, df_summida, df_koto, df_sinagawa, df_meguro,
df_ota, df_setagaya, df_sibuya, df_nakano, df_suginami,
df_toshima, df_kita, df_arakawa, df_itabashi,
df_nerima, df_adachi, df_katsushika, df_edogawa
], axis=0, ignore_index=True)
#불필요 칼럼 제거
df.drop(['Unnamed: 0'], axis=1, inplace=True)
df.drop(['マンション名'], axis=1, inplace=True)
df['管理費'] = df['管理費'].str.replace(u',', u'')
df['礼'] = df['礼'].str.replace(u'ヶ月', u'')
df['礼'] = df['礼'].str.replace(u'円', u'')
df['礼'] = df['礼'].str.replace(u'万', u'')
df['敷'] = df['敷'].str.replace(u'ヶ月', u'')
df['敷'] = df['敷'].str.replace(u'万', u'')
df['敷'] = df['敷'].str.replace(u'円', u'')
df['階'] = df['階'].str.replace(u'F', u'')
df['階'] = df['階'].fillna(100)
df['階'] = [x if regexp("[A-D]+", x) is None else 100 for x in df['階']]
df['階'] = [x if regexp("[^\d](\d)[^\d]", x) is None else int(regexp("[^\d](\d)[^\d]", x).group(1))*100 for x in df['階']]
df['階'] = [x if regexp("(\d)-", x) is None else int(regexp("(\d)-", x).group(1))*100 for x in df['階']]
df['階'] = [int(x) if type(x) is str else x for x in df['階']]
df['階'] = (df['階'] / 100).astype(int)*100
#df['賃料'] = df['賃料'] * 10000
#df['敷'] = df['敷'].astype(float) * df['賃料']
#df['礼'] = df['礼'].astype(float) * df['賃料']
#2차 수정---------------
df.drop(['礼'], axis=1, inplace=True)
df.drop(['敷'], axis=1, inplace=True)
df.drop(['徒歩'], axis=1, inplace=True)
df.drop(['管理費'], axis=1, inplace=True)
df.drop(['立地'], axis=1, inplace=True)
#-------------
#df = (df - df.mean()) / (df.max() - df.min())
#더미화
dummies_address = pd.get_dummies(df['住所'])
dummies_sub = pd.get_dummies(df['間取り'])
df = pd.concat([df, dummies_address], axis=1)
df = pd.concat([df, dummies_sub], axis=1)
df.drop(['住所'], inplace=True, axis=1)
df.drop(['間取り'], inplace=True, axis=1)
Y = np.array(df['賃料']).reshape(-1,1)
#정규화
df.drop(['賃料'], axis=1, inplace=True)
df = (df - df.mean()) / (df.max() - df.min())
data = np.array(df)
X = data
#--여기까지 데이터 정제 작업--
x = tf.placeholder(tf.float32, [None,984])
y = tf.placeholder(tf.float32, [None,1])
L1 = tf.layers.dense(x, units=10, activation=tf.nn.relu)
L2 = tf.layers.dense(L1, units=10, activation=tf.nn.relu)
L3 = tf.layers.dense(L2, units=1, activation=None)
loss = tf.reduce_mean( 0.5*tf.square(L3-y) )
train = tf.train.AdamOptimizer(0.025).minimize(loss)
'개인 소스코드 > AI' 카테고리의 다른 글
CNN 연습 / 이상한 것 (0) | 2018.12.14 |
---|