财政收入影响因素分析及预测模型
数据集链接: https://pan.baidu.com/s/1_-8F0DAyHyZSS2M7u-2NYQ 提取码: ex6y
原始数据概括性度量
#-*- coding: utf-8 -*-
import numpy as np
import pandas as pd
inputfile = 'D:\下载\data\inputdata1.csv' #输入的数据文件
data = pd.read_csv(inputfile) #读取数据
r = [data.min(), data.max(), data.mean(), data.std()] #依次计算最小值、最大值、均值、标准差
r = pd.DataFrame(r, index = ['Min', 'Max', 'Mean', 'STD']).T #计算相关系数矩阵
np.round(r, 2) #保留两位小数
原始数据求解pearson相关系数
#-*- coding: utf-8 -*-
import numpy as np
import pandas as pd
inputfile = 'D:\下载\data\input\data1.csv' #输入的数据文件
data = pd.read_csv(inputfile) #读取数据
np.round(data.corr(method = 'pearson'), 2) #计算相关系数矩阵,保留两位小数
Adaptive-Lasso变量选择模型
#-*- coding: utf-8 -*-
import pandas as pd
from sklearn.linear_model import AdaptiveLasso#导入AdaptiveLasso算法,要在较新的Scikit-Learn才有。
inputfile = 'D:\下载\data\input\data1.csv' #输入的数据文件
data = pd.read_csv(inputfile) #读取数据
model = AdaptiveLasso(gamma=1)
model.fit(data.iloc[:,0:13],data['y'])
model.coef_ #各个特征的系数
地方财政收入灰色预测
#-*- coding: utf-8 -*-
import numpy as np
import pandas as pd
def GM11(x0): #自定义灰色预测函数
x1 = x0.cumsum() #1-AGO序列
z1 = (x1[:len(x1)-1] + x1[1:])/2.0 #紧邻均值(MEAN)生成序列
z1 = z1.reshape((len(z1),1))
B = np.append(-z1, np.ones_like(z1), axis = 1)
Yn = x0[1:].reshape((len(x0)-1, 1))
[[a],[b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Yn) #计算参数
f = lambda k: (x0[0]-b/a)*np.exp(-a*(k-1))-(x0[0]-b/a)*np.exp(-a*(k-2)) #还原值
delta = np.abs(x0 - np.array([f(i) for i in range(1,len(x0)+1)]))
C = delta.std()/x0.std()
P = 1.0*(np.abs(delta - delta.mean()) < 0.6745*x0.std()).sum()/len(x0)
return f, a, b, x0[0], C, P #返回灰色预测函数、a、b、首项、方差比、小残差概率
inputfile = 'D:\下载\data\input\data1.csv' #输入的数据文件
outputfile = 'D:\下载\data\output\data1_GM11.xls' #灰色预测后保存的路径
data = pd.read_csv(inputfile) #读取数据
data.index = range(1994, 2014)
data.loc[2014] = None
data.loc[2015] = None
l = ['x1', 'x2', 'x3', 'x4', 'x5', 'x7']
for i in l:
f = GM11(data[i][range(1994, 2014)].as_matrix())[0]
data[i][2014] = f(len(data)-1) #2014年预测结果
data[i][2015] = f(len(data)) #2015年预测结果
data[i] = data[i].round(2) #保留两位小数
data[l+['y']].to_excel(outputfile) #结果输出
地方财政收入神经网络预测模型
#-*- coding: utf-8 -*-
import pandas as pd
inputfile = 'D:/下载/data/output/data1_GM11.xls' #灰色预测后保存的路径
outputfile = 'D:/下载/data/output/revenue.xls' #神经网络预测后保存的结果
modelfile = 'D:/下载/data/output/1-net.model' #模型保存路径
data = pd.read_excel(inputfile) #读取数据
feature = ['x1', 'x2', 'x3', 'x4', 'x5', 'x7'] #特征所在列
data_train = data.loc[range(1994,2014)].copy() #取2014年前的数据建模
data_mean = data_train.mean()
data_std = data_train.std()
data_train = (data_train - data_mean)/data_std #数据标准化
x_train = data_train[feature].as_matrix() #特征数据
y_train = data_train['y'].as_matrix() #标签数据
from keras.models import Sequential
from keras.layers.core import Dense, Activation
model = Sequential() #建立模型
model.add(Dense(6, 12))
model.add(Activation('relu')) #用relu函数作为激活函数,能够大幅提供准确度
model.add(Dense(12, 1))
model.compile(loss='mean_squared_error', optimizer='adam') #编译模型
model.fit(x_train, y_train, nb_epoch = 10000, batch_size = 16) #训练模型,学习一万次
model.save_weights(modelfile) #保存模型参数
#预测,并还原结果。
x = ((data[feature] - data_mean[feature])/data_std[feature]).as_matrix()
data[u'y_pred'] = model.predict(x) * data_std['y'] + data_mean['y']
data.to_excel(outputfile)
import matplotlib.pyplot as plt #画出预测结果图
p = data[['y','y_pred']].plot(subplots = True, style=['b-o','r-*'])
plt.show()
Adaptive-Lasso变量选择模型
#-*- coding: utf-8 -*-
import pandas as pd
inputfile = ''D:/下载/data/input/data2.csv' #输入的数据文件
data = pd.read_csv(inputfile) #读取数据
#导入AdaptiveLasso算法,要在较新的Scikit-Learn才有。
from sklearn.linear_model import AdaptiveLasso
model = AdaptiveLasso(gamma=1)
model.fit(data.iloc[:,0:6],data['y'])
model.coef_ #各个特征的系数
增值税灰色预测
#-*- coding: utf-8 -*-
import numpy as np
import pandas as pd
def GM11(x0): #自定义灰色预测函数
x1 = x0.cumsum() #1-AGO序列
z1 = (x1[:len(x1)-1] + x1[1:])/2.0 #紧邻均值(MEAN)生成序列
z1 = z1.reshape((len(z1),1))
B = np.append(-z1, np.ones_like(z1), axis = 1)
Yn = x0[1:].reshape((len(x0)-1, 1))
[[a],[b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Yn) #计算参数
f = lambda k: (x0[0]-b/a)*np.exp(-a*(k-1))-(x0[0]-b/a)*np.exp(-a*(k-2)) #还原值
delta = np.abs(x0 - np.array([f(i) for i in range(1,len(x0)+1)]))
C = delta.std()/x0.std()
P = 1.0*(np.abs(delta - delta.mean()) < 0.6745*x0.std()).sum()/len(x0)
return f, a, b, x0[0], C, P #返回灰色预测函数、a、b、首项、方差比、小残差概率
inputfile = ''D:/下载/data/input/data2.csv' #输入的数据文件
outputfile = ''D:/下载/data/output/data2_GM11.xls' #灰色预测后保存的路径
data = pd.read_csv(inputfile) #读取数据
data.index = range(1999, 2014)
data.loc[2014] = None
data.loc[2015] = None
l = ['x1', 'x3', 'x5']
for i in l:
f = GM11(data[i][range(1999, 2014)].as_matrix())[0]
data[i][2014] = f(len(data)-1) #2014年预测结果
data[i][2015] = f(len(data)) #2015年预测结果
data[i] = data[i].round(6) #保留六位小数
data[l+['y']].to_excel(outputfile) #结果输出
增值税神经网络预测模型
#-*- coding: utf-8 -*-
import pandas as pd
inputfile = 'D:/下载/data/output/data2_GM11.xls' #灰色预测后保存的路径
outputfile = 'D:/下载/data/output/VAT.xls' #神经网络预测后保存的结果
modelfile = 'D:/下载/data/output/2-net.model' #模型保存路径
data = pd.read_excel(inputfile) #读取数据
feature = ['x1', 'x3', 'x5'] #特征所在列
data_train = data.loc[range(1999,2014)].copy() #取2014年前的数据建模
data_mean = data_train.mean()
data_std = data_train.std()
data_train = (data_train - data_mean)/data_std #数据标准化
x_train = data_train[feature].as_matrix() #特征数据
y_train = data_train['y'].as_matrix() #标签数据
from keras.models import Sequential
from keras.layers.core import Dense, Activation
model = Sequential() #建立模型
model.add(Dense(3, 6))
model.add(Activation('relu')) #用relu函数作为激活函数,能够大幅提供准确度
model.add(Dense(6, 1))
model.compile(loss='mean_squared_error', optimizer='adam') #编译模型
model.fit(x_train, y_train, nb_epoch = 10000, batch_size = 16) #训练模型,学习一万次
model.save_weights(modelfile) #保存模型参数
#预测,并还原结果。
x = ((data[feature] - data_mean[feature])/data_std[feature]).as_matrix()
data[u'y_pred'] = model.predict(x) * data_std['y'] + data_mean['y']
data[u'y_pred'] = data[u'y_pred'].round(2)
data.to_excel(outputfile)
import matplotlib.pyplot as plt #画出预测结果图
p = data[['y','y_pred']].plot(subplots = True, style=['b-o','r-*'])
plt.show()
Adaptive-Lasso变量选择模型
#-*- coding: utf-8 -*-
import pandas as pd
inputfile = 'D:/下载/data/input/data3.csv' #输入的数据文件
data = pd.read_csv(inputfile) #读取数据
#导入AdaptiveLasso算法,要在较新的Scikit-Learn才有。
from sklearn.linear_model import AdaptiveLasso
model = AdaptiveLasso(gamma=1)
model.fit(data.iloc[:,0:10],data['y'])
model.coef_ #各个特征的系数
营业税灰色预测
#-*- coding: utf-8 -*-
import numpy as np
import pandas as pd
def GM11(x0): #自定义灰色预测函数
x1 = x0.cumsum() #1-AGO序列
z1 = (x1[:len(x1)-1] + x1[1:])/2.0 #紧邻均值(MEAN)生成序列
z1 = z1.reshape((len(z1),1))
B = np.append(-z1, np.ones_like(z1), axis = 1)
Yn = x0[1:].reshape((len(x0)-1, 1))
[[a],[b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Yn) #计算参数
f = lambda k: (x0[0]-b/a)*np.exp(-a*(k-1))-(x0[0]-b/a)*np.exp(-a*(k-2)) #还原值
delta = np.abs(x0 - np.array([f(i) for i in range(1,len(x0)+1)]))
C = delta.std()/x0.std()
P = 1.0*(np.abs(delta - delta.mean()) < 0.6745*x0.std()).sum()/len(x0)
return f, a, b, x0[0], C, P #返回灰色预测函数、a、b、首项、方差比、小残差概率
inputfile = 'D:/下载/data/input/data3.csv' #输入的数据文件
outputfile = 'D:/下载/data/output/data3_GM11.xls' #灰色预测后保存的路径
data = pd.read_csv(inputfile) #读取数据
data.index = range(1999, 2014)
data.loc[2014] = None
data.loc[2015] = None
l = ['x3', 'x4', 'x6', 'x8']
for i in l:
f = GM11(data[i][range(1999, 2014)].as_matrix())[0]
data[i][2014] = f(len(data)-1) #2014年预测结果
data[i][2015] = f(len(data)) #2015年预测结果
data[i] = data[i].round() #取整
data[l+['y']].to_excel(outputfile) #结果输出
营业税神经网络预测模型
#-*- coding: utf-8 -*-
import pandas as pd
inputfile = 'D:/下载/data/output/data3_GM11.xls' #灰色预测后保存的路径
outputfile = 'D:/下载/data/output/sales_tax.xls' #神经网络预测后保存的结果
modelfile = 'D:/下载/data/output/3-net.model' #模型保存路径
data = pd.read_excel(inputfile) #读取数据
feature = ['x3', 'x4', 'x6', 'x8'] #特征所在列
data_train = data.loc[range(1999,2014)].copy() #取2014年前的数据建模
data_mean = data_train.mean()
data_std = data_train.std()
data_train = (data_train - data_mean)/data_std #数据标准化
x_train = data_train[feature].as_matrix() #特征数据
y_train = data_train['y'].as_matrix() #标签数据
from keras.models import Sequential
from keras.layers.core import Dense, Activation
model = Sequential() #建立模型
model.add(Dense(4, 8))
model.add(Activation('relu')) #用relu函数作为激活函数,能够大幅提供准确度
model.add(Dense(8, 1))
model.compile(loss='mean_squared_error', optimizer='adam') #编译模型
model.fit(x_train, y_train, nb_epoch = 10000, batch_size = 16) #训练模型,学习一万次
model.save_weights(modelfile) #保存模型参数
#预测,并还原结果。
x = ((data[feature] - data_mean[feature])/data_std[feature]).as_matrix()
data[u'y_pred'] = model.predict(x) * data_std['y'] + data_mean['y']
data[u'y_pred'] = data[u'y_pred'].round(2)
data.to_excel(outputfile)
import matplotlib.pyplot as plt #画出预测结果图
p = data[['y','y_pred']].plot(subplots = True, style=['b-o','r-*'])
plt.show()
Adaptive-Lasso变量选择模型
#-*- coding: utf-8 -*-
import pandas as pd
inputfile = 'D:/下载/data/input/data4.csv' #输入的数据文件
data = pd.read_csv(inputfile) #读取数据
#导入AdaptiveLasso算法,要在较新的Scikit-Learn才有。
from sklearn.linear_model import AdaptiveLasso
model = AdaptiveLasso(gamma=1)
model.fit(data.iloc[:,0:10],data['y'])
model.coef_ #各个特征的系数
企业所得税灰色预测
#-*- coding: utf-8 -*-
import numpy as np
import pandas as pd
def GM11(x0): #自定义灰色预测函数
x1 = x0.cumsum() #1-AGO序列
z1 = (x1[:len(x1)-1] + x1[1:])/2.0 #紧邻均值(MEAN)生成序列
z1 = z1.reshape((len(z1),1))
B = np.append(-z1, np.ones_like(z1), axis = 1)
Yn = x0[1:].reshape((len(x0)-1, 1))
[[a],[b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Yn) #计算参数
f = lambda k: (x0[0]-b/a)*np.exp(-a*(k-1))-(x0[0]-b/a)*np.exp(-a*(k-2)) #还原值
delta = np.abs(x0 - np.array([f(i) for i in range(1,len(x0)+1)]))
C = delta.std()/x0.std()
P = 1.0*(np.abs(delta - delta.mean()) < 0.6745*x0.std()).sum()/len(x0)
return f, a, b, x0[0], C, P #返回灰色预测函数、a、b、首项、方差比、小残差概率
inputfile = 'D:/下载/data/input/data4.csv' #输入的数据文件
outputfile = 'D:/下载/data/output/data4_GM11.xls' #灰色预测后保存的路径
data = pd.read_csv(inputfile) #读取数据
data.index = range(2002, 2014)
data.loc[2014] = None
data.loc[2015] = None
l = ['x1', 'x2', 'x3', 'x4', 'x6', 'x7', 'x9', 'x10']
for i in l:
f = GM11(data[i][range(2002, 2014)].as_matrix())[0]
data[i][2014] = f(len(data)-1) #2014年预测结果
data[i][2015] = f(len(data)) #2015年预测结果
data[i] = data[i].round(2) #保留两位小数
data[l+['y']].to_excel(outputfile) #结果输出
企业所得税神经网络预测模型
#-*- coding: utf-8 -*-
import pandas as pd
inputfile = 'D:/下载/data/output/data4_GM11.xls' #灰色预测后保存的路径
outputfile = 'D:/下载/data/output/enterprise_income.xls' #神经网络预测后保存的结果
modelfile = 'D:/下载/data/output/4-net.model' #模型保存路径
data = pd.read_excel(inputfile) #读取数据
feature = ['x1', 'x2', 'x3', 'x4', 'x6', 'x7', 'x9', 'x10'] #特征所在列
data_train = data.loc[range(2002,2014)].copy() #取2014年前的数据建模
data_mean = data_train.mean()
data_std = data_train.std()
data_train = (data_train - data_mean)/data_std #数据标准化
x_train = data_train[feature].as_matrix() #特征数据
y_train = data_train['y'].as_matrix() #标签数据
from keras.models import Sequential
from keras.layers.core import Dense, Activation
model = Sequential() #建立模型
model.add(Dense(8, 6))
model.add(Activation('relu')) #用relu函数作为激活函数,能够大幅提供准确度
model.add(Dense(6, 1))
model.compile(loss='mean_squared_error', optimizer='adam') #编译模型
model.fit(x_train, y_train, nb_epoch = 5000, batch_size = 16) #训练模型,学习五千次
model.save_weights(modelfile) #保存模型参数
#预测,并还原结果。
x = ((data[feature] - data_mean[feature])/data_std[feature]).as_matrix()
data[u'y_pred'] = model.predict(x) * data_std['y'] + data_mean['y']
data[u'y_pred'] = data[u'y_pred'].round()
data.to_excel(outputfile)
import matplotlib.pyplot as plt #画出预测结果图
p = data[['y','y_pred']].plot(subplots = True, style=['b-o','r-*'])
plt.show()
Adaptive-Lasso变量选择模型
#-*- coding: utf-8 -*-
import pandas as pd
inputfile = 'D:/下载/data/input/data5.csv' #输入的数据文件
data = pd.read_csv(inputfile) #读取数据
#导入AdaptiveLasso算法,要在较新的Scikit-Learn才有。
from sklearn.linear_model import AdaptiveLasso
model = AdaptiveLasso(gamma=1)
model.fit(data.iloc[:,0:7],data['y'])
model.coef_ #各个特征的系数
个人所得税灰色预测
#-*- coding: utf-8 -*-
import numpy as np
import pandas as pd
def GM11(x0): #自定义灰色预测函数
x1 = x0.cumsum() #1-AGO序列
z1 = (x1[:len(x1)-1] + x1[1:])/2.0 #紧邻均值(MEAN)生成序列
z1 = z1.reshape((len(z1),1))
B = np.append(-z1, np.ones_like(z1), axis = 1)
Yn = x0[1:].reshape((len(x0)-1, 1))
[[a],[b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Yn) #计算参数
f = lambda k: (x0[0]-b/a)*np.exp(-a*(k-1))-(x0[0]-b/a)*np.exp(-a*(k-2)) #还原值
delta = np.abs(x0 - np.array([f(i) for i in range(1,len(x0)+1)]))
C = delta.std()/x0.std()
P = 1.0*(np.abs(delta - delta.mean()) < 0.6745*x0.std()).sum()/len(x0)
return f, a, b, x0[0], C, P #返回灰色预测函数、a、b、首项、方差比、小残差概率
inputfile = 'D:/下载/data/input/data5.csv' #输入的数据文件
outputfile = 'D:/下载/data/output/data5_GM11.xls' #灰色预测后保存的路径
data = pd.read_csv(inputfile) #读取数据
data.index = range(2000, 2014)
data.loc[2014] = None
data.loc[2015] = None
l = ['x1', 'x4', 'x5', 'x7']
for i in l:
f = GM11(data[i][range(2000, 2014)].as_matrix())[0]
data[i][2014] = f(len(data)-1) #2014年预测结果
data[i][2015] = f(len(data)) #2015年预测结果
data[i] = data[i].round() #取整
data[l+['y']].to_excel(outputfile) #结果输出
个人所得税神经网络预测模型
#-*- coding: utf-8 -*-
import pandas as pd
inputfile = 'D:/下载/data/output/data5_GM11.xls' #灰色预测后保存的路径
outputfile = 'D:/下载/data/output/personal_Income.xls' #神经网络预测后保存的结果
modelfile = 'D:/下载/data/output/5-net.model' #模型保存路径
data = pd.read_excel(inputfile) #读取数据
feature = ['x1', 'x4', 'x5', 'x7'] #特征所在列
data_train = data.loc[range(2000,2014)].copy() #取2014年前的数据建模
data_mean = data_train.mean()
data_std = data_train.std()
data_train = (data_train - data_mean)/data_std #数据标准化
x_train = data_train[feature].as_matrix() #特征数据
y_train = data_train['y'].as_matrix() #标签数据
from keras.models import Sequential
from keras.layers.core import Dense, Activation
model = Sequential() #建立模型
model.add(Dense(4, 8))
model.add(Activation('relu')) #用relu函数作为激活函数,能够大幅提供准确度
model.add(Dense(8, 1))
model.compile(loss='mean_squared_error', optimizer='adam') #编译模型
model.fit(x_train, y_train, nb_epoch = 15000, batch_size = 16) #训练模型,学习一万五千次
model.save_weights(modelfile) #保存模型参数
#预测,并还原结果。
x = ((data[feature] - data_mean[feature])/data_std[feature]).as_matrix()
data[u'y_pred'] = model.predict(x) * data_std['y'] + data_mean['y']
data[u'y_pred'] = data[u'y_pred'].round()
data.to_excel(outputfile)
import matplotlib.pyplot as plt #画出预测结果图
p = data[['y','y_pred']].plot(subplots = True, style=['b-o','r-*'])
plt.show()
政府性基金收入灰色预测
#-*- coding: utf-8 -*-
from __future__ import print_function
import numpy as np
import pandas as pd
def GM11(x0): #自定义灰色预测函数
x1 = x0.cumsum() #1-AGO序列
z1 = (x1[:len(x1)-1] + x1[1:])/2.0 #紧邻均值(MEAN)生成序列
z1 = z1.reshape((len(z1),1))
B = np.append(-z1, np.ones_like(z1), axis = 1)
Yn = x0[1:].reshape((len(x0)-1, 1))
[[a],[b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Yn) #计算参数
f = lambda k: (x0[0]-b/a)*np.exp(-a*(k-1))-(x0[0]-b/a)*np.exp(-a*(k-2)) #还原值
delta = np.abs(x0 - np.array([f(i) for i in range(1,len(x0)+1)]))
C = delta.std()/x0.std()
P = 1.0*(np.abs(delta - delta.mean()) < 0.6745*x0.std()).sum()/len(x0)
return f, a, b, x0[0], C, P #返回灰色预测函数、a、b、首项、方差比、小残差概率
x0 = np.array([3152063, 2213050, 4050122, 5265142 ,5556619, 4772843, 9463330])
f, a, b, x00, C, P = GM11(x0)
print(u'2014年、2015年的预测结果分别为:\n%0.2f万元和%0.2f万元' %(f(8), f(9)))
print(u'后验差比值为:%0.4f' %C)
p = pd.DataFrame(x0, columns = ['y'], index = range(2007, 2014))
p.loc[2014] = None
p.loc[2015] = None
p['y_pred'] = [f(i) for i in range(1,10)]
p['y_pred'] = p['y_pred'].round(2)
p.index = pd.to_datetime(p.index, format='%Y')
import matplotlib.pylab as plt
p.plot(style=['b-o','r-*'], xticks = p.index)
plt.show()