财政收入影响因素分析及预测模型

数据集链接: https://pan.baidu.com/s/1_-8F0DAyHyZSS2M7u-2NYQ 提取码: ex6y
原始数据概括性度量

#-*- coding: utf-8 -*-
import numpy as np
import pandas as pd
inputfile = 'D:\下载\data\inputdata1.csv' #输入的数据文件
data = pd.read_csv(inputfile) #读取数据
r = [data.min(), data.max(), data.mean(), data.std()] #依次计算最小值、最大值、均值、标准差
r = pd.DataFrame(r, index = ['Min', 'Max', 'Mean', 'STD']).T  #计算相关系数矩阵
np.round(r, 2) #保留两位小数

原始数据求解pearson相关系数

#-*- coding: utf-8 -*-
import numpy as np
import pandas as pd
inputfile = 'D:\下载\data\input\data1.csv' #输入的数据文件
data = pd.read_csv(inputfile) #读取数据
np.round(data.corr(method = 'pearson'), 2) #计算相关系数矩阵,保留两位小数

Adaptive-Lasso变量选择模型

#-*- coding: utf-8 -*-
import pandas as pd
from sklearn.linear_model import AdaptiveLasso#导入AdaptiveLasso算法,要在较新的Scikit-Learn才有。

inputfile = 'D:\下载\data\input\data1.csv' #输入的数据文件
data = pd.read_csv(inputfile) #读取数据

model = AdaptiveLasso(gamma=1)
model.fit(data.iloc[:,0:13],data['y'])
model.coef_ #各个特征的系数

地方财政收入灰色预测

#-*- coding: utf-8 -*-
import numpy as np
import pandas as pd

def GM11(x0): #自定义灰色预测函数
  x1 = x0.cumsum() #1-AGO序列
  z1 = (x1[:len(x1)-1] + x1[1:])/2.0 #紧邻均值(MEAN)生成序列
  z1 = z1.reshape((len(z1),1))
  B = np.append(-z1, np.ones_like(z1), axis = 1)
  Yn = x0[1:].reshape((len(x0)-1, 1))
  [[a],[b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Yn) #计算参数
  f = lambda k: (x0[0]-b/a)*np.exp(-a*(k-1))-(x0[0]-b/a)*np.exp(-a*(k-2)) #还原值
  delta = np.abs(x0 - np.array([f(i) for i in range(1,len(x0)+1)]))
  C = delta.std()/x0.std()
  P = 1.0*(np.abs(delta - delta.mean()) < 0.6745*x0.std()).sum()/len(x0)
  return f, a, b, x0[0], C, P #返回灰色预测函数、a、b、首项、方差比、小残差概率

inputfile = 'D:\下载\data\input\data1.csv' #输入的数据文件
outputfile = 'D:\下载\data\output\data1_GM11.xls' #灰色预测后保存的路径
data = pd.read_csv(inputfile) #读取数据
data.index = range(1994, 2014)

data.loc[2014] = None
data.loc[2015] = None
l = ['x1', 'x2', 'x3', 'x4', 'x5', 'x7']
for i in l:
  f = GM11(data[i][range(1994, 2014)].as_matrix())[0]
  data[i][2014] = f(len(data)-1) #2014年预测结果
  data[i][2015] = f(len(data)) #2015年预测结果
  data[i] = data[i].round(2) #保留两位小数

data[l+['y']].to_excel(outputfile) #结果输出

地方财政收入神经网络预测模型

#-*- coding: utf-8 -*-
import pandas as pd
inputfile = 'D:/下载/data/output/data1_GM11.xls' #灰色预测后保存的路径
outputfile = 'D:/下载/data/output/revenue.xls' #神经网络预测后保存的结果
modelfile = 'D:/下载/data/output/1-net.model' #模型保存路径
data = pd.read_excel(inputfile) #读取数据
feature = ['x1', 'x2', 'x3', 'x4', 'x5', 'x7'] #特征所在列

data_train = data.loc[range(1994,2014)].copy() #取2014年前的数据建模
data_mean = data_train.mean()
data_std = data_train.std()
data_train = (data_train - data_mean)/data_std #数据标准化
x_train = data_train[feature].as_matrix() #特征数据
y_train = data_train['y'].as_matrix() #标签数据

from keras.models import Sequential
from keras.layers.core import Dense, Activation

model = Sequential() #建立模型
model.add(Dense(6, 12))
model.add(Activation('relu')) #用relu函数作为激活函数,能够大幅提供准确度
model.add(Dense(12, 1))
model.compile(loss='mean_squared_error', optimizer='adam') #编译模型
model.fit(x_train, y_train, nb_epoch = 10000, batch_size = 16) #训练模型,学习一万次
model.save_weights(modelfile) #保存模型参数

#预测,并还原结果。
x = ((data[feature] - data_mean[feature])/data_std[feature]).as_matrix()
data[u'y_pred'] = model.predict(x) * data_std['y'] + data_mean['y']
data.to_excel(outputfile)

import matplotlib.pyplot as plt #画出预测结果图
p = data[['y','y_pred']].plot(subplots = True, style=['b-o','r-*'])
plt.show()

Adaptive-Lasso变量选择模型

#-*- coding: utf-8 -*-
import pandas as pd
inputfile = ''D:/下载/data/input/data2.csv' #输入的数据文件
data = pd.read_csv(inputfile) #读取数据

#导入AdaptiveLasso算法,要在较新的Scikit-Learn才有。
from sklearn.linear_model import AdaptiveLasso
model = AdaptiveLasso(gamma=1)
model.fit(data.iloc[:,0:6],data['y'])
model.coef_ #各个特征的系数

增值税灰色预测

#-*- coding: utf-8 -*-
import numpy as np
import pandas as pd

def GM11(x0): #自定义灰色预测函数
  x1 = x0.cumsum() #1-AGO序列
  z1 = (x1[:len(x1)-1] + x1[1:])/2.0 #紧邻均值(MEAN)生成序列
  z1 = z1.reshape((len(z1),1))
  B = np.append(-z1, np.ones_like(z1), axis = 1)
  Yn = x0[1:].reshape((len(x0)-1, 1))
  [[a],[b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Yn) #计算参数
  f = lambda k: (x0[0]-b/a)*np.exp(-a*(k-1))-(x0[0]-b/a)*np.exp(-a*(k-2)) #还原值
  delta = np.abs(x0 - np.array([f(i) for i in range(1,len(x0)+1)]))
  C = delta.std()/x0.std()
  P = 1.0*(np.abs(delta - delta.mean()) < 0.6745*x0.std()).sum()/len(x0)
  return f, a, b, x0[0], C, P #返回灰色预测函数、a、b、首项、方差比、小残差概率

inputfile = ''D:/下载/data/input/data2.csv' #输入的数据文件
outputfile = ''D:/下载/data/output/data2_GM11.xls' #灰色预测后保存的路径
data = pd.read_csv(inputfile) #读取数据
data.index = range(1999, 2014)

data.loc[2014] = None
data.loc[2015] = None
l = ['x1', 'x3', 'x5']
for i in l:
  f = GM11(data[i][range(1999, 2014)].as_matrix())[0]
  data[i][2014] = f(len(data)-1) #2014年预测结果
  data[i][2015] = f(len(data)) #2015年预测结果
  data[i] = data[i].round(6) #保留六位小数

data[l+['y']].to_excel(outputfile) #结果输出

增值税神经网络预测模型

#-*- coding: utf-8 -*-
import pandas as pd
inputfile = 'D:/下载/data/output/data2_GM11.xls' #灰色预测后保存的路径
outputfile = 'D:/下载/data/output/VAT.xls' #神经网络预测后保存的结果
modelfile = 'D:/下载/data/output/2-net.model' #模型保存路径
data = pd.read_excel(inputfile) #读取数据
feature = ['x1', 'x3', 'x5'] #特征所在列

data_train = data.loc[range(1999,2014)].copy() #取2014年前的数据建模
data_mean = data_train.mean()
data_std = data_train.std()
data_train = (data_train - data_mean)/data_std #数据标准化
x_train = data_train[feature].as_matrix() #特征数据
y_train = data_train['y'].as_matrix() #标签数据

from keras.models import Sequential
from keras.layers.core import Dense, Activation

model = Sequential() #建立模型
model.add(Dense(3, 6))
model.add(Activation('relu')) #用relu函数作为激活函数,能够大幅提供准确度
model.add(Dense(6, 1))
model.compile(loss='mean_squared_error', optimizer='adam') #编译模型
model.fit(x_train, y_train, nb_epoch = 10000, batch_size = 16) #训练模型,学习一万次
model.save_weights(modelfile) #保存模型参数

#预测,并还原结果。
x = ((data[feature] - data_mean[feature])/data_std[feature]).as_matrix()
data[u'y_pred'] = model.predict(x) * data_std['y'] + data_mean['y']
data[u'y_pred'] = data[u'y_pred'].round(2)
data.to_excel(outputfile)

import matplotlib.pyplot as plt #画出预测结果图
p = data[['y','y_pred']].plot(subplots = True, style=['b-o','r-*'])
plt.show()

Adaptive-Lasso变量选择模型

#-*- coding: utf-8 -*-
import pandas as pd
inputfile = 'D:/下载/data/input/data3.csv' #输入的数据文件
data = pd.read_csv(inputfile) #读取数据

#导入AdaptiveLasso算法,要在较新的Scikit-Learn才有。
from sklearn.linear_model import AdaptiveLasso
model = AdaptiveLasso(gamma=1)
model.fit(data.iloc[:,0:10],data['y'])
model.coef_ #各个特征的系数

营业税灰色预测

#-*- coding: utf-8 -*-
import numpy as np
import pandas as pd

def GM11(x0): #自定义灰色预测函数
  x1 = x0.cumsum() #1-AGO序列
  z1 = (x1[:len(x1)-1] + x1[1:])/2.0 #紧邻均值(MEAN)生成序列
  z1 = z1.reshape((len(z1),1))
  B = np.append(-z1, np.ones_like(z1), axis = 1)
  Yn = x0[1:].reshape((len(x0)-1, 1))
  [[a],[b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Yn) #计算参数
  f = lambda k: (x0[0]-b/a)*np.exp(-a*(k-1))-(x0[0]-b/a)*np.exp(-a*(k-2)) #还原值
  delta = np.abs(x0 - np.array([f(i) for i in range(1,len(x0)+1)]))
  C = delta.std()/x0.std()
  P = 1.0*(np.abs(delta - delta.mean()) < 0.6745*x0.std()).sum()/len(x0)
  return f, a, b, x0[0], C, P #返回灰色预测函数、a、b、首项、方差比、小残差概率

inputfile = 'D:/下载/data/input/data3.csv' #输入的数据文件
outputfile = 'D:/下载/data/output/data3_GM11.xls' #灰色预测后保存的路径
data = pd.read_csv(inputfile) #读取数据
data.index = range(1999, 2014)

data.loc[2014] = None
data.loc[2015] = None
l = ['x3', 'x4', 'x6', 'x8']
for i in l:
  f = GM11(data[i][range(1999, 2014)].as_matrix())[0]
  data[i][2014] = f(len(data)-1) #2014年预测结果
  data[i][2015] = f(len(data)) #2015年预测结果
  data[i] = data[i].round() #取整

data[l+['y']].to_excel(outputfile) #结果输出

营业税神经网络预测模型

#-*- coding: utf-8 -*-
import pandas as pd
inputfile = 'D:/下载/data/output/data3_GM11.xls' #灰色预测后保存的路径
outputfile = 'D:/下载/data/output/sales_tax.xls' #神经网络预测后保存的结果
modelfile = 'D:/下载/data/output/3-net.model' #模型保存路径
data = pd.read_excel(inputfile) #读取数据
feature = ['x3', 'x4', 'x6', 'x8'] #特征所在列

data_train = data.loc[range(1999,2014)].copy() #取2014年前的数据建模
data_mean = data_train.mean()
data_std = data_train.std()
data_train = (data_train - data_mean)/data_std #数据标准化
x_train = data_train[feature].as_matrix() #特征数据
y_train = data_train['y'].as_matrix() #标签数据

from keras.models import Sequential
from keras.layers.core import Dense, Activation

model = Sequential() #建立模型
model.add(Dense(4, 8))
model.add(Activation('relu')) #用relu函数作为激活函数,能够大幅提供准确度
model.add(Dense(8, 1))
model.compile(loss='mean_squared_error', optimizer='adam') #编译模型
model.fit(x_train, y_train, nb_epoch = 10000, batch_size = 16) #训练模型,学习一万次
model.save_weights(modelfile) #保存模型参数

#预测,并还原结果。
x = ((data[feature] - data_mean[feature])/data_std[feature]).as_matrix()
data[u'y_pred'] = model.predict(x) * data_std['y'] + data_mean['y']
data[u'y_pred'] = data[u'y_pred'].round(2)
data.to_excel(outputfile)

import matplotlib.pyplot as plt #画出预测结果图
p = data[['y','y_pred']].plot(subplots = True, style=['b-o','r-*'])
plt.show()

Adaptive-Lasso变量选择模型

#-*- coding: utf-8 -*-
import pandas as pd
inputfile = 'D:/下载/data/input/data4.csv' #输入的数据文件
data = pd.read_csv(inputfile) #读取数据

#导入AdaptiveLasso算法,要在较新的Scikit-Learn才有。
from sklearn.linear_model import AdaptiveLasso
model = AdaptiveLasso(gamma=1)
model.fit(data.iloc[:,0:10],data['y'])
model.coef_ #各个特征的系数

企业所得税灰色预测

#-*- coding: utf-8 -*-
import numpy as np
import pandas as pd

def GM11(x0): #自定义灰色预测函数
  x1 = x0.cumsum() #1-AGO序列
  z1 = (x1[:len(x1)-1] + x1[1:])/2.0 #紧邻均值(MEAN)生成序列
  z1 = z1.reshape((len(z1),1))
  B = np.append(-z1, np.ones_like(z1), axis = 1)
  Yn = x0[1:].reshape((len(x0)-1, 1))
  [[a],[b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Yn) #计算参数
  f = lambda k: (x0[0]-b/a)*np.exp(-a*(k-1))-(x0[0]-b/a)*np.exp(-a*(k-2)) #还原值
  delta = np.abs(x0 - np.array([f(i) for i in range(1,len(x0)+1)]))
  C = delta.std()/x0.std()
  P = 1.0*(np.abs(delta - delta.mean()) < 0.6745*x0.std()).sum()/len(x0)
  return f, a, b, x0[0], C, P #返回灰色预测函数、a、b、首项、方差比、小残差概率

inputfile = 'D:/下载/data/input/data4.csv' #输入的数据文件
outputfile = 'D:/下载/data/output/data4_GM11.xls' #灰色预测后保存的路径
data = pd.read_csv(inputfile) #读取数据
data.index = range(2002, 2014)

data.loc[2014] = None
data.loc[2015] = None
l = ['x1', 'x2', 'x3', 'x4', 'x6', 'x7', 'x9', 'x10']
for i in l:
  f = GM11(data[i][range(2002, 2014)].as_matrix())[0]
  data[i][2014] = f(len(data)-1) #2014年预测结果
  data[i][2015] = f(len(data)) #2015年预测结果
  data[i] = data[i].round(2) #保留两位小数

data[l+['y']].to_excel(outputfile) #结果输出

企业所得税神经网络预测模型

#-*- coding: utf-8 -*-
import pandas as pd
inputfile = 'D:/下载/data/output/data4_GM11.xls' #灰色预测后保存的路径
outputfile = 'D:/下载/data/output/enterprise_income.xls' #神经网络预测后保存的结果
modelfile = 'D:/下载/data/output/4-net.model' #模型保存路径
data = pd.read_excel(inputfile) #读取数据
feature = ['x1', 'x2', 'x3', 'x4', 'x6', 'x7', 'x9', 'x10'] #特征所在列

data_train = data.loc[range(2002,2014)].copy() #取2014年前的数据建模
data_mean = data_train.mean()
data_std = data_train.std()
data_train = (data_train - data_mean)/data_std #数据标准化
x_train = data_train[feature].as_matrix() #特征数据
y_train = data_train['y'].as_matrix() #标签数据

from keras.models import Sequential
from keras.layers.core import Dense, Activation

model = Sequential() #建立模型
model.add(Dense(8, 6))
model.add(Activation('relu')) #用relu函数作为激活函数,能够大幅提供准确度
model.add(Dense(6, 1))
model.compile(loss='mean_squared_error', optimizer='adam') #编译模型
model.fit(x_train, y_train, nb_epoch = 5000, batch_size = 16) #训练模型,学习五千次
model.save_weights(modelfile) #保存模型参数

#预测,并还原结果。
x = ((data[feature] - data_mean[feature])/data_std[feature]).as_matrix()
data[u'y_pred'] = model.predict(x) * data_std['y'] + data_mean['y']
data[u'y_pred'] = data[u'y_pred'].round()
data.to_excel(outputfile)

import matplotlib.pyplot as plt #画出预测结果图
p = data[['y','y_pred']].plot(subplots = True, style=['b-o','r-*'])
plt.show()

Adaptive-Lasso变量选择模型

#-*- coding: utf-8 -*-
import pandas as pd
inputfile = 'D:/下载/data/input/data5.csv' #输入的数据文件
data = pd.read_csv(inputfile) #读取数据

#导入AdaptiveLasso算法,要在较新的Scikit-Learn才有。
from sklearn.linear_model import AdaptiveLasso
model = AdaptiveLasso(gamma=1)
model.fit(data.iloc[:,0:7],data['y'])
model.coef_ #各个特征的系数

个人所得税灰色预测

#-*- coding: utf-8 -*-
import numpy as np
import pandas as pd

def GM11(x0): #自定义灰色预测函数
  x1 = x0.cumsum() #1-AGO序列
  z1 = (x1[:len(x1)-1] + x1[1:])/2.0 #紧邻均值(MEAN)生成序列
  z1 = z1.reshape((len(z1),1))
  B = np.append(-z1, np.ones_like(z1), axis = 1)
  Yn = x0[1:].reshape((len(x0)-1, 1))
  [[a],[b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Yn) #计算参数
  f = lambda k: (x0[0]-b/a)*np.exp(-a*(k-1))-(x0[0]-b/a)*np.exp(-a*(k-2)) #还原值
  delta = np.abs(x0 - np.array([f(i) for i in range(1,len(x0)+1)]))
  C = delta.std()/x0.std()
  P = 1.0*(np.abs(delta - delta.mean()) < 0.6745*x0.std()).sum()/len(x0)
  return f, a, b, x0[0], C, P #返回灰色预测函数、a、b、首项、方差比、小残差概率
  
inputfile = 'D:/下载/data/input/data5.csv' #输入的数据文件
outputfile = 'D:/下载/data/output/data5_GM11.xls' #灰色预测后保存的路径
data = pd.read_csv(inputfile) #读取数据
data.index = range(2000, 2014)

data.loc[2014] = None
data.loc[2015] = None
l = ['x1', 'x4', 'x5', 'x7']
for i in l:
  f = GM11(data[i][range(2000, 2014)].as_matrix())[0]
  data[i][2014] = f(len(data)-1) #2014年预测结果
  data[i][2015] = f(len(data)) #2015年预测结果
  data[i] = data[i].round() #取整

data[l+['y']].to_excel(outputfile) #结果输出

个人所得税神经网络预测模型

#-*- coding: utf-8 -*-
import pandas as pd
inputfile = 'D:/下载/data/output/data5_GM11.xls' #灰色预测后保存的路径
outputfile = 'D:/下载/data/output/personal_Income.xls' #神经网络预测后保存的结果
modelfile = 'D:/下载/data/output/5-net.model' #模型保存路径
data = pd.read_excel(inputfile) #读取数据
feature = ['x1', 'x4', 'x5', 'x7'] #特征所在列

data_train = data.loc[range(2000,2014)].copy() #取2014年前的数据建模
data_mean = data_train.mean()
data_std = data_train.std()
data_train = (data_train - data_mean)/data_std #数据标准化
x_train = data_train[feature].as_matrix() #特征数据
y_train = data_train['y'].as_matrix() #标签数据

from keras.models import Sequential
from keras.layers.core import Dense, Activation

model = Sequential() #建立模型
model.add(Dense(4, 8))
model.add(Activation('relu')) #用relu函数作为激活函数,能够大幅提供准确度
model.add(Dense(8, 1))
model.compile(loss='mean_squared_error', optimizer='adam') #编译模型
model.fit(x_train, y_train, nb_epoch = 15000, batch_size = 16) #训练模型,学习一万五千次
model.save_weights(modelfile) #保存模型参数

#预测,并还原结果。
x = ((data[feature] - data_mean[feature])/data_std[feature]).as_matrix()
data[u'y_pred'] = model.predict(x) * data_std['y'] + data_mean['y']
data[u'y_pred'] = data[u'y_pred'].round()
data.to_excel(outputfile)

import matplotlib.pyplot as plt #画出预测结果图
p = data[['y','y_pred']].plot(subplots = True, style=['b-o','r-*'])
plt.show()

政府性基金收入灰色预测

#-*- coding: utf-8 -*-
from __future__ import print_function
import numpy as np
import pandas as pd

def GM11(x0): #自定义灰色预测函数
  x1 = x0.cumsum() #1-AGO序列
  z1 = (x1[:len(x1)-1] + x1[1:])/2.0 #紧邻均值(MEAN)生成序列
  z1 = z1.reshape((len(z1),1))
  B = np.append(-z1, np.ones_like(z1), axis = 1)
  Yn = x0[1:].reshape((len(x0)-1, 1))
  [[a],[b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Yn) #计算参数
  f = lambda k: (x0[0]-b/a)*np.exp(-a*(k-1))-(x0[0]-b/a)*np.exp(-a*(k-2)) #还原值
  delta = np.abs(x0 - np.array([f(i) for i in range(1,len(x0)+1)]))
  C = delta.std()/x0.std()
  P = 1.0*(np.abs(delta - delta.mean()) < 0.6745*x0.std()).sum()/len(x0)
  return f, a, b, x0[0], C, P #返回灰色预测函数、a、b、首项、方差比、小残差概率
  
x0 = np.array([3152063, 2213050, 4050122, 5265142	,5556619, 4772843,	9463330])
f, a, b, x00, C, P = GM11(x0)
print(u'2014年、2015年的预测结果分别为:\n%0.2f万元和%0.2f万元' %(f(8), f(9)))
print(u'后验差比值为:%0.4f' %C)
p = pd.DataFrame(x0, columns = ['y'], index = range(2007, 2014))
p.loc[2014] = None
p.loc[2015] = None
p['y_pred'] = [f(i) for i in range(1,10)]
p['y_pred'] = p['y_pred'].round(2)
p.index = pd.to_datetime(p.index, format='%Y')

import matplotlib.pylab as plt
p.plot(style=['b-o','r-*'], xticks = p.index)
plt.show()
全部评论

相关推荐

11-14 16:13
已编辑
重庆科技大学 测试工程师
Amazarashi66:不进帖子我都知道🐮❤️网什么含金量
点赞 评论 收藏
分享
评论
点赞
收藏
分享
牛客网
牛客企业服务