biubiu~你有一份中国人口数据需要查收
爬取世界人口网上的中国各省的2019年人数和2018年GDP
(数据并非完全真实有效)
import requests
import time
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.common.action_chains import ActionChains
def get_page(url):
driver = webdriver.Chrome()
driver.get(url)
xpath = '//*[@id="menu-item-9802"]/a'
ActionChains(driver).click(driver.find_element_by_xpath(xpath)).perform()
time.sleep(1)
xpath1 = '//*[@id="copy"]/div[1]'
table1 = driver.find_element_by_xpath(xpath1).get_attribute('innerHTML')
soup = BeautifulSoup(table1,'html.parser')
tb = soup.find_all('tr')
return tb
def find_message(tb):
for row in tb:
cols = [col.text for col in row.find_all('td')]
if cols[0] == '排名':
continue
index.append(cols[0])
adress.append(cols[1])
number_2019.append(cols[2])
GDP_18.append(cols[3])
def save_message():
data = {'排名':index,'地区':adress,'2019年人口':number_2019,'2018年GDP':GDP_18}
dataframe = pd.DataFrame(data)
dataframe.to_csv('Pnumber.csv',index=False,sep=',',encoding='utf-8-sig')
def main():
tb = get_page(url)
find_message(tb)
save_message()
url = 'http://www.chamiji.com'
index = []
adress = []
number_2019 = []
GDP_18 = []
main()