1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
| import lxml from selenium import webdriver import time import requests from lxml import etree import csv import pandas as pd import os import numpy as np def login(user, passwd): driver = webdriver.Chrome() url = 'https://www.zhihu.com/signup?next=%2F' driver.get(url) driver.find_element_by_xpath('//*[@id="root"]/div/main/div/div/div/div[3]/span[2]/button[2]').click() handles=driver.window_handles driver.switch_to_window(handles[1]) driver.switch_to.frame('ptlogin_iframe') time.sleep(1) driver.find_element_by_id('switcher_plogin').click() driver.find_element_by_id('u').send_keys('2248746669') driver.find_element_by_id('p').send_keys('020727@Dyf') driver.find_element_by_id('login_button').click() driver.switch_to.window(handles[0]) time.sleep(5) driver.find_element_by_xpath('//*[@id="root"]/div/main/div/div/div[1]/div/div[1]/nav/a[3]').click() time.sleep(5) a=driver.find_elements_by_xpath('//h2') c=driver.find_elements_by_xpath('/html/body/div[1]/div/main/div/div/div[1]/div/div[2]/div/div/div[2]/section/div[2]/div') for i,m in zip(a,c): print(i.text) print(m.text) for b in driver.find_elements_by_xpath('//*[@id="TopstoryContent"]/div/div/div[2]/section/div[2]/a'): print(b.get_attribute('href')) d=b.get_attribute('href') with open('1.csv','w',encoding='utf-8-sig') as csvfile: writer=csv.writer(csvfile) writer.writerow(['事件','热度']) for i,m in zip(a,c): writer.writerows([[i.text,m.text]]) with open('2.csv','w',encoding='utf-8-sig') as csvfile: writer=csv.writer(csvfile) writer.writerow(['链接']) for b in driver.find_elements_by_xpath('//*[@id="TopstoryContent"]/div/div/div[2]/section/div[2]/a'): writer.writerows([[b.get_attribute('href')]]) inputfile_csv_1="1.csv" inputfile_csv_2="2.csv" outputfile="zhihu.csv" csv_1=pd.read_csv(inputfile_csv_1,encoding='utf-8-sig',error_bad_lines=False) csv_2=pd.read_csv(inputfile_csv_2,encoding='utf-8-sig',error_bad_lines=False) out_csv=pd.concat([csv_1,csv_2],axis=1) out_csv.to_csv(outputfile,index=False,encoding='utf-8-sig')
user = 'qq账号' passwd = 'qq密码' print(login(user, passwd))
|