python : selenium 网页爬虫 读取列表文件


selenium 网页爬虫 读取基金代码列表文件 flist.txt

先输入日期查询,再抓取天天基金网上的基金净值

fund3.py

# -*- coding: utf-8 -*-
import os, sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
import unittest, time, re
from datetime import date

class Fund(unittest.TestCase):
    def setUp(self):
        today = date.today().strftime("%Y%m%d")
        ch_driver = os.path.abspath(r"D:\selenium\chromedriver.exe")
        os.environ["webdriver.chrome.driver"]= ch_driver
        self.driver = webdriver.Chrome()
        self.driver.implicitly_wait(30)
        self.base_url = "http://fund.eastmoney.com"
        self.verificationErrors = []
        self.accept_next_alert = True
        self.flist = [] # fund list
        fp = open("flist.txt",'r')
        for line in fp:
            if len(line.strip()) ==6:
                self.flist.append(line.strip())
        fp.close()
        self.fp = open(today +'.txt','w')
        print today +'.txt'
    
    def test_fund(self):
        flist = self.flist
        fp = self.fp
        driver = self.driver
        for f1 in flist:
            driver.get(self.base_url + "/f10/jjjz_%s.html" % (f1))
            driver.find_element_by_id("lsjzSDate").clear()
            driver.find_element_by_id("lsjzSDate").send_keys("2017-06-29")
            driver.find_element_by_id("lsjzEDate").clear()
            driver.find_element_by_id("lsjzEDate").send_keys("2017-07-07")
            driver.find_element_by_css_selector("input.search").click()
            time.sleep(1)                
            try:
                div = driver.find_element_by_id("jztable")
                table = div.find_elements_by_tag_name("table")
                tbody = table[0].find_elements_by_tag_name("tbody")
                t_rows = tbody[0].find_elements_by_tag_name('tr')
                for row in t_rows:
                    fp.write(f1+' ')
                    tds = row.find_elements_by_tag_name('td')
                    for td in tds[0:4]:
                        fp.write(td.text +' ')
                    fp.write('\n')
            except Exception as msg:
                print msg
        #

    def is_element_present(self, how, what):
        try: self.driver.find_element(by=how, value=what)
        except NoSuchElementException as e: return False
        return True
    
    def is_alert_present(self):
        try: self.driver.switch_to_alert()
        except NoAlertPresentException as e: return False
        return True
    
    def close_alert_and_get_its_text(self):
        try:
            alert = self.driver.switch_to_alert()
            alert_text = alert.text
            if self.accept_next_alert:
                alert.accept()
            else:
                alert.dismiss()
            return alert_text
        finally: self.accept_next_alert = True
    
    def tearDown(self):
        self.fp.close()
        self.driver.quit()
        self.assertEqual([], self.verificationErrors)

if __name__ == "__main__":
    unittest.main()

flist.txt 一行一个基金代码:6位数字



智能推荐

注意!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系我们删除。



 
© 2014-2019 ITdaan.com 粤ICP备14056181号  

赞助商广告