基金爬虫

爬取天天基金网指定基金数据,计算出平均值。可发送到指定邮箱,结合 linux crontab 更好。

# -*- coding: utf-8 -*-

import datetime
import smtplib
from email.header import Header
from email.mime.text import MIMEText

import requests
from bs4 import BeautifulSoup
import re
import numpy as np
import pandas as pd


def get_url(url, params=None, proxies=None):
    """
    请求页面信息
    :param url:
    :param params:
    :param proxies:
    :return:
    """
    rsp = requests.get(url, params=params, proxies=proxies)
    rsp.raise_for_status()
    return rsp.text


def get_fund_data(code, per=10, sdate='', edate='', proxies=None):
    """
    抓取数据
    :param code:
    :param per:
    :param sdate:
    :param edate:
    :param proxies:
    :return:
    """
    url = 'http://fund.eastmoney.com/f10/F10DataApi.aspx'
    params = {'type': 'lsjz', 'code': code, 'page': 1, 'per': per, 'sdate': sdate, 'edate': edate}
    html = get_url(url, params, proxies)
    soup = BeautifulSoup(html, 'html.parser')

    pattern = re.compile(r'pages:(.*),')
    result = re.search(pattern, html).group(1)
    pages = int(result)

    heads = []
    for head in soup.findAll("th"):
        heads.append(head.contents[0])
    records = []
    page = 1
    while page <= pages:
        params = {'type': 'lsjz', 'code': code, 'page': page, 'per': per, 'sdate': sdate, 'edate': edate}
        html = get_url(url, params, proxies)
        soup = BeautifulSoup(html, 'html.parser')

        for row in soup.findAll("tbody")[0].findAll("tr"):
            row_records = []
            for record in row.findAll('td'):
                val = record.contents

                if val == []:
                    row_records.append(np.nan)
                else:
                    row_records.append(val[0])
            records.append(row_records)
        page = page + 1
    np_records = np.array(records)
    data = pd.DataFrame()
    for col, col_name in enumerate(heads):
        data[col_name] = np_records[:, col]

    return data


def send_email(msg):
    """
    发送邮件
    :param to_send:
    :return:
    """
    # 第三方 SMTP 服务
    mail_host = "smtp.exmail.qq.com"  # 设置服务器
    mail_user = ""  # 用户名
    mail_pass = ""  # 口令

    sender = ''
    receivers = ['']  # 接收邮件,可设置为你的QQ邮箱或者其他邮箱

    message = MIMEText(msg, 'plain', 'utf-8')
    message['From'] = Header("基金每日均值", 'utf-8')
    message['To'] = Header("", 'utf-8')

    subject = '基金每日均值'
    message['Subject'] = Header(subject, 'utf-8')

    try:
        smtpObj = smtplib.SMTP()
        smtpObj.connect(mail_host, 25)  # 25 为 SMTP 端口号
        smtpObj.login(mail_user, mail_pass)
        smtpObj.sendmail(sender, receivers, message.as_string())
        print "邮件发送成功"
    except Exception as e:
        print "Error: 无法发送邮件:" + str(e)


if __name__ == "__main__":
    report = ""

    now = datetime.datetime.now().strftime("%Y-%m-%d")
    two_day = (datetime.datetime.now() - datetime.timedelta(days=2)).strftime("%Y-%m-%d")
    three_day = (datetime.datetime.now() - datetime.timedelta(days=3)).strftime("%Y-%m-%d")
    week = (datetime.datetime.now() - datetime.timedelta(days=5)).strftime("%Y-%m-%d")
    month = (datetime.datetime.now() - datetime.timedelta(days=30)).strftime("%Y-%m-%d")
    season = (datetime.datetime.now() - datetime.timedelta(days=30 * 4)).strftime("%Y-%m-%d")
    half_year = (datetime.datetime.now() - datetime.timedelta(days=30 * 6)).strftime("%Y-%m-%d")
    year = (datetime.datetime.now() - datetime.timedelta(days=365)).strftime("%Y-%m-%d")
    two_year = (datetime.datetime.now() - datetime.timedelta(days=365 * 2)).strftime("%Y-%m-%d")
    three_year = (datetime.datetime.now() - datetime.timedelta(days=365 * 3)).strftime("%Y-%m-%d")

    fund_list = ["519732"]
    for fund in fund_list:
        # now_data = get_fund_data(fund, per=49, sdate=now, edate=now)
        week_data = get_fund_data(fund, per=49, sdate=week, edate=now)
        two_day_data = get_fund_data(fund, per=49, sdate=two_day, edate=now)
        three_day_data = get_fund_data(fund, per=49, sdate=three_day, edate=now)
        month_data = get_fund_data(fund, per=49, sdate=month, edate=now)
        season_data = get_fund_data(fund, per=49, sdate=season, edate=now)
        half_year_data = get_fund_data(fund, per=49, sdate=half_year, edate=now)
        year_data = get_fund_data(fund, per=49, sdate=year, edate=now)
        two_year_data = get_fund_data(fund, per=49, sdate=two_year, edate=now)
        three_year_data = get_fund_data(fund, per=49, sdate=three_year, edate=now)

        report += """
        """ +fund+ """:
          2-单位净值: """ + str(round(two_day_data[u"单位净值"].astype('float').mean(), 4)) + """
          2-累计净值:""" + str(round(two_day_data[u"累计净值"].astype('float').mean(), 4)) + """
          3-单位净值:""" + str(round(three_day_data[u"单位净值"].astype('float').mean(), 4))+ """
          3-累计净值:""" + str(round(three_day_data[u"累计净值"].astype('float').mean(), 4)) + """
          周-单位净值:""" + str(round(week_data[u"单位净值"].astype('float').mean(), 4)) + """
          周-累计净值:""" + str(round(week_data[u"累计净值"].astype('float').mean(), 4)) + """
          月-单位净值:""" + str(round(month_data[u"单位净值"].astype('float').mean(), 4)) + """
          月-累计净值:""" + str(round(month_data[u"累计净值"].astype('float').mean(), 4)) + """
          季-单位净值:""" + str(round(season_data[u"单位净值"].astype('float').mean(), 4)) + """
          季-累计净值:""" + str(round(season_data[u"累计净值"].astype('float').mean(), 4)) + """
          半年-单位净值:""" + str(round(half_year_data[u"单位净值"].astype('float').mean(), 4)) + """
          半年-累计净值:""" + str(round(half_year_data[u"累计净值"].astype('float').mean(), 4)) + """
          年-单位净值:""" + str(round(year_data[u"单位净值"].astype('float').mean(), 4)) + """
          年-累计净值:""" + str(round(year_data[u"累计净值"].astype('float').mean(), 4)) + """
          2年-单位净值:""" + str(round(two_year_data[u"单位净值"].astype('float').mean(), 4)) + """
          2年-累计净值:""" + str(round(two_year_data[u"累计净值"].astype('float').mean(), 4)) + """
          3年-单位净值:""" + str(round(three_year_data[u"单位净值"].astype('float').mean(), 4)) + """
          3年-累计净值:""" + str(round(three_year_data[u"累计净值"].astype('float').mean(), 4))+ """
        """
        print report
    send_email(report)


  目录