如何使python与本地mysql服务器并行工作?
我正在尝试将大量插入并行化到mysql。所有的数据库服务器设置看起来都准备好了大量的连接和/或线程。下面添加了完整代码和完整错误消息。 谷歌搜索最终错误消息没有返回任何相关信息: 操作错误:(2006年,“MySQL服务器消失了”) 2.7.10:/usr/bin/python。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。 /Users/darianhickman/Documents/Quantstart/src/dev/price_retrieval.py 在load_ticker_history(t=(17L,'AKAM')) 141当前执行数据(最终数据、每日数据) 142 143 def加载历史(t): 144打印(“为%s”%(t[1])添加数据) 145 yf_数据=获取雅虎每日历史数据(t[1],(2015,01,01)) -->146将每日数据插入数据库(“1',t[0],yf数据) t=(17L,‘AKAM’) yf_数据=[(datetime.datetime(2015,9,11,0,0),u'74.32',u'75.32',u'73.940002',u'74.669998',u'1368400',u'74.669998'), (datetime.datetime(2015,9,10,0,0),u'73.279999',u'75.029999', u'73.190002',u'74.550003',u'1408900',u'74.550003'), (datetime.datetime(2015,9,9,0,0),u'74.879997',u'75.25', u'73.129997',u'73.349998',u'1273300',u'73.349998'), (datetime.datetime(2015,9,8,0,0),u'73.75',u'74.349998', u'73.059998',u'74.279999',u'1413600',u'74.279999'), (datetime.datetime(2015,9,4,0,0),u'71.709999',u'73.169998', u'71.709999',u'72.580002',u'1607100',u'72.580002'), (datetime.datetime(2015,9,3,0,0),u'71.860001',u'74.709999', u'71.540001',u'73.260002',u'2378600',u'73.260002', (datetime.datetime(2015,9,2,0,0),u'71.349998',u'71.669998', u'69.669998',u'71.269997',u'1671900',u'71.269997'), (datetime.datetime(2015,9,1,0,0),u'69.75',u'71.43', u'69.669998',u'70.43',u'2401800',u'70.43', (datetime.datetime(2015,8,31,0,0),u'71.660004',u'71.809998', u'70.870003',u'71.309998',u'1764400',u'71.309998'), (datetime.datetime(2015,8,28,0,0),u'70.589996',u'71.970001', u'70.199997',u'71.900002',u'2371800',u'71.900002'), (datetime.datetime(2015,8,27,0,0),u'69.089996',u'70.889999', u'68.529999',u'70.50',u'2081800',u'70.50', (datetime.datetime(2015,8,26,0,0),u'67.07',u'68.300003', u'65.330002',u'68.169998',u'2238400',u'68.169998'), (datetime.datetime(2015,8,25,0,0),u'68.23998',u'68.989998', u'65.589996',u'65.629997',u'3112100',u'65.629997'), (datetime.datetime(2015,8,24,0,0),u'65.010002',u'68.220001', u'63.139999',u'66.519997',u'3779300',u'66.519997'), (datetime.datetime(2015,8,21,0,0),u'69.190002',u'70.00', u'67.910004',u'68.040001',u'2298300',u'68.040001'), (datetime.datetime(2015,8,20,0,0),u'72.300003',u'72.57', u'69.769997',u'69.779999',u'2096000',u'69.779999'), (datetime.datetime(2015,8,19,0,0),u'73.779999',u'73.900002', u'72.290001',u'73.059998',u'881700',u'73.059998'), (datetime.datetime(2015,8,18,0,0),u'73.959999',u'74.190002', u'73.230003',u'73.849998',u'1220500',u'73.849998'), (datetime.datetime(2015,8,17,0,0),u'73.010002',u'73.599998', u'72.32',u'73.57',u'1025600',u'73.57',(datetime.datetime(2015,8, 14,0,0),u'73.379997',u'73.839996',u'72.730003',u'73.370003', u'1328400',u'73.370003'),…] 147 148 149如果name==“main”: 150#这会忽略有关数据截断的警告 。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。 /Users/darianhickman/Documents/Quantstart/src/dev/price_retrieval.py 在数据库中插入每日数据(数据供应商id='1',符号id=17L, 每日数据=[('1',17L,datetime.datetime(2015,9,11,0,0), datetime.datetime(2015,9,12,2,42,58,361925), datetime.datetime(2015,9,12,2,42,58,361925),u'74.32',u'75.32', u'73.940002',u'74.669998',u'1368400',u'74.669998'),('1',17L, datetime.datetime(2015,9,10,0,0),datetime.datetime(2015,9,12, datetime.datetime(2015,9,12,2,42,58,361925), u'73.279999',u'75.029999',u'73.190002',u'74.550003',u'1408900', u'74.550003'),('1',17L,datetime.datetime(2015,9,9,0,0), datetime.datetime(2015,9,12,2,42,58,361925), datetime.datetime(2015,9,12,2,42,58,361925),u'74.879997', u'75.25',u'73.129997',u'73.349998',u'1273300',u'73.349998'),('1', 17L,datetime.datetime(2015,9,8,0,0),datetime.datetime(2015,9, 12,2,42,58361925),日期时间。日期时间(2015,9,12,2,42,58, 361925),u'73.75',u'74.349998',u'73.059998',u'74.279999', u'1413600',u'74.279999'),('1',17L,datetime.datetime(2015,9,4,0, datetime.datetime(2015,9,12,2,42,58361925), datetime.datetime(2015,9,12,2,42,58,361925),u'71.709999', u'73.169998',u'71.709999',u'72.580002',u'1607100',u'72.580002', ('1',17L,datetime.datetime(2015,9,3,0,0), datetime.datetime(2015,9,12,2,42,58,361925), datetime.datetime(2015,9,12,2,42,58,361925),u'71.860001', u'74.709999',u'71.540001',u'73.260002',u'2378600',u'73.260002', ('1',17L,datetime.datetime(2015,9,2,0,0), datetime.datetime(2015,9,12,2,42,58,361925), datetime.datetime(2015,9,12,2,42,58,361925),u'71.349998', u'71.669998',u'69.669998',u'71.269997',u'1671900',u'71.269997'), ('1',17L,datetime.datetime(2015,9,1,0,0), datetime.datetime(2015,9,12,2,42,58,361925), datetime.datetime(2015,9,12,2,42,58,361925),u'69.75',u'71.43', u'69.669998',u'70.43',u'2401800',u'70.43',('1',17L, datetime.datetime(2015,8,31,0,0),datetime.datetime(2015,9,12, datetime.datetime(2015,9,12,2,42,58,361925), u'71.660004',u'71.809998',u'70.870003',u'71.309998',u'1764400', u'71.309998',('1',17L,datetime.datetime(2015,8,28,0,0), 日期时间。日期时间(2015年9月1日)如何使python与本地mysql服务器并行工作?,python,mysql,parallel-processing,Python,Mysql,Parallel Processing,我正在尝试将大量插入并行化到mysql。所有的数据库服务器设置看起来都准备好了大量的连接和/或线程。下面添加了完整代码和完整错误消息。 谷歌搜索最终错误消息没有返回任何相关信息: 操作错误:(2006年,“MySQL服务器消失了”) 2.7.10:/usr/bin/python。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。 /Users/darianhickman/Documents/Quan
Code:
> #!/usr/bin/python
# -*- coding: utf-8 -*-
# price_retrieval.py
from __future__ import print_function
import datetime
import warnings
import MySQLdb as mdb
import requests
from joblib import Parallel, delayed
import multiprocessing
# Obtain a database connection to the MySQL instance
db_host = 'localhost'
db_user = 'sec_user'
db_pass = 'password'
db_name = 'securities_master'
con = mdb.connect(db_host, db_user, db_pass, db_name)
def obtain_list_of_db_tickers():
"""
Obtains a list of the ticker symbols in the database.
"""
with con:
cur = con.cursor()
cur.execute("SELECT id, ticker FROM symbol")
data = cur.fetchall()
return [(d[0], d[1]) for d in data]
def is_db_current():
# check that database is loaded up to yesterday's details or the last trading day.
# ??? is there an app for last trading day. # holidays could trip up schedule.
# nothing is currently designed for same day data.
sql = "select max(price_date), symbol_id, ticker \
from securities_master.daily_price left join securities_master.symbol \
on securities_master.daily_price.symbol_id = securities_master.symbol.id \
group by symbol_id limit 5"
#
today = datetime.date.today().weekday()
data = None
with con:
cur = con.cursor()
cur.execute(sql)
data = cur.fetchall()
tuples = [(d[0], d[1], d[2]) for d in data]
# if data is upto yesterday return true
# elsif data is current up to friday return true
# so if friday is filled and it's sat or sun or mon return true.
# Monday = 0 Sunday = 6
for t in tuples:
if abs(today - t[0].weekday()) <= 1:
#good but keep looping.
continue
elif today in [5,6,0] and t[0].weekday() == 4:
#good but keey looping.
continue
else:
return False
# made it through checks so
return True
# datetime.utcnow().dayofweek
def get_daily_historic_data_yahoo(
ticker, start_date=(2000,1,1),
end_date=datetime.date.today().timetuple()[0:3]
):
"""
Obtains data from Yahoo Finance returns and a list of tuples.
ticker: Yahoo Finance ticker symbol, e.g. "GOOG" for Google, Inc.
start_date: Start date in (YYYY, M, D) format
end_date: End date in (YYYY, M, D) format
"""
# Construct the Yahoo URL with the correct integer query parameters
# for start and end dates. Note that some parameters are zero-based!
ticker_tup = (
ticker, start_date[1]-1, start_date[2],
start_date[0], end_date[1]-1, end_date[2],
end_date[0]
)
yahoo_url = "http://ichart.finance.yahoo.com/table.csv"
yahoo_url += "?s=%s&a=%s&b=%s&c=%s&d=%s&e=%s&f=%s"
yahoo_url = yahoo_url % ticker_tup
# Try connecting to Yahoo Finance and obtaining the data
# On failure, print an error message.
try:
yf_data = requests.get(yahoo_url).text.split("\n")[1:-1]
prices = []
for y in yf_data:
p = y.strip().split(',')
prices.append(
(datetime.datetime.strptime(p[0], '%Y-%m-%d'),
p[1], p[2], p[3], p[4], p[5], p[6])
)
except Exception as e:
print("Could not download Yahoo data: %s" % e)
return prices
def get_daily_historid_data_google():
prices = []
return prices
def insert_daily_data_into_db(
data_vendor_id, symbol_id, daily_data
):
"""
Takes a list of tuples of daily data and adds it to the
MySQL database. Appends the vendor ID and symbol ID to the data.
daily_data: List of tuples of the OHLC data (with
adj_close and volume)
"""
# Create the time now
now = datetime.datetime.utcnow()
# Amend the data to include the vendor ID and symbol ID
daily_data = [
(data_vendor_id, symbol_id, d[0], now, now,
d[1], d[2], d[3], d[4], d[5], d[6])
for d in daily_data
]
# Create the insert strings
column_str = """data_vendor_id, symbol_id, price_date, created_date,
last_updated_date, open_price, high_price, low_price,
close_price, volume, adj_close_price"""
insert_str = ("%s, " * 11)[:-2]
final_str = "INSERT IGNORE INTO daily_price (%s) VALUES (%s)" % \
(column_str, insert_str)
# Using the MySQL connection, carry out an INSERT INTO for every symbol
with con:
cur = con.cursor()
cur.executemany(final_str, daily_data)
def load_ticker_history(t):
print( "Adding data for %s" % (t[1]) )
yf_data = get_daily_historic_data_yahoo(t[1], (2015,01,01))
insert_daily_data_into_db('1', t[0], yf_data)
if __name__ == "__main__":
# This ignores the warnings regarding Data Truncation
# from the Yahoo precision to Decimal(19,4) datatypes
warnings.filterwarnings('ignore')
# Loop over the tickers and insert the daily historical
# data into the database
tickers = obtain_list_of_db_tickers()
lentickers = len(tickers)
num_cores = multiprocessing.cpu_count()
print("numCores = " + str(num_cores))
if is_db_current():
print("db current: True")
else:
# not sure why
Parallel(n_jobs=1)(delayed(load_ticker_history)(t) for t in tickers)
print("Successfully added Yahoo Finance pricing data to DB.")
---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
OperationalError Fri Sep 11 22:42:58 2015 PID: 39551 Python
dbconfig = {
"database": "test",
"user": "joe"
}
cnx = mysql.connector.connect(pool_name = "mypool",
pool_size = 3,
**dbconfig)
dbconfig = {
"database": "test",
"user": "joe"
}
cnxpool = mysql.connector.pooling.MySQLConnectionPool(pool_name = "mypool",
pool_size = 3,
**dbconfig)
cnx1 = cnxpool.get_connection()
cnx2 = cnxpool.get_connection()