好不容易搜到的方法,先直接上代码:
import io
import urllib
import pandas as pd
from sqlalchemy import create_engine
import getfile_encoding as gcode
import datetime
#可将csv文件快速导入pg数据库,实测79万条9秒
def getFileChar(filename):
f=open(filename,'rb')
data=f.read(200)
f.close()
#print(chardet.detect(data))
#print(chardet.detect(data).get('encoding'))
return chardet.detect(data).get('encoding')
def write_to_table(df, table_name, if_exists='fail'):#append
pwd='qais@123'
conn_str="postgresql://qais:%s@localhost/db_qais"%urllib.parse.quote_plus(pwd)#?charset=utf8
db_engine = create_engine(conn_str,encoding='utf-8',echo=False)#
string_data_io = io.StringIO()
df.to_csv(string_data_io, sep='|', index=False)
pd_sql_engine = pd.io.sql.pandasSQL_builder(db_engine)
table = pd.io.sql.SQLTable(table_name, pd_sql_engine, frame=df,
index=False, if_exists=if_exists,schema = 'public')#goods_code
table.create()
string_data_io.seek(0)
#string_data_io.readline() # remove header
with db_engine.connect() as connection:
with connection.connection.cursor() as cursor:
copy_cmd = "COPY public.%s FROM STDIN HEADER DELIMITER '|' CSV" %table_name#goods_code
cursor.copy_expert(copy_cmd, string_data_io)
connection.connection.commit()
#版权声明:本文为CSDN博主「仙人掌_lz」的原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上出处链接及本声明。
#链接:https://blog.csdn.net/qq_36603091/java/article/details/79587971
if __name__=='__main__':
filename='D:/vs/tmp/数据2020-218new.csv'# test2020-218new.csv
ecding=gcode.getFileChar(filename)
df=pd.read_csv(filename,encoding=ecding)
print(datetime.datetime.now())
write_to_table(df,'testinfo','append')
print(datetime.datetime.now())
如采用如下方式,速度极慢,还特别耗内存:
filename='D:/vs/tmp/test2020-218new.csv'
ecding=gcode.getFileChar(filename)
#engine=create_engine(**pg_config)
#df=pd.read_csv('D:/vs/tmp/test2020-218new.csv',encoding='latin-1')
#df=pd.read_csv('D:/vs/tmp/test2020-218new.csv',encoding='utf-8')
df=pd.read_csv(filename,encoding=ecding)
#print(df)
print(datetime.datetime.now())
pwd='qais@123'
#pwd_new=parse.quote_plus(pwd)
conn_str="postgresql://qais:%s@localhost/db_qais"%urllib.parse.quote_plus(pwd)#?charset=utf8
engine=create_engine(conn_str,encoding='utf-8',echo=False)#
#此方式太慢、占内存,用上面方法中的函数实现非常快
pd.io.sql.to_sql(df,'custominfo',engine,if_exists='append',index=False,index_label=None)#,method='multi',index="False"
engine.dispose()
print(datetime.datetime.now())