当前位置：搜云库技术团队 > Java汇总 > 正文

自动识别csv文件编码并极速导入Postgresql

2020-08-12 分类：Java汇总阅读(567) 评论(0)

IDEA2023.1.3破解,IDEA破解,IDEA 2023.1破解,最新IDEA激活码

好不容易搜到的方法，先直接上代码：

import io
import urllib
import pandas as pd
from sqlalchemy import create_engine
import getfile_encoding as gcode
import datetime
#可将csv文件快速导入pg数据库，实测79万条9秒
def getFileChar(filename):
    f=open(filename,'rb')
    data=f.read(200)
    f.close()
    #print(chardet.detect(data))
    #print(chardet.detect(data).get('encoding'))
    return chardet.detect(data).get('encoding')
def write_to_table(df, table_name, if_exists='fail'):#append
    pwd='qais@123'
    conn_str="postgresql://qais:%s@localhost/db_qais"%urllib.parse.quote_plus(pwd)#?charset=utf8
    db_engine = create_engine(conn_str,encoding='utf-8',echo=False)#

    string_data_io = io.StringIO()
    df.to_csv(string_data_io, sep='|', index=False)
    pd_sql_engine = pd.io.sql.pandasSQL_builder(db_engine)
    table = pd.io.sql.SQLTable(table_name, pd_sql_engine, frame=df,
                               index=False, if_exists=if_exists,schema = 'public')#goods_code
    table.create()
    string_data_io.seek(0)
    #string_data_io.readline()  # remove header
    with db_engine.connect() as connection:
        with connection.connection.cursor() as cursor:
            copy_cmd = "COPY public.%s FROM STDIN HEADER DELIMITER '|' CSV" %table_name#goods_code
            cursor.copy_expert(copy_cmd, string_data_io)
        connection.connection.commit()
#版权声明：本文为CSDN博主「仙人掌_lz」的原创文章，遵循 CC 4.0 BY-SA 版权协议，转载请附上出处链接及本声明。
#链接：https://blog.csdn.net/qq_36603091/java/article/details/79587971
if __name__=='__main__':
    filename='D:/vs/tmp/数据2020-218new.csv'# test2020-218new.csv
    ecding=gcode.getFileChar(filename)
    df=pd.read_csv(filename,encoding=ecding)
    print(datetime.datetime.now())
    write_to_table(df,'testinfo','append')
    print(datetime.datetime.now())

如采用如下方式，速度极慢，还特别耗内存：

filename='D:/vs/tmp/test2020-218new.csv'
    ecding=gcode.getFileChar(filename)
    #engine=create_engine(**pg_config)
    #df=pd.read_csv('D:/vs/tmp/test2020-218new.csv',encoding='latin-1')
    #df=pd.read_csv('D:/vs/tmp/test2020-218new.csv',encoding='utf-8')
    df=pd.read_csv(filename,encoding=ecding)
    #print(df)
    print(datetime.datetime.now())
    pwd='qais@123'
    #pwd_new=parse.quote_plus(pwd)
    conn_str="postgresql://qais:%s@localhost/db_qais"%urllib.parse.quote_plus(pwd)#?charset=utf8
    engine=create_engine(conn_str,encoding='utf-8',echo=False)#

    #此方式太慢、占内存，用上面方法中的函数实现非常快

    pd.io.sql.to_sql(df,'custominfo',engine,if_exists='append',index=False,index_label=None)#,method='multi',index="False"
    engine.dispose()
    print(datetime.datetime.now())

文章永久链接：https://tech.souyunku.com/?p=42179

Warning: A non-numeric value encountered in /data/wangzhan/tech.souyunku.com.wp/wp-content/themes/dux/functions-theme.php on line 1154
赞(73) 打赏

未经允许不得转载：搜云库技术团队 » 自动识别csv文件编码并极速导入Postgresql

IDEA2023.1.3破解,IDEA破解,IDEA 2023.1破解,最新IDEA激活码

IDEA2023.1.3破解,IDEA破解,IDEA 2023.1破解,最新IDEA激活码

相关推荐

评论抢沙发

大前端WP主题更专业更方便

联系我们联系我们

QQ咨询
QQ咨询
回顶
回顶部

Fatal error: Uncaught Exception: Cache directory not writable. Comet Cache needs this directory please: `/data/wangzhan/tech.souyunku.com.wp/wp-content/cache/comet-cache/cache/https/tech-souyunku-com/index.q`. Set permissions to `755` or higher; `777` might be needed in some cases. in /data/wangzhan/tech.souyunku.com.wp/wp-content/plugins/comet-cache/src/includes/traits/Ac/ObUtils.php:367 Stack trace: #0 [internal function]: WebSharks\CometCache\Classes\AdvancedCache->outputBufferCallbackHandler() #1 /data/wangzhan/tech.souyunku.com.wp/wp-includes/functions.php(5109): ob_end_flush() #2 /data/wangzhan/tech.souyunku.com.wp/wp-includes/class-wp-hook.php(303): wp_ob_end_flush_all() #3 /data/wangzhan/tech.souyunku.com.wp/wp-includes/class-wp-hook.php(327): WP_Hook->apply_filters() #4 /data/wangzhan/tech.souyunku.com.wp/wp-includes/plugin.php(470): WP_Hook->do_action() #5 /data/wangzhan/tech.souyunku.com.wp/wp-includes/load.php(1097): do_action() #6 [internal function]: shutdown_action_hook() #7 {main} thrown in /data/wangzhan/tech.souyunku.com.wp/wp-content/plugins/comet-cache/src/includes/traits/Ac/ObUtils.php on line 367