专注于 JetBrains IDEA 全家桶,永久激活,教程
持续更新 PyCharm,IDEA,WebStorm,PhpStorm,DataGrip,RubyMine,CLion,AppCode 永久激活教程

elasticsearch python 简单实践

1、创建索引

# -.- coding:utf-8 -.-
from __future__ import print_function
from pprint import pprint
from elasticsearch import Elasticsearch

es_hosts = ["192.168.9.119:9200"]
index_name = "log"
doc_type = "20170103"

def main():
    es = Elasticsearch(es_hosts)
    es.indices.create(index=index_name, body={"mappings":{doc_type: {"properties": {"name": {"type": "text"}, "gender":{"type": "text"}, "age":{"type": "integer"}, "phone":{"type": "keyword"}}}}})
    res = es.search(index=index_name, body={"query": {"match_all": {}}})
    pprint(res)
    # pprint(es.info())

if __name__ == '__main__':
    main()

查询创建的索引

{
    "log": {
        "aliases": {},
        "mappings": {
            "20170103": {
                "properties": {
                    "age": {
                        "type": "integer"
                    },
                    "gender": {
                        "type": "text"
                    },
                    "name": {
                        "type": "text"
                    },
                    "phone": {
                        "type": "keyword"
                    }
                }
            }
        },
        "settings": {
            "index": {
                "creation_date": "1512980895137",
                "number_of_shards": "5",
                "number_of_replicas": "1",
                "uuid": "TOrOEfoHQiSKX8oqlZ6URw",
                "version": {
                    "created": "5050099"
                },
                "provided_name": "log"
            }
        }
    }
}

你也可以先创建索引,然后创建type再设置mapping

def main():
    es = Elasticsearch(es_hosts)
    # es.indices.create(index="students")
    es.indices.put_mapping(index="students", doc_type="yinianji", body={"properties": {"name": {"type": "text"}}})
    res = es.search(index=index_name, body={"query": {"match_all": {}}})
    pprint(res)
    # pprint(es.info())

if __name__ == '__main__':
    main()

2、插入数据

  • index (单条插入)
#from datetime import datetime
#print(datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S'))
#data = {
#        "filename": '1233445',
#        "url": '/root',
#        "status": 0,
#        "date": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
#        #"date": datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S'),
#        "total": 100,
#        "county": '111111 111112',
#        "gender": 0,
#        "agelow": 12,
#        "agehigh": 18
#}
#es.index(index = index_name, doc_type = doc_type, id= '1233445', body = data)

  • bulk (批量插入)
# -.- coding:utf-8 -.-
from __future__ import print_function
from pprint import pprint
from elasticsearch import Elasticsearch
from elasticsearch import helpers

es_hosts = ["192.168.9.119:9200"]
index_name = "log"
doc_type = "20170103"
body = []
for i in range(10):
    body.append({
        "_index": "students",
        "_type": "yinianji",
        "_id": i + 1,
        "_source": {
              "name": 'weishihao'
        }
        })

def main():
    es = Elasticsearch(es_hosts)
    helpers.bulk(es, body)
    res = es.search(index='students', body={"query": {"match_all": {}}})
    pprint(res)
    # pprint(es.info())

if __name__ == '__main__':
    main()

3、修改mapping结构

在elasticsearch中,更改mapping结构只能新增field。所以

es.indices.put_mapping(
                index=index_name,
                doc_type=doc_type,
                body={
                    "properties": {
                         "county": {"type": "text"},
                         "total" : {"type": "integer"},
                         "gender": {"type": "integer"},
                         "agelow": {"type": "integer"},
                         "agehigh": {"type": "integer"}
                    }
                }
            )

4、查询数据

# -.- coding:utf-8 -.-
from __future__ import print_function
from pprint import pprint
from elasticsearch import Elasticsearch
from elasticsearch import helpers

es_hosts = ["192.168.9.119:9200"]
index_name = "log"
doc_type = "20170103"
body = []
for i in range(10):
    body.append({
        "_index": "students",
        "_type": "yinianji",
        "_id": i + 1,
        "_source": {
              "name": 'weishihao'
        }
        })

def main():
    es = Elasticsearch(es_hosts)
    # helpers.bulk(es, body)
    res = es.search(index='students', doc_type='yinianji', body={"query": {"match_all": {}}})
    pprint(res)
    # pprint(es.info())

if __name__ == '__main__':
    main()

返回值中的total值会给出总数据量,但是_source中,返回显示的,默认只有10条 那么,我们如何查询所有的数据呢,

 es = Elasticsearch(es_hosts)
 scanResp = helpers.scan(es, {"query": {"match_all": {}}}, index= 'quanguorenkou', scroll= "10m") 
    for hit in scanResp:  
        print(hit)

这么我们就可以查询所有的数据了。 但是如果数据量比较大的时候,我们仅仅需要部分的数据的话,可以指定查询条件,比如:前缀查询:

 scanResp = helpers.scan(es, {"query": {"prefix": {"studentid": {"value": "330"}}}}, index= 'students', scroll= "100m", size=40000)

上述语句实现的功能是查询studentid字段以330开头的所有文档。

5、 修改数据

es.update(index = index_name, doc_type = doc_type, id='1233445', body={"script": "ctx._source.status = 1"})

6、删除索引

es.indices.delete(index=index)

7、一个完整的例子

# -.- coding:utf-8 -.-
from __future__ import print_function
from elasticsearch import Elasticsearch, helpers
from pprint import pprint
import sys
import os
sys.path.append(os.path.abspath(os.path.pardir))

from multiprocessing import current_process, Pool
from collections import deque
import time
import re
es_hosts = ["192.168.31.13"]
es_auth = ("elastic", "changeme")
index_name = 'exportdata'
doc_type = 'output'
es = Elasticsearch(es_hosts, http_auth = es_auth)
es.indices.delete(index=index_name)
es.indices.create(index=index_name, body=
                      {"mappings":{doc_type:
                              {"properties": {
                                     "filename": {"type": "text"},
                                     "url":{"type": "text"},
                                     "status":{"type": "integer"},
                                     "date": {"type": "date","format": "yyyy-MM-dd HH:mm:ss"},
                                     "county": {"type": "text"},
                                     "total" : {"type": "integer"},
                                     "gender": {"type": "integer"},
                                     "agelow": {"type": "integer"},
                                     "agehigh": {"type": "integer"}
                                    }
                              }
                     }})
#es.indices.put_mapping(
#                index=index_name,
#                doc_type=doc_type,
#                body={
#                    "properties": {
#                         "county": {"type": "text"},
#                         "total" : {"type": "integer"},
#                         "gender": {"type": "integer"},
#                         "agelow": {"type": "integer"},
#                         "agehigh": {"type": "integer"}
#                    }
#               }
#            )
#from datetime import datetime
#print(datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S'))
#data = {
#        "filename": '1233445',
#        "url": '/root',
#        "status": 0,
#        "date": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
#        #"date": datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S'),
#        "total": 100,
#        "county": '111111 111112',
#        "gender": 0,
#        "agelow": 12,
#        "agehigh": 18
#}
#es.index(index = index_name, doc_type = doc_type, id= '1233445', body = data)
#es.update(index = index_name, doc_type = doc_type, id='1233445', body={"script": "ctx._source.status = 1"})

文章永久链接:https://tech.souyunku.com/41084

未经允许不得转载:搜云库技术团队 » elasticsearch python 简单实践

JetBrains 全家桶,激活、破解、教程

提供 JetBrains 全家桶激活码、注册码、破解补丁下载及详细激活教程,支持 IntelliJ IDEA、PyCharm、WebStorm 等工具的永久激活。无论是破解教程,还是最新激活码,均可免费获得,帮助开发者解决常见激活问题,确保轻松破解并快速使用 JetBrains 软件。获取免费的破解补丁和激活码,快速解决激活难题,全面覆盖 2024/2025 版本!

联系我们联系我们