天涯论坛(tianya_news)

旧索引:tianya_news

新索引:tianya_news

字段含义 字段名称 数据类型
帖子ID id 字符串
帖子标题 title 字符串
帖子链接 url 字符串
帖子内容 content 字符串
帖子作者 host 字符串
发帖时间 published_at 日期
帖子点击数 clicks_num 数值
帖子回复数 replays_num 数值
爬取时间 crawled_at 日期

注意:插入数据时,问题ID字段值覆盖elasticsearch的元数据_id,如下所示 _idquestionId(移除)重复

部分数据:

{
  "_index": "tianya_news",
  "_type": "tianya_news",
  "_id": "19550593",
  "_score": 1,
  "_source": {
    "title": "蔡学镛在各种动态语言中为什么比较偏爱 REBOL?"
  }
}

创建索引tianya_news

curl -XPUT http://127.0.0.1:9222/tianya_news

mapping,添加 xpack 执行需要用户名密码

curl -u 用户名:密码 -XPUT http://127.0.0.1:9222/_template/tianya_news -d '
{
    "template": "tianya_news*",
    "settings": {
        "refresh_interval": "60s",
        "number_of_replicas": "1",
        "number_of_shards": "15"
    },
    "mappings": {
        "_default_": {
            "_all": {
                "enabled": true
            },
            "_source": {
                "enabled": true
            },
            "dynamic_templates": [
                {
                    "message_field": {
                        "match": "message",
                        "match_mapping_type": "string",
                        "mapping": {
                            "type": "text"
                        }
                    }
                },
                {
                    "string_fields": {
                        "match": "*",
                        "match_mapping_type": "string",
                        "mapping": {
                            "type": "keyword"
                        }
                    }
                },
                {
                    "double_fields": {
                        "match": "*",
                        "match_mapping_type": "double",
                        "mapping": {
                            "type": "double"
                        }
                    }
                },
                {
                    "byte_fields": {
                        "match": "*",
                        "match_mapping_type": "byte",
                        "mapping": {
                            "type": "byte"
                        }
                    }
                },
                {
                    "short_fields": {
                        "match": "*",
                        "match_mapping_type": "short",
                        "mapping": {
                            "type": "short"
                        }
                    }
                },
                {
                    "integer_fields": {
                        "match": "*",
                        "match_mapping_type": "integer",
                        "mapping": {
                            "type": "integer"
                        }
                    }
                },
                {
                    "long_fields": {
                        "match": "*",
                        "match_mapping_type": "long",
                        "mapping": {
                            "type": "long"
                        }
                    }
                },
                {
                    "date_fields": {
                        "match": "*",
                        "match_mapping_type": "date",
                        "mapping": {
                            "type": "date"
                        }
                    }
                },
                {
                    "geo_point_fields": {
                        "match": "*",
                        "match_mapping_type": "geo_point",
                        "mapping": {
                            "type": "geo_point"
                        }
                    }
                }
            ],
            "properties": {
                "crawled_at": {
                    "format": "strict_date_optional_time||epoch_millis",
                    "type": "date"
                },
                "published_at": {
                    "format": "strict_date_optional_time||epoch_millis",
                    "type": "date"
                },
                "id": {
                    "type": "keyword",
                    "ignore_above": 256
                },
                "title": {
                    "type": "text",
                    "analyzer": "ik_max_word",
                    "search_analyzer": "ik_max_word"
                },
                "url": {
                    "type": "keyword"
                },
                "content": {
                    "type": "text",
                    "analyzer": "ik_max_word",
                    "search_analyzer": "ik_max_word"
                },
                "host": {
                    "type": "keyword",
                    "ignore_above": 256
                },
                "clicks_num": {
                    "type": "integer"
                },
                "replays_num": {
                    "type": "integer"
                }
            }
        }
    }
}'

results matching ""

    No results matching ""