fields

编辑

在不同的场景下,以不同的方式索引同一个字段通常很有用。这就是多字段的目的。例如,一个 string 字段可以被映射为用于全文搜索的 text 字段,以及用于排序或聚合的 keyword 字段。

resp = client.indices.create(
    index="my-index-000001",
    mappings={
        "properties": {
            "city": {
                "type": "text",
                "fields": {
                    "raw": {
                        "type": "keyword"
                    }
                }
            }
        }
    },
)
print(resp)

resp1 = client.index(
    index="my-index-000001",
    id="1",
    document={
        "city": "New York"
    },
)
print(resp1)

resp2 = client.index(
    index="my-index-000001",
    id="2",
    document={
        "city": "York"
    },
)
print(resp2)

resp3 = client.search(
    index="my-index-000001",
    query={
        "match": {
            "city": "york"
        }
    },
    sort={
        "city.raw": "asc"
    },
    aggs={
        "Cities": {
            "terms": {
                "field": "city.raw"
            }
        }
    },
)
print(resp3)
response = client.indices.create(
  index: 'my-index-000001',
  body: {
    mappings: {
      properties: {
        city: {
          type: 'text',
          fields: {
            raw: {
              type: 'keyword'
            }
          }
        }
      }
    }
  }
)
puts response

response = client.index(
  index: 'my-index-000001',
  id: 1,
  body: {
    city: 'New York'
  }
)
puts response

response = client.index(
  index: 'my-index-000001',
  id: 2,
  body: {
    city: 'York'
  }
)
puts response

response = client.search(
  index: 'my-index-000001',
  body: {
    query: {
      match: {
        city: 'york'
      }
    },
    sort: {
      'city.raw' => 'asc'
    },
    aggregations: {
      "Cities": {
        terms: {
          field: 'city.raw'
        }
      }
    }
  }
)
puts response
{
	res, err := es.Indices.Create(
		"my-index-000001",
		es.Indices.Create.WithBody(strings.NewReader(`{
	  "mappings": {
	    "properties": {
	      "city": {
	        "type": "text",
	        "fields": {
	          "raw": {
	            "type": "keyword"
	          }
	        }
	      }
	    }
	  }
	}`)),
	)
	fmt.Println(res, err)
}

{
	res, err := es.Index(
		"my-index-000001",
		strings.NewReader(`{
	  "city": "New York"
	}`),
		es.Index.WithDocumentID("1"),
		es.Index.WithPretty(),
	)
	fmt.Println(res, err)
}

{
	res, err := es.Index(
		"my-index-000001",
		strings.NewReader(`{
	  "city": "York"
	}`),
		es.Index.WithDocumentID("2"),
		es.Index.WithPretty(),
	)
	fmt.Println(res, err)
}

{
	res, err := es.Search(
		es.Search.WithIndex("my-index-000001"),
		es.Search.WithBody(strings.NewReader(`{
	  "query": {
	    "match": {
	      "city": "york"
	    }
	  },
	  "sort": {
	    "city.raw": "asc"
	  },
	  "aggs": {
	    "Cities": {
	      "terms": {
	        "field": "city.raw"
	      }
	    }
	  }
	}`)),
		es.Search.WithPretty(),
	)
	fmt.Println(res, err)
}
const response = await client.indices.create({
  index: "my-index-000001",
  mappings: {
    properties: {
      city: {
        type: "text",
        fields: {
          raw: {
            type: "keyword",
          },
        },
      },
    },
  },
});
console.log(response);

const response1 = await client.index({
  index: "my-index-000001",
  id: 1,
  document: {
    city: "New York",
  },
});
console.log(response1);

const response2 = await client.index({
  index: "my-index-000001",
  id: 2,
  document: {
    city: "York",
  },
});
console.log(response2);

const response3 = await client.search({
  index: "my-index-000001",
  query: {
    match: {
      city: "york",
    },
  },
  sort: {
    "city.raw": "asc",
  },
  aggs: {
    Cities: {
      terms: {
        field: "city.raw",
      },
    },
  },
});
console.log(response3);
PUT my-index-000001
{
  "mappings": {
    "properties": {
      "city": {
        "type": "text",
        "fields": {
          "raw": { 
            "type":  "keyword"
          }
        }
      }
    }
  }
}

PUT my-index-000001/_doc/1
{
  "city": "New York"
}

PUT my-index-000001/_doc/2
{
  "city": "York"
}

GET my-index-000001/_search
{
  "query": {
    "match": {
      "city": "york" 
    }
  },
  "sort": {
    "city.raw": "asc" 
  },
  "aggs": {
    "Cities": {
      "terms": {
        "field": "city.raw" 
      }
    }
  }
}

city.raw 字段是 city 字段的 keyword 版本。

city 字段可以用于全文搜索。

city.raw 字段可以用于排序和聚合。

您可以使用 更新映射 API 将多字段添加到现有字段。

如果在添加多字段时索引(或数据流)中包含文档,则这些文档将没有新多字段的值。您可以使用 按查询更新 API 填充新的多字段。

多字段映射与父字段的映射完全分离。多字段不继承其父字段的任何映射选项。多字段不会更改原始的 _source 字段。

具有多个分析器的多字段

编辑

多字段的另一个用例是以不同的方式分析同一个字段,以获得更好的相关性。例如,我们可以使用 standard 分析器来索引一个字段,该分析器将文本分解为单词,并再次使用 english 分析器,该分析器将单词词干化为它们的词根形式。

resp = client.indices.create(
    index="my-index-000001",
    mappings={
        "properties": {
            "text": {
                "type": "text",
                "fields": {
                    "english": {
                        "type": "text",
                        "analyzer": "english"
                    }
                }
            }
        }
    },
)
print(resp)

resp1 = client.index(
    index="my-index-000001",
    id="1",
    document={
        "text": "quick brown fox"
    },
)
print(resp1)

resp2 = client.index(
    index="my-index-000001",
    id="2",
    document={
        "text": "quick brown foxes"
    },
)
print(resp2)

resp3 = client.search(
    index="my-index-000001",
    query={
        "multi_match": {
            "query": "quick brown foxes",
            "fields": [
                "text",
                "text.english"
            ],
            "type": "most_fields"
        }
    },
)
print(resp3)
response = client.indices.create(
  index: 'my-index-000001',
  body: {
    mappings: {
      properties: {
        text: {
          type: 'text',
          fields: {
            english: {
              type: 'text',
              analyzer: 'english'
            }
          }
        }
      }
    }
  }
)
puts response

response = client.index(
  index: 'my-index-000001',
  id: 1,
  body: {
    text: 'quick brown fox'
  }
)
puts response

response = client.index(
  index: 'my-index-000001',
  id: 2,
  body: {
    text: 'quick brown foxes'
  }
)
puts response

response = client.search(
  index: 'my-index-000001',
  body: {
    query: {
      multi_match: {
        query: 'quick brown foxes',
        fields: [
          'text',
          'text.english'
        ],
        type: 'most_fields'
      }
    }
  }
)
puts response
{
	res, err := es.Indices.Create(
		"my-index-000001",
		es.Indices.Create.WithBody(strings.NewReader(`{
	  "mappings": {
	    "properties": {
	      "text": {
	        "type": "text",
	        "fields": {
	          "english": {
	            "type": "text",
	            "analyzer": "english"
	          }
	        }
	      }
	    }
	  }
	}`)),
	)
	fmt.Println(res, err)
}

{
	res, err := es.Index(
		"my-index-000001",
		strings.NewReader(`{
	  "text": "quick brown fox"
	} `),
		es.Index.WithDocumentID("1"),
		es.Index.WithPretty(),
	)
	fmt.Println(res, err)
}

{
	res, err := es.Index(
		"my-index-000001",
		strings.NewReader(`{
	  "text": "quick brown foxes"
	} `),
		es.Index.WithDocumentID("2"),
		es.Index.WithPretty(),
	)
	fmt.Println(res, err)
}

{
	res, err := es.Search(
		es.Search.WithIndex("my-index-000001"),
		es.Search.WithBody(strings.NewReader(`{
	  "query": {
	    "multi_match": {
	      "query": "quick brown foxes",
	      "fields": [
	        "text",
	        "text.english"
	      ],
	      "type": "most_fields"
	    }
	  }
	}`)),
		es.Search.WithPretty(),
	)
	fmt.Println(res, err)
}
const response = await client.indices.create({
  index: "my-index-000001",
  mappings: {
    properties: {
      text: {
        type: "text",
        fields: {
          english: {
            type: "text",
            analyzer: "english",
          },
        },
      },
    },
  },
});
console.log(response);

const response1 = await client.index({
  index: "my-index-000001",
  id: 1,
  document: {
    text: "quick brown fox",
  },
});
console.log(response1);

const response2 = await client.index({
  index: "my-index-000001",
  id: 2,
  document: {
    text: "quick brown foxes",
  },
});
console.log(response2);

const response3 = await client.search({
  index: "my-index-000001",
  query: {
    multi_match: {
      query: "quick brown foxes",
      fields: ["text", "text.english"],
      type: "most_fields",
    },
  },
});
console.log(response3);
PUT my-index-000001
{
  "mappings": {
    "properties": {
      "text": { 
        "type": "text",
        "fields": {
          "english": { 
            "type":     "text",
            "analyzer": "english"
          }
        }
      }
    }
  }
}

PUT my-index-000001/_doc/1
{ "text": "quick brown fox" } 

PUT my-index-000001/_doc/2
{ "text": "quick brown foxes" } 

GET my-index-000001/_search
{
  "query": {
    "multi_match": {
      "query": "quick brown foxes",
      "fields": [ 
        "text",
        "text.english"
      ],
      "type": "most_fields" 
    }
  }
}

text 字段使用 standard 分析器。

text.english 字段使用 english 分析器。

索引两个文档,一个包含 fox,另一个包含 foxes

查询 texttext.english 字段并组合分数。

text 字段在第一个文档中包含术语 fox,在第二个文档中包含 foxestext.english 字段对于两个文档都包含 fox,因为 foxes 被词干化为 fox

查询字符串也由 text 字段的 standard 分析器分析,并由 text.english 字段的 english 分析器分析。词干化字段允许对 foxes 的查询也匹配仅包含 fox 的文档。这使我们能够匹配尽可能多的文档。通过也查询未词干化的 text 字段,我们提高了完全匹配 foxes 的文档的相关性得分。