analyzer

编辑

只有 text 字段支持 analyzer 映射参数。

analyzer 参数指定在索引或搜索 text 字段时用于 文本分析分析器

除非被 search_analyzer 映射参数覆盖,否则此分析器将用于 索引和搜索分析。请参阅 指定分析器

我们建议在生产环境中使用分析器之前进行测试。请参阅 测试分析器

analyzer 设置不能使用更新映射 API在现有字段上更新。

search_quote_analyzer

编辑

search_quote_analyzer 设置允许您为短语指定分析器,这在处理禁用短语查询的停用词时特别有用。

要为短语禁用停用词,需要一个字段使用三个分析器设置

  1. 一个 analyzer 设置,用于索引所有词项,包括停用词
  2. 一个 search_analyzer 设置,用于删除停用词的非短语查询
  3. 一个 search_quote_analyzer 设置,用于不删除停用词的短语查询
resp = client.indices.create(
    index="my-index-000001",
    settings={
        "analysis": {
            "analyzer": {
                "my_analyzer": {
                    "type": "custom",
                    "tokenizer": "standard",
                    "filter": [
                        "lowercase"
                    ]
                },
                "my_stop_analyzer": {
                    "type": "custom",
                    "tokenizer": "standard",
                    "filter": [
                        "lowercase",
                        "english_stop"
                    ]
                }
            },
            "filter": {
                "english_stop": {
                    "type": "stop",
                    "stopwords": "_english_"
                }
            }
        }
    },
    mappings={
        "properties": {
            "title": {
                "type": "text",
                "analyzer": "my_analyzer",
                "search_analyzer": "my_stop_analyzer",
                "search_quote_analyzer": "my_analyzer"
            }
        }
    },
)
print(resp)

resp1 = client.index(
    index="my-index-000001",
    id="1",
    document={
        "title": "The Quick Brown Fox"
    },
)
print(resp1)

resp2 = client.index(
    index="my-index-000001",
    id="2",
    document={
        "title": "A Quick Brown Fox"
    },
)
print(resp2)

resp3 = client.search(
    index="my-index-000001",
    query={
        "query_string": {
            "query": "\"the quick brown fox\""
        }
    },
)
print(resp3)
response = client.indices.create(
  index: 'my-index-000001',
  body: {
    settings: {
      analysis: {
        analyzer: {
          my_analyzer: {
            type: 'custom',
            tokenizer: 'standard',
            filter: [
              'lowercase'
            ]
          },
          my_stop_analyzer: {
            type: 'custom',
            tokenizer: 'standard',
            filter: [
              'lowercase',
              'english_stop'
            ]
          }
        },
        filter: {
          english_stop: {
            type: 'stop',
            stopwords: '_english_'
          }
        }
      }
    },
    mappings: {
      properties: {
        title: {
          type: 'text',
          analyzer: 'my_analyzer',
          search_analyzer: 'my_stop_analyzer',
          search_quote_analyzer: 'my_analyzer'
        }
      }
    }
  }
)
puts response

response = client.index(
  index: 'my-index-000001',
  id: 1,
  body: {
    title: 'The Quick Brown Fox'
  }
)
puts response

response = client.index(
  index: 'my-index-000001',
  id: 2,
  body: {
    title: 'A Quick Brown Fox'
  }
)
puts response

response = client.search(
  index: 'my-index-000001',
  body: {
    query: {
      query_string: {
        query: '"the quick brown fox"'
      }
    }
  }
)
puts response
const response = await client.indices.create({
  index: "my-index-000001",
  settings: {
    analysis: {
      analyzer: {
        my_analyzer: {
          type: "custom",
          tokenizer: "standard",
          filter: ["lowercase"],
        },
        my_stop_analyzer: {
          type: "custom",
          tokenizer: "standard",
          filter: ["lowercase", "english_stop"],
        },
      },
      filter: {
        english_stop: {
          type: "stop",
          stopwords: "_english_",
        },
      },
    },
  },
  mappings: {
    properties: {
      title: {
        type: "text",
        analyzer: "my_analyzer",
        search_analyzer: "my_stop_analyzer",
        search_quote_analyzer: "my_analyzer",
      },
    },
  },
});
console.log(response);

const response1 = await client.index({
  index: "my-index-000001",
  id: 1,
  document: {
    title: "The Quick Brown Fox",
  },
});
console.log(response1);

const response2 = await client.index({
  index: "my-index-000001",
  id: 2,
  document: {
    title: "A Quick Brown Fox",
  },
});
console.log(response2);

const response3 = await client.search({
  index: "my-index-000001",
  query: {
    query_string: {
      query: '"the quick brown fox"',
    },
  },
});
console.log(response3);
PUT my-index-000001
{
   "settings":{
      "analysis":{
         "analyzer":{
            "my_analyzer":{ 
               "type":"custom",
               "tokenizer":"standard",
               "filter":[
                  "lowercase"
               ]
            },
            "my_stop_analyzer":{ 
               "type":"custom",
               "tokenizer":"standard",
               "filter":[
                  "lowercase",
                  "english_stop"
               ]
            }
         },
         "filter":{
            "english_stop":{
               "type":"stop",
               "stopwords":"_english_"
            }
         }
      }
   },
   "mappings":{
       "properties":{
          "title": {
             "type":"text",
             "analyzer":"my_analyzer", 
             "search_analyzer":"my_stop_analyzer", 
             "search_quote_analyzer":"my_analyzer" 
         }
      }
   }
}

PUT my-index-000001/_doc/1
{
   "title":"The Quick Brown Fox"
}

PUT my-index-000001/_doc/2
{
   "title":"A Quick Brown Fox"
}

GET my-index-000001/_search
{
   "query":{
      "query_string":{
         "query":"\"the quick brown fox\"" 
      }
   }
}

search_quote_analyzer 设置可以使用更新映射 API在现有字段上更新。

my_analyzer 分析器,它标记所有词项,包括停用词

my_stop_analyzer 分析器,它删除停用词

analyzer 设置指向 my_analyzer 分析器,该分析器将在索引时使用

search_analyzer 设置指向 my_stop_analyzer,并删除非短语查询的停用词

search_quote_analyzer 设置指向 my_analyzer 分析器,并确保停用词不会从短语查询中删除

由于查询被引号包裹,因此它被检测为短语查询,因此 search_quote_analyzer 启动并确保停用词不会从查询中删除。my_analyzer 分析器将返回以下词项 [the, quick, brown, fox],这将匹配其中一个文档。同时,词项查询将使用 my_stop_analyzer 分析器进行分析,该分析器将过滤掉停用词。因此,搜索 The quick brown foxA quick brown fox 都将返回两个文档,因为两个文档都包含以下词项 [quick, brown, fox]。如果没有 search_quote_analyzer,则不可能对短语查询进行精确匹配,因为短语查询中的停用词将被删除,从而导致两个文档都匹配。