配置内置分析器

编辑

内置分析器可以直接使用,无需任何配置。但是,其中一些支持配置选项以更改其行为。例如,standard 分析器 可以配置为支持停用词列表。

resp = client.indices.create(
    index="my-index-000001",
    settings={
        "analysis": {
            "analyzer": {
                "std_english": {
                    "type": "standard",
                    "stopwords": "_english_"
                }
            }
        }
    },
    mappings={
        "properties": {
            "my_text": {
                "type": "text",
                "analyzer": "standard",
                "fields": {
                    "english": {
                        "type": "text",
                        "analyzer": "std_english"
                    }
                }
            }
        }
    },
)
print(resp)

resp1 = client.indices.analyze(
    index="my-index-000001",
    field="my_text",
    text="The old brown cow",
)
print(resp1)

resp2 = client.indices.analyze(
    index="my-index-000001",
    field="my_text.english",
    text="The old brown cow",
)
print(resp2)
response = client.indices.create(
  index: 'my-index-000001',
  body: {
    settings: {
      analysis: {
        analyzer: {
          std_english: {
            type: 'standard',
            stopwords: '_english_'
          }
        }
      }
    },
    mappings: {
      properties: {
        my_text: {
          type: 'text',
          analyzer: 'standard',
          fields: {
            english: {
              type: 'text',
              analyzer: 'std_english'
            }
          }
        }
      }
    }
  }
)
puts response

response = client.indices.analyze(
  index: 'my-index-000001',
  body: {
    field: 'my_text',
    text: 'The old brown cow'
  }
)
puts response

response = client.indices.analyze(
  index: 'my-index-000001',
  body: {
    field: 'my_text.english',
    text: 'The old brown cow'
  }
)
puts response
const response = await client.indices.create({
  index: "my-index-000001",
  settings: {
    analysis: {
      analyzer: {
        std_english: {
          type: "standard",
          stopwords: "_english_",
        },
      },
    },
  },
  mappings: {
    properties: {
      my_text: {
        type: "text",
        analyzer: "standard",
        fields: {
          english: {
            type: "text",
            analyzer: "std_english",
          },
        },
      },
    },
  },
});
console.log(response);

const response1 = await client.indices.analyze({
  index: "my-index-000001",
  field: "my_text",
  text: "The old brown cow",
});
console.log(response1);

const response2 = await client.indices.analyze({
  index: "my-index-000001",
  field: "my_text.english",
  text: "The old brown cow",
});
console.log(response2);
PUT my-index-000001
{
  "settings": {
    "analysis": {
      "analyzer": {
        "std_english": { 
          "type":      "standard",
          "stopwords": "_english_"
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "my_text": {
        "type":     "text",
        "analyzer": "standard", 
        "fields": {
          "english": {
            "type":     "text",
            "analyzer": "std_english" 
          }
        }
      }
    }
  }
}

POST my-index-000001/_analyze
{
  "field": "my_text", 
  "text": "The old brown cow"
}

POST my-index-000001/_analyze
{
  "field": "my_text.english", 
  "text": "The old brown cow"
}

我们将 std_english 分析器定义为基于 standard 分析器,但配置为移除预定义的英语停用词列表。

my_text 字段直接使用 standard 分析器,没有任何配置。此字段不会移除任何停用词。生成的词条为:[ the, old, brown, cow ]

my_text.english 字段使用 std_english 分析器,因此会移除英语停用词。生成的词条为:[ old, brown, cow ]