词干提取覆盖令牌过滤器

编辑

通过应用自定义映射来覆盖词干提取算法,然后保护这些词语不被词干提取器修改。必须放在任何词干提取过滤器之前。

规则是 token1[, ..., tokenN] => override 形式的映射。

设置 描述

rules

要使用的映射规则列表。

rules_path

映射列表的路径(相对于 config 位置,或绝对路径)。

这是一个例子

resp = client.indices.create(
    index="my-index-000001",
    settings={
        "analysis": {
            "analyzer": {
                "my_analyzer": {
                    "tokenizer": "standard",
                    "filter": [
                        "lowercase",
                        "custom_stems",
                        "porter_stem"
                    ]
                }
            },
            "filter": {
                "custom_stems": {
                    "type": "stemmer_override",
                    "rules_path": "analysis/stemmer_override.txt"
                }
            }
        }
    },
)
print(resp)
response = client.indices.create(
  index: 'my-index-000001',
  body: {
    settings: {
      analysis: {
        analyzer: {
          my_analyzer: {
            tokenizer: 'standard',
            filter: [
              'lowercase',
              'custom_stems',
              'porter_stem'
            ]
          }
        },
        filter: {
          custom_stems: {
            type: 'stemmer_override',
            rules_path: 'analysis/stemmer_override.txt'
          }
        }
      }
    }
  }
)
puts response
const response = await client.indices.create({
  index: "my-index-000001",
  settings: {
    analysis: {
      analyzer: {
        my_analyzer: {
          tokenizer: "standard",
          filter: ["lowercase", "custom_stems", "porter_stem"],
        },
      },
      filter: {
        custom_stems: {
          type: "stemmer_override",
          rules_path: "analysis/stemmer_override.txt",
        },
      },
    },
  },
});
console.log(response);
PUT /my-index-000001
{
  "settings": {
    "analysis": {
      "analyzer": {
        "my_analyzer": {
          "tokenizer": "standard",
          "filter": [ "lowercase", "custom_stems", "porter_stem" ]
        }
      },
      "filter": {
        "custom_stems": {
          "type": "stemmer_override",
          "rules_path": "analysis/stemmer_override.txt"
        }
      }
    }
  }
}

文件内容如下所示

running, runs => run

stemmer => stemmer

您也可以内联定义覆盖规则

resp = client.indices.create(
    index="my-index-000001",
    settings={
        "analysis": {
            "analyzer": {
                "my_analyzer": {
                    "tokenizer": "standard",
                    "filter": [
                        "lowercase",
                        "custom_stems",
                        "porter_stem"
                    ]
                }
            },
            "filter": {
                "custom_stems": {
                    "type": "stemmer_override",
                    "rules": [
                        "running, runs => run",
                        "stemmer => stemmer"
                    ]
                }
            }
        }
    },
)
print(resp)
response = client.indices.create(
  index: 'my-index-000001',
  body: {
    settings: {
      analysis: {
        analyzer: {
          my_analyzer: {
            tokenizer: 'standard',
            filter: [
              'lowercase',
              'custom_stems',
              'porter_stem'
            ]
          }
        },
        filter: {
          custom_stems: {
            type: 'stemmer_override',
            rules: [
              'running, runs => run',
              'stemmer => stemmer'
            ]
          }
        }
      }
    }
  }
)
puts response
const response = await client.indices.create({
  index: "my-index-000001",
  settings: {
    analysis: {
      analyzer: {
        my_analyzer: {
          tokenizer: "standard",
          filter: ["lowercase", "custom_stems", "porter_stem"],
        },
      },
      filter: {
        custom_stems: {
          type: "stemmer_override",
          rules: ["running, runs => run", "stemmer => stemmer"],
        },
      },
    },
  },
});
console.log(response);
PUT /my-index-000001
{
  "settings": {
    "analysis": {
      "analyzer": {
        "my_analyzer": {
          "tokenizer": "standard",
          "filter": [ "lowercase", "custom_stems", "porter_stem" ]
        }
      },
      "filter": {
        "custom_stems": {
          "type": "stemmer_override",
          "rules": [
            "running, runs => run",
            "stemmer => stemmer"
          ]
        }
      }
    }
  }
}