resp = client.indices.analyze(
    tokenizer="whitespace",
    filter=[
        "delimited_payload"
    ],
    text="the|0 brown|10 fox|5 is|0 quick|10",
)
print(resp)

response = client.indices.analyze(
  body: {
    tokenizer: 'whitespace',
    filter: [
      'delimited_payload'
    ],
    text: 'the|0 brown|10 fox|5 is|0 quick|10'
  }
)
puts response

const response = await client.indices.analyze({
  tokenizer: "whitespace",
  filter: ["delimited_payload"],
  text: "the|0 brown|10 fox|5 is|0 quick|10",
});
console.log(response);

GET _analyze
{
  "tokenizer": "whitespace",
  "filter": ["delimited_payload"],
  "text": "the|0 brown|10 fox|5 is|0 quick|10"
}

Copy as curl Try in Elastic

过滤器生成以下令牌

[ the, brown, fox, is, quick ]

请注意，analyze API 不返回存储的负载。有关包含返回负载的示例，请参阅返回存储的负载。

添加到分析器

编辑

以下创建索引 API 请求使用 delimited-payload 过滤器来配置新的自定义分析器。

resp = client.indices.create(
    index="delimited_payload",
    settings={
        "analysis": {
            "analyzer": {
                "whitespace_delimited_payload": {
                    "tokenizer": "whitespace",
                    "filter": [
                        "delimited_payload"
                    ]
                }
            }
        }
    },
)
print(resp)

response = client.indices.create(
  index: 'delimited_payload',
  body: {
    settings: {
      analysis: {
        analyzer: {
          whitespace_delimited_payload: {
            tokenizer: 'whitespace',
            filter: [
              'delimited_payload'
            ]
          }
        }
      }
    }
  }
)
puts response

const response = await client.indices.create({
  index: "delimited_payload",
  settings: {
    analysis: {
      analyzer: {
        whitespace_delimited_payload: {
          tokenizer: "whitespace",
          filter: ["delimited_payload"],
        },
      },
    },
  },
});
console.log(response);

PUT delimited_payload
{
  "settings": {
    "analysis": {
      "analyzer": {
        "whitespace_delimited_payload": {
          "tokenizer": "whitespace",
          "filter": [ "delimited_payload" ]
        }
      }
    }
  }
}

Copy as curl Try in Elastic

可配置参数

编辑

delimiter

（可选，字符串）用于分隔令牌和负载的字符。默认为 |。

encoding

（可选，字符串）存储的负载的数据类型。有效值包括

float: （默认）浮点数
identity: 字符
int: 整数

自定义并添加到分析器

编辑

要自定义 delimited_payload 过滤器，请复制它以创建新的自定义令牌过滤器的基础。您可以使用其可配置参数修改过滤器。

例如，以下创建索引 API 请求使用自定义 delimited_payload 过滤器来配置新的自定义分析器。自定义 delimited_payload 过滤器使用 + 分隔符分隔令牌和负载。负载编码为整数。

resp = client.indices.create(
    index="delimited_payload_example",
    settings={
        "analysis": {
            "analyzer": {
                "whitespace_plus_delimited": {
                    "tokenizer": "whitespace",
                    "filter": [
                        "plus_delimited"
                    ]
                }
            },
            "filter": {
                "plus_delimited": {
                    "type": "delimited_payload",
                    "delimiter": "+",
                    "encoding": "int"
                }
            }
        }
    },
)
print(resp)

response = client.indices.create(
  index: 'delimited_payload_example',
  body: {
    settings: {
      analysis: {
        analyzer: {
          whitespace_plus_delimited: {
            tokenizer: 'whitespace',
            filter: [
              'plus_delimited'
            ]
          }
        },
        filter: {
          plus_delimited: {
            type: 'delimited_payload',
            delimiter: '+',
            encoding: 'int'
          }
        }
      }
    }
  }
)
puts response

const response = await client.indices.create({
  index: "delimited_payload_example",
  settings: {
    analysis: {
      analyzer: {
        whitespace_plus_delimited: {
          tokenizer: "whitespace",
          filter: ["plus_delimited"],
        },
      },
      filter: {
        plus_delimited: {
          type: "delimited_payload",
          delimiter: "+",
          encoding: "int",
        },
      },
    },
  },
});
console.log(response);

PUT delimited_payload_example
{
  "settings": {
    "analysis": {
      "analyzer": {
        "whitespace_plus_delimited": {
          "tokenizer": "whitespace",
          "filter": [ "plus_delimited" ]
        }
      },
      "filter": {
        "plus_delimited": {
          "type": "delimited_payload",
          "delimiter": "+",
          "encoding": "int"
        }
      }
    }
  }
}

Copy as curl Try in Elastic

返回存储的负载

编辑

使用创建索引 API 创建一个索引，该索引

包含一个存储带有负载的词项向量的字段。
使用带有 delimited_payload 过滤器的自定义索引分析器。

resp = client.indices.create(
    index="text_payloads",
    mappings={
        "properties": {
            "text": {
                "type": "text",
                "term_vector": "with_positions_payloads",
                "analyzer": "payload_delimiter"
            }
        }
    },
    settings={
        "analysis": {
            "analyzer": {
                "payload_delimiter": {
                    "tokenizer": "whitespace",
                    "filter": [
                        "delimited_payload"
                    ]
                }
            }
        }
    },
)
print(resp)

response = client.indices.create(
  index: 'text_payloads',
  body: {
    mappings: {
      properties: {
        text: {
          type: 'text',
          term_vector: 'with_positions_payloads',
          analyzer: 'payload_delimiter'
        }
      }
    },
    settings: {
      analysis: {
        analyzer: {
          payload_delimiter: {
            tokenizer: 'whitespace',
            filter: [
              'delimited_payload'
            ]
          }
        }
      }
    }
  }
)
puts response

const response = await client.indices.create({
  index: "text_payloads",
  mappings: {
    properties: {
      text: {
        type: "text",
        term_vector: "with_positions_payloads",
        analyzer: "payload_delimiter",
      },
    },
  },
  settings: {
    analysis: {
      analyzer: {
        payload_delimiter: {
          tokenizer: "whitespace",
          filter: ["delimited_payload"],
        },
      },
    },
  },
});
console.log(response);

PUT text_payloads
{
  "mappings": {
    "properties": {
      "text": {
        "type": "text",
        "term_vector": "with_positions_payloads",
        "analyzer": "payload_delimiter"
      }
    }
  },
  "settings": {
    "analysis": {
      "analyzer": {
        "payload_delimiter": {
          "tokenizer": "whitespace",
          "filter": [ "delimited_payload" ]
        }
      }
    }
  }
}

Copy as curl Try in Elastic

向索引添加包含负载的文档。

resp = client.index(
    index="text_payloads",
    id="1",
    document={
        "text": "the|0 brown|3 fox|4 is|0 quick|10"
    },
)
print(resp)

response = client.index(
  index: 'text_payloads',
  id: 1,
  body: {
    text: 'the|0 brown|3 fox|4 is|0 quick|10'
  }
)
puts response

const response = await client.index({
  index: "text_payloads",
  id: 1,
  document: {
    text: "the|0 brown|3 fox|4 is|0 quick|10",
  },
});
console.log(response);

POST text_payloads/_doc/1
{
  "text": "the|0 brown|3 fox|4 is|0 quick|10"
}

Copy as curl Try in Elastic

使用词项向量 API 返回文档的令牌和 base64 编码的负载。

resp = client.termvectors(
    index="text_payloads",
    id="1",
    fields=[
        "text"
    ],
    payloads=True,
)
print(resp)

response = client.termvectors(
  index: 'text_payloads',
  id: 1,
  body: {
    fields: [
      'text'
    ],
    payloads: true
  }
)
puts response

const response = await client.termvectors({
  index: "text_payloads",
  id: 1,
  fields: ["text"],
  payloads: true,
});
console.log(response);

GET text_payloads/_termvectors/1
{
  "fields": [ "text" ],
  "payloads": true
}

Copy as curl Try in Elastic

API 返回以下响应

{
  "_index": "text_payloads",
  "_id": "1",
  "_version": 1,
  "found": true,
  "took": 8,
  "term_vectors": {
    "text": {
      "field_statistics": {
        "sum_doc_freq": 5,
        "doc_count": 1,
        "sum_ttf": 5
      },
      "terms": {
        "brown": {
          "term_freq": 1,
          "tokens": [
            {
              "position": 1,
              "payload": "QEAAAA=="
            }
          ]
        },
        "fox": {
          "term_freq": 1,
          "tokens": [
            {
              "position": 2,
              "payload": "QIAAAA=="
            }
          ]
        },
        "is": {
          "term_freq": 1,
          "tokens": [
            {
              "position": 3,
              "payload": "AAAAAA=="
            }
          ]
        },
        "quick": {
          "term_freq": 1,
          "tokens": [
            {
              "position": 4,
              "payload": "QSAAAA=="
            }
          ]
        },
        "the": {
          "term_freq": 1,
          "tokens": [
            {
              "position": 0,
              "payload": "AAAAAA=="
            }
          ]
        }
      }
    }
  }
}

« 小数位令牌过滤器字典分解器令牌过滤器 »

On this page

示例
添加到分析器
可配置参数
自定义并添加到分析器
返回存储的负载

Was this helpful?

Feedback

The Search AI Company

ELK Stack

Elastic Cloud

Generative AI

Search

Security

Observability

By solution

Industries

Customer spotlight

Research

Build

Learn

Connect

分隔符负载令牌过滤器

分隔符负载令牌过滤器

示例

添加到分析器

可配置参数

自定义并添加到分析器

返回存储的负载

Follow us

About us

Join us

Partners

Trust & Security

Investor relations

Excellence Awards

About us

Join us

Partners

Trust & Security

Investor relations

Excellence Awards