脚本处理器

编辑

在传入的文档上运行内联或存储的脚本。该脚本在ingest上下文中运行。

脚本处理器使用脚本缓存以避免为每个传入的文档重新编译脚本。为了提高性能,请确保在生产环境中使用脚本处理器之前正确调整脚本缓存的大小。

表 40. 脚本选项

名称 必需 默认 描述

lang

"painless"

脚本语言.

id

-

存储的脚本的 ID。如果未指定 source,则此参数是必需的。

source

-

内联脚本。如果未指定 id,则此参数是必需的。

params

-

包含脚本参数的对象。

description

-

处理器的描述。用于描述处理器的目的或其配置。

if

-

有条件地执行处理器。请参阅有条件地运行处理器

ignore_failure

false

忽略处理器的失败。请参阅处理管道失败

on_failure

-

处理处理器的失败。请参阅处理管道失败

tag

-

处理器的标识符。用于调试和指标。

访问源字段

编辑

脚本处理器将每个传入文档的 JSON 源字段解析为一组映射、列表和原始类型。要使用 Painless 脚本访问这些字段,请使用映射访问运算符ctx['my-field']。您还可以使用简写 ctx.<my-field> 语法。

脚本处理器不支持 ctx['_source']['my-field']ctx._source.<my-field> 语法。

以下处理器使用 Painless 脚本从 env 源字段中提取 tags 字段。

resp = client.ingest.simulate(
    pipeline={
        "processors": [
            {
                "script": {
                    "description": "Extract 'tags' from 'env' field",
                    "lang": "painless",
                    "source": "\n            String[] envSplit = ctx['env'].splitOnToken(params['delimiter']);\n            ArrayList tags = new ArrayList();\n            tags.add(envSplit[params['position']].trim());\n            ctx['tags'] = tags;\n          ",
                    "params": {
                        "delimiter": "-",
                        "position": 1
                    }
                }
            }
        ]
    },
    docs=[
        {
            "_source": {
                "env": "es01-prod"
            }
        }
    ],
)
print(resp)
response = client.ingest.simulate(
  body: {
    pipeline: {
      processors: [
        {
          script: {
            description: "Extract 'tags' from 'env' field",
            lang: 'painless',
            source: "\n            String[] envSplit = ctx['env'].splitOnToken(params['delimiter']);\n            ArrayList tags = new ArrayList();\n            tags.add(envSplit[params['position']].trim());\n            ctx['tags'] = tags;\n          ",
            params: {
              delimiter: '-',
              position: 1
            }
          }
        }
      ]
    },
    docs: [
      {
        _source: {
          env: 'es01-prod'
        }
      }
    ]
  }
)
puts response
const response = await client.ingest.simulate({
  pipeline: {
    processors: [
      {
        script: {
          description: "Extract 'tags' from 'env' field",
          lang: "painless",
          source:
            "\n            String[] envSplit = ctx['env'].splitOnToken(params['delimiter']);\n            ArrayList tags = new ArrayList();\n            tags.add(envSplit[params['position']].trim());\n            ctx['tags'] = tags;\n          ",
          params: {
            delimiter: "-",
            position: 1,
          },
        },
      },
    ],
  },
  docs: [
    {
      _source: {
        env: "es01-prod",
      },
    },
  ],
});
console.log(response);
POST _ingest/pipeline/_simulate
{
  "pipeline": {
    "processors": [
      {
        "script": {
          "description": "Extract 'tags' from 'env' field",
          "lang": "painless",
          "source": """
            String[] envSplit = ctx['env'].splitOnToken(params['delimiter']);
            ArrayList tags = new ArrayList();
            tags.add(envSplit[params['position']].trim());
            ctx['tags'] = tags;
          """,
          "params": {
            "delimiter": "-",
            "position": 1
          }
        }
      }
    ]
  },
  "docs": [
    {
      "_source": {
        "env": "es01-prod"
      }
    }
  ]
}

该处理器生成

{
  "docs": [
    {
      "doc": {
        ...
        "_source": {
          "env": "es01-prod",
          "tags": [
            "prod"
          ]
        }
      }
    }
  ]
}

访问元数据字段

编辑

您还可以使用脚本处理器来访问元数据字段。以下处理器使用 Painless 脚本来设置传入文档的 _index

resp = client.ingest.simulate(
    pipeline={
        "processors": [
            {
                "script": {
                    "description": "Set index based on `lang` field and `dataset` param",
                    "lang": "painless",
                    "source": "\n            ctx['_index'] = ctx['lang'] + '-' + params['dataset'];\n          ",
                    "params": {
                        "dataset": "catalog"
                    }
                }
            }
        ]
    },
    docs=[
        {
            "_index": "generic-index",
            "_source": {
                "lang": "fr"
            }
        }
    ],
)
print(resp)
response = client.ingest.simulate(
  body: {
    pipeline: {
      processors: [
        {
          script: {
            description: 'Set index based on `lang` field and `dataset` param',
            lang: 'painless',
            source: "\n            ctx['_index'] = ctx['lang'] + '-' + params['dataset'];\n          ",
            params: {
              dataset: 'catalog'
            }
          }
        }
      ]
    },
    docs: [
      {
        _index: 'generic-index',
        _source: {
          lang: 'fr'
        }
      }
    ]
  }
)
puts response
const response = await client.ingest.simulate({
  pipeline: {
    processors: [
      {
        script: {
          description: "Set index based on `lang` field and `dataset` param",
          lang: "painless",
          source:
            "\n            ctx['_index'] = ctx['lang'] + '-' + params['dataset'];\n          ",
          params: {
            dataset: "catalog",
          },
        },
      },
    ],
  },
  docs: [
    {
      _index: "generic-index",
      _source: {
        lang: "fr",
      },
    },
  ],
});
console.log(response);
POST _ingest/pipeline/_simulate
{
  "pipeline": {
    "processors": [
      {
        "script": {
          "description": "Set index based on `lang` field and `dataset` param",
          "lang": "painless",
          "source": """
            ctx['_index'] = ctx['lang'] + '-' + params['dataset'];
          """,
          "params": {
            "dataset": "catalog"
          }
        }
      }
    ]
  },
  "docs": [
    {
      "_index": "generic-index",
      "_source": {
        "lang": "fr"
      }
    }
  ]
}

该处理器将文档的 _indexgeneric-index 更改为 fr-catalog

{
  "docs": [
    {
      "doc": {
        ...
        "_index": "fr-catalog",
        "_source": {
          "lang": "fr"
        }
      }
    }
  ]
}