教程:将 ILM 管理的数据流迁移到数据流生命周期

编辑

教程:将 ILM 管理的数据流迁移到数据流生命周期

编辑

在本教程中,我们将研究如何将现有数据流从索引生命周期管理 (ILM)迁移到数据流生命周期。现有的 ILM 管理的后备索引将继续由 ILM 管理,直到它们老化并被 ILM 删除;但是,新的后备索引将由数据流生命周期管理。这样,数据流会逐渐从由 ILM 管理迁移到由数据流生命周期管理。正如我们将看到的,ILM 和数据流生命周期可以共同管理一个数据流;但是,一个索引一次只能由一个系统管理。

要点

编辑

要将数据流从 ILM 迁移到数据流生命周期,我们需要执行两个步骤

  1. 更新支持数据流的索引模板,将 prefer_ilm 设置为 false,并配置数据流生命周期。
  2. 使用生命周期 API现有数据流配置数据流生命周期。

有关更多详细信息,请参阅迁移到数据流生命周期部分。

设置 ILM 管理的数据流

编辑

让我们首先创建一个由 ILM 管理的包含两个后备索引的数据流。我们首先创建一个 ILM 策略

resp = client.ilm.put_lifecycle(
    name="pre-dsl-ilm-policy",
    policy={
        "phases": {
            "hot": {
                "actions": {
                    "rollover": {
                        "max_primary_shard_size": "50gb"
                    }
                }
            },
            "delete": {
                "min_age": "7d",
                "actions": {
                    "delete": {}
                }
            }
        }
    },
)
print(resp)
response = client.ilm.put_lifecycle(
  policy: 'pre-dsl-ilm-policy',
  body: {
    policy: {
      phases: {
        hot: {
          actions: {
            rollover: {
              max_primary_shard_size: '50gb'
            }
          }
        },
        delete: {
          min_age: '7d',
          actions: {
            delete: {}
          }
        }
      }
    }
  }
)
puts response
const response = await client.ilm.putLifecycle({
  name: "pre-dsl-ilm-policy",
  policy: {
    phases: {
      hot: {
        actions: {
          rollover: {
            max_primary_shard_size: "50gb",
          },
        },
      },
      delete: {
        min_age: "7d",
        actions: {
          delete: {},
        },
      },
    },
  },
});
console.log(response);
PUT _ilm/policy/pre-dsl-ilm-policy
{
  "policy": {
    "phases": {
      "hot": {
        "actions": {
          "rollover": {
            "max_primary_shard_size": "50gb"
          }
        }
      },
      "delete": {
        "min_age": "7d",
        "actions": {
          "delete": {}
        }
      }
    }
  }
}

然后创建一个将支持数据流并配置 ILM 的索引模板

resp = client.indices.put_index_template(
    name="dsl-data-stream-template",
    index_patterns=[
        "dsl-data-stream*"
    ],
    data_stream={},
    priority=500,
    template={
        "settings": {
            "index.lifecycle.name": "pre-dsl-ilm-policy"
        }
    },
)
print(resp)
response = client.indices.put_index_template(
  name: 'dsl-data-stream-template',
  body: {
    index_patterns: [
      'dsl-data-stream*'
    ],
    data_stream: {},
    priority: 500,
    template: {
      settings: {
        'index.lifecycle.name' => 'pre-dsl-ilm-policy'
      }
    }
  }
)
puts response
const response = await client.indices.putIndexTemplate({
  name: "dsl-data-stream-template",
  index_patterns: ["dsl-data-stream*"],
  data_stream: {},
  priority: 500,
  template: {
    settings: {
      "index.lifecycle.name": "pre-dsl-ilm-policy",
    },
  },
});
console.log(response);
PUT _index_template/dsl-data-stream-template
{
  "index_patterns": ["dsl-data-stream*"],
  "data_stream": { },
  "priority": 500,
  "template": {
    "settings": {
      "index.lifecycle.name": "pre-dsl-ilm-policy"
    }
  }
}

现在我们将索引一个以 dsl-data-stream 为目标的文档来创建数据流,我们还将手动翻转数据流以创建另一个生成索引

resp = client.index(
    index="dsl-data-stream",
    document={
        "@timestamp": "2023-10-18T16:21:15.000Z",
        "message": "192.0.2.42 - - [06/May/2099:16:21:15 +0000] \"GET /images/bg.jpg HTTP/1.0\" 200 24736"
    },
)
print(resp)
response = client.index(
  index: 'dsl-data-stream',
  body: {
    "@timestamp": '2023-10-18T16:21:15.000Z',
    message: '192.0.2.42 - - [06/May/2099:16:21:15 +0000] "GET /images/bg.jpg HTTP/1.0" 200 24736'
  }
)
puts response
const response = await client.index({
  index: "dsl-data-stream",
  document: {
    "@timestamp": "2023-10-18T16:21:15.000Z",
    message:
      '192.0.2.42 - - [06/May/2099:16:21:15 +0000] "GET /images/bg.jpg HTTP/1.0" 200 24736',
  },
});
console.log(response);
POST dsl-data-stream/_doc?
{
  "@timestamp": "2023-10-18T16:21:15.000Z",
  "message": "192.0.2.42 - - [06/May/2099:16:21:15 +0000] \"GET /images/bg.jpg HTTP/1.0\" 200 24736"
}
resp = client.indices.rollover(
    alias="dsl-data-stream",
)
print(resp)
response = client.indices.rollover(
  alias: 'dsl-data-stream'
)
puts response
const response = await client.indices.rollover({
  alias: "dsl-data-stream",
});
console.log(response);
POST dsl-data-stream/_rollover

我们将使用GET _data_stream API 来检查数据流的状态

resp = client.indices.get_data_stream(
    name="dsl-data-stream",
)
print(resp)
response = client.indices.get_data_stream(
  name: 'dsl-data-stream'
)
puts response
const response = await client.indices.getDataStream({
  name: "dsl-data-stream",
});
console.log(response);
GET _data_stream/dsl-data-stream

检查响应,我们将看到两个后备索引都由 ILM 管理,并且下一代索引也将由 ILM 管理

{
  "data_streams": [
    {
      "name": "dsl-data-stream",
      "timestamp_field": {
        "name": "@timestamp"
      },
      "indices": [
        {
          "index_name": ".ds-dsl-data-stream-2023.10.19-000001",    
          "index_uuid": "xCEhwsp8Tey0-FLNFYVwSg",
          "prefer_ilm": true,                                       
          "ilm_policy": "pre-dsl-ilm-policy",                       
          "managed_by": "Index Lifecycle Management"                
        },
        {
          "index_name": ".ds-dsl-data-stream-2023.10.19-000002",
          "index_uuid": "PA_JquKGSiKcAKBA8DJ5gw",
          "prefer_ilm": true,
          "ilm_policy": "pre-dsl-ilm-policy",
          "managed_by": "Index Lifecycle Management"
        }
      ],
      "generation": 2,
      "status": "GREEN",
      "template": "dsl-data-stream-template",
      "next_generation_managed_by": "Index Lifecycle Management",   
      "prefer_ilm": true,                                           
      "ilm_policy": "pre-dsl-ilm-policy",                           
      "hidden": false,
      "system": false,
      "allow_custom_routing": false,
      "replicated": false,
      "rollover_on_write": false
    }
  ]
}

后备索引的名称。

对于每个后备索引,我们显示prefer_ilm配置的值,该值将指示如果两个系统都为索引配置,则 ILM 是否优先于数据流生命周期。

为此索引配置的 ILM 策略。

管理此索引的系统(可能的值为“索引生命周期管理”、“数据流生命周期”或“未管理”)

将管理下一代索引的系统(一旦数据流翻转,此数据流的新写入索引)。可能的值为“索引生命周期管理”、“数据流生命周期”或“未管理”。

在支持数据流的索引模板中配置的prefer_ilm值。此值将为所有新的后备索引配置。如果它未在索引模板中配置,则后备索引将接收 true 默认值(默认情况下,ILM 优先于数据流生命周期,因为它目前在功能上更丰富)。

在支持此数据流的索引模板中配置的 ILM 策略(只要它存在于索引模板中,就会在所有新的后备索引上配置)。

将数据流迁移到数据流生命周期

编辑

要将 dsl-data-stream 迁移到数据流生命周期,我们需要执行两个步骤

  1. 更新支持数据流的索引模板,将 prefer_ilm 设置为 false,并配置数据流生命周期。
  2. 使用生命周期 API现有dsl-data-stream 配置数据流生命周期。

添加到索引模板的数据流生命周期配置(属于数据流配置)将仅应用于数据流。我们的数据流已经存在,因此,即使我们在索引模板中添加了数据流生命周期配置,它也不会应用于 dsl-data-stream

让我们更新索引模板

resp = client.indices.put_index_template(
    name="dsl-data-stream-template",
    index_patterns=[
        "dsl-data-stream*"
    ],
    data_stream={},
    priority=500,
    template={
        "settings": {
            "index.lifecycle.name": "pre-dsl-ilm-policy",
            "index.lifecycle.prefer_ilm": False
        },
        "lifecycle": {
            "data_retention": "7d"
        }
    },
)
print(resp)
response = client.indices.put_index_template(
  name: 'dsl-data-stream-template',
  body: {
    index_patterns: [
      'dsl-data-stream*'
    ],
    data_stream: {},
    priority: 500,
    template: {
      settings: {
        'index.lifecycle.name' => 'pre-dsl-ilm-policy',
        'index.lifecycle.prefer_ilm' => false
      },
      lifecycle: {
        data_retention: '7d'
      }
    }
  }
)
puts response
const response = await client.indices.putIndexTemplate({
  name: "dsl-data-stream-template",
  index_patterns: ["dsl-data-stream*"],
  data_stream: {},
  priority: 500,
  template: {
    settings: {
      "index.lifecycle.name": "pre-dsl-ilm-policy",
      "index.lifecycle.prefer_ilm": false,
    },
    lifecycle: {
      data_retention: "7d",
    },
  },
});
console.log(response);
PUT _index_template/dsl-data-stream-template
{
  "index_patterns": ["dsl-data-stream*"],
  "data_stream": { },
  "priority": 500,
  "template": {
    "settings": {
      "index.lifecycle.name": "pre-dsl-ilm-policy",
      "index.lifecycle.prefer_ilm": false             
    },
    "lifecycle": {
      "data_retention": "7d"                          
    }
  }
}

prefer_ilm 设置现在将配置在的后备索引(通过翻转数据流创建)上,使得 ILM 优先于数据流生命周期。

我们正在配置数据流生命周期,以便的数据流将由数据流生命周期管理。

我们现在已确保新的数据流将由数据流生命周期管理。

让我们更新我们现有的 dsl-data-stream 并配置数据流生命周期

resp = client.indices.put_data_lifecycle(
    name="dsl-data-stream",
    data_retention="7d",
)
print(resp)
response = client.indices.put_data_lifecycle(
  name: 'dsl-data-stream',
  body: {
    data_retention: '7d'
  }
)
puts response
const response = await client.indices.putDataLifecycle({
  name: "dsl-data-stream",
  data_retention: "7d",
});
console.log(response);
PUT _data_stream/dsl-data-stream/_lifecycle
{
    "data_retention": "7d"
}

我们可以检查数据流,以确认下一代将确实由数据流生命周期管理

resp = client.indices.get_data_stream(
    name="dsl-data-stream",
)
print(resp)
response = client.indices.get_data_stream(
  name: 'dsl-data-stream'
)
puts response
const response = await client.indices.getDataStream({
  name: "dsl-data-stream",
});
console.log(response);
GET _data_stream/dsl-data-stream
{
  "data_streams": [
    {
      "name": "dsl-data-stream",
      "timestamp_field": {
        "name": "@timestamp"
      },
      "indices": [
        {
          "index_name": ".ds-dsl-data-stream-2023.10.19-000001",
          "index_uuid": "xCEhwsp8Tey0-FLNFYVwSg",
          "prefer_ilm": true,
          "ilm_policy": "pre-dsl-ilm-policy",
          "managed_by": "Index Lifecycle Management"                
        },
        {
          "index_name": ".ds-dsl-data-stream-2023.10.19-000002",
          "index_uuid": "PA_JquKGSiKcAKBA8DJ5gw",
          "prefer_ilm": true,
          "ilm_policy": "pre-dsl-ilm-policy",
          "managed_by": "Index Lifecycle Management"                
        }
      ],
      "generation": 2,
      "status": "GREEN",
      "template": "dsl-data-stream-template",
      "lifecycle": {
        "enabled": true,
        "data_retention": "7d",
        "effective_retention": "7d",
        "retention_determined_by": "data_stream_configuration"
      },
      "ilm_policy": "pre-dsl-ilm-policy",
      "next_generation_managed_by": "Data stream lifecycle",         
      "prefer_ilm": false,                                           
      "hidden": false,
      "system": false,
      "allow_custom_routing": false,
      "replicated": false,
      "rollover_on_write": false
    }
  ]
}

现有的后备索引将继续由 ILM 管理

现有的后备索引将继续由 ILM 管理

下一代索引将由数据流生命周期管理

我们在索引模板中配置的 prefer_ilm 设置值被反映出来,并将为新的后备索引进行相应配置。

现在我们将翻转数据流以查看由数据流生命周期管理的新一代索引

resp = client.indices.rollover(
    alias="dsl-data-stream",
)
print(resp)
response = client.indices.rollover(
  alias: 'dsl-data-stream'
)
puts response
const response = await client.indices.rollover({
  alias: "dsl-data-stream",
});
console.log(response);
POST dsl-data-stream/_rollover
resp = client.indices.get_data_stream(
    name="dsl-data-stream",
)
print(resp)
response = client.indices.get_data_stream(
  name: 'dsl-data-stream'
)
puts response
const response = await client.indices.getDataStream({
  name: "dsl-data-stream",
});
console.log(response);
GET _data_stream/dsl-data-stream
{
  "data_streams": [
    {
      "name": "dsl-data-stream",
      "timestamp_field": {
        "name": "@timestamp"
      },
      "indices": [
        {
          "index_name": ".ds-dsl-data-stream-2023.10.19-000001",
          "index_uuid": "xCEhwsp8Tey0-FLNFYVwSg",
          "prefer_ilm": true,
          "ilm_policy": "pre-dsl-ilm-policy",
          "managed_by": "Index Lifecycle Management"                
        },
        {
          "index_name": ".ds-dsl-data-stream-2023.10.19-000002",
          "index_uuid": "PA_JquKGSiKcAKBA8DJ5gw",
          "prefer_ilm": true,
          "ilm_policy": "pre-dsl-ilm-policy",
          "managed_by": "Index Lifecycle Management"                
        },
        {
          "index_name": ".ds-dsl-data-stream-2023.10.19-000003",
          "index_uuid": "PA_JquKGSiKcAKBA8abcd1",
          "prefer_ilm": false,                                      
          "ilm_policy": "pre-dsl-ilm-policy",
          "managed_by": "Data stream lifecycle"                     
        }
      ],
      "generation": 3,
      "status": "GREEN",
      "template": "dsl-data-stream-template",
      "lifecycle": {
        "enabled": true,
        "data_retention": "7d",
        "effective_retention": "7d",
        "retention_determined_by": "data_stream_configuration"
      },
      "ilm_policy": "pre-dsl-ilm-policy",
      "next_generation_managed_by": "Data stream lifecycle",
      "prefer_ilm": false,
      "hidden": false,
      "system": false,
      "allow_custom_routing": false,
      "replicated": false,
      "rollover_on_write": false
    }
  ]
}

翻转之前存在的后备索引将继续由 ILM 管理

翻转之前存在的后备索引将继续由 ILM 管理

新的写入索引接收了 prefer_ilm 设置的 false 值,正如我们在索引模板中配置的那样

新的写入索引由 数据流生命周期 管理

将数据流迁移回 ILM

编辑

我们可以轻松地将此数据流更改为由 ILM 管理,因为我们在更新索引模板时没有删除 ILM 策略。

我们可以通过两种方式实现此目的

  1. 从数据流中删除生命周期
  2. 通过将 enabled 标志配置为 false 来禁用数据流生命周期。

让我们实现选项 2 并禁用数据流生命周期

resp = client.indices.put_data_lifecycle(
    name="dsl-data-stream",
    data_retention="7d",
    enabled=False,
)
print(resp)
response = client.indices.put_data_lifecycle(
  name: 'dsl-data-stream',
  body: {
    data_retention: '7d',
    enabled: false
  }
)
puts response
const response = await client.indices.putDataLifecycle({
  name: "dsl-data-stream",
  data_retention: "7d",
  enabled: false,
});
console.log(response);
PUT _data_stream/dsl-data-stream/_lifecycle
{
    "data_retention": "7d",
    "enabled": false 
}

可以省略 enabled 标志,默认值为 true,但是,在这里我们将其显式配置为 false。让我们检查数据流的状态

resp = client.indices.get_data_stream(
    name="dsl-data-stream",
)
print(resp)
response = client.indices.get_data_stream(
  name: 'dsl-data-stream'
)
puts response
const response = await client.indices.getDataStream({
  name: "dsl-data-stream",
});
console.log(response);
GET _data_stream/dsl-data-stream
{
  "data_streams": [
    {
      "name": "dsl-data-stream",
      "timestamp_field": {
        "name": "@timestamp"
      },
      "indices": [
        {
          "index_name": ".ds-dsl-data-stream-2023.10.19-000001",
          "index_uuid": "xCEhwsp8Tey0-FLNFYVwSg",
          "prefer_ilm": true,
          "ilm_policy": "pre-dsl-ilm-policy",
          "managed_by": "Index Lifecycle Management"
        },
        {
          "index_name": ".ds-dsl-data-stream-2023.10.19-000002",
          "index_uuid": "PA_JquKGSiKcAKBA8DJ5gw",
          "prefer_ilm": true,
          "ilm_policy": "pre-dsl-ilm-policy",
          "managed_by": "Index Lifecycle Management"
        },
        {
          "index_name": ".ds-dsl-data-stream-2023.10.19-000003",
          "index_uuid": "PA_JquKGSiKcAKBA8abcd1",
          "prefer_ilm": false,
          "ilm_policy": "pre-dsl-ilm-policy",
          "managed_by": "Index Lifecycle Management"                
        }
      ],
      "generation": 3,
      "status": "GREEN",
      "template": "dsl-data-stream-template",
      "lifecycle": {
        "enabled": false,                                          
        "data_retention": "7d"
      },
      "ilm_policy": "pre-dsl-ilm-policy",
      "next_generation_managed_by": "Index Lifecycle Management",  
      "prefer_ilm": false,
      "hidden": false,
      "system": false,
      "allow_custom_routing": false,
      "replicated": false,
      "rollover_on_write": false
    }
  ]
}

写入索引现在由 ILM 管理

数据流上配置的 生命周期 现在被禁用。

下一个写入索引将由 ILM 管理

如果我们更新它时从索引模板中删除了 ILM 策略,则数据流的写入索引现在将是 未管理,因为该索引不会配置 ILM 策略以回退。