ES|QL 多值字段

编辑

ES|QL 可以很好地读取多值字段

resp = client.bulk(
    index="mv",
    refresh=True,
    operations=[
        {
            "index": {}
        },
        {
            "a": 1,
            "b": [
                2,
                1
            ]
        },
        {
            "index": {}
        },
        {
            "a": 2,
            "b": 3
        }
    ],
)
print(resp)

resp1 = client.esql.query(
    query="FROM mv | LIMIT 2",
)
print(resp1)
const response = await client.bulk({
  index: "mv",
  refresh: "true",
  operations: [
    {
      index: {},
    },
    {
      a: 1,
      b: [2, 1],
    },
    {
      index: {},
    },
    {
      a: 2,
      b: 3,
    },
  ],
});
console.log(response);

const response1 = await client.esql.query({
  query: "FROM mv | LIMIT 2",
});
console.log(response1);
POST /mv/_bulk?refresh
{ "index" : {} }
{ "a": 1, "b": [2, 1] }
{ "index" : {} }
{ "a": 2, "b": 3 }

POST /_query
{
  "query": "FROM mv | LIMIT 2"
}

多值字段返回为 JSON 数组

{
  "took": 28,
  "columns": [
    { "name": "a", "type": "long"},
    { "name": "b", "type": "long"}
  ],
  "values": [
    [1, [1, 2]],
    [2,      3]
  ]
}

多值字段中值的相对顺序未定义。它们通常按升序排列,但不要依赖于此。

重复值

编辑

某些字段类型,例如 keyword 会在写入时删除重复值

resp = client.indices.create(
    index="mv",
    mappings={
        "properties": {
            "b": {
                "type": "keyword"
            }
        }
    },
)
print(resp)

resp1 = client.bulk(
    index="mv",
    refresh=True,
    operations=[
        {
            "index": {}
        },
        {
            "a": 1,
            "b": [
                "foo",
                "foo",
                "bar"
            ]
        },
        {
            "index": {}
        },
        {
            "a": 2,
            "b": [
                "bar",
                "bar"
            ]
        }
    ],
)
print(resp1)

resp2 = client.esql.query(
    query="FROM mv | LIMIT 2",
)
print(resp2)
const response = await client.indices.create({
  index: "mv",
  mappings: {
    properties: {
      b: {
        type: "keyword",
      },
    },
  },
});
console.log(response);

const response1 = await client.bulk({
  index: "mv",
  refresh: "true",
  operations: [
    {
      index: {},
    },
    {
      a: 1,
      b: ["foo", "foo", "bar"],
    },
    {
      index: {},
    },
    {
      a: 2,
      b: ["bar", "bar"],
    },
  ],
});
console.log(response1);

const response2 = await client.esql.query({
  query: "FROM mv | LIMIT 2",
});
console.log(response2);
PUT /mv
{
  "mappings": {
    "properties": {
      "b": {"type": "keyword"}
    }
  }
}

POST /mv/_bulk?refresh
{ "index" : {} }
{ "a": 1, "b": ["foo", "foo", "bar"] }
{ "index" : {} }
{ "a": 2, "b": ["bar", "bar"] }

POST /_query
{
  "query": "FROM mv | LIMIT 2"
}

ES|QL 会看到此删除操作

{
  "took": 28,
  "columns": [
    { "name": "a", "type": "long"},
    { "name": "b", "type": "keyword"}
  ],
  "values": [
    [1, ["bar", "foo"]],
    [2,          "bar"]
  ]
}

但其他类型,例如 long 不会删除重复值。

resp = client.indices.create(
    index="mv",
    mappings={
        "properties": {
            "b": {
                "type": "long"
            }
        }
    },
)
print(resp)

resp1 = client.bulk(
    index="mv",
    refresh=True,
    operations=[
        {
            "index": {}
        },
        {
            "a": 1,
            "b": [
                2,
                2,
                1
            ]
        },
        {
            "index": {}
        },
        {
            "a": 2,
            "b": [
                1,
                1
            ]
        }
    ],
)
print(resp1)

resp2 = client.esql.query(
    query="FROM mv | LIMIT 2",
)
print(resp2)
const response = await client.indices.create({
  index: "mv",
  mappings: {
    properties: {
      b: {
        type: "long",
      },
    },
  },
});
console.log(response);

const response1 = await client.bulk({
  index: "mv",
  refresh: "true",
  operations: [
    {
      index: {},
    },
    {
      a: 1,
      b: [2, 2, 1],
    },
    {
      index: {},
    },
    {
      a: 2,
      b: [1, 1],
    },
  ],
});
console.log(response1);

const response2 = await client.esql.query({
  query: "FROM mv | LIMIT 2",
});
console.log(response2);
PUT /mv
{
  "mappings": {
    "properties": {
      "b": {"type": "long"}
    }
  }
}

POST /mv/_bulk?refresh
{ "index" : {} }
{ "a": 1, "b": [2, 2, 1] }
{ "index" : {} }
{ "a": 2, "b": [1, 1] }

POST /_query
{
  "query": "FROM mv | LIMIT 2"
}

ES|QL 也会看到这一点

{
  "took": 28,
  "columns": [
    { "name": "a", "type": "long"},
    { "name": "b", "type": "long"}
  ],
  "values": [
    [1, [1, 2, 2]],
    [2,    [1, 1]]
  ]
}

这都在存储层进行。如果存储重复的 `long` 值,然后将它们转换为字符串,则重复值将保留。

resp = client.indices.create(
    index="mv",
    mappings={
        "properties": {
            "b": {
                "type": "long"
            }
        }
    },
)
print(resp)

resp1 = client.bulk(
    index="mv",
    refresh=True,
    operations=[
        {
            "index": {}
        },
        {
            "a": 1,
            "b": [
                2,
                2,
                1
            ]
        },
        {
            "index": {}
        },
        {
            "a": 2,
            "b": [
                1,
                1
            ]
        }
    ],
)
print(resp1)

resp2 = client.esql.query(
    query="FROM mv | EVAL b=TO_STRING(b) | LIMIT 2",
)
print(resp2)
const response = await client.indices.create({
  index: "mv",
  mappings: {
    properties: {
      b: {
        type: "long",
      },
    },
  },
});
console.log(response);

const response1 = await client.bulk({
  index: "mv",
  refresh: "true",
  operations: [
    {
      index: {},
    },
    {
      a: 1,
      b: [2, 2, 1],
    },
    {
      index: {},
    },
    {
      a: 2,
      b: [1, 1],
    },
  ],
});
console.log(response1);

const response2 = await client.esql.query({
  query: "FROM mv | EVAL b=TO_STRING(b) | LIMIT 2",
});
console.log(response2);
PUT /mv
{
  "mappings": {
    "properties": {
      "b": {"type": "long"}
    }
  }
}

POST /mv/_bulk?refresh
{ "index" : {} }
{ "a": 1, "b": [2, 2, 1] }
{ "index" : {} }
{ "a": 2, "b": [1, 1] }

POST /_query
{
  "query": "FROM mv | EVAL b=TO_STRING(b) | LIMIT 2"
}
{
  "took": 28,
  "columns": [
    { "name": "a", "type": "long"},
    { "name": "b", "type": "keyword"}
  ],
  "values": [
    [1, ["1", "2", "2"]],
    [2,      ["1", "1"]]
  ]
}

列表中的 null

编辑

列表中的 null 值在存储层不会保留。

resp = client.index(
    index="mv",
    refresh=True,
    document={
        "a": [
            2,
            None,
            1
        ]
    },
)
print(resp)

resp1 = client.esql.query(
    query="FROM mv | LIMIT 1",
)
print(resp1)
const response = await client.index({
  index: "mv",
  refresh: "true",
  document: {
    a: [2, null, 1],
  },
});
console.log(response);

const response1 = await client.esql.query({
  query: "FROM mv | LIMIT 1",
});
console.log(response1);
POST /mv/_doc?refresh
{ "a": [2, null, 1] }

POST /_query
{
  "query": "FROM mv | LIMIT 1"
}
{
  "took": 28,
  "columns": [
    { "name": "a", "type": "long"},
  ],
  "values": [
    [[1, 2]],
  ]
}

函数

编辑

除非另有说明,否则当函数应用于多值字段时,将返回 null

resp = client.bulk(
    index="mv",
    refresh=True,
    operations=[
        {
            "index": {}
        },
        {
            "a": 1,
            "b": [
                2,
                1
            ]
        },
        {
            "index": {}
        },
        {
            "a": 2,
            "b": 3
        }
    ],
)
print(resp)
response = client.bulk(
  index: 'mv',
  refresh: true,
  body: [
    {
      index: {}
    },
    {
      a: 1,
      b: [
        2,
        1
      ]
    },
    {
      index: {}
    },
    {
      a: 2,
      b: 3
    }
  ]
)
puts response
const response = await client.bulk({
  index: "mv",
  refresh: "true",
  operations: [
    {
      index: {},
    },
    {
      a: 1,
      b: [2, 1],
    },
    {
      index: {},
    },
    {
      a: 2,
      b: 3,
    },
  ],
});
console.log(response);
POST /mv/_bulk?refresh
{ "index" : {} }
{ "a": 1, "b": [2, 1] }
{ "index" : {} }
{ "a": 2, "b": 3 }
resp = client.esql.query(
    query="FROM mv | EVAL b + 2, a + b | LIMIT 4",
)
print(resp)
const response = await client.esql.query({
  query: "FROM mv | EVAL b + 2, a + b | LIMIT 4",
});
console.log(response);
POST /_query
{
  "query": "FROM mv | EVAL b + 2, a + b | LIMIT 4"
}
{
  "took": 28,
  "columns": [
    { "name": "a",   "type": "long"},
    { "name": "b",   "type": "long"},
    { "name": "b + 2", "type": "long"},
    { "name": "a + b", "type": "long"}
  ],
  "values": [
    [1, [1, 2], null, null],
    [2,      3,    5,    5]
  ]
}

可以使用以下方法将字段转换为单值来解决此限制:

resp = client.esql.query(
    query="FROM mv | EVAL b=MV_MIN(b) | EVAL b + 2, a + b | LIMIT 4",
)
print(resp)
const response = await client.esql.query({
  query: "FROM mv | EVAL b=MV_MIN(b) | EVAL b + 2, a + b | LIMIT 4",
});
console.log(response);
POST /_query
{
  "query": "FROM mv | EVAL b=MV_MIN(b) | EVAL b + 2, a + b | LIMIT 4"
}
{
  "took": 28,
  "columns": [
    { "name": "a",   "type": "long"},
    { "name": "b",   "type": "long"},
    { "name": "b + 2", "type": "long"},
    { "name": "a + b", "type": "long"}
  ],
  "values": [
    [1, 1, 3, 2],
    [2, 3, 5, 5]
  ]
}