resp = client.indices.create(
    index="amazon-reviews",
    mappings={
        "properties": {
            "review_vector": {
                "type": "dense_vector",
                "dims": 8,
                "index": True,
                "similarity": "cosine"
            },
            "review_text": {
                "type": "text"
            }
        }
    },
)
print(resp)

const response = await client.indices.create({
  index: "amazon-reviews",
  mappings: {
    properties: {
      review_vector: {
        type: "dense_vector",
        dims: 8,
        index: true,
        similarity: "cosine",
      },
      review_text: {
        type: "text",
      },
    },
  },
});
console.log(response);

PUT /amazon-reviews
{
  "mappings": {
    "properties": {
      "review_vector": {
        "type": "dense_vector",
        "dims": 8, 
        "index": true, 
        "similarity": "cosine" 
      },
      "review_text": {
        "type": "text"
      }
    }
  }
}

Copy as curl Try in Elastic

	`dims` 参数必须与嵌入向量的长度匹配。为了便于阅读，这里我们使用一个简单的 8 维嵌入。如果未指定，则 `dims` 将根据第一个索引的文档动态计算。
	`index` 参数设置为 `true`，以启用 `knn` 查询的使用。
	`similarity` 参数定义用于比较查询向量与文档向量的相似度函数。 `cosine` 是 Elasticsearch 中 `dense_vector` 字段的默认相似度函数。

步骤 2：使用嵌入索引文档

编辑

索引单个文档

编辑

首先，索引单个文档以了解文档结构。

resp = client.index(
    index="amazon-reviews",
    id="1",
    document={
        "review_text": "This product is lifechanging! I'm telling all my friends about it.",
        "review_vector": [
            0.1,
            0.2,
            0.3,
            0.4,
            0.5,
            0.6,
            0.7,
            0.8
        ]
    },
)
print(resp)

const response = await client.index({
  index: "amazon-reviews",
  id: 1,
  document: {
    review_text:
      "This product is lifechanging! I'm telling all my friends about it.",
    review_vector: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8],
  },
});
console.log(response);

PUT /amazon-reviews/_doc/1
{
  "review_text": "This product is lifechanging! I'm telling all my friends about it.",
  "review_vector": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] 
}

Copy as curl Try in Elastic

review_vector 数组的大小为 8，与映射中指定的 dims 计数匹配。

批量索引多个文档

编辑

在生产环境中，您需要使用 _bulk 端点一次索引多个文档。

以下是在单个 _bulk 请求中索引多个文档的示例。

resp = client.bulk(
    operations=[
        {
            "index": {
                "_index": "amazon-reviews",
                "_id": "2"
            }
        },
        {
            "review_text": "This product is amazing! I love it.",
            "review_vector": [
                0.1,
                0.2,
                0.3,
                0.4,
                0.5,
                0.6,
                0.7,
                0.8
            ]
        },
        {
            "index": {
                "_index": "amazon-reviews",
                "_id": "3"
            }
        },
        {
            "review_text": "This product is terrible. I hate it.",
            "review_vector": [
                0.8,
                0.7,
                0.6,
                0.5,
                0.4,
                0.3,
                0.2,
                0.1
            ]
        },
        {
            "index": {
                "_index": "amazon-reviews",
                "_id": "4"
            }
        },
        {
            "review_text": "This product is great. I can do anything with it.",
            "review_vector": [
                0.1,
                0.2,
                0.3,
                0.4,
                0.5,
                0.6,
                0.7,
                0.8
            ]
        },
        {
            "index": {
                "_index": "amazon-reviews",
                "_id": "5"
            }
        },
        {
            "review_text": "This product has ruined my life and the lives of my family and friends.",
            "review_vector": [
                0.8,
                0.7,
                0.6,
                0.5,
                0.4,
                0.3,
                0.2,
                0.1
            ]
        }
    ],
)
print(resp)

const response = await client.bulk({
  operations: [
    {
      index: {
        _index: "amazon-reviews",
        _id: "2",
      },
    },
    {
      review_text: "This product is amazing! I love it.",
      review_vector: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8],
    },
    {
      index: {
        _index: "amazon-reviews",
        _id: "3",
      },
    },
    {
      review_text: "This product is terrible. I hate it.",
      review_vector: [0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1],
    },
    {
      index: {
        _index: "amazon-reviews",
        _id: "4",
      },
    },
    {
      review_text: "This product is great. I can do anything with it.",
      review_vector: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8],
    },
    {
      index: {
        _index: "amazon-reviews",
        _id: "5",
      },
    },
    {
      review_text:
        "This product has ruined my life and the lives of my family and friends.",
      review_vector: [0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1],
    },
  ],
});
console.log(response);

POST /_bulk
{ "index": { "_index": "amazon-reviews", "_id": "2" } }
{ "review_text": "This product is amazing! I love it.", "review_vector": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] }
{ "index": { "_index": "amazon-reviews", "_id": "3" } }
{ "review_text": "This product is terrible. I hate it.", "review_vector": [0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1] }
{ "index": { "_index": "amazon-reviews", "_id": "4" } }
{ "review_text": "This product is great. I can do anything with it.", "review_vector": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] }
{ "index": { "_index": "amazon-reviews", "_id": "5" } }
{ "review_text": "This product has ruined my life and the lives of my family and friends.", "review_vector": [0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1] }

Copy as curl Try in Elastic

步骤 3：使用嵌入搜索文档

编辑

现在，您可以使用 knn 检索器查询这些文档向量。 knn 是一种向量搜索类型，它查找与查询向量最相似的 k 个文档。为了演示，这里我们只是使用原始向量作为查询文本。

resp = client.search(
    index="amazon-reviews",
    retriever={
        "knn": {
            "field": "review_vector",
            "query_vector": [
                0.1,
                0.2,
                0.3,
                0.4,
                0.5,
                0.6,
                0.7,
                0.8
            ],
            "k": 2,
            "num_candidates": 5
        }
    },
)
print(resp)

const response = await client.search({
  index: "amazon-reviews",
  retriever: {
    knn: {
      field: "review_vector",
      query_vector: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8],
      k: 2,
      num_candidates: 5,
    },
  },
});
console.log(response);

POST /amazon-reviews/_search
{
  "retriever": {
    "knn": {
      "field": "review_vector",
      "query_vector": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8], 
      "k": 2, 
      "num_candidates": 5 
    }
  }
}

Copy as curl Try in Elastic

	在这个简单的示例中，我们发送一个原始向量作为查询文本。在实际场景中，您需要使用嵌入模型为查询生成向量。
	`k` 参数指定要返回的结果数。
	`num_candidates` 参数是可选的。它限制搜索节点返回的候选数量。这可以提高性能并降低成本。

了解更多

编辑

在这个简单的示例中，我们发送一个原始向量作为查询文本。在实际场景中，您不会提前知道查询文本。您需要使用生成文档向量的同一嵌入模型，动态生成查询向量。

为此，您需要在 Elasticsearch 中部署一个文本嵌入模型，并使用 query_vector_builder 参数。或者，您可以在客户端生成向量，并将其直接与搜索请求一起发送。

了解如何使用已部署的文本嵌入模型进行语义搜索。

如果您刚开始使用 Elasticsearch 中的向量搜索，请参阅语义搜索。

« 教程：使用已部署模型进行语义搜索跨集群搜索 »

On this page

步骤 1：使用 dense_vector 映射创建索引
步骤 2：使用嵌入索引文档
索引单个文档
批量索引多个文档
步骤 3：使用嵌入搜索文档
了解更多

Was this helpful?

Feedback

The Search AI Company

ELK Stack

Elastic Cloud

Generative AI

Search

Security

Observability

By solution

Industries

Customer spotlight

Research

Build

Learn

Connect

将您自己的稠密向量嵌入引入 Elasticsearch

将您自己的稠密向量嵌入引入 Elasticsearch

步骤 1：使用 `dense_vector` 映射创建索引

步骤 2：使用嵌入索引文档

索引单个文档

批量索引多个文档

步骤 3：使用嵌入搜索文档

了解更多

Follow us

About us

Join us

Partners

Trust & Security

Investor relations

Excellence Awards

About us

Join us

Partners

Trust & Security

Investor relations

Excellence Awards

The Search AI Company

Generative AI

Search

Security

Observability

By solution

Industries

将您自己的稠密向量嵌入引入 Elasticsearch

将您自己的稠密向量嵌入引入 Elasticsearch

步骤 1：使用 dense_vector 映射创建索引

步骤 2：使用嵌入索引文档

索引单个文档

批量索引多个文档

步骤 3：使用嵌入搜索文档

了解更多

Follow us

About us

Join us

Partners

Trust & Security

Investor relations

Excellence Awards

步骤 1：使用 `dense_vector` 映射创建索引