resp = client.inference.put(
    task_type="text_embedding",
    inference_id="cohere_embeddings",
    inference_config={
        "service": "cohere",
        "service_settings": {
            "api_key": "<api_key>",
            "model_id": "embed-english-v3.0",
            "embedding_type": "byte"
        }
    },
)
print(resp)

const response = await client.inference.put({
  task_type: "text_embedding",
  inference_id: "cohere_embeddings",
  inference_config: {
    service: "cohere",
    service_settings: {
      api_key: "<api_key>",
      model_id: "embed-english-v3.0",
      embedding_type: "byte",
    },
  },
});
console.log(response);

PUT _inference/text_embedding/cohere_embeddings 
{
    "service": "cohere",
    "service_settings": {
        "api_key": "<api_key>", 
        "model_id": "embed-english-v3.0", 
        "embedding_type": "byte"
    }
}

	路径中的任务类型为 `text_embedding`，而 `inference_id`（推理端点的唯一标识符）为 `cohere_embeddings`。
	您的 Cohere 帐户的 API 密钥。您可以在 Cohere 仪表板的 API 密钥部分中找到您的 API 密钥。您只需要提供一次 API 密钥。获取推理 API 不会返回您的 API 密钥。
	要使用的嵌入模型的名称。您可以在此处找到 Cohere 嵌入模型的列表此处。

使用此模型时，在 dense_vector 字段映射中建议使用的相似性度量为 dot_product。对于 Cohere 模型，嵌入被归一化为单位长度，在这种情况下，dot_product 和 cosine 度量是等价的。

resp = client.inference.put(
    task_type="sparse_embedding",
    inference_id="elser_embeddings",
    inference_config={
        "service": "elser",
        "service_settings": {
            "num_allocations": 1,
            "num_threads": 1
        }
    },
)
print(resp)

const response = await client.inference.put({
  task_type: "sparse_embedding",
  inference_id: "elser_embeddings",
  inference_config: {
    service: "elser",
    service_settings: {
      num_allocations: 1,
      num_threads: 1,
    },
  },
});
console.log(response);

PUT _inference/sparse_embedding/elser_embeddings 
{
  "service": "elser",
  "service_settings": {
    "num_allocations": 1,
    "num_threads": 1
  }
}

路径中的任务类型为 sparse_embedding，而 inference_id（推理端点的唯一标识符）为 elser_embeddings。

您无需提前下载和部署 ELSER 模型，上述 API 请求将在尚未下载模型时下载它，然后部署它。

使用 Kibana 控制台时，您可能会在响应中看到 502 错误网关错误。此错误通常只是反映了超时，而模型在后台下载。您可以在机器学习 UI 中检查下载进度。如果使用 Python 客户端，您可以将 timeout 参数设置为更高的值。

首先，您需要在 Hugging Face 端点页面上创建一个新的推理端点以获取端点 URL。在新的端点创建页面上选择模型 all-mpnet-base-v2，然后在“高级配置”部分下选择 Sentence Embeddings 任务。创建端点。端点初始化完成后复制 URL，您需要在以下推理 API 调用中使用此 URL。

resp = client.inference.put(
    task_type="text_embedding",
    inference_id="hugging_face_embeddings",
    inference_config={
        "service": "hugging_face",
        "service_settings": {
            "api_key": "<access_token>",
            "url": "<url_endpoint>"
        }
    },
)
print(resp)

const response = await client.inference.put({
  task_type: "text_embedding",
  inference_id: "hugging_face_embeddings",
  inference_config: {
    service: "hugging_face",
    service_settings: {
      api_key: "<access_token>",
      url: "<url_endpoint>",
    },
  },
});
console.log(response);

PUT _inference/text_embedding/hugging_face_embeddings 
{
  "service": "hugging_face",
  "service_settings": {
    "api_key": "<access_token>", 
    "url": "<url_endpoint>" 
  }
}

	路径中的任务类型为 `text_embedding`，而 `inference_id`（推理端点的唯一标识符）为 `hugging_face_embeddings`。
	有效的 HuggingFace 访问令牌。您可以在帐户的设置页面上找到。
	您在 Hugging Face 上创建的推理端点 URL。

resp = client.inference.put(
    task_type="text_embedding",
    inference_id="openai_embeddings",
    inference_config={
        "service": "openai",
        "service_settings": {
            "api_key": "<api_key>",
            "model_id": "text-embedding-ada-002"
        }
    },
)
print(resp)

const response = await client.inference.put({
  task_type: "text_embedding",
  inference_id: "openai_embeddings",
  inference_config: {
    service: "openai",
    service_settings: {
      api_key: "<api_key>",
      model_id: "text-embedding-ada-002",
    },
  },
});
console.log(response);

PUT _inference/text_embedding/openai_embeddings 
{
    "service": "openai",
    "service_settings": {
        "api_key": "<api_key>", 
        "model_id": "text-embedding-ada-002" 
    }
}

	路径中的任务类型为 `text_embedding`，而 `inference_id`（推理端点的唯一标识符）为 `openai_embeddings`。
	您的 OpenAI 帐户的 API 密钥。您可以在 OpenAI 帐户的 API 密钥部分中找到您的 OpenAI API 密钥。您只需要提供一次 API 密钥。获取推理 API 不会返回您的 API 密钥。
	要使用的嵌入模型的名称。您可以在此处找到 OpenAI 嵌入模型的列表此处。

使用此模型时，在 dense_vector 字段映射中建议使用的相似性度量为 dot_product。对于 OpenAI 模型，嵌入被归一化为单位长度，在这种情况下，dot_product 和 cosine 度量是等价的。

resp = client.inference.put(
    task_type="text_embedding",
    inference_id="azure_openai_embeddings",
    inference_config={
        "service": "azureopenai",
        "service_settings": {
            "api_key": "<api_key>",
            "resource_name": "<resource_name>",
            "deployment_id": "<deployment_id>",
            "api_version": "2024-02-01"
        }
    },
)
print(resp)

const response = await client.inference.put({
  task_type: "text_embedding",
  inference_id: "azure_openai_embeddings",
  inference_config: {
    service: "azureopenai",
    service_settings: {
      api_key: "<api_key>",
      resource_name: "<resource_name>",
      deployment_id: "<deployment_id>",
      api_version: "2024-02-01",
    },
  },
});
console.log(response);

PUT _inference/text_embedding/azure_openai_embeddings 
{
    "service": "azureopenai",
    "service_settings": {
        "api_key": "<api_key>", 
        "resource_name": "<resource_name>", 
        "deployment_id": "<deployment_id>", 
        "api_version": "2024-02-01"
    }
}

	路径中的任务类型为 `text_embedding`，而 `inference_id`（推理端点的唯一标识符）为 `azure_openai_embeddings`。
	用于访问您的 Azure OpenAI 服务的 API 密钥。或者，您可以在此处提供 `entra_id` 而不是 `api_key`。获取推理 API 不会返回此信息。
	您的 Azure 资源的名称。
	您已部署模型的 ID。

模型部署可能需要几分钟才能可用。如果您尝试按照上述方法创建模型并收到 404 错误消息，请等待几分钟，然后重试。此外，使用此模型时，在 dense_vector 字段映射中建议使用的相似性度量为 dot_product。对于 Azure OpenAI 模型，嵌入被归一化为单位长度，在这种情况下，dot_product 和 cosine 度量是等价的。

resp = client.inference.put(
    task_type="text_embedding",
    inference_id="azure_ai_studio_embeddings",
    inference_config={
        "service": "azureaistudio",
        "service_settings": {
            "api_key": "<api_key>",
            "target": "<target_uri>",
            "provider": "<provider>",
            "endpoint_type": "<endpoint_type>"
        }
    },
)
print(resp)

const response = await client.inference.put({
  task_type: "text_embedding",
  inference_id: "azure_ai_studio_embeddings",
  inference_config: {
    service: "azureaistudio",
    service_settings: {
      api_key: "<api_key>",
      target: "<target_uri>",
      provider: "<provider>",
      endpoint_type: "<endpoint_type>",
    },
  },
});
console.log(response);

PUT _inference/text_embedding/azure_ai_studio_embeddings 
{
    "service": "azureaistudio",
    "service_settings": {
        "api_key": "<api_key>", 
        "target": "<target_uri>", 
        "provider": "<provider>", 
        "endpoint_type": "<endpoint_type>" 
    }
}

	路径中的任务类型为 `text_embedding`，而 `inference_id`（推理端点的唯一标识符）为 `azure_ai_studio_embeddings`。
	用于访问您的 Azure AI Studio 部署模型的 API 密钥。您可以在模型部署的概览页面上找到它。
	用于访问您的 Azure AI Studio 部署模型的目标 URI。您可以在模型部署的概览页面上找到它。
	模型提供商，例如 `cohere` 或 `openai`。
	已部署的端点类型。这可以是 `token`（对于“按使用付费”部署）或 `realtime`（对于实时部署端点）。

模型部署可能需要几分钟才能可用。如果您尝试按照上述方法创建模型并收到 404 错误消息，请等待几分钟，然后重试。此外，使用此模型时，在 dense_vector 字段映射中建议使用的相似性度量为 dot_product。

resp = client.inference.put(
    task_type="text_embedding",
    inference_id="google_vertex_ai_embeddings",
    inference_config={
        "service": "googlevertexai",
        "service_settings": {
            "service_account_json": "<service_account_json>",
            "model_id": "text-embedding-004",
            "location": "<location>",
            "project_id": "<project_id>"
        }
    },
)
print(resp)

const response = await client.inference.put({
  task_type: "text_embedding",
  inference_id: "google_vertex_ai_embeddings",
  inference_config: {
    service: "googlevertexai",
    service_settings: {
      service_account_json: "<service_account_json>",
      model_id: "text-embedding-004",
      location: "<location>",
      project_id: "<project_id>",
    },
  },
});
console.log(response);

PUT _inference/text_embedding/google_vertex_ai_embeddings 
{
    "service": "googlevertexai",
    "service_settings": {
        "service_account_json": "<service_account_json>", 
        "model_id": "text-embedding-004", 
        "location": "<location>", 
        "project_id": "<project_id>" 
    }
}

	任务类型根据路径为 `text_embedding`。`google_vertex_ai_embeddings` 是推理端点的唯一标识符（其 `inference_id`）。
	一个以 JSON 格式表示的 Google Vertex AI API 的有效服务账户。
	有关可用模型的列表，请参阅文本嵌入 API页面。
	用于推理任务的区域名称。有关可用区域，请参阅Vertex AI 上的生成式 AI 区域。
	用于推理任务的项目名称。

resp = client.inference.put(
    task_type="text_embedding",
    inference_id="mistral_embeddings",
    inference_config={
        "service": "mistral",
        "service_settings": {
            "api_key": "<api_key>",
            "model": "<model_id>"
        }
    },
)
print(resp)

const response = await client.inference.put({
  task_type: "text_embedding",
  inference_id: "mistral_embeddings",
  inference_config: {
    service: "mistral",
    service_settings: {
      api_key: "<api_key>",
      model: "<model_id>",
    },
  },
});
console.log(response);

PUT _inference/text_embedding/mistral_embeddings 
{
    "service": "mistral",
    "service_settings": {
        "api_key": "<api_key>", 
        "model": "<model_id>" 
    }
}

	任务类型在路径中为`text_embedding`，推理端点的唯一标识符 `inference_id` 为 `mistral_embeddings`。
	访问 Mistral API 的 API 密钥。您可以在 Mistral 账户的 API 密钥页面中找到它。
	Mistral 嵌入模型名称，例如 `mistral-embed`。

resp = client.inference.put(
    task_type="text_embedding",
    inference_id="amazon_bedrock_embeddings",
    inference_config={
        "service": "amazonbedrock",
        "service_settings": {
            "access_key": "<aws_access_key>",
            "secret_key": "<aws_secret_key>",
            "region": "<region>",
            "provider": "<provider>",
            "model": "<model_id>"
        }
    },
)
print(resp)

const response = await client.inference.put({
  task_type: "text_embedding",
  inference_id: "amazon_bedrock_embeddings",
  inference_config: {
    service: "amazonbedrock",
    service_settings: {
      access_key: "<aws_access_key>",
      secret_key: "<aws_secret_key>",
      region: "<region>",
      provider: "<provider>",
      model: "<model_id>",
    },
  },
});
console.log(response);

PUT _inference/text_embedding/amazon_bedrock_embeddings 
{
    "service": "amazonbedrock",
    "service_settings": {
        "access_key": "<aws_access_key>", 
        "secret_key": "<aws_secret_key>", 
        "region": "<region>", 
        "provider": "<provider>", 
        "model": "<model_id>" 
    }
}

	任务类型在路径中为`text_embedding`，推理端点的唯一标识符 `inference_id` 为 `amazon_bedrock_embeddings`。
	访问密钥可在 AWS IAM 管理页面上找到，用于访问 Amazon Bedrock 的用户账户。
	密钥应为指定访问密钥的配对密钥。
	指定您的模型所在的区域。
	指定模型提供商。
	要使用的模型 ID 或 ARN。

resp = client.inference.put(
    task_type="text_embedding",
    inference_id="alibabacloud_ai_search_embeddings",
    inference_config={
        "service": "alibabacloud-ai-search",
        "service_settings": {
            "api_key": "<api_key>",
            "service_id": "<service_id>",
            "host": "<host>",
            "workspace": "<workspace>"
        }
    },
)
print(resp)

const response = await client.inference.put({
  task_type: "text_embedding",
  inference_id: "alibabacloud_ai_search_embeddings",
  inference_config: {
    service: "alibabacloud-ai-search",
    service_settings: {
      api_key: "<api_key>",
      service_id: "<service_id>",
      host: "<host>",
      workspace: "<workspace>",
    },
  },
});
console.log(response);

PUT _inference/text_embedding/alibabacloud_ai_search_embeddings 
{
    "service": "alibabacloud-ai-search",
    "service_settings": {
        "api_key": "<api_key>", 
        "service_id": "<service_id>", 
        "host": "<host>", 
        "workspace": "<workspace>" 
    }
}

	任务类型在路径中为`text_embedding`，推理端点的唯一标识符 `inference_id` 为 `alibabacloud_ai_search_embeddings`。
	访问阿里云 AI Search API 的 API 密钥。您可以在阿里云账户的API 密钥部分中找到您的 API 密钥。您只需要提供一次 API 密钥。获取推理 API不会返回您的 API 密钥。
	阿里云 AI Search 嵌入模型名称，例如 `ops-text-embedding-zh-001`。
	阿里云 AI Search 主机地址的名称。
	阿里云 AI Search 工作区的名称。

创建索引映射

编辑

必须创建目标索引的映射 - 包含模型根据您的输入文本创建的嵌入的索引。对于大多数模型，目标索引必须包含具有dense_vector字段类型的字段，对于像 elser 服务中稀疏向量模型，则必须具有sparse_vector字段类型，以便对所用模型的输出进行索引。

resp = client.indices.create(
    index="cohere-embeddings",
    mappings={
        "properties": {
            "content_embedding": {
                "type": "dense_vector",
                "dims": 1024,
                "element_type": "byte"
            },
            "content": {
                "type": "text"
            }
        }
    },
)
print(resp)

response = client.indices.create(
  index: 'cohere-embeddings',
  body: {
    mappings: {
      properties: {
        content_embedding: {
          type: 'dense_vector',
          dims: 1024,
          element_type: 'byte'
        },
        content: {
          type: 'text'
        }
      }
    }
  }
)
puts response

const response = await client.indices.create({
  index: "cohere-embeddings",
  mappings: {
    properties: {
      content_embedding: {
        type: "dense_vector",
        dims: 1024,
        element_type: "byte",
      },
      content: {
        type: "text",
      },
    },
  },
});
console.log(response);

PUT cohere-embeddings
{
  "mappings": {
    "properties": {
      "content_embedding": { 
        "type": "dense_vector", 
        "dims": 1024, 
        "element_type": "byte"
      },
      "content": { 
        "type": "text" 
      }
    }
  }
}

	包含生成令牌的字段名称。它必须在下一步的推理管道配置中引用。
	包含令牌的字段是 `dense_vector` 字段。
	模型的输出维度。在您使用的模型的Cohere 文档中找到此值。
	要从中创建密集向量表示的字段名称。在本例中，字段名称为 `content`。它必须在下一步的推理管道配置中引用。
	字段类型，在本例中为文本。

resp = client.indices.create(
    index="elser-embeddings",
    mappings={
        "properties": {
            "content_embedding": {
                "type": "sparse_vector"
            },
            "content": {
                "type": "text"
            }
        }
    },
)
print(resp)

const response = await client.indices.create({
  index: "elser-embeddings",
  mappings: {
    properties: {
      content_embedding: {
        type: "sparse_vector",
      },
      content: {
        type: "text",
      },
    },
  },
});
console.log(response);

PUT elser-embeddings
{
  "mappings": {
    "properties": {
      "content_embedding": { 
        "type": "sparse_vector" 
      },
      "content": { 
        "type": "text" 
      }
    }
  }
}

	包含生成令牌的字段名称。它必须在下一步的推理管道配置中引用。
	对于 ELSER，包含令牌的字段是 `sparse_vector` 字段。
	要从中创建密集向量表示的字段名称。在本例中，字段名称为 `content`。它必须在下一步的推理管道配置中引用。
	字段类型，在本例中为文本。

resp = client.indices.create(
    index="hugging-face-embeddings",
    mappings={
        "properties": {
            "content_embedding": {
                "type": "dense_vector",
                "dims": 768,
                "element_type": "float"
            },
            "content": {
                "type": "text"
            }
        }
    },
)
print(resp)

response = client.indices.create(
  index: 'hugging-face-embeddings',
  body: {
    mappings: {
      properties: {
        content_embedding: {
          type: 'dense_vector',
          dims: 768,
          element_type: 'float'
        },
        content: {
          type: 'text'
        }
      }
    }
  }
)
puts response

const response = await client.indices.create({
  index: "hugging-face-embeddings",
  mappings: {
    properties: {
      content_embedding: {
        type: "dense_vector",
        dims: 768,
        element_type: "float",
      },
      content: {
        type: "text",
      },
    },
  },
});
console.log(response);

PUT hugging-face-embeddings
{
  "mappings": {
    "properties": {
      "content_embedding": { 
        "type": "dense_vector", 
        "dims": 768, 
        "element_type": "float"
      },
      "content": { 
        "type": "text" 
      }
    }
  }
}

	包含生成令牌的字段名称。它必须在下一步的推理管道配置中引用。
	包含令牌的字段是 `dense_vector` 字段。
	模型的输出维度。在HuggingFace 模型文档中找到此值。
	要从中创建密集向量表示的字段名称。在本例中，字段名称为 `content`。它必须在下一步的推理管道配置中引用。
	字段类型，在本例中为文本。

resp = client.indices.create(
    index="openai-embeddings",
    mappings={
        "properties": {
            "content_embedding": {
                "type": "dense_vector",
                "dims": 1536,
                "element_type": "float",
                "similarity": "dot_product"
            },
            "content": {
                "type": "text"
            }
        }
    },
)
print(resp)

response = client.indices.create(
  index: 'openai-embeddings',
  body: {
    mappings: {
      properties: {
        content_embedding: {
          type: 'dense_vector',
          dims: 1536,
          element_type: 'float',
          similarity: 'dot_product'
        },
        content: {
          type: 'text'
        }
      }
    }
  }
)
puts response

const response = await client.indices.create({
  index: "openai-embeddings",
  mappings: {
    properties: {
      content_embedding: {
        type: "dense_vector",
        dims: 1536,
        element_type: "float",
        similarity: "dot_product",
      },
      content: {
        type: "text",
      },
    },
  },
});
console.log(response);

PUT openai-embeddings
{
  "mappings": {
    "properties": {
      "content_embedding": { 
        "type": "dense_vector", 
        "dims": 1536, 
        "element_type": "float",
        "similarity": "dot_product" 
      },
      "content": { 
        "type": "text" 
      }
    }
  }
}

	包含生成令牌的字段名称。它必须在下一步的推理管道配置中引用。
	包含令牌的字段是 `dense_vector` 字段。
	模型的输出维度。在您使用的模型的OpenAI 文档中找到此值。
	可以使用更快的 `dot_product` 函数来计算相似度，因为 OpenAI 嵌入被归一化为单位长度。您可以查看OpenAI 文档，了解应使用哪种相似度函数。
	要从中创建密集向量表示的字段名称。在本例中，字段名称为 `content`。它必须在下一步的推理管道配置中引用。
	字段类型，在本例中为文本。

resp = client.indices.create(
    index="azure-openai-embeddings",
    mappings={
        "properties": {
            "content_embedding": {
                "type": "dense_vector",
                "dims": 1536,
                "element_type": "float",
                "similarity": "dot_product"
            },
            "content": {
                "type": "text"
            }
        }
    },
)
print(resp)

response = client.indices.create(
  index: 'azure-openai-embeddings',
  body: {
    mappings: {
      properties: {
        content_embedding: {
          type: 'dense_vector',
          dims: 1536,
          element_type: 'float',
          similarity: 'dot_product'
        },
        content: {
          type: 'text'
        }
      }
    }
  }
)
puts response

const response = await client.indices.create({
  index: "azure-openai-embeddings",
  mappings: {
    properties: {
      content_embedding: {
        type: "dense_vector",
        dims: 1536,
        element_type: "float",
        similarity: "dot_product",
      },
      content: {
        type: "text",
      },
    },
  },
});
console.log(response);

PUT azure-openai-embeddings
{
  "mappings": {
    "properties": {
      "content_embedding": { 
        "type": "dense_vector", 
        "dims": 1536, 
        "element_type": "float",
        "similarity": "dot_product" 
      },
      "content": { 
        "type": "text" 
      }
    }
  }
}

	包含生成令牌的字段名称。它必须在下一步的推理管道配置中引用。
	包含令牌的字段是 `dense_vector` 字段。
	模型的输出维度。在您使用的模型的Azure OpenAI 文档中找到此值。
	对于 Azure OpenAI 嵌入，应使用 `dot_product` 函数来计算相似度，因为 Azure OpenAI 嵌入被归一化为单位长度。有关模型规范的更多信息，请参阅Azure OpenAI 嵌入文档。
	要从中创建密集向量表示的字段名称。在本例中，字段名称为 `content`。它必须在下一步的推理管道配置中引用。
	字段类型，在本例中为文本。

resp = client.indices.create(
    index="azure-ai-studio-embeddings",
    mappings={
        "properties": {
            "content_embedding": {
                "type": "dense_vector",
                "dims": 1536,
                "element_type": "float",
                "similarity": "dot_product"
            },
            "content": {
                "type": "text"
            }
        }
    },
)
print(resp)

const response = await client.indices.create({
  index: "azure-ai-studio-embeddings",
  mappings: {
    properties: {
      content_embedding: {
        type: "dense_vector",
        dims: 1536,
        element_type: "float",
        similarity: "dot_product",
      },
      content: {
        type: "text",
      },
    },
  },
});
console.log(response);

PUT azure-ai-studio-embeddings
{
  "mappings": {
    "properties": {
      "content_embedding": { 
        "type": "dense_vector", 
        "dims": 1536, 
        "element_type": "float",
        "similarity": "dot_product" 
      },
      "content": { 
        "type": "text" 
      }
    }
  }
}

	包含生成令牌的字段名称。它必须在下一步的推理管道配置中引用。
	包含令牌的字段是 `dense_vector` 字段。
	模型的输出维度。此值可以在 Azure AI Studio 部署中的模型卡中找到。
	对于 Azure AI Studio 嵌入，应使用 `dot_product` 函数来计算相似度。
	要从中创建密集向量表示的字段名称。在本例中，字段名称为 `content`。它必须在下一步的推理管道配置中引用。
	字段类型，在本例中为文本。

resp = client.indices.create(
    index="google-vertex-ai-embeddings",
    mappings={
        "properties": {
            "content_embedding": {
                "type": "dense_vector",
                "dims": 768,
                "element_type": "float",
                "similarity": "dot_product"
            },
            "content": {
                "type": "text"
            }
        }
    },
)
print(resp)

const response = await client.indices.create({
  index: "google-vertex-ai-embeddings",
  mappings: {
    properties: {
      content_embedding: {
        type: "dense_vector",
        dims: 768,
        element_type: "float",
        similarity: "dot_product",
      },
      content: {
        type: "text",
      },
    },
  },
});
console.log(response);

PUT google-vertex-ai-embeddings
{
  "mappings": {
    "properties": {
      "content_embedding": { 
        "type": "dense_vector", 
        "dims": 768, 
        "element_type": "float",
        "similarity": "dot_product" 
      },
      "content": { 
        "type": "text" 
      }
    }
  }
}

	包含生成的嵌入的字段名称。它必须在下一步的推理管道配置中引用。
	包含嵌入的字段是 `dense_vector` 字段。
	模型的输出维度。此值可以在Google Vertex AI 模型参考中找到。如果未指定 `dims`，推理 API 会尝试自动计算输出维度。
	对于 Google Vertex AI 嵌入，应使用 `dot_product` 函数来计算相似度。
	要从中创建密集向量表示的字段名称。在本例中，字段名称为 `content`。它必须在下一步的推理管道配置中引用。
	字段类型，在本例中为 `text`。

resp = client.indices.create(
    index="mistral-embeddings",
    mappings={
        "properties": {
            "content_embedding": {
                "type": "dense_vector",
                "dims": 1024,
                "element_type": "float",
                "similarity": "dot_product"
            },
            "content": {
                "type": "text"
            }
        }
    },
)
print(resp)

const response = await client.indices.create({
  index: "mistral-embeddings",
  mappings: {
    properties: {
      content_embedding: {
        type: "dense_vector",
        dims: 1024,
        element_type: "float",
        similarity: "dot_product",
      },
      content: {
        type: "text",
      },
    },
  },
});
console.log(response);

PUT mistral-embeddings
{
  "mappings": {
    "properties": {
      "content_embedding": { 
        "type": "dense_vector", 
        "dims": 1024, 
        "element_type": "float",
        "similarity": "dot_product" 
      },
      "content": { 
        "type": "text" 
      }
    }
  }
}

	包含生成令牌的字段名称。它必须在下一步的推理管道配置中引用。
	包含令牌的字段是 `dense_vector` 字段。
	模型的输出维度。此值可以在Mistral 模型参考中找到。
	对于 Mistral 嵌入，应使用 `dot_product` 函数来计算相似度。
	要从中创建密集向量表示的字段名称。在本例中，字段名称为 `content`。它必须在下一步的推理管道配置中引用。
	字段类型，在本例中为文本。

resp = client.indices.create(
    index="amazon-bedrock-embeddings",
    mappings={
        "properties": {
            "content_embedding": {
                "type": "dense_vector",
                "dims": 1024,
                "element_type": "float",
                "similarity": "dot_product"
            },
            "content": {
                "type": "text"
            }
        }
    },
)
print(resp)

const response = await client.indices.create({
  index: "amazon-bedrock-embeddings",
  mappings: {
    properties: {
      content_embedding: {
        type: "dense_vector",
        dims: 1024,
        element_type: "float",
        similarity: "dot_product",
      },
      content: {
        type: "text",
      },
    },
  },
});
console.log(response);

PUT amazon-bedrock-embeddings
{
  "mappings": {
    "properties": {
      "content_embedding": { 
        "type": "dense_vector", 
        "dims": 1024, 
        "element_type": "float",
        "similarity": "dot_product" 
      },
      "content": { 
        "type": "text" 
      }
    }
  }
}

	包含生成令牌的字段名称。它必须在下一步的推理管道配置中引用。
	包含令牌的字段是 `dense_vector` 字段。
	模型的输出维度。此值可能因使用的底层模型而异。请参阅Amazon Titan 模型或Cohere 嵌入模型文档。
	对于 Amazon Bedrock 嵌入，对于 Amazon titan 模型，应使用 `dot_product` 函数来计算相似度，对于 Cohere 模型，应使用 `cosine` 函数。
	要从中创建密集向量表示的字段名称。在本例中，字段名称为 `content`。它必须在下一步的推理管道配置中引用。
	字段类型，在本例中为文本。

resp = client.indices.create(
    index="alibabacloud-ai-search-embeddings",
    mappings={
        "properties": {
            "content_embedding": {
                "type": "dense_vector",
                "dims": 1024,
                "element_type": "float"
            },
            "content": {
                "type": "text"
            }
        }
    },
)
print(resp)

const response = await client.indices.create({
  index: "alibabacloud-ai-search-embeddings",
  mappings: {
    properties: {
      content_embedding: {
        type: "dense_vector",
        dims: 1024,
        element_type: "float",
      },
      content: {
        type: "text",
      },
    },
  },
});
console.log(response);

PUT alibabacloud-ai-search-embeddings
{
  "mappings": {
    "properties": {
      "content_embedding": { 
        "type": "dense_vector", 
        "dims": 1024, 
        "element_type": "float"
      },
      "content": { 
        "type": "text" 
      }
    }
  }
}

	包含生成令牌的字段名称。它必须在下一步的推理管道配置中引用。
	包含令牌的字段是 `dense_vector` 字段。
	模型的输出维度。此值可能因使用的底层模型而异。请参阅阿里云 AI Search 嵌入模型文档。
	要从中创建密集向量表示的字段名称。在本例中，字段名称为 `content`。它必须在下一步的推理管道配置中引用。
	字段类型，在本例中为文本。

使用推理处理器创建摄取管道

编辑

使用推理处理器创建一个摄取管道，并使用您上面创建的模型对管道中正在摄取的数据进行推理。

resp = client.ingest.put_pipeline(
    id="cohere_embeddings_pipeline",
    processors=[
        {
            "inference": {
                "model_id": "cohere_embeddings",
                "input_output": {
                    "input_field": "content",
                    "output_field": "content_embedding"
                }
            }
        }
    ],
)
print(resp)

const response = await client.ingest.putPipeline({
  id: "cohere_embeddings_pipeline",
  processors: [
    {
      inference: {
        model_id: "cohere_embeddings",
        input_output: {
          input_field: "content",
          output_field: "content_embedding",
        },
      },
    },
  ],
});
console.log(response);

PUT _ingest/pipeline/cohere_embeddings_pipeline
{
  "processors": [
    {
      "inference": {
        "model_id": "cohere_embeddings", 
        "input_output": { 
          "input_field": "content",
          "output_field": "content_embedding"
        }
      }
    }
  ]
}

	您使用创建推理 API创建的推理端点的名称，在该步骤中称为 `inference_id`。
	定义推理过程的 `input_field` 和包含推理结果的 `output_field` 的配置对象。

resp = client.ingest.put_pipeline(
    id="elser_embeddings_pipeline",
    processors=[
        {
            "inference": {
                "model_id": "elser_embeddings",
                "input_output": {
                    "input_field": "content",
                    "output_field": "content_embedding"
                }
            }
        }
    ],
)
print(resp)

const response = await client.ingest.putPipeline({
  id: "elser_embeddings_pipeline",
  processors: [
    {
      inference: {
        model_id: "elser_embeddings",
        input_output: {
          input_field: "content",
          output_field: "content_embedding",
        },
      },
    },
  ],
});
console.log(response);

PUT _ingest/pipeline/elser_embeddings_pipeline
{
  "processors": [
    {
      "inference": {
        "model_id": "elser_embeddings", 
        "input_output": { 
          "input_field": "content",
          "output_field": "content_embedding"
        }
      }
    }
  ]
}

	您使用创建推理 API创建的推理端点的名称，在该步骤中称为 `inference_id`。
	定义推理过程的 `input_field` 和包含推理结果的 `output_field` 的配置对象。

resp = client.ingest.put_pipeline(
    id="hugging_face_embeddings_pipeline",
    processors=[
        {
            "inference": {
                "model_id": "hugging_face_embeddings",
                "input_output": {
                    "input_field": "content",
                    "output_field": "content_embedding"
                }
            }
        }
    ],
)
print(resp)

const response = await client.ingest.putPipeline({
  id: "hugging_face_embeddings_pipeline",
  processors: [
    {
      inference: {
        model_id: "hugging_face_embeddings",
        input_output: {
          input_field: "content",
          output_field: "content_embedding",
        },
      },
    },
  ],
});
console.log(response);

PUT _ingest/pipeline/hugging_face_embeddings_pipeline
{
  "processors": [
    {
      "inference": {
        "model_id": "hugging_face_embeddings", 
        "input_output": { 
          "input_field": "content",
          "output_field": "content_embedding"
        }
      }
    }
  ]
}

	您使用创建推理 API创建的推理端点的名称，在该步骤中称为 `inference_id`。
	定义推理过程的 `input_field` 和包含推理结果的 `output_field` 的配置对象。

resp = client.ingest.put_pipeline(
    id="openai_embeddings_pipeline",
    processors=[
        {
            "inference": {
                "model_id": "openai_embeddings",
                "input_output": {
                    "input_field": "content",
                    "output_field": "content_embedding"
                }
            }
        }
    ],
)
print(resp)

const response = await client.ingest.putPipeline({
  id: "openai_embeddings_pipeline",
  processors: [
    {
      inference: {
        model_id: "openai_embeddings",
        input_output: {
          input_field: "content",
          output_field: "content_embedding",
        },
      },
    },
  ],
});
console.log(response);

PUT _ingest/pipeline/openai_embeddings_pipeline
{
  "processors": [
    {
      "inference": {
        "model_id": "openai_embeddings", 
        "input_output": { 
          "input_field": "content",
          "output_field": "content_embedding"
        }
      }
    }
  ]
}

	您使用创建推理 API创建的推理端点的名称，在该步骤中称为 `inference_id`。
	定义推理过程的 `input_field` 和包含推理结果的 `output_field` 的配置对象。

resp = client.ingest.put_pipeline(
    id="azure_openai_embeddings_pipeline",
    processors=[
        {
            "inference": {
                "model_id": "azure_openai_embeddings",
                "input_output": {
                    "input_field": "content",
                    "output_field": "content_embedding"
                }
            }
        }
    ],
)
print(resp)

const response = await client.ingest.putPipeline({
  id: "azure_openai_embeddings_pipeline",
  processors: [
    {
      inference: {
        model_id: "azure_openai_embeddings",
        input_output: {
          input_field: "content",
          output_field: "content_embedding",
        },
      },
    },
  ],
});
console.log(response);

PUT _ingest/pipeline/azure_openai_embeddings_pipeline
{
  "processors": [
    {
      "inference": {
        "model_id": "azure_openai_embeddings", 
        "input_output": { 
          "input_field": "content",
          "output_field": "content_embedding"
        }
      }
    }
  ]
}

	您使用创建推理 API创建的推理端点的名称，在该步骤中称为 `inference_id`。
	定义推理过程的 `input_field` 和包含推理结果的 `output_field` 的配置对象。

resp = client.ingest.put_pipeline(
    id="azure_ai_studio_embeddings_pipeline",
    processors=[
        {
            "inference": {
                "model_id": "azure_ai_studio_embeddings",
                "input_output": {
                    "input_field": "content",
                    "output_field": "content_embedding"
                }
            }
        }
    ],
)
print(resp)

const response = await client.ingest.putPipeline({
  id: "azure_ai_studio_embeddings_pipeline",
  processors: [
    {
      inference: {
        model_id: "azure_ai_studio_embeddings",
        input_output: {
          input_field: "content",
          output_field: "content_embedding",
        },
      },
    },
  ],
});
console.log(response);

PUT _ingest/pipeline/azure_ai_studio_embeddings_pipeline
{
  "processors": [
    {
      "inference": {
        "model_id": "azure_ai_studio_embeddings", 
        "input_output": { 
          "input_field": "content",
          "output_field": "content_embedding"
        }
      }
    }
  ]
}

	您使用创建推理 API创建的推理端点的名称，在该步骤中称为 `inference_id`。
	定义推理过程的 `input_field` 和包含推理结果的 `output_field` 的配置对象。

resp = client.ingest.put_pipeline(
    id="google_vertex_ai_embeddings_pipeline",
    processors=[
        {
            "inference": {
                "model_id": "google_vertex_ai_embeddings",
                "input_output": {
                    "input_field": "content",
                    "output_field": "content_embedding"
                }
            }
        }
    ],
)
print(resp)

const response = await client.ingest.putPipeline({
  id: "google_vertex_ai_embeddings_pipeline",
  processors: [
    {
      inference: {
        model_id: "google_vertex_ai_embeddings",
        input_output: {
          input_field: "content",
          output_field: "content_embedding",
        },
      },
    },
  ],
});
console.log(response);

PUT _ingest/pipeline/google_vertex_ai_embeddings_pipeline
{
  "processors": [
    {
      "inference": {
        "model_id": "google_vertex_ai_embeddings", 
        "input_output": { 
          "input_field": "content",
          "output_field": "content_embedding"
        }
      }
    }
  ]
}

	您使用创建推理 API创建的推理端点的名称，在该步骤中称为 `inference_id`。
	定义推理过程的 `input_field` 和包含推理结果的 `output_field` 的配置对象。

resp = client.ingest.put_pipeline(
    id="mistral_embeddings_pipeline",
    processors=[
        {
            "inference": {
                "model_id": "mistral_embeddings",
                "input_output": {
                    "input_field": "content",
                    "output_field": "content_embedding"
                }
            }
        }
    ],
)
print(resp)

const response = await client.ingest.putPipeline({
  id: "mistral_embeddings_pipeline",
  processors: [
    {
      inference: {
        model_id: "mistral_embeddings",
        input_output: {
          input_field: "content",
          output_field: "content_embedding",
        },
      },
    },
  ],
});
console.log(response);

PUT _ingest/pipeline/mistral_embeddings_pipeline
{
  "processors": [
    {
      "inference": {
        "model_id": "mistral_embeddings", 
        "input_output": { 
          "input_field": "content",
          "output_field": "content_embedding"
        }
      }
    }
  ]
}

	您使用创建推理 API创建的推理端点的名称，在该步骤中称为 `inference_id`。
	定义推理过程的 `input_field` 和包含推理结果的 `output_field` 的配置对象。

resp = client.ingest.put_pipeline(
    id="amazon_bedrock_embeddings_pipeline",
    processors=[
        {
            "inference": {
                "model_id": "amazon_bedrock_embeddings",
                "input_output": {
                    "input_field": "content",
                    "output_field": "content_embedding"
                }
            }
        }
    ],
)
print(resp)

const response = await client.ingest.putPipeline({
  id: "amazon_bedrock_embeddings_pipeline",
  processors: [
    {
      inference: {
        model_id: "amazon_bedrock_embeddings",
        input_output: {
          input_field: "content",
          output_field: "content_embedding",
        },
      },
    },
  ],
});
console.log(response);

PUT _ingest/pipeline/amazon_bedrock_embeddings_pipeline
{
  "processors": [
    {
      "inference": {
        "model_id": "amazon_bedrock_embeddings", 
        "input_output": { 
          "input_field": "content",
          "output_field": "content_embedding"
        }
      }
    }
  ]
}

	您使用创建推理 API创建的推理端点的名称，在该步骤中称为 `inference_id`。
	定义推理过程的 `input_field` 和包含推理结果的 `output_field` 的配置对象。

resp = client.ingest.put_pipeline(
    id="alibabacloud_ai_search_embeddings_pipeline",
    processors=[
        {
            "inference": {
                "model_id": "alibabacloud_ai_search_embeddings",
                "input_output": {
                    "input_field": "content",
                    "output_field": "content_embedding"
                }
            }
        }
    ],
)
print(resp)

const response = await client.ingest.putPipeline({
  id: "alibabacloud_ai_search_embeddings_pipeline",
  processors: [
    {
      inference: {
        model_id: "alibabacloud_ai_search_embeddings",
        input_output: {
          input_field: "content",
          output_field: "content_embedding",
        },
      },
    },
  ],
});
console.log(response);

PUT _ingest/pipeline/alibabacloud_ai_search_embeddings_pipeline
{
  "processors": [
    {
      "inference": {
        "model_id": "alibabacloud_ai_search_embeddings", 
        "input_output": { 
          "input_field": "content",
          "output_field": "content_embedding"
        }
      }
    }
  ]
}

	您使用创建推理 API创建的推理端点的名称，在该步骤中称为 `inference_id`。
	定义推理过程的 `input_field` 和包含推理结果的 `output_field` 的配置对象。

加载数据

编辑

在此步骤中，您加载稍后在推理摄取管道中使用的数据，以从中创建嵌入。

使用 msmarco-passagetest2019-top1000 数据集，它是 MS MARCO 段落排序数据集的一个子集。它包含 200 个查询，每个查询都附带一个相关文本段落的列表。所有唯一的段落及其 ID 都已从该数据集中提取并编译到一个tsv 文件中。

下载该文件并使用机器学习 UI 中的数据可视化将其上传到您的集群。分析完数据后，单击覆盖设置。在编辑字段名称下，将 id 分配给第一列，将 content 分配给第二列。单击应用，然后单击导入。将索引命名为 test-data，然后单击导入。上传完成后，您将看到一个名为 test-data 的索引，其中包含 182,469 个文档。

通过推理摄取管道摄取数据

编辑

通过使用所选模型的推理管道重新索引数据，从文本中创建嵌入。此步骤使用重新索引 API来模拟通过管道的的数据摄取。

resp = client.reindex(
    wait_for_completion=False,
    source={
        "index": "test-data",
        "size": 50
    },
    dest={
        "index": "cohere-embeddings",
        "pipeline": "cohere_embeddings_pipeline"
    },
)
print(resp)

const response = await client.reindex({
  wait_for_completion: "false",
  source: {
    index: "test-data",
    size: 50,
  },
  dest: {
    index: "cohere-embeddings",
    pipeline: "cohere_embeddings_pipeline",
  },
});
console.log(response);

POST _reindex?wait_for_completion=false
{
  "source": {
    "index": "test-data",
    "size": 50 
  },
  "dest": {
    "index": "cohere-embeddings",
    "pipeline": "cohere_embeddings_pipeline"
  }
}

重新索引的默认批次大小为 1000。将 size 减少到更小的数字可以加快重新索引过程的更新速度，这使您可以密切关注进度并在早期检测到错误。

您 Cohere 账户的速率限制可能会影响重新索引过程的吞吐量。

resp = client.reindex(
    wait_for_completion=False,
    source={
        "index": "test-data",
        "size": 50
    },
    dest={
        "index": "elser-embeddings",
        "pipeline": "elser_embeddings_pipeline"
    },
)
print(resp)

const response = await client.reindex({
  wait_for_completion: "false",
  source: {
    index: "test-data",
    size: 50,
  },
  dest: {
    index: "elser-embeddings",
    pipeline: "elser_embeddings_pipeline",
  },
});
console.log(response);

POST _reindex?wait_for_completion=false
{
  "source": {
    "index": "test-data",
    "size": 50 
  },
  "dest": {
    "index": "elser-embeddings",
    "pipeline": "elser_embeddings_pipeline"
  }
}

重新索引的默认批次大小为 1000。将 size 减少到更小的数字可以加快重新索引过程的更新速度，这使您可以密切关注进度并在早期检测到错误。

resp = client.reindex(
    wait_for_completion=False,
    source={
        "index": "test-data",
        "size": 50
    },
    dest={
        "index": "hugging-face-embeddings",
        "pipeline": "hugging_face_embeddings_pipeline"
    },
)
print(resp)

const response = await client.reindex({
  wait_for_completion: "false",
  source: {
    index: "test-data",
    size: 50,
  },
  dest: {
    index: "hugging-face-embeddings",
    pipeline: "hugging_face_embeddings_pipeline",
  },
});
console.log(response);

POST _reindex?wait_for_completion=false
{
  "source": {
    "index": "test-data",
    "size": 50 
  },
  "dest": {
    "index": "hugging-face-embeddings",
    "pipeline": "hugging_face_embeddings_pipeline"
  }
}

重新索引的默认批次大小为 1000。将 size 减少到更小的数字可以加快重新索引过程的更新速度，这使您可以密切关注进度并在早期检测到错误。

resp = client.reindex(
    wait_for_completion=False,
    source={
        "index": "test-data",
        "size": 50
    },
    dest={
        "index": "openai-embeddings",
        "pipeline": "openai_embeddings_pipeline"
    },
)
print(resp)

const response = await client.reindex({
  wait_for_completion: "false",
  source: {
    index: "test-data",
    size: 50,
  },
  dest: {
    index: "openai-embeddings",
    pipeline: "openai_embeddings_pipeline",
  },
});
console.log(response);

POST _reindex?wait_for_completion=false
{
  "source": {
    "index": "test-data",
    "size": 50 
  },
  "dest": {
    "index": "openai-embeddings",
    "pipeline": "openai_embeddings_pipeline"
  }
}

重新索引的默认批次大小为 1000。将 size 减少到更小的数字可以加快重新索引过程的更新速度，这使您可以密切关注进度并在早期检测到错误。

您 OpenAI 账户的速率限制可能会影响重新索引过程的吞吐量。如果发生这种情况，请将 size 更改为 3 或类似数量级的数值。

resp = client.reindex(
    wait_for_completion=False,
    source={
        "index": "test-data",
        "size": 50
    },
    dest={
        "index": "azure-openai-embeddings",
        "pipeline": "azure_openai_embeddings_pipeline"
    },
)
print(resp)

const response = await client.reindex({
  wait_for_completion: "false",
  source: {
    index: "test-data",
    size: 50,
  },
  dest: {
    index: "azure-openai-embeddings",
    pipeline: "azure_openai_embeddings_pipeline",
  },
});
console.log(response);

POST _reindex?wait_for_completion=false
{
  "source": {
    "index": "test-data",
    "size": 50 
  },
  "dest": {
    "index": "azure-openai-embeddings",
    "pipeline": "azure_openai_embeddings_pipeline"
  }
}

重新索引的默认批次大小为 1000。将 size 减少到更小的数字可以加快重新索引过程的更新速度，这使您可以密切关注进度并在早期检测到错误。

您 Azure OpenAI 账户的速率限制可能会影响重新索引过程的吞吐量。如果发生这种情况，请将 size 更改为 3 或类似数量级的数值。

resp = client.reindex(
    wait_for_completion=False,
    source={
        "index": "test-data",
        "size": 50
    },
    dest={
        "index": "azure-ai-studio-embeddings",
        "pipeline": "azure_ai_studio_embeddings_pipeline"
    },
)
print(resp)

const response = await client.reindex({
  wait_for_completion: "false",
  source: {
    index: "test-data",
    size: 50,
  },
  dest: {
    index: "azure-ai-studio-embeddings",
    pipeline: "azure_ai_studio_embeddings_pipeline",
  },
});
console.log(response);

POST _reindex?wait_for_completion=false
{
  "source": {
    "index": "test-data",
    "size": 50 
  },
  "dest": {
    "index": "azure-ai-studio-embeddings",
    "pipeline": "azure_ai_studio_embeddings_pipeline"
  }
}

重新索引的默认批次大小为 1000。将 size 减少到更小的数字可以加快重新索引过程的更新速度，这使您可以密切关注进度并在早期检测到错误。

您的 Azure AI Studio 模型部署可能已实施速率限制，这可能会影响重新索引过程的吞吐量。如果发生这种情况，请将 size 更改为 3 或类似数量级的数值。

resp = client.reindex(
    wait_for_completion=False,
    source={
        "index": "test-data",
        "size": 50
    },
    dest={
        "index": "google-vertex-ai-embeddings",
        "pipeline": "google_vertex_ai_embeddings_pipeline"
    },
)
print(resp)

const response = await client.reindex({
  wait_for_completion: "false",
  source: {
    index: "test-data",
    size: 50,
  },
  dest: {
    index: "google-vertex-ai-embeddings",
    pipeline: "google_vertex_ai_embeddings_pipeline",
  },
});
console.log(response);

POST _reindex?wait_for_completion=false
{
  "source": {
    "index": "test-data",
    "size": 50 
  },
  "dest": {
    "index": "google-vertex-ai-embeddings",
    "pipeline": "google_vertex_ai_embeddings_pipeline"
  }
}

重新索引的默认批次大小为 1000。减小 size 将使重新索引过程的更新更快。这使您可以密切关注进度并在早期检测到错误。

resp = client.reindex(
    wait_for_completion=False,
    source={
        "index": "test-data",
        "size": 50
    },
    dest={
        "index": "mistral-embeddings",
        "pipeline": "mistral_embeddings_pipeline"
    },
)
print(resp)

const response = await client.reindex({
  wait_for_completion: "false",
  source: {
    index: "test-data",
    size: 50,
  },
  dest: {
    index: "mistral-embeddings",
    pipeline: "mistral_embeddings_pipeline",
  },
});
console.log(response);

POST _reindex?wait_for_completion=false
{
  "source": {
    "index": "test-data",
    "size": 50 
  },
  "dest": {
    "index": "mistral-embeddings",
    "pipeline": "mistral_embeddings_pipeline"
  }
}

重新索引的默认批次大小为 1000。将 size 减少到更小的数字可以加快重新索引过程的更新速度，这使您可以密切关注进度并在早期检测到错误。

resp = client.reindex(
    wait_for_completion=False,
    source={
        "index": "test-data",
        "size": 50
    },
    dest={
        "index": "amazon-bedrock-embeddings",
        "pipeline": "amazon_bedrock_embeddings_pipeline"
    },
)
print(resp)

const response = await client.reindex({
  wait_for_completion: "false",
  source: {
    index: "test-data",
    size: 50,
  },
  dest: {
    index: "amazon-bedrock-embeddings",
    pipeline: "amazon_bedrock_embeddings_pipeline",
  },
});
console.log(response);

POST _reindex?wait_for_completion=false
{
  "source": {
    "index": "test-data",
    "size": 50 
  },
  "dest": {
    "index": "amazon-bedrock-embeddings",
    "pipeline": "amazon_bedrock_embeddings_pipeline"
  }
}

重新索引的默认批次大小为 1000。将 size 减少到更小的数字可以加快重新索引过程的更新速度，这使您可以密切关注进度并在早期检测到错误。

resp = client.reindex(
    wait_for_completion=False,
    source={
        "index": "test-data",
        "size": 50
    },
    dest={
        "index": "alibabacloud-ai-search-embeddings",
        "pipeline": "alibabacloud_ai_search_embeddings_pipeline"
    },
)
print(resp)

const response = await client.reindex({
  wait_for_completion: "false",
  source: {
    index: "test-data",
    size: 50,
  },
  dest: {
    index: "alibabacloud-ai-search-embeddings",
    pipeline: "alibabacloud_ai_search_embeddings_pipeline",
  },
});
console.log(response);

POST _reindex?wait_for_completion=false
{
  "source": {
    "index": "test-data",
    "size": 50 
  },
  "dest": {
    "index": "alibabacloud-ai-search-embeddings",
    "pipeline": "alibabacloud_ai_search_embeddings_pipeline"
  }
}

重新索引的默认批次大小为 1000。将 size 减少到更小的数字可以加快重新索引过程的更新速度，这使您可以密切关注进度并在早期检测到错误。

该调用返回一个任务 ID 以监控进度。

resp = client.tasks.get(
    task_id="<task_id>",
)
print(resp)

const response = await client.tasks.get({
  task_id: "<task_id>",
});
console.log(response);

GET _tasks/<task_id>

重新索引大型数据集可能需要很长时间。您可以只使用数据集的一部分来测试此工作流。为此，请取消重新索引过程，并且仅为已重新索引的子集生成嵌入。以下 API 请求将取消重新索引任务。

resp = client.tasks.cancel(
    task_id="<task_id>",
)
print(resp)

const response = await client.tasks.cancel({
  task_id: "<task_id>",
});
console.log(response);

POST _tasks/<task_id>/_cancel

语义搜索

编辑

在数据集使用嵌入进行丰富后，您可以使用语义搜索来查询数据。对于密集向量模型，请将 query_vector_builder 传递给 k 近邻 (kNN) 向量搜索 API，并提供查询文本和用于创建嵌入的模型。对于像 ELSER 这样的稀疏向量模型，请使用 sparse_vector 查询，并提供查询文本和用于创建嵌入的模型。

如果您取消了重新索引过程，则仅对数据的一部分运行查询，这会影响结果的质量。

resp = client.search(
    index="cohere-embeddings",
    knn={
        "field": "content_embedding",
        "query_vector_builder": {
            "text_embedding": {
                "model_id": "cohere_embeddings",
                "model_text": "Muscles in human body"
            }
        },
        "k": 10,
        "num_candidates": 100
    },
    source=[
        "id",
        "content"
    ],
)
print(resp)

response = client.search(
  index: 'cohere-embeddings',
  body: {
    knn: {
      field: 'content_embedding',
      query_vector_builder: {
        text_embedding: {
          model_id: 'cohere_embeddings',
          model_text: 'Muscles in human body'
        }
      },
      k: 10,
      num_candidates: 100
    },
    _source: [
      'id',
      'content'
    ]
  }
)
puts response

const response = await client.search({
  index: "cohere-embeddings",
  knn: {
    field: "content_embedding",
    query_vector_builder: {
      text_embedding: {
        model_id: "cohere_embeddings",
        model_text: "Muscles in human body",
      },
    },
    k: 10,
    num_candidates: 100,
  },
  _source: ["id", "content"],
});
console.log(response);

GET cohere-embeddings/_search
{
  "knn": {
    "field": "content_embedding",
    "query_vector_builder": {
      "text_embedding": {
        "model_id": "cohere_embeddings",
        "model_text": "Muscles in human body"
      }
    },
    "k": 10,
    "num_candidates": 100
  },
  "_source": [
    "id",
    "content"
  ]
}

结果，您将收到来自 cohere-embeddings 索引的与查询语义最接近的 10 个文档，这些文档按其与查询的接近程度排序。

"hits": [
      {
        "_index": "cohere-embeddings",
        "_id": "-eFWCY4BECzWLnMZuI78",
        "_score": 0.737484,
        "_source": {
          "id": 1690948,
          "content": "Oxygen is supplied to the muscles via red blood cells. Red blood cells carry hemoglobin which oxygen bonds with as the hemoglobin rich blood cells pass through the blood vessels of the lungs.The now oxygen rich blood cells carry that oxygen to the cells that are demanding it, in this case skeletal muscle cells.ther ways in which muscles are supplied with oxygen include: 1  Blood flow from the heart is increased. 2  Blood flow to your muscles in increased. 3  Blood flow from nonessential organs is transported to working muscles."
        }
      },
      {
        "_index": "cohere-embeddings",
        "_id": "HuFWCY4BECzWLnMZuI_8",
        "_score": 0.7176013,
        "_source": {
          "id": 1692482,
          "content": "The thoracic cavity is separated from the abdominal cavity by the  diaphragm. This is a broad flat muscle.    (muscular) diaphragm The diaphragm is a muscle that separat…e the thoracic from the abdominal cavity. The pelvis is the lowest part of the abdominal cavity and it has no physical separation from it    Diaphragm."
        }
      },
      {
        "_index": "cohere-embeddings",
        "_id": "IOFWCY4BECzWLnMZuI_8",
        "_score": 0.7154432,
        "_source": {
          "id": 1692489,
          "content": "Muscular Wall Separating the Abdominal and Thoracic Cavities; Thoracic Cavity of a Fetal Pig; In Mammals the Diaphragm Separates the Abdominal Cavity from the"
        }
      },
      {
        "_index": "cohere-embeddings",
        "_id": "C-FWCY4BECzWLnMZuI_8",
        "_score": 0.695313,
        "_source": {
          "id": 1691493,
          "content": "Burning, aching, tenderness and stiffness are just some descriptors of the discomfort you may feel in the muscles you exercised one to two days ago.For the most part, these sensations you experience after exercise are collectively known as delayed onset muscle soreness.urning, aching, tenderness and stiffness are just some descriptors of the discomfort you may feel in the muscles you exercised one to two days ago."
        }
      },
      (...)
    ]

resp = client.search(
    index="elser-embeddings",
    query={
        "sparse_vector": {
            "field": "content_embedding",
            "inference_id": "elser_embeddings",
            "query": "How to avoid muscle soreness after running?"
        }
    },
    source=[
        "id",
        "content"
    ],
)
print(resp)

const response = await client.search({
  index: "elser-embeddings",
  query: {
    sparse_vector: {
      field: "content_embedding",
      inference_id: "elser_embeddings",
      query: "How to avoid muscle soreness after running?",
    },
  },
  _source: ["id", "content"],
});
console.log(response);

GET elser-embeddings/_search
{
  "query":{
    "sparse_vector":{
      "field": "content_embedding",
      "inference_id": "elser_embeddings",
      "query": "How to avoid muscle soreness after running?"
    }
  },
  "_source": [
    "id",
    "content"
  ]
}

结果，您将收到来自 cohere-embeddings 索引的与查询语义最接近的 10 个文档，这些文档按其与查询的接近程度排序。

"hits": [
{
"_index": "elser-embeddings",
"_id": "ZLGc_pABZbBmsu5_eCoH",
"_score": 21.472063,
"_source": {
"id": 2258240,
"content": "You may notice some muscle aches while you are exercising. This is called acute soreness. More often, you may begin to feel sore about 12 hours after exercising, and the discomfort usually peaks at 48 to 72 hours after exercise. This is called delayed-onset muscle soreness.It is thought that, during this time, your body is repairing the muscle, making it stronger and bigger.You may also notice the muscles feel better if you exercise lightly. This is normal.his is called delayed-onset muscle soreness. It is thought that, during this time, your body is repairing the muscle, making it stronger and bigger. You may also notice the muscles feel better if you exercise lightly. This is normal."
}
},
{
"_index": "elser-embeddings",
"_id": "ZbGc_pABZbBmsu5_eCoH",
"_score": 21.421381,
"_source": {
"id": 2258242,
"content": "Photo Credit Jupiterimages/Stockbyte/Getty Images. That stiff, achy feeling you get in the days after exercise is a normal physiological response known as delayed onset muscle soreness. You can take it as a positive sign that your muscles have felt the workout, but the pain may also turn you off to further exercise.ou are more likely to develop delayed onset muscle soreness if you are new to working out, if you’ve gone a long time without exercising and start up again, if you have picked up a new type of physical activity or if you have recently boosted the intensity, length or frequency of your exercise sessions."
}
},
{
"_index": "elser-embeddings",
"_id": "ZrGc_pABZbBmsu5_eCoH",
"_score": 20.542095,
"_source": {
"id": 2258248,
"content": "They found that stretching before and after exercise has no effect on muscle soreness. Exercise might cause inflammation, which leads to an increase in the production of immune cells (comprised mostly of macrophages and neutrophils). Levels of these immune cells reach a peak 24-48 hours after exercise.These cells, in turn, produce bradykinins and prostaglandins, which make the pain receptors in your body more sensitive. Whenever you move, these pain receptors are stimulated.hey found that stretching before and after exercise has no effect on muscle soreness. Exercise might cause inflammation, which leads to an increase in the production of immune cells (comprised mostly of macrophages and neutrophils). Levels of these immune cells reach a peak 24-48 hours after exercise."
}
},
(...)
]

resp = client.search(
    index="hugging-face-embeddings",
    knn={
        "field": "content_embedding",
        "query_vector_builder": {
            "text_embedding": {
                "model_id": "hugging_face_embeddings",
                "model_text": "What's margin of error?"
            }
        },
        "k": 10,
        "num_candidates": 100
    },
    source=[
        "id",
        "content"
    ],
)
print(resp)

response = client.search(
  index: 'hugging-face-embeddings',
  body: {
    knn: {
      field: 'content_embedding',
      query_vector_builder: {
        text_embedding: {
          model_id: 'hugging_face_embeddings',
          model_text: "What's margin of error?"
        }
      },
      k: 10,
      num_candidates: 100
    },
    _source: [
      'id',
      'content'
    ]
  }
)
puts response

const response = await client.search({
  index: "hugging-face-embeddings",
  knn: {
    field: "content_embedding",
    query_vector_builder: {
      text_embedding: {
        model_id: "hugging_face_embeddings",
        model_text: "What's margin of error?",
      },
    },
    k: 10,
    num_candidates: 100,
  },
  _source: ["id", "content"],
});
console.log(response);

GET hugging-face-embeddings/_search
{
  "knn": {
    "field": "content_embedding",
    "query_vector_builder": {
      "text_embedding": {
        "model_id": "hugging_face_embeddings",
        "model_text": "What's margin of error?"
      }
    },
    "k": 10,
    "num_candidates": 100
  },
  "_source": [
    "id",
    "content"
  ]
}

因此，您将收到来自hugging-face-embeddings索引中与查询语义最接近的前 10 个文档，并按其与查询的接近程度排序。

"hits": [
      {
        "_index": "hugging-face-embeddings",
        "_id": "ljEfo44BiUQvMpPgT20E",
        "_score": 0.8522128,
        "_source": {
          "id": 7960255,
          "content": "The margin of error can be defined by either of the following equations. Margin of error = Critical value x Standard deviation of the statistic. Margin of error = Critical value x Standard error of the statistic. If you know the standard deviation of the statistic, use the first equation to compute the margin of error. Otherwise, use the second equation. Previously, we described how to compute the standard deviation and standard error."
        }
      },
      {
        "_index": "hugging-face-embeddings",
        "_id": "lzEfo44BiUQvMpPgT20E",
        "_score": 0.7865497,
        "_source": {
          "id": 7960259,
          "content": "1 y ou are told only the size of the sample and are asked to provide the margin of error for percentages which are not (yet) known. 2  This is typically the case when you are computing the margin of error for a survey which is going to be conducted in the future."
        }
      },
      {
        "_index": "hugging-face-embeddings1",
        "_id": "DjEfo44BiUQvMpPgT20E",
        "_score": 0.6229427,
        "_source": {
          "id": 2166183,
          "content": "1. In general, the point at which gains equal losses. 2. In options, the market price that a stock must reach for option buyers to avoid a loss if they exercise. For a call, it is the strike price plus the premium paid. For a put, it is the strike price minus the premium paid."
        }
      },
      {
        "_index": "hugging-face-embeddings1",
        "_id": "VzEfo44BiUQvMpPgT20E",
        "_score": 0.6034223,
        "_source": {
          "id": 2173417,
          "content": "How do you find the area of a circle? Can you measure the area of a circle and use that to find a value for Pi?"
        }
      },
      (...)
    ]

resp = client.search(
    index="openai-embeddings",
    knn={
        "field": "content_embedding",
        "query_vector_builder": {
            "text_embedding": {
                "model_id": "openai_embeddings",
                "model_text": "Calculate fuel cost"
            }
        },
        "k": 10,
        "num_candidates": 100
    },
    source=[
        "id",
        "content"
    ],
)
print(resp)

response = client.search(
  index: 'openai-embeddings',
  body: {
    knn: {
      field: 'content_embedding',
      query_vector_builder: {
        text_embedding: {
          model_id: 'openai_embeddings',
          model_text: 'Calculate fuel cost'
        }
      },
      k: 10,
      num_candidates: 100
    },
    _source: [
      'id',
      'content'
    ]
  }
)
puts response

const response = await client.search({
  index: "openai-embeddings",
  knn: {
    field: "content_embedding",
    query_vector_builder: {
      text_embedding: {
        model_id: "openai_embeddings",
        model_text: "Calculate fuel cost",
      },
    },
    k: 10,
    num_candidates: 100,
  },
  _source: ["id", "content"],
});
console.log(response);

GET openai-embeddings/_search
{
  "knn": {
    "field": "content_embedding",
    "query_vector_builder": {
      "text_embedding": {
        "model_id": "openai_embeddings",
        "model_text": "Calculate fuel cost"
      }
    },
    "k": 10,
    "num_candidates": 100
  },
  "_source": [
    "id",
    "content"
  ]
}

因此，您将收到来自openai-embeddings索引中与查询语义最接近的前 10 个文档，并按其与查询的接近程度排序。

"hits": [
      {
        "_index": "openai-embeddings",
        "_id": "DDd5OowBHxQKHyc3TDSC",
        "_score": 0.83704096,
        "_source": {
          "id": 862114,
          "body": "How to calculate fuel cost for a road trip. By Tara Baukus Mello • Bankrate.com. Dear Driving for Dollars, My family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost.It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes.y family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost. It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes."
        }
      },
      {
        "_index": "openai-embeddings",
        "_id": "ajd5OowBHxQKHyc3TDSC",
        "_score": 0.8345704,
        "_source": {
          "id": 820622,
          "body": "Home Heating Calculator. Typically, approximately 50% of the energy consumed in a home annually is for space heating. When deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important.This calculator can help you estimate the cost of fuel for different heating appliances.hen deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important. This calculator can help you estimate the cost of fuel for different heating appliances."
        }
      },
      {
        "_index": "openai-embeddings",
        "_id": "Djd5OowBHxQKHyc3TDSC",
        "_score": 0.8327426,
        "_source": {
          "id": 8202683,
          "body": "Fuel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel.If you are paying $4 per gallon, the trip would cost you $200.Most boats have much larger gas tanks than cars.uel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel."
        }
      },
      (...)
    ]

resp = client.search(
    index="azure-openai-embeddings",
    knn={
        "field": "content_embedding",
        "query_vector_builder": {
            "text_embedding": {
                "model_id": "azure_openai_embeddings",
                "model_text": "Calculate fuel cost"
            }
        },
        "k": 10,
        "num_candidates": 100
    },
    source=[
        "id",
        "content"
    ],
)
print(resp)

response = client.search(
  index: 'azure-openai-embeddings',
  body: {
    knn: {
      field: 'content_embedding',
      query_vector_builder: {
        text_embedding: {
          model_id: 'azure_openai_embeddings',
          model_text: 'Calculate fuel cost'
        }
      },
      k: 10,
      num_candidates: 100
    },
    _source: [
      'id',
      'content'
    ]
  }
)
puts response

const response = await client.search({
  index: "azure-openai-embeddings",
  knn: {
    field: "content_embedding",
    query_vector_builder: {
      text_embedding: {
        model_id: "azure_openai_embeddings",
        model_text: "Calculate fuel cost",
      },
    },
    k: 10,
    num_candidates: 100,
  },
  _source: ["id", "content"],
});
console.log(response);

GET azure-openai-embeddings/_search
{
  "knn": {
    "field": "content_embedding",
    "query_vector_builder": {
      "text_embedding": {
        "model_id": "azure_openai_embeddings",
        "model_text": "Calculate fuel cost"
      }
    },
    "k": 10,
    "num_candidates": 100
  },
  "_source": [
    "id",
    "content"
  ]
}

因此，您将收到来自azure-openai-embeddings索引中与查询语义最接近的前 10 个文档，并按其与查询的接近程度排序。

"hits": [
      {
        "_index": "azure-openai-embeddings",
        "_id": "DDd5OowBHxQKHyc3TDSC",
        "_score": 0.83704096,
        "_source": {
          "id": 862114,
          "body": "How to calculate fuel cost for a road trip. By Tara Baukus Mello • Bankrate.com. Dear Driving for Dollars, My family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost.It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes.y family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost. It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes."
        }
      },
      {
        "_index": "azure-openai-embeddings",
        "_id": "ajd5OowBHxQKHyc3TDSC",
        "_score": 0.8345704,
        "_source": {
          "id": 820622,
          "body": "Home Heating Calculator. Typically, approximately 50% of the energy consumed in a home annually is for space heating. When deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important.This calculator can help you estimate the cost of fuel for different heating appliances.hen deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important. This calculator can help you estimate the cost of fuel for different heating appliances."
        }
      },
      {
        "_index": "azure-openai-embeddings",
        "_id": "Djd5OowBHxQKHyc3TDSC",
        "_score": 0.8327426,
        "_source": {
          "id": 8202683,
          "body": "Fuel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel.If you are paying $4 per gallon, the trip would cost you $200.Most boats have much larger gas tanks than cars.uel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel."
        }
      },
      (...)
    ]

resp = client.search(
    index="azure-ai-studio-embeddings",
    knn={
        "field": "content_embedding",
        "query_vector_builder": {
            "text_embedding": {
                "model_id": "azure_ai_studio_embeddings",
                "model_text": "Calculate fuel cost"
            }
        },
        "k": 10,
        "num_candidates": 100
    },
    source=[
        "id",
        "content"
    ],
)
print(resp)

const response = await client.search({
  index: "azure-ai-studio-embeddings",
  knn: {
    field: "content_embedding",
    query_vector_builder: {
      text_embedding: {
        model_id: "azure_ai_studio_embeddings",
        model_text: "Calculate fuel cost",
      },
    },
    k: 10,
    num_candidates: 100,
  },
  _source: ["id", "content"],
});
console.log(response);

GET azure-ai-studio-embeddings/_search
{
  "knn": {
    "field": "content_embedding",
    "query_vector_builder": {
      "text_embedding": {
        "model_id": "azure_ai_studio_embeddings",
        "model_text": "Calculate fuel cost"
      }
    },
    "k": 10,
    "num_candidates": 100
  },
  "_source": [
    "id",
    "content"
  ]
}

因此，您将收到来自azure-ai-studio-embeddings索引中与查询语义最接近的前 10 个文档，并按其与查询的接近程度排序。

"hits": [
      {
        "_index": "azure-ai-studio-embeddings",
        "_id": "DDd5OowBHxQKHyc3TDSC",
        "_score": 0.83704096,
        "_source": {
          "id": 862114,
          "body": "How to calculate fuel cost for a road trip. By Tara Baukus Mello • Bankrate.com. Dear Driving for Dollars, My family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost.It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes.y family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost. It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes."
        }
      },
      {
        "_index": "azure-ai-studio-embeddings",
        "_id": "ajd5OowBHxQKHyc3TDSC",
        "_score": 0.8345704,
        "_source": {
          "id": 820622,
          "body": "Home Heating Calculator. Typically, approximately 50% of the energy consumed in a home annually is for space heating. When deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important.This calculator can help you estimate the cost of fuel for different heating appliances.hen deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important. This calculator can help you estimate the cost of fuel for different heating appliances."
        }
      },
      {
        "_index": "azure-ai-studio-embeddings",
        "_id": "Djd5OowBHxQKHyc3TDSC",
        "_score": 0.8327426,
        "_source": {
          "id": 8202683,
          "body": "Fuel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel.If you are paying $4 per gallon, the trip would cost you $200.Most boats have much larger gas tanks than cars.uel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel."
        }
      },
      (...)
    ]

resp = client.search(
    index="google-vertex-ai-embeddings",
    knn={
        "field": "content_embedding",
        "query_vector_builder": {
            "text_embedding": {
                "model_id": "google_vertex_ai_embeddings",
                "model_text": "Calculate fuel cost"
            }
        },
        "k": 10,
        "num_candidates": 100
    },
    source=[
        "id",
        "content"
    ],
)
print(resp)

const response = await client.search({
  index: "google-vertex-ai-embeddings",
  knn: {
    field: "content_embedding",
    query_vector_builder: {
      text_embedding: {
        model_id: "google_vertex_ai_embeddings",
        model_text: "Calculate fuel cost",
      },
    },
    k: 10,
    num_candidates: 100,
  },
  _source: ["id", "content"],
});
console.log(response);

GET google-vertex-ai-embeddings/_search
{
  "knn": {
    "field": "content_embedding",
    "query_vector_builder": {
      "text_embedding": {
        "model_id": "google_vertex_ai_embeddings",
        "model_text": "Calculate fuel cost"
      }
    },
    "k": 10,
    "num_candidates": 100
  },
  "_source": [
    "id",
    "content"
  ]
}

因此，您将收到来自mistral-embeddings索引中与查询语义最接近的前 10 个文档，并按其与查询的接近程度排序。

"hits": [
{
"_index": "google-vertex-ai-embeddings",
"_id": "Ryv0nZEBBFPLbFsdCbGn",
"_score": 0.86815524,
"_source": {
"id": 3041038,
"content": "For example, the cost of the fuel could be 96.9, the amount could be 10 pounds, and the distance covered could be 80 miles. To convert between Litres per 100KM and Miles Per Gallon, please provide a value and click on the required button.o calculate how much fuel you'll need for a given journey, please provide the distance in miles you will be covering on your journey, and the estimated MPG of your vehicle. To work out what MPG you are really getting, please provide the cost of the fuel, how much you spent on the fuel, and how far it took you."
}
},
{
"_index": "google-vertex-ai-embeddings",
"_id": "w4j0nZEBZ1nFq1oiHQvK",
"_score": 0.8676357,
"_source": {
"id": 1541469,
"content": "This driving cost calculator takes into consideration the fuel economy of the vehicle that you are travelling in as well as the fuel cost. This road trip gas calculator will give you an idea of how much would it cost to drive before you actually travel.his driving cost calculator takes into consideration the fuel economy of the vehicle that you are travelling in as well as the fuel cost. This road trip gas calculator will give you an idea of how much would it cost to drive before you actually travel."
}
},
{
"_index": "google-vertex-ai-embeddings",
"_id": "Hoj0nZEBZ1nFq1oiHQjJ",
"_score": 0.80510974,
"_source": {
"id": 7982559,
"content": "What's that light cost you? 1 Select your electric rate (or click to enter your own). 2 You can calculate results for up to four types of lights. 3 Select the type of lamp (i.e. 4 Select the lamp wattage (lamp lumens). 5 Enter the number of lights in use. 6 Select how long the lamps are in use (or click to enter your own; enter hours on per year). 7 Finally, ..."
}
},
(...)
]

resp = client.search(
    index="mistral-embeddings",
    knn={
        "field": "content_embedding",
        "query_vector_builder": {
            "text_embedding": {
                "model_id": "mistral_embeddings",
                "model_text": "Calculate fuel cost"
            }
        },
        "k": 10,
        "num_candidates": 100
    },
    source=[
        "id",
        "content"
    ],
)
print(resp)

const response = await client.search({
  index: "mistral-embeddings",
  knn: {
    field: "content_embedding",
    query_vector_builder: {
      text_embedding: {
        model_id: "mistral_embeddings",
        model_text: "Calculate fuel cost",
      },
    },
    k: 10,
    num_candidates: 100,
  },
  _source: ["id", "content"],
});
console.log(response);

GET mistral-embeddings/_search
{
  "knn": {
    "field": "content_embedding",
    "query_vector_builder": {
      "text_embedding": {
        "model_id": "mistral_embeddings",
        "model_text": "Calculate fuel cost"
      }
    },
    "k": 10,
    "num_candidates": 100
  },
  "_source": [
    "id",
    "content"
  ]
}

因此，您将收到来自mistral-embeddings索引中与查询语义最接近的前 10 个文档，并按其与查询的接近程度排序。

"hits": [
      {
        "_index": "mistral-embeddings",
        "_id": "DDd5OowBHxQKHyc3TDSC",
        "_score": 0.83704096,
        "_source": {
          "id": 862114,
          "body": "How to calculate fuel cost for a road trip. By Tara Baukus Mello • Bankrate.com. Dear Driving for Dollars, My family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost.It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes.y family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost. It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes."
        }
      },
      {
        "_index": "mistral-embeddings",
        "_id": "ajd5OowBHxQKHyc3TDSC",
        "_score": 0.8345704,
        "_source": {
          "id": 820622,
          "body": "Home Heating Calculator. Typically, approximately 50% of the energy consumed in a home annually is for space heating. When deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important.This calculator can help you estimate the cost of fuel for different heating appliances.hen deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important. This calculator can help you estimate the cost of fuel for different heating appliances."
        }
      },
      {
        "_index": "mistral-embeddings",
        "_id": "Djd5OowBHxQKHyc3TDSC",
        "_score": 0.8327426,
        "_source": {
          "id": 8202683,
          "body": "Fuel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel.If you are paying $4 per gallon, the trip would cost you $200.Most boats have much larger gas tanks than cars.uel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel."
        }
      },
      (...)
    ]

resp = client.search(
    index="amazon-bedrock-embeddings",
    knn={
        "field": "content_embedding",
        "query_vector_builder": {
            "text_embedding": {
                "model_id": "amazon_bedrock_embeddings",
                "model_text": "Calculate fuel cost"
            }
        },
        "k": 10,
        "num_candidates": 100
    },
    source=[
        "id",
        "content"
    ],
)
print(resp)

const response = await client.search({
  index: "amazon-bedrock-embeddings",
  knn: {
    field: "content_embedding",
    query_vector_builder: {
      text_embedding: {
        model_id: "amazon_bedrock_embeddings",
        model_text: "Calculate fuel cost",
      },
    },
    k: 10,
    num_candidates: 100,
  },
  _source: ["id", "content"],
});
console.log(response);

GET amazon-bedrock-embeddings/_search
{
  "knn": {
    "field": "content_embedding",
    "query_vector_builder": {
      "text_embedding": {
        "model_id": "amazon_bedrock_embeddings",
        "model_text": "Calculate fuel cost"
      }
    },
    "k": 10,
    "num_candidates": 100
  },
  "_source": [
    "id",
    "content"
  ]
}

因此，您将收到来自amazon-bedrock-embeddings索引中与查询语义最接近的前 10 个文档，并按其与查询的接近程度排序。

"hits": [
      {
        "_index": "amazon-bedrock-embeddings",
        "_id": "DDd5OowBHxQKHyc3TDSC",
        "_score": 0.83704096,
        "_source": {
          "id": 862114,
          "body": "How to calculate fuel cost for a road trip. By Tara Baukus Mello • Bankrate.com. Dear Driving for Dollars, My family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost.It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes.y family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost. It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes."
        }
      },
      {
        "_index": "amazon-bedrock-embeddings",
        "_id": "ajd5OowBHxQKHyc3TDSC",
        "_score": 0.8345704,
        "_source": {
          "id": 820622,
          "body": "Home Heating Calculator. Typically, approximately 50% of the energy consumed in a home annually is for space heating. When deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important.This calculator can help you estimate the cost of fuel for different heating appliances.hen deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important. This calculator can help you estimate the cost of fuel for different heating appliances."
        }
      },
      {
        "_index": "amazon-bedrock-embeddings",
        "_id": "Djd5OowBHxQKHyc3TDSC",
        "_score": 0.8327426,
        "_source": {
          "id": 8202683,
          "body": "Fuel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel.If you are paying $4 per gallon, the trip would cost you $200.Most boats have much larger gas tanks than cars.uel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel."
        }
      },
      (...)
    ]

resp = client.search(
    index="alibabacloud-ai-search-embeddings",
    knn={
        "field": "content_embedding",
        "query_vector_builder": {
            "text_embedding": {
                "model_id": "alibabacloud_ai_search_embeddings",
                "model_text": "Calculate fuel cost"
            }
        },
        "k": 10,
        "num_candidates": 100
    },
    source=[
        "id",
        "content"
    ],
)
print(resp)

const response = await client.search({
  index: "alibabacloud-ai-search-embeddings",
  knn: {
    field: "content_embedding",
    query_vector_builder: {
      text_embedding: {
        model_id: "alibabacloud_ai_search_embeddings",
        model_text: "Calculate fuel cost",
      },
    },
    k: 10,
    num_candidates: 100,
  },
  _source: ["id", "content"],
});
console.log(response);

GET alibabacloud-ai-search-embeddings/_search
{
  "knn": {
    "field": "content_embedding",
    "query_vector_builder": {
      "text_embedding": {
        "model_id": "alibabacloud_ai_search_embeddings",
        "model_text": "Calculate fuel cost"
      }
    },
    "k": 10,
    "num_candidates": 100
  },
  "_source": [
    "id",
    "content"
  ]
}

因此，您将收到来自alibabacloud-ai-search-embeddings索引中与查询语义最接近的前 10 个文档，并按其与查询的接近程度排序。

"hits": [
      {
        "_index": "alibabacloud-ai-search-embeddings",
        "_id": "DDd5OowBHxQKHyc3TDSC",
        "_score": 0.83704096,
        "_source": {
          "id": 862114,
          "body": "How to calculate fuel cost for a road trip. By Tara Baukus Mello • Bankrate.com. Dear Driving for Dollars, My family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost.It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes.y family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost. It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes."
        }
      },
      {
        "_index": "alibabacloud-ai-search-embeddings",
        "_id": "ajd5OowBHxQKHyc3TDSC",
        "_score": 0.8345704,
        "_source": {
          "id": 820622,
          "body": "Home Heating Calculator. Typically, approximately 50% of the energy consumed in a home annually is for space heating. When deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important.This calculator can help you estimate the cost of fuel for different heating appliances.hen deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important. This calculator can help you estimate the cost of fuel for different heating appliances."
        }
      },
      {
        "_index": "alibabacloud-ai-search-embeddings",
        "_id": "Djd5OowBHxQKHyc3TDSC",
        "_score": 0.8327426,
        "_source": {
          "id": 8202683,
          "body": "Fuel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel.If you are paying $4 per gallon, the trip would cost you $200.Most boats have much larger gas tanks than cars.uel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel."
        }
      },
      (...)
    ]

交互式教程

编辑

您还可以使用 Elasticsearch Python 客户端以交互式 Colab Notebook 格式找到教程。

« 教程：使用 semantic_text 进行混合搜索教程：使用 ELSER 进行语义搜索 »