SiliconFlow

创建重排序请求

Reranks documents by relevance to a query. Supports text, image, and video content.

POST
/rerank
AuthorizationBearer <token>required

Use the following format for authentication: Bearer

In: header

modelstringrequired

Corresponding Model Name. To better enhance service quality, we will make periodic changes to the models provided by this service, including but not limited to model on/offlining and adjustments to model service capabilities. We will notify you of such changes through appropriate means such as announcements or message pushes where feasible. For a complete list of available models, please check the Models.

Example"BAAI/bge-reranker-v2-m3"
querystringrequired

The search query. Length must be ≥ 1.

Length1 <= length
Example"Apple"
documentsstring | arrayrequired

The list of documents to be ranked. Supports the following formats:

  • A single text string
  • An array of text strings At least 1 document is required.

Item: A single document item in the rerank input list.

instructionstring

The instruction for the reranker. Only supported by Qwen/Qwen3-Reranker-8B, Qwen/Qwen3-Reranker-4B, Qwen/Qwen3-Reranker-0.6B. Length must be ≥ 1.

Length1 <= length
Example"Please rerank the documents based on the query."
top_ninteger

Number of most relevant documents or indices to return. Must be ≥ 1.

Range1 <= value
Example4
return_documentsboolean

If false, the response does not include document text; if true, it includes the input document text. Default is false.

max_chunks_per_docinteger

Maximum number of chunks generated from within a document. Long documents are divided into multiple chunks for calculation, and the highest score among the chunks is taken as the document's score. Only BAAI/bge-reranker-v2-m3, Pro/BAAI/bge-reranker-v2-m3, netease-youdao/bce-reranker-base_v1 support this field. Must be ≥ 1, default is 1024.

Default1024
Range1 <= value
overlap_tokensinteger

Number of token overlaps between adjacent chunks when documents are chunked. Only BAAI/bge-reranker-v2-m3, Pro/BAAI/bge-reranker-v2-m3, netease-youdao/bce-reranker-base_v1 support this field. Must be between 0 and 80.

Rangevalue <= 80
modelstringrequired

The model name for multimodal reranking. Support models: Qwen/Qwen3-VL-Reranker-8B

Example"Qwen/Qwen3-VL-Reranker-8B"
querystring | objectrequired

The search query. Supports text string or content object.

  • Text query: a plain string
  • Image query: {"image": "https://example.com/image.jpg"} or base64

Length must be ≥ 1.

documentsarray<string | object | object>required

The list of documents to be ranked. Each item can be a text string or a content object.

  • Text document: a plain string
  • Text object: {"text": "document text"}
  • Image object: {"image": "https://example.com/image.jpg"} or base64

At least 1 document is required.

Example["apple","banana","fruit","vegetable"]

Item: A single document item in the VL rerank input list.

instructionstring

The instruction for the reranker. Only supported by Qwen/Qwen3-Reranker-8B, Qwen/Qwen3-Reranker-4B, Qwen/Qwen3-Reranker-0.6B. Length must be ≥ 1.

Length1 <= length
Example"Please rerank the documents based on the query."
top_ninteger

Number of most relevant documents or indices to return. Must be ≥ 1.

Range1 <= value
Example4
return_documentsboolean

If false, the response does not include document text; if true, it includes the input document text. Default is false.

max_chunks_per_docinteger

Maximum number of chunks generated from within a document. Must be ≥ 1, default is 1024.

Default1024
Range1 <= value
overlap_tokensinteger

Number of token overlaps between adjacent chunks when documents are chunked. Must be between 0 and 80.

Rangevalue <= 80

Response Body

curl -X POST "https://api.siliconflow.cn/v1/rerank" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "BAAI/bge-reranker-v2-m3",
    "query": "Apple",
    "documents": "apple"
  }'
const body = JSON.stringify({
  "model": "BAAI/bge-reranker-v2-m3",
  "query": "Apple",
  "documents": "apple"
})

fetch("https://api.siliconflow.cn/v1/rerank", {
  body
})
package main

import (
  "fmt"
  "net/http"
  "io/ioutil"
  "strings"
)

func main() {
  url := "https://api.siliconflow.cn/v1/rerank"
  body := strings.NewReader(`{
    "model": "BAAI/bge-reranker-v2-m3",
    "query": "Apple",
    "documents": "apple"
  }`)
  req, _ := http.NewRequest("POST", url, body)
  req.Header.Add("Content-Type", "application/json")
  res, _ := http.DefaultClient.Do(req)
  defer res.Body.Close()
  body, _ := ioutil.ReadAll(res.Body)

  fmt.Println(res)
  fmt.Println(string(body))
}
import requests

url = "https://api.siliconflow.cn/v1/rerank"
body = {
  "model": "BAAI/bge-reranker-v2-m3",
  "query": "Apple",
  "documents": "apple"
}
response = requests.request("POST", url, json = body, headers = {
  "Content-Type": "application/json"
})

print(response.text)
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.net.http.HttpResponse.BodyHandlers;
import java.time.Duration;
import java.net.http.HttpRequest.BodyPublishers;

var body = BodyPublishers.ofString("""{
  "model": "BAAI/bge-reranker-v2-m3",
  "query": "Apple",
  "documents": "apple"
}""");
HttpClient client = HttpClient.newBuilder()
  .connectTimeout(Duration.ofSeconds(10))
  .build();

HttpRequest.Builder requestBuilder = HttpRequest.newBuilder()
  .uri(URI.create("https://api.siliconflow.cn/v1/rerank"))
  .header("Content-Type", "application/json")
  .POST(body)
  .build();

try {
  HttpResponse<String> response = client.send(requestBuilder.build(), BodyHandlers.ofString());
  System.out.println("Status code: " + response.statusCode());
  System.out.println("Response body: " + response.body());
} catch (Exception e) {
  e.printStackTrace();
}
using System;
using System.Net.Http;
using System.Text;

var body = new StringContent("""
{
  "model": "BAAI/bge-reranker-v2-m3",
  "query": "Apple",
  "documents": "apple"
}
""", Encoding.UTF8, "application/json");

var client = new HttpClient();
var response = await client.PostAsync("https://api.siliconflow.cn/v1/rerank", body);
var responseBody = await response.Content.ReadAsStringAsync();
curl -X POST https://api.siliconflow.cn/v1/rerank \
  -H "Authorization: Bearer $SILICONFLOW_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "BAAI/bge-reranker-v2-m3",
    "query": "Apple",
    "documents": ["apple", "banana", "fruit", "vegetable"],
    "return_documents": true,
    "top_n": 4
  }'
import os
import requests

url = "https://api.siliconflow.cn/v1/rerank"
headers = {
    "Authorization": f"Bearer {os.environ.get('SILICONFLOW_API_KEY')}",
    "Content-Type": "application/json"
}
payload = {
    "model": "BAAI/bge-reranker-v2-m3",
    "query": "Apple",
    "documents": ["apple", "banana", "fruit", "vegetable"],
    "return_documents": True,
    "top_n": 4
}

response = requests.post(url, json=payload, headers=headers)
print(response.json())
const url = "https://api.siliconflow.cn/v1/rerank";
const headers = {
  "Authorization": `Bearer ${process.env.SILICONFLOW_API_KEY}`,
  "Content-Type": "application/json"
};
const payload = {
  model: "BAAI/bge-reranker-v2-m3",
  query: "Apple",
  documents: ["apple", "banana", "fruit", "vegetable"],
  return_documents: true,
  top_n: 4
};

fetch(url, {
  method: "POST",
  headers: headers,
  body: JSON.stringify(payload)
})
  .then(res => res.json())
  .then(data => console.log(data))
  .catch(err => console.error(err));
curl -X POST https://api.siliconflow.cn/v1/rerank \
  -H "Authorization: Bearer $SILICONFLOW_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "Qwen/Qwen3-VL-Reranker-8B",
    "query": {
      "image": "https://example.com/query-image.jpg"
    },
    "documents": [
      {
        "image": "https://example.com/doc1.jpg"
      },
      "这是一个相关的文本文档..."
    ],
    "max_chunks_per_doc": 512
  }'
import os
import requests

url = "https://api.siliconflow.cn/v1/rerank"
headers = {
    "Authorization": f"Bearer {os.environ.get('SILICONFLOW_API_KEY')}",
    "Content-Type": "application/json"
}
payload = {
    "model": "Qwen/Qwen3-VL-Reranker-8B",
    "query": {
        "image": "https://example.com/query-image.jpg"
    },
    "documents": [
        {"image": "https://example.com/doc1.jpg"},
        "这是一个相关的文本文档..."
    ],
    "max_chunks_per_doc": 512
}

response = requests.post(url, json=payload, headers=headers)
print(response.json())
const url = "https://api.siliconflow.cn/v1/rerank";
const headers = {
  "Authorization": `Bearer ${process.env.SILICONFLOW_API_KEY}`,
  "Content-Type": "application/json"
};
const payload = {
  model: "Qwen/Qwen3-VL-Reranker-8B",
  query: {
    image: "https://example.com/query-image.jpg"
  },
  documents: [
    { image: "https://example.com/doc1.jpg" },
    "这是一个相关的文本文档..."
  ],
  max_chunks_per_doc: 512
};

fetch(url, {
  method: "POST",
  headers: headers,
  body: JSON.stringify(payload)
})
  .then(res => res.json())
  .then(data => console.log(data))
  .catch(err => console.error(err));          
curl -X POST https://api.siliconflow.cn/v1/rerank \
  -H "Authorization: Bearer $SILICONFLOW_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "Qwen/Qwen3-Reranker-8B",
    "query": "找出最相关的技术文档",
    "documents": ["文档1", "文档2", "文档3"],
    "instruction": "优先考虑最新发布的内容",
    "overlap_tokens": 20
  }'
import os
import requests

url = "https://api.siliconflow.cn/v1/rerank"
headers = {
    "Authorization": f"Bearer {os.environ.get('SILICONFLOW_API_KEY')}",
    "Content-Type": "application/json"
}
payload = {
    "model": "Qwen/Qwen3-Reranker-8B",
    "query": "找出最相关的技术文档",
    "documents": ["文档1", "文档2", "文档3"],
    "instruction": "优先考虑最新发布的内容",
    "overlap_tokens": 20
}

response = requests.post(url, json=payload, headers=headers)
print(response.json())
const url = "https://api.siliconflow.cn/v1/rerank";
const headers = {
  "Authorization": `Bearer ${process.env.SILICONFLOW_API_KEY}`,
  "Content-Type": "application/json"
};
const payload = {
  model: "Qwen/Qwen3-Reranker-8B",
  query: "找出最相关的技术文档",
  documents: ["文档1", "文档2", "文档3"],
  instruction: "优先考虑最新发布的内容",
  overlap_tokens: 20
};

fetch(url, {
  method: "POST",
  headers: headers,
  body: JSON.stringify(payload)
})
  .then(res => res.json())
  .then(data => console.log(data))
  .catch(err => console.error(err));                     
{
  "id": "rerank-20240115-abc123def456",
  "results": [
    {
      "index": 1,
      "document": {
        "text": "深度学习是机器学习的子集..."
      },
      "relevance_score": 0.85
    }
  ],
  "meta": {
    "tokens": {
      "input_tokens": 150,
      "output_tokens": 10,
      "image_tokens": 0
    },
    "billed_units": {
      "input_tokens": 150,
      "output_tokens": 10,
      "image_tokens": 0,
      "search_units": 1,
      "classifications": 0
    }
  }
}
{
  "code": 20012,
  "message": "string",
  "data": "string"
}
"Invalid token"
"Forbidden"
"404 page not found"
{
  "message": "Request was rejected due to rate limiting. If you want more, please contact contact@siliconflow.cn. Details:TPM limit reached.",
  "data": "string"
}
{
  "code": 50505,
  "message": "Model service overloaded. Please try again later.",
  "data": "string"
}
"string"