mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 12:32:30 +08:00
### What problem does this PR solve? fix issues:https://github.com/infiniflow/ragflow/issues/10402 As the newly distributed embedding models support vector dimensions max to 4096, while current OpenSearch's max dimension support is 1536. As I tested, the 4096-dimensions vector will be treated as a float type which is unacceptable in OpenSearch. Besides, OpenSearch supports max to 16000 dimensions by defalut with the vector engine(Faiss). According to: https://docs.opensearch.org/2.19/field-types/supported-field-types/knn-methods-engines/ I added max to 10240 dimensions support for OpenSearch, as I think will be sufficient in the future. As I tested , it worked well on my own server (treated as knn_vector)by using qwen3-embedding:8b as the embedding model: <img width="1338" height="790" alt="image" src="https://github.com/user-attachments/assets/a9b2d284-fcf6-4cea-859a-6aadccf36ace" /> ### Type of change - [x] New Feature (non-breaking change which adds functionality) By the way, I will still focus on the stuff about Elasticsearch/Opensearch as search engines and vector databases. Co-authored-by: 张雨豪 <zhangyh80@chinatelecom.cn>
268 lines
5.6 KiB
JSON
268 lines
5.6 KiB
JSON
{
|
|
"settings": {
|
|
"index": {
|
|
"number_of_shards": 2,
|
|
"number_of_replicas": 0,
|
|
"refresh_interval": "1000ms",
|
|
"knn": true,
|
|
"similarity": {
|
|
"scripted_sim": {
|
|
"type": "scripted",
|
|
"script": {
|
|
"source": "double idf = Math.log(1+(field.docCount-term.docFreq+0.5)/(term.docFreq + 0.5))/Math.log(1+((field.docCount-0.5)/1.5)); return query.boost * idf * Math.min(doc.freq, 1);"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"mappings": {
|
|
"properties": {
|
|
"lat_lon": {
|
|
"type": "geo_point",
|
|
"store": "true"
|
|
}
|
|
},
|
|
"date_detection": "true",
|
|
"dynamic_templates": [
|
|
{
|
|
"int": {
|
|
"match": "*_int",
|
|
"mapping": {
|
|
"type": "integer",
|
|
"store": "true"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"ulong": {
|
|
"match": "*_ulong",
|
|
"mapping": {
|
|
"type": "unsigned_long",
|
|
"store": "true"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"long": {
|
|
"match": "*_long",
|
|
"mapping": {
|
|
"type": "long",
|
|
"store": "true"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"short": {
|
|
"match": "*_short",
|
|
"mapping": {
|
|
"type": "short",
|
|
"store": "true"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"numeric": {
|
|
"match": "*_flt",
|
|
"mapping": {
|
|
"type": "float",
|
|
"store": true
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"tks": {
|
|
"match": "*_tks",
|
|
"mapping": {
|
|
"type": "text",
|
|
"similarity": "scripted_sim",
|
|
"analyzer": "whitespace",
|
|
"store": true
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"ltks": {
|
|
"match": "*_ltks",
|
|
"mapping": {
|
|
"type": "text",
|
|
"analyzer": "whitespace",
|
|
"store": true
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"kwd": {
|
|
"match_pattern": "regex",
|
|
"match": "^(.*_(kwd|id|ids|uid|uids)|uid)$",
|
|
"mapping": {
|
|
"type": "keyword",
|
|
"similarity": "boolean",
|
|
"store": true
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"dt": {
|
|
"match_pattern": "regex",
|
|
"match": "^.*(_dt|_time|_at)$",
|
|
"mapping": {
|
|
"type": "date",
|
|
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyy-MM-dd_HH:mm:ss",
|
|
"store": true
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"nested": {
|
|
"match": "*_nst",
|
|
"mapping": {
|
|
"type": "nested"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"object": {
|
|
"match": "*_obj",
|
|
"mapping": {
|
|
"type": "object",
|
|
"dynamic": "true"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"string": {
|
|
"match_pattern": "regex",
|
|
"match": "^.*_(with_weight|list)$",
|
|
"mapping": {
|
|
"type": "text",
|
|
"index": "false",
|
|
"store": true
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"rank_feature": {
|
|
"match": "*_fea",
|
|
"mapping": {
|
|
"type": "rank_feature"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"rank_features": {
|
|
"match": "*_feas",
|
|
"mapping": {
|
|
"type": "rank_features"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"knn_vector": {
|
|
"match": "*_512_vec",
|
|
"mapping": {
|
|
"type": "knn_vector",
|
|
"index": true,
|
|
"space_type": "cosinesimil",
|
|
"dimension": 512
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"knn_vector": {
|
|
"match": "*_768_vec",
|
|
"mapping": {
|
|
"type": "knn_vector",
|
|
"index": true,
|
|
"space_type": "cosinesimil",
|
|
"dimension": 768
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"knn_vector": {
|
|
"match": "*_1024_vec",
|
|
"mapping": {
|
|
"type": "knn_vector",
|
|
"index": true,
|
|
"space_type": "cosinesimil",
|
|
"dimension": 1024
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"knn_vector": {
|
|
"match": "*_1536_vec",
|
|
"mapping": {
|
|
"type": "knn_vector",
|
|
"index": true,
|
|
"space_type": "cosinesimil",
|
|
"dimension": 1536
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"knn_vector": {
|
|
"match": "*_2048_vec",
|
|
"mapping": {
|
|
"type": "knn_vector",
|
|
"index": true,
|
|
"space_type": "cosinesimil",
|
|
"dimension": 2048
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"knn_vector": {
|
|
"match": "*_4096_vec",
|
|
"mapping": {
|
|
"type": "knn_vector",
|
|
"index": true,
|
|
"space_type": "cosinesimil",
|
|
"dimension": 4096
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"knn_vector": {
|
|
"match": "*_6144_vec",
|
|
"mapping": {
|
|
"type": "knn_vector",
|
|
"index": true,
|
|
"space_type": "cosinesimil",
|
|
"dimension": 6144
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"knn_vector": {
|
|
"match": "*_8192_vec",
|
|
"mapping": {
|
|
"type": "knn_vector",
|
|
"index": true,
|
|
"space_type": "cosinesimil",
|
|
"dimension": 8192
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"knn_vector": {
|
|
"match": "*_10240_vec",
|
|
"mapping": {
|
|
"type": "knn_vector",
|
|
"index": true,
|
|
"space_type": "cosinesimil",
|
|
"dimension": 10240
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"binary": {
|
|
"match": "*_bin",
|
|
"mapping": {
|
|
"type": "binary"
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
} |