Move api.settings to common.settings (#11036)

### What problem does this PR solve?

As title

### Type of change

- [x] Refactoring

---------

Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
Jin Hai
2025-11-06 09:36:38 +08:00
committed by GitHub
parent 87c9a054d3
commit f98b24c9bf
68 changed files with 675 additions and 718 deletions

View File

@ -26,7 +26,7 @@ from deepdoc.parser.pdf_parser import RAGFlowPdfParser
from rag.flow.base import ProcessBase, ProcessParamBase
from rag.flow.hierarchical_merger.schema import HierarchicalMergerFromUpstream
from rag.nlp import concat_img
from rag.utils.storage_factory import STORAGE_IMPL
from common import settings
class HierarchicalMergerParam(ProcessParamBase):
@ -166,7 +166,7 @@ class HierarchicalMerger(ProcessBase):
img = None
for i in path:
txt += lines[i] + "\n"
concat_img(img, id2image(section_images[i], partial(STORAGE_IMPL.get, tenant_id=self._canvas._tenant_id)))
concat_img(img, id2image(section_images[i], partial(settings.STORAGE_IMPL.get, tenant_id=self._canvas._tenant_id)))
cks.append(txt)
images.append(img)
@ -180,7 +180,7 @@ class HierarchicalMerger(ProcessBase):
]
async with trio.open_nursery() as nursery:
for d in cks:
nursery.start_soon(image2id, d, partial(STORAGE_IMPL.put, tenant_id=self._canvas._tenant_id), get_uuid())
nursery.start_soon(image2id, d, partial(settings.STORAGE_IMPL.put, tenant_id=self._canvas._tenant_id), get_uuid())
self.set_output("chunks", cks)
self.callback(1, "Done.")

View File

@ -36,7 +36,7 @@ from rag.app.naive import Docx
from rag.flow.base import ProcessBase, ProcessParamBase
from rag.flow.parser.schema import ParserFromUpstream
from rag.llm.cv_model import Base as VLM
from rag.utils.storage_factory import STORAGE_IMPL
from common import settings
class ParserParam(ProcessParamBase):
@ -588,7 +588,7 @@ class Parser(ProcessBase):
name = from_upstream.name
if self._canvas._doc_id:
b, n = File2DocumentService.get_storage_address(doc_id=self._canvas._doc_id)
blob = STORAGE_IMPL.get(b, n)
blob = settings.STORAGE_IMPL.get(b, n)
else:
blob = FileService.get_blob(from_upstream.file["created_by"], from_upstream.file["id"])
@ -606,4 +606,4 @@ class Parser(ProcessBase):
outs = self.output()
async with trio.open_nursery() as nursery:
for d in outs.get("json", []):
nursery.start_soon(image2id, d, partial(STORAGE_IMPL.put, tenant_id=self._canvas._tenant_id), get_uuid())
nursery.start_soon(image2id, d, partial(settings.STORAGE_IMPL.put, tenant_id=self._canvas._tenant_id), get_uuid())

View File

@ -23,7 +23,7 @@ from deepdoc.parser.pdf_parser import RAGFlowPdfParser
from rag.flow.base import ProcessBase, ProcessParamBase
from rag.flow.splitter.schema import SplitterFromUpstream
from rag.nlp import naive_merge, naive_merge_with_images
from rag.utils.storage_factory import STORAGE_IMPL
from common import settings
class SplitterParam(ProcessParamBase):
@ -87,7 +87,7 @@ class Splitter(ProcessBase):
sections, section_images = [], []
for o in from_upstream.json_result or []:
sections.append((o.get("text", ""), o.get("position_tag", "")))
section_images.append(id2image(o.get("img_id"), partial(STORAGE_IMPL.get, tenant_id=self._canvas._tenant_id)))
section_images.append(id2image(o.get("img_id"), partial(settings.STORAGE_IMPL.get, tenant_id=self._canvas._tenant_id)))
chunks, images = naive_merge_with_images(
sections,
@ -106,6 +106,6 @@ class Splitter(ProcessBase):
]
async with trio.open_nursery() as nursery:
for d in cks:
nursery.start_soon(image2id, d, partial(STORAGE_IMPL.put, tenant_id=self._canvas._tenant_id), get_uuid())
nursery.start_soon(image2id, d, partial(settings.STORAGE_IMPL.put, tenant_id=self._canvas._tenant_id), get_uuid())
self.set_output("chunks", cks)
self.callback(1, "Done.")

View File

@ -21,7 +21,7 @@ from concurrent.futures import ThreadPoolExecutor
import trio
from api import settings
from common import settings
from rag.flow.pipeline import Pipeline

View File

@ -27,7 +27,7 @@ from common.connection_utils import timeout
from rag.flow.base import ProcessBase, ProcessParamBase
from rag.flow.tokenizer.schema import TokenizerFromUpstream
from rag.nlp import rag_tokenizer
from rag.settings import EMBEDDING_BATCH_SIZE
from common import settings
from rag.svr.task_executor import embed_limiter
from common.token_utils import truncate
@ -82,16 +82,16 @@ class Tokenizer(ProcessBase):
return embedding_model.encode([truncate(c, embedding_model.max_length - 10) for c in txts])
cnts_ = np.array([])
for i in range(0, len(texts), EMBEDDING_BATCH_SIZE):
for i in range(0, len(texts), settings.EMBEDDING_BATCH_SIZE):
async with embed_limiter:
vts, c = await trio.to_thread.run_sync(lambda: batch_encode(texts[i : i + EMBEDDING_BATCH_SIZE]))
vts, c = await trio.to_thread.run_sync(lambda: batch_encode(texts[i : i + settings.EMBEDDING_BATCH_SIZE]))
if len(cnts_) == 0:
cnts_ = vts
else:
cnts_ = np.concatenate((cnts_, vts), axis=0)
token_count += c
if i % 33 == 32:
self.callback(i * 1.0 / len(texts) / parts / EMBEDDING_BATCH_SIZE + 0.5 * (parts - 1))
self.callback(i * 1.0 / len(texts) / parts / settings.EMBEDDING_BATCH_SIZE + 0.5 * (parts - 1))
cnts = cnts_
title_w = float(self._param.filename_embd_weight)