mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-01 16:15:07 +08:00
Feat: refine dataflow and initialize dataflow app (#9952)
### What problem does this PR solve? Refine dataflow and initialize dataflow app. ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@ -18,12 +18,14 @@ import json
|
||||
import os
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import trio
|
||||
|
||||
from api import settings
|
||||
from rag.flow.pipeline import Pipeline
|
||||
|
||||
|
||||
def print_logs(pipeline):
|
||||
def print_logs(pipeline: Pipeline):
|
||||
last_logs = "[]"
|
||||
while True:
|
||||
time.sleep(5)
|
||||
@ -34,16 +36,16 @@ def print_logs(pipeline):
|
||||
last_logs = logs_str
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
dsl_default_path = os.path.join(
|
||||
os.path.dirname(os.path.realpath(__file__)),
|
||||
"dsl_examples",
|
||||
"general_pdf_all.json",
|
||||
)
|
||||
parser.add_argument('-s', '--dsl', default=dsl_default_path, help="input dsl", action='store', required=True)
|
||||
parser.add_argument('-d', '--doc_id', default=False, help="Document ID", action='store', required=True)
|
||||
parser.add_argument('-t', '--tenant_id', default=False, help="Tenant ID", action='store', required=True)
|
||||
parser.add_argument("-s", "--dsl", default=dsl_default_path, help="input dsl", action="store", required=False)
|
||||
parser.add_argument("-d", "--doc_id", default=False, help="Document ID", action="store", required=True)
|
||||
parser.add_argument("-t", "--tenant_id", default=False, help="Tenant ID", action="store", required=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
settings.init_settings()
|
||||
@ -53,5 +55,7 @@ if __name__ == '__main__':
|
||||
exe = ThreadPoolExecutor(max_workers=5)
|
||||
thr = exe.submit(print_logs, pipeline)
|
||||
|
||||
# queue_dataflow(dsl=open(args.dsl, "r").read(), tenant_id=args.tenant_id, doc_id=args.doc_id, task_id="xxxx", flow_id="xxx", priority=0)
|
||||
|
||||
trio.run(pipeline.run)
|
||||
thr.result()
|
||||
thr.result()
|
||||
|
||||
@ -1,15 +1,15 @@
|
||||
{
|
||||
"components": {
|
||||
"begin": {
|
||||
"File": {
|
||||
"obj":{
|
||||
"component_name": "File",
|
||||
"params": {
|
||||
}
|
||||
},
|
||||
"downstream": ["parser:0"],
|
||||
"downstream": ["Parser:0"],
|
||||
"upstream": []
|
||||
},
|
||||
"parser:0": {
|
||||
"Parser:0": {
|
||||
"obj": {
|
||||
"component_name": "Parser",
|
||||
"params": {
|
||||
@ -22,14 +22,22 @@
|
||||
"pdf"
|
||||
],
|
||||
"output_format": "json"
|
||||
},
|
||||
"excel": {
|
||||
"output_format": "html",
|
||||
"suffix": [
|
||||
"xls",
|
||||
"xlsx",
|
||||
"csv"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"downstream": ["chunker:0"],
|
||||
"upstream": ["begin"]
|
||||
"downstream": ["Chunker:0"],
|
||||
"upstream": ["Begin"]
|
||||
},
|
||||
"chunker:0": {
|
||||
"Chunker:0": {
|
||||
"obj": {
|
||||
"component_name": "Chunker",
|
||||
"params": {
|
||||
@ -37,18 +45,19 @@
|
||||
"auto_keywords": 5
|
||||
}
|
||||
},
|
||||
"downstream": ["tokenizer:0"],
|
||||
"upstream": ["chunker:0"]
|
||||
"downstream": ["Tokenizer:0"],
|
||||
"upstream": ["Parser:0"]
|
||||
},
|
||||
"tokenizer:0": {
|
||||
"Tokenizer:0": {
|
||||
"obj": {
|
||||
"component_name": "Tokenizer",
|
||||
"params": {
|
||||
}
|
||||
},
|
||||
"downstream": [],
|
||||
"upstream": ["chunker:0"]
|
||||
"upstream": ["Chunker:0"]
|
||||
}
|
||||
},
|
||||
"path": []
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user