Feat: init dataflow. (#9791)

### What problem does this PR solve?

#9790

Close #9782

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Kevin Hu
2025-08-28 18:40:32 +08:00
committed by GitHub
parent a246949b77
commit c27172b3bc
19 changed files with 1020 additions and 166 deletions

57
rag/flow/tests/client.py Normal file
View File

@ -0,0 +1,57 @@
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import argparse
import json
import os
import time
from concurrent.futures import ThreadPoolExecutor
import trio
from api import settings
from rag.flow.pipeline import Pipeline
def print_logs(pipeline):
last_logs = "[]"
while True:
time.sleep(5)
logs = pipeline.fetch_logs()
logs_str = json.dumps(logs)
if logs_str != last_logs:
print(logs_str)
last_logs = logs_str
if __name__ == '__main__':
parser = argparse.ArgumentParser()
dsl_default_path = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
"dsl_examples",
"general_pdf_all.json",
)
parser.add_argument('-s', '--dsl', default=dsl_default_path, help="input dsl", action='store', required=True)
parser.add_argument('-d', '--doc_id', default=False, help="Document ID", action='store', required=True)
parser.add_argument('-t', '--tenant_id', default=False, help="Tenant ID", action='store', required=True)
args = parser.parse_args()
settings.init_settings()
pipeline = Pipeline(open(args.dsl, "r").read(), tenant_id=args.tenant_id, doc_id=args.doc_id, task_id="xxxx", flow_id="xxx")
pipeline.reset()
exe = ThreadPoolExecutor(max_workers=5)
thr = exe.submit(print_logs, pipeline)
trio.run(pipeline.run)
thr.result()