support graph (#1152)

### What problem does this PR solve? #918 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2026-02-01 08:05:07 +08:00 · 2024-06-14 10:49:36 +08:00
parent 2023fdc13e
commit a25d32496c
24 changed files with 2344 additions and 0 deletions
--- a/graph/component/init.py
+++ b/graph/component/init.py
@ -0,0 +1,16 @@
+import importlib
+from .begin import Begin, BeginParam
+from .generate import Generate, GenerateParam
+from .retrieval import Retrieval, RetrievalParam
+from .answer import Answer, AnswerParam
+from .categorize import Categorize, CategorizeParam
+from .switch import Switch, SwitchParam
+from .relevant import Relevant, RelevantParam
+from .message import Message, MessageParam
+from .rewrite import RewriteQuestion, RewriteQuestionParam
+
+
+def component_class(class_name):
+    m = importlib.import_module("graph.component")
+    c = getattr(m, class_name)
+    return c
--- a/graph/component/answer.py
+++ b/graph/component/answer.py
@ -0,0 +1,77 @@
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import random
+from abc import ABC
+from functools import partial
+
+import pandas as pd
+
+from graph.component.base import ComponentBase, ComponentParamBase
+
+
+class AnswerParam(ComponentParamBase):
+
+    """
+    Define the Answer component parameters.
+    """
+    def __init__(self):
+        super().__init__()
+        self.post_answers = []
+
+    def check(self):
+        return True
+
+
+class Answer(ComponentBase, ABC):
+    component_name = "Answer"
+
+    def _run(self, history, **kwargs):
+        if kwargs.get("stream"):
+            return partial(self.stream_output)
+
+        ans = self.get_input()
+        if self._param.post_answers:
+            ans = pd.concat([ans, pd.DataFrame([{"content": random.choice(self._param.post_answers)}])], ignore_index=False)
+        return ans
+
+    def stream_output(self):
+        res = None
+        if hasattr(self, "exception") and self.exception:
+            res = {"content": str(self.exception)}
+            self.exception = None
+            yield res
+            self.set_output(res)
+            return
+
+        stream = self.get_stream_input()
+        if isinstance(stream, pd.DataFrame):
+            res = stream
+            for ii, row in stream.iterrows():
+                yield row.to_dict()
+        else:
+            for st in stream():
+                res = st
+                yield st
+        if self._param.post_answers:
+            res["content"] += random.choice(self._param.post_answers)
+            yield res
+
+        self.set_output(res)
+
+    def set_exception(self, e):
+        self.exception = e
+
+
--- a/graph/component/base.py
+++ b/graph/component/base.py
@ -0,0 +1,466 @@
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from abc import ABC
+import builtins
+import json
+import os
+from copy import deepcopy
+from functools import partial
+from typing import List, Dict
+
+import pandas as pd
+
+from graph import settings
+from graph.settings import flow_logger
+
+_FEEDED_DEPRECATED_PARAMS = "_feeded_deprecated_params"
+_DEPRECATED_PARAMS = "_deprecated_params"
+_USER_FEEDED_PARAMS = "_user_feeded_params"
+_IS_RAW_CONF = "_is_raw_conf"
+
+
+class ComponentParamBase(ABC):
+    def __init__(self):
+        self.output_var_name = "output"
+        self.message_history_window_size = 4
+
+    def set_name(self, name: str):
+        self._name = name
+        return self
+
+    def check(self):
+        raise NotImplementedError("Parameter Object should be checked.")
+
+    @classmethod
+    def _get_or_init_deprecated_params_set(cls):
+        if not hasattr(cls, _DEPRECATED_PARAMS):
+            setattr(cls, _DEPRECATED_PARAMS, set())
+        return getattr(cls, _DEPRECATED_PARAMS)
+
+    def _get_or_init_feeded_deprecated_params_set(self, conf=None):
+        if not hasattr(self, _FEEDED_DEPRECATED_PARAMS):
+            if conf is None:
+                setattr(self, _FEEDED_DEPRECATED_PARAMS, set())
+            else:
+                setattr(
+                    self,
+                    _FEEDED_DEPRECATED_PARAMS,
+                    set(conf[_FEEDED_DEPRECATED_PARAMS]),
+                )
+        return getattr(self, _FEEDED_DEPRECATED_PARAMS)
+
+    def _get_or_init_user_feeded_params_set(self, conf=None):
+        if not hasattr(self, _USER_FEEDED_PARAMS):
+            if conf is None:
+                setattr(self, _USER_FEEDED_PARAMS, set())
+            else:
+                setattr(self, _USER_FEEDED_PARAMS, set(conf[_USER_FEEDED_PARAMS]))
+        return getattr(self, _USER_FEEDED_PARAMS)
+
+    def get_user_feeded(self):
+        return self._get_or_init_user_feeded_params_set()
+
+    def get_feeded_deprecated_params(self):
+        return self._get_or_init_feeded_deprecated_params_set()
+
+    @property
+    def _deprecated_params_set(self):
+        return {name: True for name in self.get_feeded_deprecated_params()}
+
+    def __str__(self):
+
+        return json.dumps(self.as_dict(), ensure_ascii=False)
+
+    def as_dict(self):
+        def _recursive_convert_obj_to_dict(obj):
+            ret_dict = {}
+            for attr_name in list(obj.__dict__):
+                if attr_name in [_FEEDED_DEPRECATED_PARAMS, _DEPRECATED_PARAMS, _USER_FEEDED_PARAMS, _IS_RAW_CONF]:
+                    continue
+                # get attr
+                attr = getattr(obj, attr_name)
+                if isinstance(attr, pd.DataFrame):
+                    ret_dict[attr_name] = attr.to_dict()
+                    continue
+                if attr and type(attr).__name__ not in dir(builtins):
+                    ret_dict[attr_name] = _recursive_convert_obj_to_dict(attr)
+                else:
+                    ret_dict[attr_name] = attr
+
+            return ret_dict
+
+        return _recursive_convert_obj_to_dict(self)
+
+    def update(self, conf, allow_redundant=False):
+        update_from_raw_conf = conf.get(_IS_RAW_CONF, True)
+        if update_from_raw_conf:
+            deprecated_params_set = self._get_or_init_deprecated_params_set()
+            feeded_deprecated_params_set = (
+                self._get_or_init_feeded_deprecated_params_set()
+            )
+            user_feeded_params_set = self._get_or_init_user_feeded_params_set()
+            setattr(self, _IS_RAW_CONF, False)
+        else:
+            feeded_deprecated_params_set = (
+                self._get_or_init_feeded_deprecated_params_set(conf)
+            )
+            user_feeded_params_set = self._get_or_init_user_feeded_params_set(conf)
+
+        def _recursive_update_param(param, config, depth, prefix):
+            if depth > settings.PARAM_MAXDEPTH:
+                raise ValueError("Param define nesting too deep!!!, can not parse it")
+
+            inst_variables = param.__dict__
+            redundant_attrs = []
+            for config_key, config_value in config.items():
+                # redundant attr
+                if config_key not in inst_variables:
+                    if not update_from_raw_conf and config_key.startswith("_"):
+                        setattr(param, config_key, config_value)
+                    else:
+                        setattr(param, config_key, config_value)
+                        # redundant_attrs.append(config_key)
+                    continue
+
+                full_config_key = f"{prefix}{config_key}"
+
+                if update_from_raw_conf:
+                    # add user feeded params
+                    user_feeded_params_set.add(full_config_key)
+
+                    # update user feeded deprecated param set
+                    if full_config_key in deprecated_params_set:
+                        feeded_deprecated_params_set.add(full_config_key)
+
+                # supported attr
+                attr = getattr(param, config_key)
+                if type(attr).__name__ in dir(builtins) or attr is None:
+                    setattr(param, config_key, config_value)
+
+                else:
+                    # recursive set obj attr
+                    sub_params = _recursive_update_param(
+                        attr, config_value, depth + 1, prefix=f"{prefix}{config_key}."
+                    )
+                    setattr(param, config_key, sub_params)
+
+            if not allow_redundant and redundant_attrs:
+                raise ValueError(
+                    f"cpn `{getattr(self, '_name', type(self))}` has redundant parameters: `{[redundant_attrs]}`"
+                )
+
+            return param
+
+        return _recursive_update_param(param=self, config=conf, depth=0, prefix="")
+
+    def extract_not_builtin(self):
+        def _get_not_builtin_types(obj):
+            ret_dict = {}
+            for variable in obj.__dict__:
+                attr = getattr(obj, variable)
+                if attr and type(attr).__name__ not in dir(builtins):
+                    ret_dict[variable] = _get_not_builtin_types(attr)
+
+            return ret_dict
+
+        return _get_not_builtin_types(self)
+
+    def validate(self):
+        self.builtin_types = dir(builtins)
+        self.func = {
+            "ge": self._greater_equal_than,
+            "le": self._less_equal_than,
+            "in": self._in,
+            "not_in": self._not_in,
+            "range": self._range,
+        }
+        home_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
+        param_validation_path_prefix = home_dir + "/param_validation/"
+
+        param_name = type(self).__name__
+        param_validation_path = "/".join(
+            [param_validation_path_prefix, param_name + ".json"]
+        )
+
+        validation_json = None
+
+        try:
+            with open(param_validation_path, "r") as fin:
+                validation_json = json.loads(fin.read())
+        except BaseException:
+            return
+
+        self._validate_param(self, validation_json)
+
+    def _validate_param(self, param_obj, validation_json):
+        default_section = type(param_obj).__name__
+        var_list = param_obj.__dict__
+
+        for variable in var_list:
+            attr = getattr(param_obj, variable)
+
+            if type(attr).__name__ in self.builtin_types or attr is None:
+                if variable not in validation_json:
+                    continue
+
+                validation_dict = validation_json[default_section][variable]
+                value = getattr(param_obj, variable)
+                value_legal = False
+
+                for op_type in validation_dict:
+                    if self.func[op_type](value, validation_dict[op_type]):
+                        value_legal = True
+                        break
+
+                if not value_legal:
+                    raise ValueError(
+                        "Plase check runtime conf, {} = {} does not match user-parameter restriction".format(
+                            variable, value
+                        )
+                    )
+
+            elif variable in validation_json:
+                self._validate_param(attr, validation_json)
+
+    @staticmethod
+    def check_string(param, descr):
+        if type(param).__name__ not in ["str"]:
+            raise ValueError(
+                descr + " {} not supported, should be string type".format(param)
+            )
+
+    @staticmethod
+    def check_empty(param, descr):
+        if not param:
+            raise ValueError(
+                descr + " {} not supported empty value."
+            )
+
+    @staticmethod
+    def check_positive_integer(param, descr):
+        if type(param).__name__ not in ["int", "long"] or param <= 0:
+            raise ValueError(
+                descr + " {} not supported, should be positive integer".format(param)
+            )
+
+    @staticmethod
+    def check_positive_number(param, descr):
+        if type(param).__name__ not in ["float", "int", "long"] or param <= 0:
+            raise ValueError(
+                descr + " {} not supported, should be positive numeric".format(param)
+            )
+
+    @staticmethod
+    def check_nonnegative_number(param, descr):
+        if type(param).__name__ not in ["float", "int", "long"] or param < 0:
+            raise ValueError(
+                descr
+                + " {} not supported, should be non-negative numeric".format(param)
+            )
+
+    @staticmethod
+    def check_decimal_float(param, descr):
+        if type(param).__name__ not in ["float", "int"] or param < 0 or param > 1:
+            raise ValueError(
+                descr
+                + " {} not supported, should be a float number in range [0, 1]".format(
+                    param
+                )
+            )
+
+    @staticmethod
+    def check_boolean(param, descr):
+        if type(param).__name__ != "bool":
+            raise ValueError(
+                descr + " {} not supported, should be bool type".format(param)
+            )
+
+    @staticmethod
+    def check_open_unit_interval(param, descr):
+        if type(param).__name__ not in ["float"] or param <= 0 or param >= 1:
+            raise ValueError(
+                descr + " should be a numeric number between 0 and 1 exclusively"
+            )
+
+    @staticmethod
+    def check_valid_value(param, descr, valid_values):
+        if param not in valid_values:
+            raise ValueError(
+                descr
+                + " {} is not supported, it should be in {}".format(param, valid_values)
+            )
+
+    @staticmethod
+    def check_defined_type(param, descr, types):
+        if type(param).__name__ not in types:
+            raise ValueError(
+                descr + " {} not supported, should be one of {}".format(param, types)
+            )
+
+    @staticmethod
+    def check_and_change_lower(param, valid_list, descr=""):
+        if type(param).__name__ != "str":
+            raise ValueError(
+                descr
+                + " {} not supported, should be one of {}".format(param, valid_list)
+            )
+
+        lower_param = param.lower()
+        if lower_param in valid_list:
+            return lower_param
+        else:
+            raise ValueError(
+                descr
+                + " {} not supported, should be one of {}".format(param, valid_list)
+            )
+
+    @staticmethod
+    def _greater_equal_than(value, limit):
+        return value >= limit - settings.FLOAT_ZERO
+
+    @staticmethod
+    def _less_equal_than(value, limit):
+        return value <= limit + settings.FLOAT_ZERO
+
+    @staticmethod
+    def _range(value, ranges):
+        in_range = False
+        for left_limit, right_limit in ranges:
+            if (
+                    left_limit - settings.FLOAT_ZERO
+                    <= value
+                    <= right_limit + settings.FLOAT_ZERO
+            ):
+                in_range = True
+                break
+
+        return in_range
+
+    @staticmethod
+    def _in(value, right_value_list):
+        return value in right_value_list
+
+    @staticmethod
+    def _not_in(value, wrong_value_list):
+        return value not in wrong_value_list
+
+    def _warn_deprecated_param(self, param_name, descr):
+        if self._deprecated_params_set.get(param_name):
+            flow_logger.warning(
+                f"{descr} {param_name} is deprecated and ignored in this version."
+            )
+
+    def _warn_to_deprecate_param(self, param_name, descr, new_param):
+        if self._deprecated_params_set.get(param_name):
+            flow_logger.warning(
+                f"{descr} {param_name} will be deprecated in future release; "
+                f"please use {new_param} instead."
+            )
+            return True
+        return False
+
+
+class ComponentBase(ABC):
+    component_name: str
+
+    def __str__(self):
+        """
+        {
+            "component_name": "Begin",
+            "params": {}
+        }
+        """
+        return """{{
+            "component_name": "{}",
+            "params": {}
+        }}""".format(self.component_name,
+                     self._param
+                     )
+
+    def __init__(self, canvas, id, param: ComponentParamBase):
+        self._canvas = canvas
+        self._id = id
+        self._param = param
+        self._param.check()
+
+    def run(self, history, **kwargs):
+        flow_logger.info("{}, history: {}, kwargs: {}".format(self, json.dumps(history, ensure_ascii=False),
+                                                              json.dumps(kwargs, ensure_ascii=False)))
+        try:
+            res = self._run(history, **kwargs)
+            self.set_output(res)
+        except Exception as e:
+            self.set_output(pd.DataFrame([{"content": str(e)}]))
+            raise e
+
+        return res
+
+    def _run(self, history, **kwargs):
+        raise NotImplementedError()
+
+    def output(self) -> pd.DataFrame:
+        o = getattr(self._param, self._param.output_var_name)
+        if not isinstance(o, partial) and not isinstance(o, pd.DataFrame):
+            if not isinstance(o, list): o = [o]
+            o = pd.DataFrame(o)
+        return self._param.output_var_name, o
+
+    def set_output(self, v: pd.DataFrame):
+        setattr(self._param, self._param.output_var_name, v)
+
+    def get_input(self):
+        upstream_outs = []
+        reversed_cpnts = []
+        if len(self._canvas.path) > 1:
+            reversed_cpnts.extend(self._canvas.path[-2])
+        reversed_cpnts.extend(self._canvas.path[-1])
+
+        print(self.component_name, reversed_cpnts[::-1])
+        for u in reversed_cpnts[::-1]:
+            if self.get_component_name(u) in ["switch"]: continue
+            if self.component_name.lower().find("switch") < 0 \
+                    and self.get_component_name(u) in ["relevant", "categorize"]:
+                continue
+            if u.lower().find("answer") >= 0:
+                for r, c in self._canvas.history[::-1]:
+                    if r == "user":
+                        upstream_outs.append(pd.DataFrame([{"content": c}]))
+                        break
+                break
+            if self.component_name.lower().find("answer") >= 0:
+                if self.get_component_name(u) in ["relevant"]: continue
+
+            upstream_outs.append(self._canvas.get_component(u)["obj"].output()[1])
+            break
+
+        return pd.concat(upstream_outs, ignore_index=False)
+
+    def get_stream_input(self):
+        reversed_cpnts = []
+        if len(self._canvas.path) > 1:
+            reversed_cpnts.extend(self._canvas.path[-2])
+        reversed_cpnts.extend(self._canvas.path[-1])
+
+        for u in reversed_cpnts[::-1]:
+            if self.get_component_name(u) in ["switch", "answer"]: continue
+            return self._canvas.get_component(u)["obj"].output()[1]
+
+    @staticmethod
+    def be_output(v):
+        return pd.DataFrame([{"content": v}])
+
+    def get_component_name(self, cpn_id):
+        return self._canvas.get_component(cpn_id)["obj"].component_name.lower()
--- a/graph/component/begin.py
+++ b/graph/component/begin.py
@ -0,0 +1,49 @@
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import json
+from functools import partial
+
+import pandas as pd
+from graph.component.base import ComponentBase, ComponentParamBase
+
+class BeginParam(ComponentParamBase):
+
+    """
+    Define the Begin component parameters.
+    """
+    def __init__(self):
+        super().__init__()
+        self.prologue = "Hi! I'm your smart assistant. What can I do for you?"
+
+    def check(self):
+        return True
+
+
+class Begin(ComponentBase):
+    component_name = "Begin"
+
+    def _run(self, history, **kwargs):
+        if kwargs.get("stream"):
+            return partial(self.stream_output)
+        return pd.DataFrame([{"content": self._param.prologue}])
+
+    def stream_output(self):
+        res = {"content": self._param.prologue}
+        yield res
+        self.set_output(res)
+
+
+
--- a/graph/component/categorize.py
+++ b/graph/component/categorize.py
@ -0,0 +1,87 @@
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from abc import ABC
+
+import pandas as pd
+
+from api.db import LLMType
+from api.db.services.llm_service import LLMBundle
+from graph.component import GenerateParam, Generate
+
+
+class CategorizeParam(GenerateParam):
+
+    """
+    Define the Categorize component parameters.
+    """
+    def __init__(self):
+        super().__init__()
+        self.category_description = {}
+        self.prompt = ""
+
+    def check(self):
+        super().check()
+        self.check_empty(self.category_description, "Category examples")
+
+    def get_prompt(self):
+        cate_lines = []
+        for c, desc in self.category_description.items():
+            for l in desc["examples"].split("\n"):
+                if not l: continue
+                cate_lines.append("Question: {}\tCategory: {}".format(l, c))
+        descriptions = []
+        for c, desc in self.category_description.items():
+            if desc.get("description"):
+                descriptions.append(
+                    "--------------------\nCategory: {}\nDescription: {}\n".format(c, desc["description"]))
+
+        self.prompt = """
+        You're a text classifier. You need to categorize the user’s questions into {} categories, 
+        namely: {}
+        Here's description of each category:
+        {}
+
+        You could learn from the following examples:
+        {}
+        You could learn from the above examples.
+        Just mention the category names, no need for any additional words.
+        """.format(
+            len(self.category_description.keys()),
+            "/".join(list(self.category_description.keys())),
+            "\n".join(descriptions),
+            "- ".join(cate_lines)
+        )
+        return self.prompt
+
+
+class Categorize(Generate, ABC):
+    component_name = "Categorize"
+
+    def _run(self, history, **kwargs):
+        input = self.get_input()
+        print(input, "DDDDDDDDDDDDDDDDDDDDDDDDDDDDD")
+        input = "Question: " + ("; ".join(input["content"]) if "content" in input else "") + "Category: "
+        chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
+        ans = chat_mdl.chat(self._param.get_prompt(), [{"role": "user", "content": input}],
+                            self._param.gen_conf())
+        print(ans, ":::::::::::::::::::::::::::::::::")
+        for c in self._param.category_description.keys():
+            if ans.lower().find(c.lower()) >= 0:
+                return Categorize.be_output(self._param.category_description[c]["to"])
+
+        return Categorize.be_output(self._param.category_description.items()[-1][1]["to"])
+
+
--- a/graph/component/cite.py
+++ b/graph/component/cite.py
@ -0,0 +1,75 @@
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from abc import ABC
+
+import pandas as pd
+
+from api.db import LLMType
+from api.db.services.knowledgebase_service import KnowledgebaseService
+from api.db.services.llm_service import LLMBundle
+from api.settings import retrievaler
+from graph.component.base import ComponentBase, ComponentParamBase
+
+
+class CiteParam(ComponentParamBase):
+
+    """
+    Define the Retrieval component parameters.
+    """
+    def __init__(self):
+        super().__init__()
+        self.cite_sources = []
+
+    def check(self):
+        self.check_empty(self.cite_source, "Please specify where you want to cite from.")
+
+
+class Cite(ComponentBase, ABC):
+    component_name = "Cite"
+
+    def _run(self, history, **kwargs):
+        input = "\n- ".join(self.get_input()["content"])
+        sources = [self._canvas.get_component(cpn_id).output()[1] for cpn_id in self._param.cite_source]
+        query = []
+        for role, cnt in history[::-1][:self._param.message_history_window_size]:
+            if role != "user":continue
+            query.append(cnt)
+        query = "\n".join(query)
+
+        kbs = KnowledgebaseService.get_by_ids(self._param.kb_ids)
+        if not kbs:
+            raise ValueError("Can't find knowledgebases by {}".format(self._param.kb_ids))
+        embd_nms = list(set([kb.embd_id for kb in kbs]))
+        assert len(embd_nms) == 1, "Knowledge bases use different embedding models."
+
+        embd_mdl = LLMBundle(kbs[0].tenant_id, LLMType.EMBEDDING, embd_nms[0])
+
+        rerank_mdl = None
+        if self._param.rerank_id:
+            rerank_mdl = LLMBundle(kbs[0].tenant_id, LLMType.RERANK, self._param.rerank_id)
+
+        kbinfos = retrievaler.retrieval(query, embd_mdl, kbs[0].tenant_id, self._param.kb_ids,
+                                        1, self._param.top_n,
+                                        self._param.similarity_threshold, 1 - self._param.keywords_similarity_weight,
+                                        aggs=False, rerank_mdl=rerank_mdl)
+
+        if not kbinfos["chunks"]: return pd.DataFrame()
+        df = pd.DataFrame(kbinfos["chunks"])
+        df["content"] = df["content_with_weight"]
+        del df["content_with_weight"]
+        return df
+
+
--- a/graph/component/generate.py
+++ b/graph/component/generate.py
@ -0,0 +1,156 @@
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import re
+from functools import partial
+
+import pandas as pd
+
+from api.db import LLMType
+from api.db.services.llm_service import LLMBundle
+from api.settings import retrievaler
+from graph.component.base import ComponentBase, ComponentParamBase
+
+
+class GenerateParam(ComponentParamBase):
+    """
+    Define the Generate component parameters.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.llm_id = ""
+        self.prompt = ""
+        self.max_tokens = 256
+        self.temperature = 0.1
+        self.top_p = 0.3
+        self.presence_penalty = 0.4
+        self.frequency_penalty = 0.7
+        self.cite = True
+        #self.parameters = []
+
+    def check(self):
+        self.check_decimal_float(self.temperature, "Temperature")
+        self.check_decimal_float(self.presence_penalty, "Presence penalty")
+        self.check_decimal_float(self.frequency_penalty, "Frequency penalty")
+        self.check_positive_number(self.max_tokens, "Max tokens")
+        self.check_decimal_float(self.top_p, "Top P")
+        self.check_empty(self.llm_id, "LLM")
+        #self.check_defined_type(self.parameters, "Parameters", ["list"])
+
+    def gen_conf(self):
+        return {
+            "max_tokens": self.max_tokens,
+            "temperature": self.temperature,
+            "top_p": self.top_p,
+            "presence_penalty": self.presence_penalty,
+            "frequency_penalty": self.frequency_penalty,
+        }
+
+
+class Generate(ComponentBase):
+    component_name = "Generate"
+
+    def _run(self, history, **kwargs):
+        chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
+        prompt = self._param.prompt
+
+        retrieval_res = self.get_input()
+        input = "\n- ".join(retrieval_res["content"])
+
+
+        kwargs["input"] = input
+        for n, v in kwargs.items():
+            #prompt = re.sub(r"\{%s\}"%n, re.escape(str(v)), prompt)
+            prompt = re.sub(r"\{%s\}"%n, str(v), prompt)
+
+        if kwargs.get("stream"):
+            return partial(self.stream_output, chat_mdl, prompt, retrieval_res)
+
+        if "empty_response" in retrieval_res.columns:
+            return Generate.be_output(input)
+
+        ans = chat_mdl.chat(prompt, self._canvas.get_history(self._param.message_history_window_size), self._param.gen_conf())
+
+        if self._param.cite and "content_ltks" in retrieval_res.columns and "vector" in retrieval_res.columns:
+            ans, idx = retrievaler.insert_citations(ans,
+                                                   [ck["content_ltks"]
+                                                    for _, ck in retrieval_res.iterrows()],
+                                                   [ck["vector"]
+                                                    for _,ck in retrieval_res.iterrows()],
+                                                   LLMBundle(self._canvas.get_tenant_id(), LLMType.EMBEDDING, self._canvas.get_embedding_model()),
+                                                   tkweight=0.7,
+                                                   vtweight=0.3)
+            del retrieval_res["vector"]
+            retrieval_res = retrieval_res.to_dict("records")
+            df = []
+            for i in idx:
+                df.append(retrieval_res[int(i)])
+                r = re.search(r"^((.|[\r\n])*? ##%s\$\$)"%str(i), ans)
+                assert r, f"{i} => {ans}"
+                df[-1]["content"] = r.group(1)
+                ans = re.sub(r"^((.|[\r\n])*? ##%s\$\$)" % str(i), "", ans)
+            if ans: df.append({"content": ans})
+            return pd.DataFrame(df)
+
+        return Generate.be_output(ans)
+
+    def stream_output(self, chat_mdl, prompt, retrieval_res):
+        res = None
+        if "empty_response" in retrieval_res.columns and "\n- ".join(retrieval_res["content"]):
+            res = {"content": "\n- ".join(retrieval_res["content"]), "reference": []}
+            yield res
+            self.set_output(res)
+            return
+
+        answer = ""
+        for ans in chat_mdl.chat_streamly(prompt, self._canvas.get_history(self._param.message_history_window_size), self._param.gen_conf()):
+            res = {"content": ans, "reference": []}
+            answer = ans
+            yield res
+
+        if self._param.cite and "content_ltks" in retrieval_res.columns and "vector" in retrieval_res.columns:
+            answer, idx = retrievaler.insert_citations(answer,
+                                                   [ck["content_ltks"]
+                                                    for _, ck in retrieval_res.iterrows()],
+                                                   [ck["vector"]
+                                                    for _, ck in retrieval_res.iterrows()],
+                                                   LLMBundle(self._canvas.get_tenant_id(), LLMType.EMBEDDING, self._canvas.get_embedding_model()),
+                                                   tkweight=0.7,
+                                                   vtweight=0.3)
+            doc_ids = set([])
+            recall_docs = []
+            for i in idx:
+                did = retrieval_res.loc[int(i), "doc_id"]
+                if did in doc_ids: continue
+                doc_ids.add(did)
+                recall_docs.append({"doc_id": did, "doc_name": retrieval_res.loc[int(i), "docnm_kwd"]})
+
+            del retrieval_res["vector"]
+            del retrieval_res["content_ltks"]
+
+            reference = {
+                "chunks": [ck.to_dict() for _, ck in retrieval_res.iterrows()],
+                "doc_aggs": recall_docs
+            }
+
+            if answer.lower().find("invalid key") >= 0 or answer.lower().find("invalid api") >= 0:
+                answer += " Please set LLM API-Key in 'User Setting -> Model Providers -> API-Key'"
+            res = {"content": answer, "reference": reference}
+            yield res
+
+        self.set_output(res)
+
+
--- a/graph/component/message.py
+++ b/graph/component/message.py
@ -0,0 +1,52 @@
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import random
+from abc import ABC
+from functools import partial
+
+import pandas as pd
+
+from graph.component.base import ComponentBase, ComponentParamBase
+
+
+class MessageParam(ComponentParamBase):
+
+    """
+    Define the Message component parameters.
+    """
+    def __init__(self):
+        super().__init__()
+        self.messages = []
+
+    def check(self):
+        self.check_empty(self.messages, "Message")
+        return True
+
+
+class Message(ComponentBase, ABC):
+    component_name = "Message"
+
+    def _run(self, history, **kwargs):
+        if kwargs.get("stream"):
+            return partial(self.stream_output)
+
+        return Message.be_output(random.choice(self._param.messages))
+
+    def stream_output(self):
+        if self._param.messages:
+            yield {"content": random.choice(self._param.messages)}
+
+
--- a/graph/component/relevant.py
+++ b/graph/component/relevant.py
@ -0,0 +1,78 @@
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from abc import ABC
+from api.db import LLMType
+from api.db.services.llm_service import LLMBundle
+from graph.component import GenerateParam, Generate
+from rag.utils import num_tokens_from_string, encoder
+
+
+class RelevantParam(GenerateParam):
+
+    """
+    Define the Relevant component parameters.
+    """
+    def __init__(self):
+        super().__init__()
+        self.prompt = ""
+        self.yes = ""
+        self.no = ""
+
+    def check(self):
+        super().check()
+
+    def get_prompt(self):
+        self.prompt = """
+        You are a grader assessing relevance of a retrieved document to a user question. 
+        It does not need to be a stringent test. The goal is to filter out erroneous retrievals.
+        If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. 
+        Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
+        No other words needed except 'yes' or 'no'.
+        """
+        return self.prompt
+
+
+class Relevant(Generate, ABC):
+    component_name = "Relevant"
+
+    def _run(self, history, **kwargs):
+        q = ""
+        for r, c in self._canvas.history[::-1]:
+            if r == "user":
+                q = c
+                break
+        ans = self.get_input()
+        ans = " - ".join(ans["content"]) if "content" in ans else ""
+        if not ans:
+            return Relevant.be_output(self._param.no)
+        ans = "Documents: \n" + ans
+        ans = f"Question: {q}\n" + ans
+        chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
+
+        if num_tokens_from_string(ans) >= chat_mdl.max_length - 4:
+            ans = encoder.decode(encoder.encode(ans)[:chat_mdl.max_length - 4])
+
+        ans = chat_mdl.chat(self._param.get_prompt(), [{"role": "user", "content": ans}],
+                            self._param.gen_conf())
+
+        print(ans, ":::::::::::::::::::::::::::::::::")
+        if ans.lower().find("yes") >= 0:
+            return Relevant.be_output(self._param.yes)
+        if ans.lower().find("no") >= 0:
+            return Relevant.be_output(self._param.no)
+        assert False, f"Relevant component got: {ans}"
+
+
--- a/graph/component/retrieval.py
+++ b/graph/component/retrieval.py
@ -0,0 +1,88 @@
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from abc import ABC
+
+import pandas as pd
+
+from api.db import LLMType
+from api.db.services.knowledgebase_service import KnowledgebaseService
+from api.db.services.llm_service import LLMBundle
+from api.settings import retrievaler
+from graph.component.base import ComponentBase, ComponentParamBase
+
+
+class RetrievalParam(ComponentParamBase):
+
+    """
+    Define the Retrieval component parameters.
+    """
+    def __init__(self):
+        super().__init__()
+        self.similarity_threshold = 0.2
+        self.keywords_similarity_weight = 0.5
+        self.top_n = 8
+        self.top_k = 1024
+        self.kb_ids = []
+        self.rerank_id = ""
+        self.empty_response = ""
+
+    def check(self):
+        self.check_decimal_float(self.similarity_threshold, "Similarity threshold")
+        self.check_decimal_float(self.keywords_similarity_weight, "Keywords similarity weight")
+        self.check_positive_number(self.top_n, "Top N")
+        self.check_empty(self.kb_ids, "Knowledge bases")
+
+
+class Retrieval(ComponentBase, ABC):
+    component_name = "Retrieval"
+
+    def _run(self, history, **kwargs):
+        query = []
+        for role, cnt in history[::-1][:self._param.message_history_window_size]:
+            if role != "user":continue
+            query.append(cnt)
+        query = "\n".join(query)
+
+        kbs = KnowledgebaseService.get_by_ids(self._param.kb_ids)
+        if not kbs:
+            raise ValueError("Can't find knowledgebases by {}".format(self._param.kb_ids))
+        embd_nms = list(set([kb.embd_id for kb in kbs]))
+        assert len(embd_nms) == 1, "Knowledge bases use different embedding models."
+
+        embd_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.EMBEDDING, embd_nms[0])
+        self._canvas.set_embedding_model(embd_nms[0])
+
+        rerank_mdl = None
+        if self._param.rerank_id:
+            rerank_mdl = LLMBundle(kbs[0].tenant_id, LLMType.RERANK, self._param.rerank_id)
+
+        kbinfos = retrievaler.retrieval(query, embd_mdl, kbs[0].tenant_id, self._param.kb_ids,
+                                        1, self._param.top_n,
+                                        self._param.similarity_threshold, 1 - self._param.keywords_similarity_weight,
+                                        aggs=False, rerank_mdl=rerank_mdl)
+
+        if not kbinfos["chunks"]:
+            df = Retrieval.be_output(self._param.empty_response)
+            df["empty_response"] = True
+            return df
+
+        df = pd.DataFrame(kbinfos["chunks"])
+        df["content"] = df["content_with_weight"]
+        del df["content_with_weight"]
+        print(">>>>>>>>>>>>>>>>>>>>>>>>>>\n", query, df)
+        return df
+
+
--- a/graph/component/rewrite.py
+++ b/graph/component/rewrite.py
@ -0,0 +1,72 @@
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from abc import ABC
+from api.db import LLMType
+from api.db.services.llm_service import LLMBundle
+from graph.component import GenerateParam, Generate
+
+
+class RewriteQuestionParam(GenerateParam):
+
+    """
+    Define the QuestionRewrite component parameters.
+    """
+    def __init__(self):
+        super().__init__()
+        self.temperature = 0.9
+        self.prompt = ""
+        self.loop = 1
+
+    def check(self):
+        super().check()
+
+    def get_prompt(self):
+        self.prompt = """
+        You are an expert at query expansion to generate a paraphrasing of a question.
+        I can't retrieval relevant information from the knowledge base by using user's question directly.     
+        You need to expand or paraphrase user's question by multiple ways such as using synonyms words/phrase, 
+        writing the abbreviation in its entirety, adding some extra descriptions or explanations, 
+        changing the way of expression, translating the original question into another language (English/Chinese), etc. 
+        And return 5 versions of question and one is from translation.
+        Just list the question. No other words are needed.
+        """
+        return self.prompt
+
+
+class RewriteQuestion(Generate, ABC):
+    component_name = "RewriteQuestion"
+
+    def _run(self, history, **kwargs):
+        if not hasattr(self, "_loop"):
+            setattr(self, "_loop", 0)
+        if self._loop >= self._param.loop:
+            self._loop = 0
+            raise Exception("Can't find relevant information.")
+        self._loop += 1
+        q = "Question: "
+        for r, c in self._canvas.history[::-1]:
+            if r == "user":
+                q += c
+                break
+
+        chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
+        ans = chat_mdl.chat(self._param.get_prompt(), [{"role": "user", "content": q}],
+                            self._param.gen_conf())
+
+        print(ans, ":::::::::::::::::::::::::::::::::")
+        return RewriteQuestion.be_output(ans)
+
+
--- a/graph/component/switch.py
+++ b/graph/component/switch.py
@ -0,0 +1,80 @@
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from abc import ABC
+
+import pandas as pd
+
+from api.db import LLMType
+from api.db.services.knowledgebase_service import KnowledgebaseService
+from api.db.services.llm_service import LLMBundle
+from api.settings import retrievaler
+from graph.component.base import ComponentBase, ComponentParamBase
+
+
+class SwitchParam(ComponentParamBase):
+
+    """
+    Define the Switch component parameters.
+    """
+    def __init__(self):
+        super().__init__()
+        """
+        {
+            "cpn_id": "categorize:0",
+            "not": False,
+            "operator": "gt/gte/lt/lte/eq/in",
+            "value": "",
+            "to": ""
+        }
+        """
+        self.conditions = []
+        self.default = ""
+
+    def check(self):
+        self.check_empty(self.conditions, "Switch conditions")
+        self.check_empty(self.default, "Default path")
+
+    def operators(self, field, op, value):
+        if op == "gt":
+            return float(field) > float(value)
+        if op == "gte":
+            return float(field) >= float(value)
+        if op == "lt":
+            return float(field) < float(value)
+        if op == "lte":
+            return float(field) <= float(value)
+        if op == "eq":
+            return str(field) == str(value)
+        if op == "in":
+            return str(field).find(str(value)) >= 0
+        return False
+
+
+class Switch(ComponentBase, ABC):
+    component_name = "Switch"
+
+    def _run(self, history, **kwargs):
+        for cond in self._param.conditions:
+            input = self._canvas.get_component(cond["cpn_id"])["obj"].output()[1]
+            if self._param.operators(input.iloc[0, 0], cond["operator"], cond["value"]):
+                if not cond["not"]:
+                    return pd.DataFrame([{"content": cond["to"]}])
+
+        return pd.DataFrame([{"content": self._param.default}])
+
+
+
+