add auto keywords and auto-question (#2965)

### What problem does this PR solve?

#2687

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Kevin Hu
2024-10-22 13:12:49 +08:00
committed by GitHub
parent 5aa9d7787e
commit 226bdd6e99
8 changed files with 119 additions and 61 deletions

View File

@ -25,7 +25,7 @@ from api.db import FileType, LLMType, ParserType, FileSource
from api.db.db_models import APIToken, Task, File
from api.db.services import duplicate_name
from api.db.services.api_service import APITokenService, API4ConversationService
from api.db.services.dialog_service import DialogService, chat
from api.db.services.dialog_service import DialogService, chat, keyword_extraction
from api.db.services.document_service import DocumentService, doc_upload_and_parse
from api.db.services.file2document_service import File2DocumentService
from api.db.services.file_service import FileService
@ -38,7 +38,6 @@ from api.utils.api_utils import server_error_response, get_data_error_result, ge
generate_confirmation_token
from api.utils.file_utils import filename_type, thumbnail
from rag.nlp import keyword_extraction
from rag.utils.storage_factory import STORAGE_IMPL
from api.db.services.canvas_service import UserCanvasService

View File

@ -21,8 +21,9 @@ from flask import request
from flask_login import login_required, current_user
from elasticsearch_dsl import Q
from api.db.services.dialog_service import keyword_extraction
from rag.app.qa import rmPrefix, beAdoc
from rag.nlp import search, rag_tokenizer, keyword_extraction
from rag.nlp import search, rag_tokenizer
from rag.utils.es_conn import ELASTICSEARCH
from rag.utils import rmSpace
from api.db import LLMType, ParserType

View File

@ -16,16 +16,15 @@
from flask import request
from api.db import StatusEnum
from api.db.db_models import TenantLLM
from api.db.services.dialog_service import DialogService
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.llm_service import LLMService, TenantLLMService
from api.db.services.llm_service import TenantLLMService
from api.db.services.user_service import TenantService
from api.settings import RetCode
from api.utils import get_uuid
from api.utils.api_utils import get_error_data_result, token_required
from api.utils.api_utils import get_result
@manager.route('/chat', methods=['POST'])
@token_required
def create(tenant_id):

View File

@ -1,10 +1,25 @@
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from flask import request, jsonify
from db import LLMType, ParserType
from db.services.knowledgebase_service import KnowledgebaseService
from db.services.llm_service import LLMBundle
from settings import retrievaler, kg_retrievaler, RetCode
from utils.api_utils import validate_request, build_error_result, apikey_required
from api.db import LLMType, ParserType
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.llm_service import LLMBundle
from api.settings import retrievaler, kg_retrievaler, RetCode
from api.utils.api_utils import validate_request, build_error_result, apikey_required
@manager.route('/dify/retrieval', methods=['POST'])

View File

@ -1,48 +1,37 @@
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import pathlib
import re
import datetime
import json
import traceback
from botocore.docs.method import document_model_driven_method
from flask import request
from flask_login import login_required, current_user
from elasticsearch_dsl import Q
from pygments import highlight
from sphinx.addnodes import document
from api.db.services.dialog_service import keyword_extraction
from rag.app.qa import rmPrefix, beAdoc
from rag.nlp import search, rag_tokenizer, keyword_extraction
from rag.utils.es_conn import ELASTICSEARCH
from rag.utils import rmSpace
from rag.nlp import rag_tokenizer
from api.db import LLMType, ParserType
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.llm_service import TenantLLMService
from api.db.services.user_service import UserTenantService
from api.utils.api_utils import server_error_response, get_error_data_result, validate_request
from api.db.services.document_service import DocumentService
from api.settings import RetCode, retrievaler, kg_retrievaler
from api.utils.api_utils import get_result
from api.settings import kg_retrievaler
import hashlib
import re
from api.utils.api_utils import get_result, token_required, get_error_data_result
from api.db.db_models import Task, File
from api.utils.api_utils import token_required
from api.db.db_models import Task
from api.db.services.task_service import TaskService, queue_tasks
from api.db.services.user_service import TenantService, UserTenantService
from api.utils.api_utils import server_error_response, get_error_data_result, validate_request
from api.utils.api_utils import get_result, get_result, get_error_data_result
from functools import partial
from api.utils.api_utils import server_error_response
from api.utils.api_utils import get_result, get_error_data_result
from io import BytesIO
from elasticsearch_dsl import Q
from flask import request, send_file
from flask_login import login_required
from api.db import FileSource, TaskStatus, FileType
from api.db.db_models import File
from api.db.services.document_service import DocumentService
@ -50,8 +39,7 @@ from api.db.services.file2document_service import File2DocumentService
from api.db.services.file_service import FileService
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.settings import RetCode, retrievaler
from api.utils.api_utils import construct_json_result, construct_error_response
from rag.app import book, laws, manual, naive, one, paper, presentation, qa, resume, table, picture, audio, email
from api.utils.api_utils import construct_json_result
from rag.nlp import search
from rag.utils import rmSpace
from rag.utils.es_conn import ELASTICSEARCH
@ -365,7 +353,6 @@ def list_chunks(tenant_id,dataset_id,document_id):
return get_result(data=res)
@manager.route('/dataset/<dataset_id>/document/<document_id>/chunk', methods=['POST'])
@token_required
def create(tenant_id,dataset_id,document_id):
@ -454,7 +441,6 @@ def rm_chunk(tenant_id,dataset_id,document_id):
return get_result()
@manager.route('/dataset/<dataset_id>/document/<document_id>/chunk/<chunk_id>', methods=['PUT'])
@token_required
def update_chunk(tenant_id,dataset_id,document_id,chunk_id):
@ -512,7 +498,6 @@ def update_chunk(tenant_id,dataset_id,document_id,chunk_id):
return get_result()
@manager.route('/retrieval', methods=['POST'])
@token_required
def retrieval_test(tenant_id):