API: Download doc api (#1354)

### What problem does this PR solve?

Adds download_document api

### Type of change


- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
cecilia-uu
2024-07-04 16:33:55 +08:00
committed by GitHub
parent daaf6aed50
commit 306108fe0e
4 changed files with 137 additions and 17 deletions

View File

@ -12,12 +12,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
import requests
from api.db.services.document_service import DocumentService
from api.settings import RetCode
@ -126,7 +126,22 @@ class RAGFlow:
return response.json()
# ----------------------------download a file-----------------------------------------------------
def download_file(self, dataset_id, document_id):
endpoint = f"{self.dataset_url}/{dataset_id}/documents/{document_id}"
res = requests.get(endpoint, headers=self.authorization_header)
content = res.content # binary data
# decode the binary data
try:
decoded_content = content.decode("utf-8")
json_data = json.loads(decoded_content)
return json_data # message
except json.JSONDecodeError: # binary data
_, document = DocumentService.get_by_id(document_id)
file_path = os.path.join(os.getcwd(), document.name)
with open(file_path, "wb") as file:
file.write(content)
return {"code": RetCode.SUCCESS, "data": content}
# ----------------------------start parsing-----------------------------------------------------
# ----------------------------stop parsing-----------------------------------------------------
@ -144,3 +159,4 @@ class RAGFlow:
# ----------------------------get a specific chunk-----------------------------------------------------
# ----------------------------retrieval test-----------------------------------------------------