use minio to store uploaded files; build dialog server; (#16)

* format code

* use minio to store uploaded files; build dialog server;
This commit is contained in:
KevinHuSh
2023-12-25 19:05:59 +08:00
committed by GitHub
parent d4fd138954
commit 3245107dc7
13 changed files with 520 additions and 134 deletions

View File

@ -3,6 +3,7 @@ import re
import pandas as pd
from collections import Counter
from nlp import huqie
from io import BytesIO
class HuDocxParser:
@ -97,7 +98,7 @@ class HuDocxParser:
return ["\n".join(lines)]
def __call__(self, fnm):
self.doc = Document(fnm)
self.doc = Document(fnm) if isinstance(fnm, str) else Document(BytesIO(fnm))
secs = [(p.text, p.style.name) for p in self.doc.paragraphs]
tbls = [self.__extract_table_content(tb) for tb in self.doc.tables]
return secs, tbls

View File

@ -1,10 +1,12 @@
from openpyxl import load_workbook
import sys
from io import BytesIO
class HuExcelParser:
def __call__(self, fnm):
wb = load_workbook(fnm)
if isinstance(fnm, str):wb = load_workbook(fnm)
else: wb = load_workbook(BytesIO(fnm))
res = []
for sheetname in wb.sheetnames:
ws = wb[sheetname]

View File

@ -1,4 +1,5 @@
import xgboost as xgb
from io import BytesIO
import torch
import re
import pdfplumber
@ -1525,7 +1526,7 @@ class HuParser:
return "\n\n".join(res)
def __call__(self, fnm, need_image=True, zoomin=3, return_html=False):
self.pdf = pdfplumber.open(fnm)
self.pdf = pdfplumber.open(fnm) if isinstance(fnm, str) else pdfplumber.open(BytesIO(fnm))
self.lefted_chars = []
self.mean_height = []
self.mean_width = []