mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
add a new model for 'Laws' (#290)
### What problem does this PR solve? Issue link:#289 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -14,6 +14,8 @@ import copy
|
||||
import re
|
||||
from io import BytesIO
|
||||
from docx import Document
|
||||
|
||||
from api.db import ParserType
|
||||
from rag.nlp import bullets_category, is_english, tokenize, remove_contents_table, hierarchical_merge, \
|
||||
make_colon_as_title, add_positions, tokenize_chunks
|
||||
from rag.nlp import huqie
|
||||
@ -23,7 +25,8 @@ from rag.settings import cron_logger
|
||||
|
||||
class Docx(DocxParser):
|
||||
def __init__(self):
|
||||
pass
|
||||
self.model_speciess = ParserType.LAWS.value
|
||||
super().__init__()
|
||||
|
||||
def __clean(self, line):
|
||||
line = re.sub(r"\u3000", " ", line).strip()
|
||||
|
||||
Reference in New Issue
Block a user