Rework logging (#3358)

Unified all log files into one.

### What problem does this PR solve?

Unified all log files into one.

### Type of change

- [x] Refactoring
This commit is contained in:
Zhichang Yu
2024-11-12 17:35:13 +08:00
committed by GitHub
parent 567a7563e7
commit a2a5631da4
75 changed files with 481 additions and 853 deletions

View File

@ -14,7 +14,6 @@
# limitations under the License.
#
import re
import traceback
from concurrent.futures import ThreadPoolExecutor, ALL_COMPLETED, wait
from threading import Lock
from typing import Tuple
@ -22,7 +21,8 @@ import umap
import numpy as np
from sklearn.mixture import GaussianMixture
from rag.utils import num_tokens_from_string, truncate
from rag.utils import truncate
from api.utils.log_utils import logger
class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
@ -62,14 +62,13 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
{"temperature": 0.3, "max_tokens": self._max_token}
)
cnt = re.sub("(······\n由于长度的原因,回答被截断了,要继续吗?|For the content length reason, it stopped, continue?)", "", cnt)
print("SUM:", cnt)
logger.info(f"SUM: {cnt}")
embds, _ = self._embd_model.encode([cnt])
with lock:
if not len(embds[0]): return
chunks.append((cnt, embds[0]))
except Exception as e:
print(e, flush=True)
traceback.print_stack(e)
logger.exception("summarize got exception")
return e
labels = []
@ -105,7 +104,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
ck_idx = [i+start for i in range(len(lbls)) if lbls[i] == c]
threads.append(executor.submit(summarize, ck_idx, lock))
wait(threads, return_when=ALL_COMPLETED)
print([t.result() for t in threads])
logger.info(str([t.result() for t in threads]))
assert len(chunks) - end == n_clusters, "{} vs. {}".format(len(chunks) - end, n_clusters)
labels.extend(lbls)