diff --git a/api/db/db_models.py b/api/db/db_models.py
index bd3feea64..e60afbef5 100644
--- a/api/db/db_models.py
+++ b/api/db/db_models.py
@@ -749,7 +749,7 @@ class Knowledgebase(DataBaseModel):
 
     parser_id = CharField(max_length=32, null=False, help_text="default parser ID", default=ParserType.NAIVE.value, index=True)
     pipeline_id = CharField(max_length=32, null=True, help_text="Pipeline ID", index=True)
-    parser_config = JSONField(null=False, default={"pages": [[1, 1000000]]})
+    parser_config = JSONField(null=False, default={"pages": [[1, 1000000]], "table_context_size": 0, "image_context_size": 0})
     pagerank = IntegerField(default=0, index=False)
 
     graphrag_task_id = CharField(max_length=32, null=True, help_text="Graph RAG task ID", index=True)
@@ -774,7 +774,7 @@ class Document(DataBaseModel):
     kb_id = CharField(max_length=256, null=False, index=True)
     parser_id = CharField(max_length=32, null=False, help_text="default parser ID", index=True)
     pipeline_id = CharField(max_length=32, null=True, help_text="pipeline ID", index=True)
-    parser_config = JSONField(null=False, default={"pages": [[1, 1000000]]})
+    parser_config = JSONField(null=False, default={"pages": [[1, 1000000]], "table_context_size": 0, "image_context_size": 0})
     source_type = CharField(max_length=128, null=False, default="local", help_text="where dose this document come from", index=True)
     type = CharField(max_length=32, null=False, help_text="file extension", index=True)
     created_by = CharField(max_length=32, null=False, help_text="who created it", index=True)
diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py
index 514b3fd87..7b7ef53ec 100644
--- a/api/db/services/document_service.py
+++ b/api/db/services/document_service.py
@@ -923,7 +923,7 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
         ParserType.AUDIO.value: audio,
         ParserType.EMAIL.value: email
     }
-    parser_config = {"chunk_token_num": 4096, "delimiter": "\n!?;。；！？", "layout_recognize": "Plain Text"}
+    parser_config = {"chunk_token_num": 4096, "delimiter": "\n!?;。；！？", "layout_recognize": "Plain Text", "table_context_size": 0, "image_context_size": 0}
     exe = ThreadPoolExecutor(max_workers=12)
     threads = []
     doc_nm = {}
diff --git a/api/utils/api_utils.py b/api/utils/api_utils.py
index cbd2423f2..314211694 100644
--- a/api/utils/api_utils.py
+++ b/api/utils/api_utils.py
@@ -313,6 +313,10 @@ def get_parser_config(chunk_method, parser_config):
         chunk_method = "naive"
 
     # Define default configurations for each chunking method
+    base_defaults = {
+        "table_context_size": 0,
+        "image_context_size": 0,
+    }
     key_mapping = {
         "naive": {
             "layout_recognize": "DeepDOC",
@@ -365,16 +369,19 @@ def get_parser_config(chunk_method, parser_config):
 
     default_config = key_mapping[chunk_method]
 
-    # If no parser_config provided, return default
+    # If no parser_config provided, return default merged with base defaults
     if not parser_config:
-        return default_config
+        if default_config is None:
+            return deep_merge(base_defaults, {})
+        return deep_merge(base_defaults, default_config)
 
     # If parser_config is provided, merge with defaults to ensure required fields exist
     if default_config is None:
-        return parser_config
+        return deep_merge(base_defaults, parser_config)
 
     # Ensure raptor and graphrag fields have default values if not provided
-    merged_config = deep_merge(default_config, parser_config)
+    merged_config = deep_merge(base_defaults, default_config)
+    merged_config = deep_merge(merged_config, parser_config)
 
     return merged_config
 
diff --git a/rag/app/book.py b/rag/app/book.py
index 5bdaec72d..ca91be149 100644
--- a/rag/app/book.py
+++ b/rag/app/book.py
@@ -23,7 +23,7 @@ from rag.app import naive
 from rag.app.naive import by_plaintext, PARSERS
 from rag.nlp import bullets_category, is_english,remove_contents_table, \
     hierarchical_merge, make_colon_as_title, naive_merge, random_choices, tokenize_table, \
-    tokenize_chunks
+    tokenize_chunks, attach_media_context
 from rag.nlp import rag_tokenizer
 from deepdoc.parser import PdfParser, HtmlParser
 from deepdoc.parser.figure_parser import vision_figure_parser_docx_wrapper
@@ -175,6 +175,10 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
 
     res = tokenize_table(tbls, doc, eng)
     res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser))
+    table_ctx = max(0, int(parser_config.get("table_context_size", 0) or 0))
+    image_ctx = max(0, int(parser_config.get("image_context_size", 0) or 0))
+    if table_ctx or image_ctx:
+        attach_media_context(res, table_ctx, image_ctx)
 
     return res
 
diff --git a/rag/app/manual.py b/rag/app/manual.py
index b3a4ae38d..1eb86a043 100644
--- a/rag/app/manual.py
+++ b/rag/app/manual.py
@@ -20,7 +20,7 @@ import re
 
 from common.constants import ParserType
 from io import BytesIO
-from rag.nlp import rag_tokenizer, tokenize, tokenize_table, bullets_category, title_frequency, tokenize_chunks, docx_question_level
+from rag.nlp import rag_tokenizer, tokenize, tokenize_table, bullets_category, title_frequency, tokenize_chunks, docx_question_level, attach_media_context
 from common.token_utils import num_tokens_from_string
 from deepdoc.parser import PdfParser, DocxParser
 from deepdoc.parser.figure_parser import vision_figure_parser_pdf_wrapper,vision_figure_parser_docx_wrapper
@@ -155,7 +155,7 @@ class Docx(DocxParser):
             sum_question = '\n'.join(question_stack)
             if sum_question:
                 ti_list.append((f'{sum_question}\n{last_answer}', last_image))
-                
+
         tbls = []
         for tb in self.doc.tables:
             html= "<table>"
@@ -231,14 +231,14 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
             if isinstance(poss, str):
                 poss = pdf_parser.extract_positions(poss)
                 first = poss[0]          # tuple: ([pn], x1, x2, y1, y2)
-                pn = first[0]           
+                pn = first[0]
 
                 if isinstance(pn, list):
                     pn = pn[0]           # [pn] -> pn
                     poss[0] = (pn, *first[1:])
 
             return (txt, layoutno, poss)
-        
+
 
         sections = [_normalize_section(sec) for sec in sections]
 
@@ -247,7 +247,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
 
         if name in ["tcadp", "docling", "mineru"]:
             parser_config["chunk_token_num"] = 0
-        
+
         callback(0.8, "Finish parsing.")
 
         if len(sections) > 0 and len(pdf_parser.outlines) / len(sections) > 0.03:
@@ -310,6 +310,10 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
         tbls=vision_figure_parser_pdf_wrapper(tbls=tbls,callback=callback,**kwargs)
         res = tokenize_table(tbls, doc, eng)
         res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser))
+        table_ctx = max(0, int(parser_config.get("table_context_size", 0) or 0))
+        image_ctx = max(0, int(parser_config.get("image_context_size", 0) or 0))
+        if table_ctx or image_ctx:
+            attach_media_context(res, table_ctx, image_ctx)
         return res
 
     elif re.search(r"\.docx?$", filename, re.IGNORECASE):
@@ -325,10 +329,14 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
                 d["doc_type_kwd"] = "image"
             tokenize(d, text, eng)
             res.append(d)
+        table_ctx = max(0, int(parser_config.get("table_context_size", 0) or 0))
+        image_ctx = max(0, int(parser_config.get("image_context_size", 0) or 0))
+        if table_ctx or image_ctx:
+            attach_media_context(res, table_ctx, image_ctx)
         return res
     else:
         raise NotImplementedError("file type not supported yet(pdf and docx supported)")
-    
+
 
 if __name__ == "__main__":
     import sys
diff --git a/rag/app/naive.py b/rag/app/naive.py
index 0496c7507..7872ebc22 100644
--- a/rag/app/naive.py
+++ b/rag/app/naive.py
@@ -37,7 +37,7 @@ from deepdoc.parser.pdf_parser import PlainParser, VisionParser
 from deepdoc.parser.mineru_parser import MinerUParser
 from deepdoc.parser.docling_parser import DoclingParser
 from deepdoc.parser.tcadp_parser import TCADPParser
-from rag.nlp import concat_img, find_codec, naive_merge, naive_merge_with_images, naive_merge_docx, rag_tokenizer, tokenize_chunks, tokenize_chunks_with_images, tokenize_table
+from rag.nlp import concat_img, find_codec, naive_merge, naive_merge_with_images, naive_merge_docx, rag_tokenizer, tokenize_chunks, tokenize_chunks_with_images, tokenize_table, attach_media_context
 
 def by_deepdoc(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", callback=None, pdf_cls = None ,**kwargs):
     callback = callback
@@ -616,6 +616,8 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
     parser_config = kwargs.get(
         "parser_config", {
             "chunk_token_num": 512, "delimiter": "\n!?。；！？", "layout_recognize": "DeepDOC", "analyze_hyperlink": True})
+    table_context_size = max(0, int(parser_config.get("table_context_size", 0) or 0))
+    image_context_size = max(0, int(parser_config.get("image_context_size", 0) or 0))
     final_sections = False
     doc = {
         "docnm_kwd": filename,
@@ -686,6 +688,8 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
         logging.info("naive_merge({}): {}".format(filename, timer() - st))
         res.extend(embed_res)
         res.extend(url_res)
+        if table_context_size or image_context_size:
+            attach_media_context(res, table_context_size, image_context_size)
         return res
 
     elif re.search(r"\.pdf$", filename, re.IGNORECASE):
@@ -947,6 +951,8 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
         res.extend(embed_res)
     if url_res:
         res.extend(url_res)
+    if table_context_size or image_context_size:
+        attach_media_context(res, table_context_size, image_context_size)
     return res
 
 
diff --git a/rag/app/paper.py b/rag/app/paper.py
index 222be0762..d84d5645d 100644
--- a/rag/app/paper.py
+++ b/rag/app/paper.py
@@ -20,7 +20,7 @@ import re
 
 from deepdoc.parser.figure_parser import vision_figure_parser_pdf_wrapper
 from common.constants import ParserType
-from rag.nlp import rag_tokenizer, tokenize, tokenize_table, add_positions, bullets_category, title_frequency, tokenize_chunks
+from rag.nlp import rag_tokenizer, tokenize, tokenize_table, add_positions, bullets_category, title_frequency, tokenize_chunks, attach_media_context
 from deepdoc.parser import PdfParser
 import numpy as np
 from rag.app.naive import by_plaintext, PARSERS
@@ -150,7 +150,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
             "chunk_token_num": 512, "delimiter": "\n!?。；！？", "layout_recognize": "DeepDOC"})
     if re.search(r"\.pdf$", filename, re.IGNORECASE):
         layout_recognizer = parser_config.get("layout_recognize", "DeepDOC")
-        
+
         if isinstance(layout_recognizer, bool):
             layout_recognizer = "DeepDOC" if layout_recognizer else "Plain Text"
 
@@ -234,6 +234,10 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
         chunks.append(txt)
         last_sid = sec_id
     res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser))
+    table_ctx = max(0, int(parser_config.get("table_context_size", 0) or 0))
+    image_ctx = max(0, int(parser_config.get("image_context_size", 0) or 0))
+    if table_ctx or image_ctx:
+        attach_media_context(res, table_ctx, image_ctx)
     return res
 
 
diff --git a/rag/app/picture.py b/rag/app/picture.py
index f260104f7..8e7aa4bce 100644
--- a/rag/app/picture.py
+++ b/rag/app/picture.py
@@ -20,11 +20,11 @@ import re
 import numpy as np
 from PIL import Image
 
-from common.constants import LLMType
 from api.db.services.llm_service import LLMBundle
-from deepdoc.vision import OCR
-from rag.nlp import rag_tokenizer, tokenize
+from common.constants import LLMType
 from common.string_utils import clean_markdown_block
+from deepdoc.vision import OCR
+from rag.nlp import attach_media_context, rag_tokenizer, tokenize
 
 ocr = OCR()
 
@@ -39,9 +39,16 @@ def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs):
     }
     eng = lang.lower() == "english"
 
+    parser_config = kwargs.get("parser_config", {}) or {}
+    image_ctx = max(0, int(parser_config.get("image_context_size", 0) or 0))
+
     if any(filename.lower().endswith(ext) for ext in VIDEO_EXTS):
         try:
-            doc.update({"doc_type_kwd": "video"})
+            doc.update(
+                {
+                    "doc_type_kwd": "video",
+                }
+            )
             cv_mdl = LLMBundle(tenant_id, llm_type=LLMType.IMAGE2TEXT, lang=lang)
             ans = cv_mdl.chat(system="", history=[], gen_conf={}, video_bytes=binary, filename=filename)
             callback(0.8, "CV LLM respond: %s ..." % ans[:32])
@@ -64,7 +71,7 @@ def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs):
         if (eng and len(txt.split()) > 32) or len(txt) > 32:
             tokenize(doc, txt, eng)
             callback(0.8, "OCR results is too long to use CV LLM.")
-            return [doc]
+            return attach_media_context([doc], 0, image_ctx)
 
         try:
             callback(0.4, "Use CV LLM to describe the picture.")
@@ -76,7 +83,7 @@ def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs):
             callback(0.8, "CV LLM respond: %s ..." % ans[:32])
             txt += "\n" + ans
             tokenize(doc, txt, eng)
-            return [doc]
+            return attach_media_context([doc], 0, image_ctx)
         except Exception as e:
             callback(prog=-1, msg=str(e))
 
@@ -103,7 +110,7 @@ def vision_llm_chunk(binary, vision_model, prompt=None, callback=None):
                 img_binary.seek(0)
                 img_binary.truncate()
                 img.save(img_binary, format="PNG")
-                
+
             img_binary.seek(0)
             ans = clean_markdown_block(vision_model.describe_with_prompt(img_binary.read(), prompt))
             txt += "\n" + ans
diff --git a/rag/flow/parser/parser.py b/rag/flow/parser/parser.py
index 1a111cc3a..7747448ad 100644
--- a/rag/flow/parser/parser.py
+++ b/rag/flow/parser/parser.py
@@ -19,16 +19,16 @@ import random
 import re
 from functools import partial
 
-import trio
 import numpy as np
+import trio
 from PIL import Image
 
-from common.constants import LLMType
 from api.db.services.file2document_service import File2DocumentService
 from api.db.services.file_service import FileService
 from api.db.services.llm_service import LLMBundle
+from common import settings
+from common.constants import LLMType
 from common.misc_utils import get_uuid
-from rag.utils.base64_image import image2id
 from deepdoc.parser import ExcelParser
 from deepdoc.parser.mineru_parser import MinerUParser
 from deepdoc.parser.pdf_parser import PlainParser, RAGFlowPdfParser, VisionParser
@@ -37,7 +37,8 @@ from rag.app.naive import Docx
 from rag.flow.base import ProcessBase, ProcessParamBase
 from rag.flow.parser.schema import ParserFromUpstream
 from rag.llm.cv_model import Base as VLM
-from common import settings
+from rag.nlp import attach_media_context
+from rag.utils.base64_image import image2id
 
 
 class ParserParam(ProcessParamBase):
@@ -61,15 +62,18 @@ class ParserParam(ProcessParamBase):
                 "json",
             ],
             "image": [
-                "text"
+                "text",
+            ],
+            "email": [
+                "text",
+                "json",
             ],
-            "email": ["text", "json"],
             "text&markdown": [
                 "text",
-                "json"
+                "json",
             ],
             "audio": [
-                "json"
+                "json",
             ],
             "video": [],
         }
@@ -82,6 +86,8 @@ class ParserParam(ProcessParamBase):
                     "pdf",
                 ],
                 "output_format": "json",
+                "table_context_size": 0,
+                "image_context_size": 0,
             },
             "spreadsheet": {
                 "parse_method": "deepdoc",  # deepdoc/tcadp_parser
@@ -91,6 +97,8 @@ class ParserParam(ProcessParamBase):
                     "xlsx",
                     "csv",
                 ],
+                "table_context_size": 0,
+                "image_context_size": 0,
             },
             "word": {
                 "suffix": [
@@ -98,18 +106,24 @@ class ParserParam(ProcessParamBase):
                     "docx",
                 ],
                 "output_format": "json",
+                "table_context_size": 0,
+                "image_context_size": 0,
             },
             "text&markdown": {
                 "suffix": ["md", "markdown", "mdx", "txt"],
                 "output_format": "json",
+                "table_context_size": 0,
+                "image_context_size": 0,
             },
             "slides": {
                 "parse_method": "deepdoc",  # deepdoc/tcadp_parser
                 "suffix": [
                     "pptx",
-                    "ppt"
+                    "ppt",
                 ],
                 "output_format": "json",
+                "table_context_size": 0,
+                "image_context_size": 0,
             },
             "image": {
                 "parse_method": "ocr",
@@ -121,13 +135,14 @@ class ParserParam(ProcessParamBase):
             },
             "email": {
                 "suffix": [
-                  "eml", "msg"
+                    "eml",
+                    "msg",
                 ],
                 "fields": ["from", "to", "cc", "bcc", "date", "subject", "body", "attachments", "metadata"],
                 "output_format": "json",
             },
             "audio": {
-                "suffix":[
+                "suffix": [
                     "da",
                     "wave",
                     "wav",
@@ -142,15 +157,15 @@ class ParserParam(ProcessParamBase):
                     "realaudio",
                     "vqf",
                     "oggvorbis",
-                    "ape"
+                    "ape",
                 ],
                 "output_format": "text",
             },
             "video": {
-                "suffix":[
+                "suffix": [
                     "mp4",
                     "avi",
-                    "mkv"
+                    "mkv",
                 ],
                 "output_format": "text",
             },
@@ -253,7 +268,7 @@ class Parser(ProcessBase):
             markdown_image_response_type = conf.get("markdown_image_response_type", "1")
             tcadp_parser = TCADPParser(
                 table_result_type=table_result_type,
-                markdown_image_response_type=markdown_image_response_type
+                markdown_image_response_type=markdown_image_response_type,
             )
             sections, _ = tcadp_parser.parse_pdf(
                 filepath=name,
@@ -261,7 +276,7 @@ class Parser(ProcessBase):
                 callback=self.callback,
                 file_type="PDF",
                 file_start_page=1,
-                file_end_page=1000
+                file_end_page=1000,
             )
             bboxes = []
             for section, position_tag in sections:
@@ -269,17 +284,20 @@ class Parser(ProcessBase):
                     # Extract position information from TCADP's position tag
                     # Format: @@{page_number}\t{x0}\t{x1}\t{top}\t{bottom}##
                     import re
+
                     match = re.match(r"@@([0-9-]+)\t([0-9.]+)\t([0-9.]+)\t([0-9.]+)\t([0-9.]+)##", position_tag)
                     if match:
                         pn, x0, x1, top, bott = match.groups()
-                        bboxes.append({
-                            "page_number": int(pn.split('-')[0]),  # Take the first page number
-                            "x0": float(x0),
-                            "x1": float(x1),
-                            "top": float(top),
-                            "bottom": float(bott),
-                            "text": section
-                        })
+                        bboxes.append(
+                            {
+                                "page_number": int(pn.split("-")[0]),  # Take the first page number
+                                "x0": float(x0),
+                                "x1": float(x1),
+                                "top": float(top),
+                                "bottom": float(bott),
+                                "text": section,
+                            }
+                        )
                     else:
                         # If no position info, add as text without position
                         bboxes.append({"text": section})
@@ -291,7 +309,30 @@ class Parser(ProcessBase):
             bboxes = []
             for t, poss in lines:
                 for pn, x0, x1, top, bott in RAGFlowPdfParser.extract_positions(poss):
-                    bboxes.append({"page_number": int(pn[0]), "x0": float(x0), "x1": float(x1), "top": float(top), "bottom": float(bott), "text": t})
+                    bboxes.append(
+                        {
+                            "page_number": int(pn[0]),
+                            "x0": float(x0),
+                            "x1": float(x1),
+                            "top": float(top),
+                            "bottom": float(bott),
+                            "text": t,
+                        }
+                    )
+
+        for b in bboxes:
+            text_val = b.get("text", "")
+            has_text = isinstance(text_val, str) and text_val.strip()
+            layout = b.get("layout_type")
+            if layout == "figure" or (b.get("image") and not has_text):
+                b["doc_type_kwd"] = "image"
+            elif layout == "table":
+                b["doc_type_kwd"] = "table"
+
+        table_ctx = conf.get("table_context_size", 0) or 0
+        image_ctx = conf.get("image_context_size", 0) or 0
+        if table_ctx or image_ctx:
+            bboxes = attach_media_context(bboxes, table_ctx, image_ctx)
 
         if conf.get("output_format") == "json":
             self.set_output("json", bboxes)
@@ -319,7 +360,7 @@ class Parser(ProcessBase):
             markdown_image_response_type = conf.get("markdown_image_response_type", "1")
             tcadp_parser = TCADPParser(
                 table_result_type=table_result_type,
-                markdown_image_response_type=markdown_image_response_type
+                markdown_image_response_type=markdown_image_response_type,
             )
             if not tcadp_parser.check_installation():
                 raise RuntimeError("TCADP parser not available. Please check Tencent Cloud API configuration.")
@@ -337,7 +378,7 @@ class Parser(ProcessBase):
                 callback=self.callback,
                 file_type=file_type,
                 file_start_page=1,
-                file_end_page=1000
+                file_end_page=1000,
             )
 
             # Process TCADP parser output based on configured output_format
@@ -365,7 +406,12 @@ class Parser(ProcessBase):
                 # Add tables as text
                 for table in tables:
                     if table:
-                        result.append({"text": table})
+                        result.append({"text": table, "doc_type_kwd": "table"})
+
+                table_ctx = conf.get("table_context_size", 0) or 0
+                image_ctx = conf.get("image_context_size", 0) or 0
+                if table_ctx or image_ctx:
+                    result = attach_media_context(result, table_ctx, image_ctx)
 
                 self.set_output("json", result)
 
@@ -400,7 +446,13 @@ class Parser(ProcessBase):
         if conf.get("output_format") == "json":
             sections, tbls = docx_parser(name, binary=blob)
             sections = [{"text": section[0], "image": section[1]} for section in sections if section]
-            sections.extend([{"text": tb, "image": None} for ((_,tb), _) in tbls])
+            sections.extend([{"text": tb, "image": None, "doc_type_kwd": "table"} for ((_, tb), _) in tbls])
+
+            table_ctx = conf.get("table_context_size", 0) or 0
+            image_ctx = conf.get("image_context_size", 0) or 0
+            if table_ctx or image_ctx:
+                sections = attach_media_context(sections, table_ctx, image_ctx)
+
             self.set_output("json", sections)
         elif conf.get("output_format") == "markdown":
             markdown_text = docx_parser.to_markdown(name, binary=blob)
@@ -420,7 +472,7 @@ class Parser(ProcessBase):
             markdown_image_response_type = conf.get("markdown_image_response_type", "1")
             tcadp_parser = TCADPParser(
                 table_result_type=table_result_type,
-                markdown_image_response_type=markdown_image_response_type
+                markdown_image_response_type=markdown_image_response_type,
             )
             if not tcadp_parser.check_installation():
                 raise RuntimeError("TCADP parser not available. Please check Tencent Cloud API configuration.")
@@ -439,7 +491,7 @@ class Parser(ProcessBase):
                 callback=self.callback,
                 file_type=file_type,
                 file_start_page=1,
-                file_end_page=1000
+                file_end_page=1000,
             )
 
             # Process TCADP parser output - PPT only supports json format
@@ -454,7 +506,12 @@ class Parser(ProcessBase):
                 # Add tables as text
                 for table in tables:
                     if table:
-                        result.append({"text": table})
+                        result.append({"text": table, "doc_type_kwd": "table"})
+
+                table_ctx = conf.get("table_context_size", 0) or 0
+                image_ctx = conf.get("image_context_size", 0) or 0
+                if table_ctx or image_ctx:
+                    result = attach_media_context(result, table_ctx, image_ctx)
 
                 self.set_output("json", result)
         else:
@@ -469,6 +526,10 @@ class Parser(ProcessBase):
             # json
             assert conf.get("output_format") == "json", "have to be json for ppt"
             if conf.get("output_format") == "json":
+                table_ctx = conf.get("table_context_size", 0) or 0
+                image_ctx = conf.get("image_context_size", 0) or 0
+                if table_ctx or image_ctx:
+                    sections = attach_media_context(sections, table_ctx, image_ctx)
                 self.set_output("json", sections)
 
     def _markdown(self, name, blob):
@@ -508,11 +569,15 @@ class Parser(ProcessBase):
 
                 json_results.append(json_result)
 
+            table_ctx = conf.get("table_context_size", 0) or 0
+            image_ctx = conf.get("image_context_size", 0) or 0
+            if table_ctx or image_ctx:
+                json_results = attach_media_context(json_results, table_ctx, image_ctx)
+
             self.set_output("json", json_results)
         else:
             self.set_output("text", "\n".join([section_text for section_text, _ in sections]))
 
-
     def _image(self, name, blob):
         from deepdoc.vision import OCR
 
@@ -588,7 +653,7 @@ class Parser(ProcessBase):
             from email.parser import BytesParser
 
             msg = BytesParser(policy=policy.default).parse(io.BytesIO(blob))
-            email_content['metadata'] = {}
+            email_content["metadata"] = {}
             # handle header info
             for header, value in msg.items():
                 # get fields like from, to, cc, bcc, date, subject
@@ -600,6 +665,7 @@ class Parser(ProcessBase):
             # get body
             if "body" in target_fields:
                 body_text, body_html = [], []
+
                 def _add_content(m, content_type):
                     def _decode_payload(payload, charset, target_list):
                         try:
@@ -641,14 +707,17 @@ class Parser(ProcessBase):
                         if dispositions[0].lower() == "attachment":
                             filename = part.get_filename()
                             payload = part.get_payload(decode=True).decode(part.get_content_charset())
-                            attachments.append({
-                                "filename": filename,
-                                "payload": payload,
-                            })
+                            attachments.append(
+                                {
+                                    "filename": filename,
+                                    "payload": payload,
+                                }
+                            )
                 email_content["attachments"] = attachments
         else:
             # handle msg file
             import extract_msg
+
             print("handle a msg file.")
             msg = extract_msg.Message(blob)
             # handle header info
@@ -662,9 +731,9 @@ class Parser(ProcessBase):
             }
             email_content.update({k: v for k, v in basic_content.items() if k in target_fields})
             # get metadata
-            email_content['metadata'] = {
-                'message_id': msg.messageId,
-                'in_reply_to': msg.inReplyTo,
+            email_content["metadata"] = {
+                "message_id": msg.messageId,
+                "in_reply_to": msg.inReplyTo,
             }
             # get body
             if "body" in target_fields:
@@ -675,29 +744,31 @@ class Parser(ProcessBase):
             if "attachments" in target_fields:
                 attachments = []
                 for t in msg.attachments:
-                    attachments.append({
-                        "filename": t.name,
-                        "payload": t.data.decode("utf-8")
-                    })
+                    attachments.append(
+                        {
+                            "filename": t.name,
+                            "payload": t.data.decode("utf-8"),
+                        }
+                    )
                 email_content["attachments"] = attachments
 
         if conf["output_format"] == "json":
             self.set_output("json", [email_content])
         else:
-            content_txt = ''
+            content_txt = ""
             for k, v in email_content.items():
                 if isinstance(v, str):
                     # basic info
-                    content_txt += f'{k}:{v}' + "\n"
+                    content_txt += f"{k}:{v}" + "\n"
                 elif isinstance(v, dict):
                     # metadata
-                    content_txt += f'{k}:{json.dumps(v)}' + "\n"
+                    content_txt += f"{k}:{json.dumps(v)}" + "\n"
                 elif isinstance(v, list):
                     # attachments or others
                     for fb in v:
                         if isinstance(fb, dict):
                             # attachments
-                            content_txt += f'{fb["filename"]}:{fb["payload"]}' + "\n"
+                            content_txt += f"{fb['filename']}:{fb['payload']}" + "\n"
                         else:
                             # str, usually plain text
                             content_txt += fb
diff --git a/rag/nlp/__init__.py b/rag/nlp/__init__.py
index 0624309ee..6f36a927a 100644
--- a/rag/nlp/__init__.py
+++ b/rag/nlp/__init__.py
@@ -318,6 +318,7 @@ def tokenize_table(tbls, doc, eng, batch_size=10):
             d = copy.deepcopy(doc)
             tokenize(d, rows, eng)
             d["content_with_weight"] = rows
+            d["doc_type_kwd"] = "table"
             if img:
                 d["image"] = img
                 d["doc_type_kwd"] = "image"
@@ -330,6 +331,7 @@ def tokenize_table(tbls, doc, eng, batch_size=10):
             d = copy.deepcopy(doc)
             r = de.join(rows[i:i + batch_size])
             tokenize(d, r, eng)
+            d["doc_type_kwd"] = "table"
             if img:
                 d["image"] = img
                 d["doc_type_kwd"] = "image"
@@ -338,6 +340,194 @@ def tokenize_table(tbls, doc, eng, batch_size=10):
     return res
 
 
+def attach_media_context(chunks, table_context_size=0, image_context_size=0):
+    """
+    Attach surrounding text chunk content to media chunks (table/image).
+    Best-effort ordering: if positional info exists on any chunk, use it to
+    order chunks before collecting context; otherwise keep original order.
+    """
+    if not chunks or (table_context_size <= 0 and image_context_size <= 0):
+        return chunks
+
+    def is_image_chunk(ck):
+        if ck.get("doc_type_kwd") == "image":
+            return True
+
+        text_val = ck.get("content_with_weight") if isinstance(ck.get("content_with_weight"), str) else ck.get("text")
+        has_text = isinstance(text_val, str) and text_val.strip()
+        return bool(ck.get("image")) and not has_text
+
+    def is_table_chunk(ck):
+        return ck.get("doc_type_kwd") == "table"
+
+    def is_text_chunk(ck):
+        return not is_image_chunk(ck) and not is_table_chunk(ck)
+
+    def get_text(ck):
+        if isinstance(ck.get("content_with_weight"), str):
+            return ck["content_with_weight"]
+        if isinstance(ck.get("text"), str):
+            return ck["text"]
+        return ""
+
+    def split_sentences(text):
+        pattern = r"([.。！？!?；;：:\n])"
+        parts = re.split(pattern, text)
+        sentences = []
+        buf = ""
+        for p in parts:
+            if not p:
+                continue
+            if re.fullmatch(pattern, p):
+                buf += p
+                sentences.append(buf)
+                buf = ""
+            else:
+                buf += p
+        if buf:
+            sentences.append(buf)
+        return sentences
+
+    def trim_to_tokens(text, token_budget, from_tail=False):
+        if token_budget <= 0 or not text:
+            return ""
+        sentences = split_sentences(text)
+        if not sentences:
+            return ""
+
+        collected = []
+        remaining = token_budget
+        seq = reversed(sentences) if from_tail else sentences
+        for s in seq:
+            tks = num_tokens_from_string(s)
+            if tks <= 0:
+                continue
+            if tks > remaining:
+                collected.append(s)
+                break
+            collected.append(s)
+            remaining -= tks
+
+        if from_tail:
+            collected = list(reversed(collected))
+        return "".join(collected)
+
+    def extract_position(ck):
+        pn = None
+        top = None
+        left = None
+        try:
+            if ck.get("page_num_int"):
+                pn = ck["page_num_int"][0]
+            elif ck.get("page_number") is not None:
+                pn = ck.get("page_number")
+
+            if ck.get("top_int"):
+                top = ck["top_int"][0]
+            elif ck.get("top") is not None:
+                top = ck.get("top")
+
+            if ck.get("position_int"):
+                left = ck["position_int"][0][1]
+            elif ck.get("x0") is not None:
+                left = ck.get("x0")
+        except Exception:
+            pn = top = left = None
+        return pn, top, left
+
+    indexed = list(enumerate(chunks))
+    positioned_indices = []
+    unpositioned_indices = []
+    for idx, ck in indexed:
+        pn, top, left = extract_position(ck)
+        if pn is not None and top is not None:
+            positioned_indices.append((idx, pn, top, left if left is not None else 0))
+        else:
+            unpositioned_indices.append(idx)
+
+    if positioned_indices:
+        positioned_indices.sort(key=lambda x: (int(x[1]), int(x[2]), int(x[3]), x[0]))
+        ordered_indices = [i for i, _, _, _ in positioned_indices] + unpositioned_indices
+    else:
+        ordered_indices = [idx for idx, _ in indexed]
+
+    total = len(ordered_indices)
+    for sorted_pos, idx in enumerate(ordered_indices):
+        ck = chunks[idx]
+        token_budget = image_context_size if is_image_chunk(ck) else table_context_size if is_table_chunk(ck) else 0
+        if token_budget <= 0:
+            continue
+
+        prev_ctx = []
+        remaining_prev = token_budget
+        for prev_idx in range(sorted_pos - 1, -1, -1):
+            if remaining_prev <= 0:
+                break
+            neighbor_idx = ordered_indices[prev_idx]
+            if not is_text_chunk(chunks[neighbor_idx]):
+                break
+            txt = get_text(chunks[neighbor_idx])
+            if not txt:
+                continue
+            tks = num_tokens_from_string(txt)
+            if tks <= 0:
+                continue
+            if tks > remaining_prev:
+                txt = trim_to_tokens(txt, remaining_prev, from_tail=True)
+                tks = num_tokens_from_string(txt)
+            prev_ctx.append(txt)
+            remaining_prev -= tks
+        prev_ctx.reverse()
+
+        next_ctx = []
+        remaining_next = token_budget
+        for next_idx in range(sorted_pos + 1, total):
+            if remaining_next <= 0:
+                break
+            neighbor_idx = ordered_indices[next_idx]
+            if not is_text_chunk(chunks[neighbor_idx]):
+                break
+            txt = get_text(chunks[neighbor_idx])
+            if not txt:
+                continue
+            tks = num_tokens_from_string(txt)
+            if tks <= 0:
+                continue
+            if tks > remaining_next:
+                txt = trim_to_tokens(txt, remaining_next, from_tail=False)
+                tks = num_tokens_from_string(txt)
+            next_ctx.append(txt)
+            remaining_next -= tks
+
+        if not prev_ctx and not next_ctx:
+            continue
+
+        self_text = get_text(ck)
+        pieces = [*prev_ctx]
+        if self_text:
+            pieces.append(self_text)
+        pieces.extend(next_ctx)
+        combined = "\n".join(pieces)
+
+        original = ck.get("content_with_weight")
+        if "content_with_weight" in ck:
+            ck["content_with_weight"] = combined
+        elif "text" in ck:
+            original = ck.get("text")
+            ck["text"] = combined
+
+        if combined != original:
+            if "content_ltks" in ck:
+                ck["content_ltks"] = rag_tokenizer.tokenize(combined)
+            if "content_sm_ltks" in ck:
+                ck["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(ck.get("content_ltks", rag_tokenizer.tokenize(combined)))
+
+    if positioned_indices:
+        chunks[:] = [chunks[i] for i in ordered_indices]
+
+    return chunks
+
+
 def add_positions(d, poss):
     if not poss:
         return
diff --git a/test/testcases/configs.py b/test/testcases/configs.py
index 992e98d5b..a94a627b7 100644
--- a/test/testcases/configs.py
+++ b/test/testcases/configs.py
@@ -42,6 +42,8 @@ DEFAULT_PARSER_CONFIG = {
     "auto_keywords": 0,
     "auto_questions": 0,
     "html4excel": False,
+    "image_context_size": 0,
+    "table_context_size": 0,
     "topn_tags": 3,
     "raptor": {
         "use_raptor": True,
@@ -62,4 +64,4 @@ DEFAULT_PARSER_CONFIG = {
         ],
         "method": "light",
     },
-}
\ No newline at end of file
+}