mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Fix potential SSRF attack vulnerability (#4334)
### What problem does this PR solve? Fix potential SSRF attack vulnerability ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn>
This commit is contained in:
@ -1,4 +1,7 @@
|
||||
import re
|
||||
import socket
|
||||
from urllib.parse import urlparse
|
||||
import ipaddress
|
||||
import json
|
||||
import base64
|
||||
|
||||
@ -76,5 +79,25 @@ def __get_pdf_from_html(
|
||||
return base64.b64decode(result["data"])
|
||||
|
||||
|
||||
def is_private_ip(ip: str) -> bool:
|
||||
try:
|
||||
ip_obj = ipaddress.ip_address(ip)
|
||||
return ip_obj.is_private
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
def is_valid_url(url: str) -> bool:
|
||||
return bool(re.match(r"(https?)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]", url))
|
||||
if not re.match(r"(https?)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]", url):
|
||||
return False
|
||||
parsed_url = urlparse(url)
|
||||
hostname = parsed_url.hostname
|
||||
|
||||
if not hostname:
|
||||
return False
|
||||
try:
|
||||
ip = socket.gethostbyname(hostname)
|
||||
if is_private_ip(ip):
|
||||
return False
|
||||
except socket.gaierror:
|
||||
return False
|
||||
return True
|
||||
Reference in New Issue
Block a user