feat(core): Harden native Python task runner (no-changelog) (#18826)

This commit is contained in:
Iván Ovejero
2025-08-29 14:19:38 +02:00
committed by GitHub
parent e29ed1532a
commit 3b574306f3
8 changed files with 435 additions and 47 deletions

View File

@@ -3,6 +3,8 @@ import multiprocessing
import traceback
import textwrap
import json
import os
import sys
from src.errors import (
TaskResultMissingError,
@@ -11,9 +13,9 @@ from src.errors import (
TaskProcessExitError,
)
from .message_types.broker import NodeMode, Items
from .constants import EXECUTOR_CIRCULAR_REFERENCE_KEY, EXECUTOR_USER_OUTPUT_KEY
from typing import Any
from src.message_types.broker import NodeMode, Items
from src.constants import EXECUTOR_CIRCULAR_REFERENCE_KEY, EXECUTOR_USER_OUTPUT_KEY
from typing import Any, Set
from multiprocessing.context import SpawnProcess
@@ -26,7 +28,14 @@ class TaskExecutor:
"""Responsible for executing Python code tasks in isolated subprocesses."""
@staticmethod
def create_process(code: str, node_mode: NodeMode, items: Items):
def create_process(
code: str,
node_mode: NodeMode,
items: Items,
stdlib_allow: Set[str],
external_allow: Set[str],
builtins_deny: set[str],
):
"""Create a subprocess for executing a Python code task and a queue for communication."""
fn = (
@@ -36,7 +45,10 @@ class TaskExecutor:
)
queue = MULTIPROCESSING_CONTEXT.Queue()
process = MULTIPROCESSING_CONTEXT.Process(target=fn, args=(code, items, queue))
process = MULTIPROCESSING_CONTEXT.Process(
target=fn,
args=(code, items, queue, stdlib_allow, external_allow, builtins_deny),
)
return process, queue
@@ -95,16 +107,27 @@ class TaskExecutor:
process.kill()
@staticmethod
def _all_items(raw_code: str, items: Items, queue: multiprocessing.Queue):
def _all_items(
raw_code: str,
items: Items,
queue: multiprocessing.Queue,
stdlib_allow: Set[str],
external_allow: Set[str],
builtins_deny: set[str],
):
"""Execute a Python code task in all-items mode."""
os.environ.clear()
TaskExecutor._sanitize_sys_modules(stdlib_allow, external_allow)
print_args: PrintArgs = []
try:
code = TaskExecutor._wrap_code(raw_code)
globals = {
"__builtins__": __builtins__,
"__builtins__": TaskExecutor._filter_builtins(builtins_deny),
"_items": items,
"print": TaskExecutor._create_custom_print(print_args),
}
@@ -119,9 +142,20 @@ class TaskExecutor:
TaskExecutor._put_error(queue, e, print_args)
@staticmethod
def _per_item(raw_code: str, items: Items, queue: multiprocessing.Queue):
def _per_item(
raw_code: str,
items: Items,
queue: multiprocessing.Queue,
stdlib_allow: Set[str],
external_allow: Set[str],
builtins_deny: set[str],
):
"""Execute a Python code task in per-item mode."""
os.environ.clear()
TaskExecutor._sanitize_sys_modules(stdlib_allow, external_allow)
print_args: PrintArgs = []
try:
@@ -131,7 +165,7 @@ class TaskExecutor:
result = []
for index, item in enumerate(items):
globals = {
"__builtins__": __builtins__,
"__builtins__": TaskExecutor._filter_builtins(builtins_deny),
"_item": item,
"print": TaskExecutor._create_custom_print(print_args),
}
@@ -195,7 +229,7 @@ class TaskExecutor:
@staticmethod
def _format_print_args(*args) -> list[str]:
"""
Takes the arguments passed to a `print()` call in user code and converts them
Takes the args passed to a `print()` call in user code and converts them
to string representations suitable for display in a browser console.
Expects all args to be serializable.
@@ -217,3 +251,45 @@ class TaskExecutor:
formatted.append(json.dumps(arg, default=str, ensure_ascii=False))
return formatted
# ========== security ==========
@staticmethod
def _filter_builtins(builtins_deny: set[str]):
"""Get __builtins__ with denied ones removed."""
if len(builtins_deny) == 0:
return __builtins__
return {k: v for k, v in __builtins__.items() if k not in builtins_deny}
@staticmethod
def _sanitize_sys_modules(stdlib_allow: Set[str], external_allow: Set[str]):
safe_modules = {
"builtins",
"__main__",
"sys",
"traceback",
"linecache",
}
if "*" in stdlib_allow:
safe_modules.update(sys.stdlib_module_names)
else:
safe_modules.update(stdlib_allow)
if "*" in external_allow:
safe_modules.update(
name
for name in sys.modules.keys()
if name not in sys.stdlib_module_names
)
else:
safe_modules.update(external_allow)
modules_to_remove = [
name for name in sys.modules.keys() if name not in safe_modules
]
for module_name in modules_to_remove:
del sys.modules[module_name]