"""
Tensor class with reverse-mode automatic differentiation.
This is the core data structure for TensorQuantLib. Every computation
flows through Tensor objects, which track the computational graph
and enable gradient computation via backpropagation.
"""
from __future__ import annotations
from collections.abc import Callable
from typing import Any, Union
import numpy as np
[docs]
class Tensor:
"""A multi-dimensional array with automatic differentiation support.
Stores data as a NumPy float64 array, optionally tracking gradients.
When requires_grad=True, all operations on this tensor build a
computational graph that enables reverse-mode autodiff via .backward().
Attributes:
data: The underlying NumPy array (float64).
grad: Gradient array, populated after .backward(). None until first backward pass.
requires_grad: Whether this tensor participates in gradient computation.
"""
def __init__(
self,
data: Union[np.ndarray, list[Any], float, int],
requires_grad: bool = False,
_children: tuple[Tensor, ...] = (),
_op: str = "",
):
if isinstance(data, Tensor):
data = data.data
self.data = np.asarray(data, dtype=np.float64)
self.requires_grad = requires_grad
self.grad: np.ndarray | None = None
self._backward: Callable[[], None] = lambda: None # closure for local backward
self._children = set(_children)
self._op = _op # label for debugging
# ------------------------------------------------------------------ #
# Properties
# ------------------------------------------------------------------ #
@property
def shape(self) -> tuple[int, ...]:
return self.data.shape
@property
def ndim(self) -> int:
return self.data.ndim
@property
def dtype(self) -> np.dtype[Any]:
return self.data.dtype
@property
def size(self) -> int:
return self.data.size
@property
def T(self) -> Tensor:
"""Transpose (creates a new node in the graph)."""
return tensor_transpose(self)
# ------------------------------------------------------------------ #
# Backward (reverse-mode autodiff)
# ------------------------------------------------------------------ #
[docs]
def backward(self) -> None:
"""Compute gradients via reverse-mode automatic differentiation.
Performs a topological sort of the computational graph, then
propagates gradients from this tensor back to all ancestors
with requires_grad=True.
This tensor's grad is seeded with ones (dL/dL = 1).
"""
# Build topological order via DFS
topo: list[Tensor] = []
visited: set[int] = set()
def _build_topo(v: Tensor) -> None:
vid = id(v)
if vid not in visited:
visited.add(vid)
for child in v._children:
_build_topo(child)
topo.append(v)
_build_topo(self)
# Seed gradient
self.grad = np.ones_like(self.data)
# Reverse pass
for node in reversed(topo):
node._backward()
[docs]
def zero_grad(self) -> None:
"""Reset gradient to None."""
self.grad = None
# ------------------------------------------------------------------ #
# Dunder methods — delegate to ops module functions
# ------------------------------------------------------------------ #
def __add__(self, other: object) -> Tensor:
other = _ensure_tensor(other)
return tensor_add(self, other)
def __radd__(self, other: object) -> Tensor:
other = _ensure_tensor(other)
return tensor_add(other, self)
def __sub__(self, other: object) -> Tensor:
other = _ensure_tensor(other)
return tensor_sub(self, other)
def __rsub__(self, other: object) -> Tensor:
other = _ensure_tensor(other)
return tensor_sub(other, self)
def __mul__(self, other: object) -> Tensor:
other = _ensure_tensor(other)
return tensor_mul(self, other)
def __rmul__(self, other: object) -> Tensor:
other = _ensure_tensor(other)
return tensor_mul(other, self)
def __truediv__(self, other: object) -> Tensor:
other = _ensure_tensor(other)
return tensor_div(self, other)
def __rtruediv__(self, other: object) -> Tensor:
other = _ensure_tensor(other)
return tensor_div(other, self)
def __neg__(self) -> Tensor:
return tensor_neg(self)
def __matmul__(self, other: object) -> Tensor:
other = _ensure_tensor(other)
return tensor_matmul(self, other)
def __rmatmul__(self, other: object) -> Tensor:
other = _ensure_tensor(other)
return tensor_matmul(other, self)
def __pow__(self, exponent: Union[int, float]) -> Tensor:
return tensor_pow(self, exponent)
# Forbid in-place ops to protect the computational graph
def __iadd__(self, other: object) -> Tensor:
raise NotImplementedError(
"In-place operations are not supported on Tensors with autograd. "
"Use out-of-place operations instead: z = x + y"
)
def __isub__(self, other: object) -> Tensor:
raise NotImplementedError("In-place sub not supported. Use z = x - y.")
def __imul__(self, other: object) -> Tensor:
raise NotImplementedError("In-place mul not supported. Use z = x * y.")
def __itruediv__(self, other: object) -> Tensor:
raise NotImplementedError("In-place div not supported. Use z = x / y.")
# ------------------------------------------------------------------ #
# Convenience methods that delegate to ops
# ------------------------------------------------------------------ #
[docs]
def sum(
self, axis: Union[int, tuple[int, ...]] | None = None, keepdims: bool = False
) -> Tensor:
return tensor_sum(self, axis=axis, keepdims=keepdims)
[docs]
def mean(
self, axis: Union[int, tuple[int, ...]] | None = None, keepdims: bool = False
) -> Tensor:
return tensor_mean(self, axis=axis, keepdims=keepdims)
[docs]
def reshape(self, *shape: Union[int, tuple[int, ...], list[int]]) -> Tensor:
if len(shape) == 1 and isinstance(shape[0], (tuple, list)):
final_shape = tuple(shape[0])
else:
final_shape = tuple(int(s) for s in shape) # type: ignore[arg-type]
return tensor_reshape(self, final_shape)
[docs]
def exp(self) -> Tensor:
return tensor_exp(self)
[docs]
def log(self) -> Tensor:
return tensor_log(self)
[docs]
def sqrt(self) -> Tensor:
return tensor_sqrt(self)
[docs]
def sin(self) -> Tensor:
return tensor_sin(self)
[docs]
def cos(self) -> Tensor:
return tensor_cos(self)
[docs]
def tanh(self) -> Tensor:
return tensor_tanh(self)
[docs]
def abs(self) -> Tensor:
return tensor_abs(self)
[docs]
def clip(self, a_min: float, a_max: float) -> Tensor:
return tensor_clip(self, a_min, a_max)
[docs]
def item(self) -> float:
"""Return scalar value (only works for size-1 tensors)."""
return float(self.data.item())
[docs]
def detach(self) -> Tensor:
"""Return a new Tensor with the same data but detached from the graph.
The returned tensor has ``requires_grad=False`` and no ``_children``,
so gradients will not flow through it. Use this to treat an
intermediate result as a constant:
y = x * x
y_const = y.detach() # gradients stop here
"""
return Tensor(self.data.copy(), requires_grad=False)
[docs]
def free_graph(self) -> None:
"""Release all references to the computational graph.
Clears ``_children`` and the ``_backward`` closure for this node
and all ancestors, breaking reference cycles and allowing GC to
reclaim memory. Call after ``backward()`` in long-running loops
to prevent unbounded memory growth.
"""
visited: set[int] = set()
def _free(v: Tensor) -> None:
vid = id(v)
if vid in visited:
return
visited.add(vid)
for child in v._children:
_free(child)
v._children = set()
v._backward = lambda: None
_free(self)
# ------------------------------------------------------------------ #
# Representation
# ------------------------------------------------------------------ #
def __repr__(self) -> str:
shape = self.data.shape
if self.data.size <= 4:
data_str = str(self.data.tolist())
else:
data_str = f"shape={shape}"
grad_str = ", requires_grad=True" if self.requires_grad else ""
op_str = f", op='{self._op}'" if self._op else ""
return f"Tensor({data_str}{grad_str}{op_str})"
def __len__(self) -> int:
return len(self.data)
def __getitem__(self, idx: Any) -> Tensor:
"""Index into the tensor, returning a Tensor that participates in autograd.
Supports any NumPy-compatible index (int, slice, tuple, boolean mask).
Gradients flow back to the indexed positions of the original tensor.
"""
return tensor_getitem(self, idx)
# ====================================================================== #
# Helper: ensure value is a Tensor
# ====================================================================== #
def _ensure_tensor(val: Union[np.ndarray, list[Any], float, int, Tensor, object]) -> Tensor:
"""Wrap scalars/arrays as Tensor if needed."""
if isinstance(val, Tensor):
return val
return Tensor(val, requires_grad=False) # type: ignore[arg-type]
# ====================================================================== #
# Unbroadcast helper (critical for correct gradients)
# ====================================================================== #
def _unbroadcast(grad: np.ndarray, target_shape: tuple[int, ...]) -> np.ndarray:
"""Sum out dimensions that were broadcast during forward pass.
When an operation broadcasts (e.g., shape (3,1) + (1,4) → (3,4)),
the backward pass must sum gradients along the broadcast dimensions
to produce the correct gradient shape for each input.
"""
# Sum out leading dimensions that were added
while grad.ndim > len(target_shape):
grad = grad.sum(axis=0)
# Sum along axes where target had size 1 but grad has size > 1
for i, dim in enumerate(target_shape):
if dim == 1 and grad.shape[i] > 1:
grad = grad.sum(axis=i, keepdims=True)
return grad
# ====================================================================== #
# Core differentiable operations
# ====================================================================== #
[docs]
def tensor_add(a: Tensor, b: Tensor) -> Tensor:
"""Element-wise addition: z = a + b."""
out = Tensor(a.data + b.data, _children=(a, b), _op="+")
out.requires_grad = a.requires_grad or b.requires_grad
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
a.grad += _unbroadcast(out.grad, a.shape)
if b.requires_grad:
if b.grad is None:
b.grad = np.zeros_like(b.data)
b.grad += _unbroadcast(out.grad, b.shape)
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_sub(a: Tensor, b: Tensor) -> Tensor:
"""Element-wise subtraction: z = a - b."""
out = Tensor(a.data - b.data, _children=(a, b), _op="-")
out.requires_grad = a.requires_grad or b.requires_grad
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
a.grad += _unbroadcast(out.grad, a.shape)
if b.requires_grad:
if b.grad is None:
b.grad = np.zeros_like(b.data)
b.grad += _unbroadcast(-out.grad, b.shape)
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_mul(a: Tensor, b: Tensor) -> Tensor:
"""Element-wise multiplication: z = a * b."""
out = Tensor(a.data * b.data, _children=(a, b), _op="*")
out.requires_grad = a.requires_grad or b.requires_grad
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
a.grad += _unbroadcast(out.grad * b.data, a.shape)
if b.requires_grad:
if b.grad is None:
b.grad = np.zeros_like(b.data)
b.grad += _unbroadcast(out.grad * a.data, b.shape)
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_div(a: Tensor, b: Tensor) -> Tensor:
"""Element-wise division: z = a / b."""
out = Tensor(a.data / b.data, _children=(a, b), _op="/")
out.requires_grad = a.requires_grad or b.requires_grad
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
a.grad += _unbroadcast(out.grad / b.data, a.shape)
if b.requires_grad:
if b.grad is None:
b.grad = np.zeros_like(b.data)
b.grad += _unbroadcast(-out.grad * a.data / (b.data**2), b.shape)
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_neg(a: Tensor) -> Tensor:
"""Element-wise negation: z = -a.
Args:
a: Input tensor.
Returns:
New tensor with negated values. Gradient: dz/da = -1.
Example:
>>> x = Tensor(np.array([3.0, -2.0]), requires_grad=True)
>>> y = tensor_neg(x)
>>> y.data
array([-3., 2.])
"""
out = Tensor(-a.data, _children=(a,), _op="neg")
out.requires_grad = a.requires_grad
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
a.grad += -out.grad
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_matmul(a: Tensor, b: Tensor) -> Tensor:
"""Matrix multiplication: Z = A @ B."""
out = Tensor(a.data @ b.data, _children=(a, b), _op="@")
out.requires_grad = a.requires_grad or b.requires_grad
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
# dL/dA = dL/dZ @ B^T
if b.data.ndim == 1:
# vector case: (m,n) @ (n,) -> (m,)
a.grad += np.outer(out.grad, b.data)
else:
a.grad += out.grad @ b.data.T
if b.requires_grad:
if b.grad is None:
b.grad = np.zeros_like(b.data)
# dL/dB = A^T @ dL/dZ
if a.data.ndim == 1:
b.grad += np.outer(a.data, out.grad)
else:
b.grad += a.data.T @ out.grad
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_pow(a: Tensor, exponent: Union[int, float]) -> Tensor:
"""Power: z = a^exponent (exponent is a constant, not a Tensor)."""
out = Tensor(a.data**exponent, _children=(a,), _op=f"**{exponent}")
out.requires_grad = a.requires_grad
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
a.grad += out.grad * exponent * (a.data ** (exponent - 1))
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_exp(a: Tensor) -> Tensor:
"""Element-wise exponential: z = exp(a).
Args:
a: Input tensor.
Returns:
New tensor with exp(a). Gradient: dz/da = exp(a).
Example:
>>> x = Tensor(np.array([0.0, 1.0]), requires_grad=True)
>>> y = tensor_exp(x)
>>> np.allclose(y.data, [1.0, np.e])
True
"""
out_data = np.exp(a.data)
out = Tensor(out_data, _children=(a,), _op="exp")
out.requires_grad = a.requires_grad
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
a.grad += out.grad * out_data
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_log(a: Tensor) -> Tensor:
"""Element-wise natural logarithm: z = log(a).
Input is clamped to a minimum of 1e-12 to avoid log(0).
Args:
a: Input tensor (values should be positive).
Returns:
New tensor with log(a). Gradient: dz/da = 1/a.
Example:
>>> x = Tensor(np.array([1.0, np.e]), requires_grad=True)
>>> y = tensor_log(x)
>>> np.allclose(y.data, [0.0, 1.0])
True
"""
safe_data = np.maximum(a.data, 1e-12)
out = Tensor(np.log(safe_data), _children=(a,), _op="log")
out.requires_grad = a.requires_grad
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
a.grad += out.grad / safe_data
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_sqrt(a: Tensor) -> Tensor:
"""Element-wise square root: z = sqrt(a).
Args:
a: Input tensor (values should be non-negative).
Returns:
New tensor with sqrt(a). Gradient: dz/da = 1 / (2 * sqrt(a)).
Example:
>>> x = Tensor(np.array([4.0, 9.0]), requires_grad=True)
>>> y = tensor_sqrt(x)
>>> np.allclose(y.data, [2.0, 3.0])
True
"""
out_data = np.sqrt(a.data)
out = Tensor(out_data, _children=(a,), _op="sqrt")
out.requires_grad = a.requires_grad
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
# dz/da = 1 / (2 * sqrt(a)), guard against division by zero
safe_out = np.where(out_data == 0, 1e-12, out_data)
a.grad += out.grad / (2.0 * safe_out)
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_sum(
a: Tensor, axis: Union[int, tuple[int, ...]] | None = None, keepdims: bool = False
) -> Tensor:
"""Sum: z = sum(a, axis)."""
out_data = a.data.sum(axis=axis, keepdims=keepdims)
out = Tensor(out_data, _children=(a,), _op="sum")
out.requires_grad = a.requires_grad
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
# Broadcast the gradient back to the original shape
g = out.grad
if axis is not None and not keepdims:
# Re-expand the reduced dimension(s) for broadcasting
if isinstance(axis, int):
g = np.expand_dims(g, axis=axis)
else:
for ax in sorted(axis):
g = np.expand_dims(g, axis=ax)
a.grad += np.broadcast_to(g, a.shape).copy()
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_mean(
a: Tensor, axis: Union[int, tuple[int, ...]] | None = None, keepdims: bool = False
) -> Tensor:
"""Mean: z = mean(a, axis)."""
out_data = a.data.mean(axis=axis, keepdims=keepdims)
out = Tensor(out_data, _children=(a,), _op="mean")
out.requires_grad = a.requires_grad
# Compute the number of elements being averaged
if axis is None:
count = a.data.size
elif isinstance(axis, int):
count = a.data.shape[axis]
else:
count = 1
for ax in axis:
count *= a.data.shape[ax]
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
g = out.grad
if axis is not None and not keepdims:
if isinstance(axis, int):
g = np.expand_dims(g, axis=axis)
else:
for ax in sorted(axis):
g = np.expand_dims(g, axis=ax)
a.grad += np.broadcast_to(g / count, a.shape).copy()
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_reshape(a: Tensor, shape: tuple[int, ...]) -> Tensor:
"""Reshape: z = a.reshape(shape)."""
out = Tensor(a.data.reshape(shape), _children=(a,), _op="reshape")
out.requires_grad = a.requires_grad
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
a.grad += out.grad.reshape(a.shape)
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_transpose(a: Tensor) -> Tensor:
"""Transpose: z = a.T."""
out = Tensor(a.data.T, _children=(a,), _op="T")
out.requires_grad = a.requires_grad
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
a.grad += out.grad.T
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_maximum(a: Tensor, val: float = 0.0) -> Tensor:
"""Element-wise maximum: z = max(a, val). Used for ReLU and payoff clipping."""
out = Tensor(np.maximum(a.data, val), _children=(a,), _op=f"max({val})")
out.requires_grad = a.requires_grad
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
# Subgradient: 1 where a > val, 0 where a <= val
a.grad += out.grad * (a.data > val).astype(np.float64)
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_norm_cdf(a: Tensor) -> Tensor:
"""Standard normal CDF: z = Phi(a)."""
from scipy.stats import norm
out = Tensor(norm.cdf(a.data), _children=(a,), _op="Φ")
out.requires_grad = a.requires_grad
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
# dΦ/da = φ(a) = normal PDF
a.grad += out.grad * norm.pdf(a.data)
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_sin(a: Tensor) -> Tensor:
"""Element-wise sine: z = sin(a).
Args:
a: Input tensor (in radians).
Returns:
New tensor with sin(a). Gradient: dz/da = cos(a).
Example:
>>> x = Tensor(np.array([0.0, np.pi / 2]), requires_grad=True)
>>> y = tensor_sin(x)
>>> np.allclose(y.data, [0.0, 1.0], atol=1e-10)
True
"""
out = Tensor(np.sin(a.data), _children=(a,), _op="sin")
out.requires_grad = a.requires_grad
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
a.grad += out.grad * np.cos(a.data)
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_cos(a: Tensor) -> Tensor:
"""Element-wise cosine: z = cos(a).
Args:
a: Input tensor (in radians).
Returns:
New tensor with cos(a). Gradient: dz/da = -sin(a).
Example:
>>> x = Tensor(np.array([0.0, np.pi]), requires_grad=True)
>>> y = tensor_cos(x)
>>> np.allclose(y.data, [1.0, -1.0], atol=1e-10)
True
"""
out = Tensor(np.cos(a.data), _children=(a,), _op="cos")
out.requires_grad = a.requires_grad
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
a.grad += out.grad * (-np.sin(a.data))
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_tanh(a: Tensor) -> Tensor:
"""Element-wise hyperbolic tangent: z = tanh(a).
Args:
a: Input tensor.
Returns:
New tensor with tanh(a). Gradient: dz/da = 1 - tanh(a)^2.
Example:
>>> x = Tensor(np.array([0.0, 1.0]), requires_grad=True)
>>> y = tensor_tanh(x)
>>> abs(y.data[0]) < 1e-10 # tanh(0) = 0
True
"""
out_data = np.tanh(a.data)
out = Tensor(out_data, _children=(a,), _op="tanh")
out.requires_grad = a.requires_grad
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
# d tanh/da = 1 - tanh²(a)
a.grad += out.grad * (1.0 - out_data**2)
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_abs(a: Tensor) -> Tensor:
"""Element-wise absolute value: z = |a|.
Uses sign(a) as the subgradient, with subgradient 0 at a=0.
Args:
a: Input tensor.
Returns:
New tensor with |a|. Gradient: dz/da = sign(a).
Example:
>>> x = Tensor(np.array([-3.0, 0.0, 5.0]), requires_grad=True)
>>> y = tensor_abs(x)
>>> y.data
array([3., 0., 5.])
"""
out = Tensor(np.abs(a.data), _children=(a,), _op="abs")
out.requires_grad = a.requires_grad
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
a.grad += out.grad * np.sign(a.data)
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_clip(a: Tensor, a_min: float, a_max: float) -> Tensor:
"""Element-wise clip: z = clip(a, a_min, a_max).
Gradient is 1 where a is within [a_min, a_max], 0 otherwise.
"""
out = Tensor(np.clip(a.data, a_min, a_max), _children=(a,), _op="clip")
out.requires_grad = a.requires_grad
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
mask = ((a.data >= a_min) & (a.data <= a_max)).astype(np.float64)
a.grad += out.grad * mask
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_where(condition: np.ndarray, a: Tensor, b: Tensor) -> Tensor:
"""Element-wise selection: z = a where condition else b.
Args:
condition: Boolean NumPy array.
a: Values where condition is True.
b: Values where condition is False.
"""
out = Tensor(np.where(condition, a.data, b.data), _children=(a, b), _op="where")
out.requires_grad = a.requires_grad or b.requires_grad
def _backward() -> None:
assert out.grad is not None
mask = condition.astype(np.float64)
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
a.grad += _unbroadcast(out.grad * mask, a.shape)
if b.requires_grad:
if b.grad is None:
b.grad = np.zeros_like(b.data)
b.grad += _unbroadcast(out.grad * (1.0 - mask), b.shape)
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_softmax(a: Tensor, axis: int = -1) -> Tensor:
"""Softmax along an axis: z_i = exp(a_i) / sum_j(exp(a_j)).
Numerically stable via max subtraction.
"""
shifted = a.data - a.data.max(axis=axis, keepdims=True)
exp_a = np.exp(shifted)
out_data = exp_a / exp_a.sum(axis=axis, keepdims=True)
out = Tensor(out_data, _children=(a,), _op="softmax")
out.requires_grad = a.requires_grad
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
# Jacobian-vector product for softmax
s = out_data
dot = (out.grad * s).sum(axis=axis, keepdims=True)
a.grad += s * (out.grad - dot)
if out.requires_grad:
out._backward = _backward
return out
[docs]
def tensor_getitem(a: Tensor, idx: Any) -> Tensor:
"""Index into a Tensor, preserving gradient flow.
Supports any NumPy-compatible index (int, slice, array, bool mask).
Gradient is scattered back to the original positions.
Example::
x = Tensor([1.0, 2.0, 3.0], requires_grad=True)
y = x[1] ** 2 # y = 4.0; x.grad[1] = 4.0 after backward
"""
out_data = a.data[idx]
# Ensure out_data is an ndarray even when index yields a scalar
out_data = np.asarray(out_data, dtype=np.float64)
out = Tensor(out_data, _children=(a,), _op="[]")
out.requires_grad = a.requires_grad
def _backward() -> None:
assert out.grad is not None
if a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
np.add.at(a.grad, idx, out.grad)
if out.requires_grad:
out._backward = _backward
return out