Source code for job_shop_lib.reinforcement_learning._utils

"""Utility functions for reinforcement learning."""

from typing import TypeVar, Any

import numpy as np
from numpy.typing import NDArray

from job_shop_lib import ScheduledOperation
from job_shop_lib.exceptions import ValidationError
from job_shop_lib.dispatching import OptimalOperationsObserver, Dispatcher

T = TypeVar("T", bound=np.number)



[docs]
def add_padding(
    array: NDArray[Any],
    output_shape: tuple[int, ...],
    padding_value: float = -1,
    dtype: type[T] | None = None,
) -> NDArray[T]:
    """Adds padding to the array.

    Pads the input array to the specified output shape with a given padding
    value. If the ``dtype`` is not specified, the ``dtype`` of the input array
    is used.

    Args:
        array:
            The input array to be padded.
        output_shape:
            The desired shape of the output array.
        padding_value:
            The value to use for padding. Defaults to -1.
        dtype:
            The data type for the output array. Defaults to ``None``, in which
            case the dtype of the input array is used.

    Returns:
        The padded array with the specified output shape.

    Raises:
        ValidationError:
            If the output shape is smaller than the input shape.

    Examples:

    .. doctest::

        >>> array = np.array([[1, 2], [3, 4]])
        >>> add_padding(array, (3, 3))
        array([[ 1,  2, -1],
               [ 3,  4, -1],
               [-1, -1, -1]])

        >>> add_padding(array, (3, 3), padding_value=0)
        array([[1, 2, 0],
               [3, 4, 0],
               [0, 0, 0]])

        >>> bool_array = np.array([[True, False], [False, True]])
        >>> add_padding(bool_array, (3, 3), padding_value=False, dtype=int)
        array([[1, 0, 0],
               [0, 1, 0],
               [0, 0, 0]])

        >>> add_padding(bool_array, (3, 3), dtype=int)
        array([[ 1,  0, -1],
               [ 0,  1, -1],
               [-1, -1, -1]])
    """

    if np.any(np.less(output_shape, array.shape)):
        raise ValidationError(
            "Output shape must be greater than the input shape. "
            f"Got output shape: {output_shape}, input shape: {array.shape}."
        )

    if dtype is None:
        dtype = array.dtype.type

    padded_array = np.full(
        output_shape,
        fill_value=padding_value,
        dtype=dtype,
    )

    if array.size == 0:
        return padded_array

    slices = tuple(slice(0, dim) for dim in array.shape)
    padded_array[slices] = array
    return padded_array




[docs]
def create_edge_type_dict(
    edge_index: NDArray[T],
    type_ranges: dict[str, tuple[int, int]],
    relationship: str = "to",
) -> dict[tuple[str, str, str], NDArray[T]]:
    """Organizes edges based on node types.

    Args:
        edge_index:
            numpy array of shape (2, E) where E is number of edges
        type_ranges: dict[str, tuple[int, int]]
            Dictionary mapping type names to their corresponding index ranges
            [start, end) in the ``edge_index`` array.
        relationship:
            A string representing the relationship type between nodes.

    Returns:
        A dictionary with keys (type_i, relationship, type_j) and values as
        edge indices
    """
    edge_index_dict: dict[tuple[str, str, str], NDArray] = {}
    for type_name_i, (start_i, end_i) in type_ranges.items():
        for type_name_j, (start_j, end_j) in type_ranges.items():
            key: tuple[str, str, str] = (
                type_name_i,
                relationship,
                type_name_j,
            )
            # Find edges where source is in type_i and target is in type_j
            mask = (
                (edge_index[0] >= start_i)
                & (edge_index[0] < end_i)
                & (edge_index[1] >= start_j)
                & (edge_index[1] < end_j)
            )
            edge_index_dict[key] = edge_index[:, mask]

    return edge_index_dict




[docs]
def map_values(array: NDArray[T], mapping: dict[int, int]) -> NDArray[T]:
    """Maps values in an array using a mapping.

    Args:
        array:
            An NumPy array.

    Returns:
        A NumPy array where each element has been replaced by its
        corresponding value from the mapping.

    Raises:
        ValidationError:
            If the array contains values that are not in the mapping.

    Examples:
        >>> map_values(np.array([1, 2, 3]), {1: 10, 2: 20, 3: 30})
        array([10, 20, 30])

        >>> map_values(np.array([1, 2]), {1: 10, 2: 10, 3: 30})
        array([10, 10])

    """
    if array.size == 0:
        return array
    try:
        vectorized_mapping = np.vectorize(mapping.get)
        return vectorized_mapping(array)
    except TypeError as e:
        raise ValidationError(
            "The array contains values that are not in the mapping."
        ) from e




[docs]
def get_optimal_actions(
    optimal_ops_observer: OptimalOperationsObserver,
    available_operations_with_ids: list[tuple[int, int, int]],
) -> dict[tuple[int, int, int], int]:
    """Indicates if each action is optimal according to a
    :class:`~job_shop_lib.dispatching.OptimalOperationsObserver` instance.

    Args:
        optimal_ops_observer: The observer that provides optimal operations.
        available_operations_with_ids: List of available operations with their
        IDs (operation_id, machine_id, job_id).

    Returns:
        A dictionary mapping each tuple
        (operation_id, machine_id, job_id) in the available actions to a binary
        indicator (1 if optimal, 0 otherwise).
    """
    optimal_actions = {}
    optimal_ops = optimal_ops_observer.optimal_available
    optimal_ops_ids = [
        (op.operation_id, op.machine_id, op.job_id) for op in optimal_ops
    ]
    for operation_id, machine_id, job_id in available_operations_with_ids:
        is_optimal = (operation_id, machine_id, job_id) in optimal_ops_ids
        optimal_actions[(operation_id, machine_id, job_id)] = int(is_optimal)
    return optimal_actions




[docs]
def get_deadline_violation_penalty(
    scheduled_operation: ScheduledOperation,
    unused_dispatcher: Dispatcher,
    deadline_penalty_factor: float = 10_000,
) -> float:
    """Compute the penalty for a scheduled operation that violates its
    deadline.

    Args:
        scheduled_operation:
            The scheduled operation to evaluate.
        unused_dispatcher:
            This argument is unused but included for compatibility with the
            penalty function signature.
        deadline_penalty_factor:
            Cost added for each operation that
            finishes after its deadline. Defaults to 10_000.
    Returns:
        The penalty for the scheduled operation if it violates its deadline,
        otherwise 0.

    .. versionadded:: 1.7.0
    """
    if (
        scheduled_operation.operation.deadline is not None
        and scheduled_operation.end_time
        > scheduled_operation.operation.deadline
    ):
        return deadline_penalty_factor
    return 0.0




[docs]
def get_due_date_violation_penalty(
    scheduled_operation: ScheduledOperation,
    unused_dispatcher: Dispatcher,
    due_date_penalty_factor: float = 100,
) -> float:
    """Compute the penalty for a scheduled operation that violates its
    due date.

    Args:
        scheduled_operation:
            The scheduled operation to evaluate.
        unused_dispatcher:
            This argument is unused but included for compatibility with the
            penalty function signature.
        due_date_penalty_factor:
            Cost added for each operation that
            finishes after its due date. Defaults to 100.
    Returns:
        The penalty for the scheduled operation if it violates its due date,
        otherwise 0.

    .. versionadded:: 1.7.0
    """
    if (
        scheduled_operation.operation.due_date is not None
        and scheduled_operation.end_time
        > scheduled_operation.operation.due_date
    ):
        return due_date_penalty_factor
    return 0.0