diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 18f3b4f2..0c7e321e 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,4 +1,4 @@ -### All Submissions +### All submissions - [ ] Have you followed the guidelines in our [Contributing](../../CONTRIBUTING.md) document? - [ ] Have you checked to ensure there are no other open [Pull Requests](../../../pulls) for the same changes? diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4f6af7ea..a61bcb44 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -25,7 +25,6 @@ jobs: make build - name: Check docstrings - continue-on-error: true # TODO: remove when all docstring issues are fixed run: | make docstring-check diff --git a/Makefile b/Makefile index 7605111e..07d75216 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ setup-tensorboard: @echo '=== Setup TensorBoard ===' $(UV) pip install -e ".[tensorboard]" -check: lint format typecheck +check: typecheck format lint docstring-check format: @echo '=== Formatting ===' @@ -73,8 +73,6 @@ si-test: @echo '=== Running single integration test for $(T) ===' $(UV) run $(PYTEST) -n auto -s $(PROJECT_NAME)/integration_tests/$(T) -m "integration" - - # If the first argument is run... ifeq ($(firstword $(MAKECMDGOALS)),run) # use the rest as arguments for run... diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md index 4cbdc772..7502350b 100644 --- a/docs/getting-started/installation.md +++ b/docs/getting-started/installation.md @@ -58,7 +58,7 @@ HyperBench declares compatibility ranges for direct dependencies in `pyproject.t | Dependency | Supported range | Notes | | --- | --- | --- | -| mkdocstrings[python] | `>=1.0.4,<2.0.0` | | +| mkdocstrings\[python\] | `>=1.0.4,<2.0.0` | | | pre-commit | `>=4.5.1,<5.0.0` | | | pytest | `>=9.0.3,<10.0.0` | | | pytest-cov | `>=7.1.0,<8.0.0` | | diff --git a/examples/early_stopping.py b/examples/early_stopping.py index 03e9c149..b0070fb5 100644 --- a/examples/early_stopping.py +++ b/examples/early_stopping.py @@ -81,7 +81,8 @@ train_dataset.enrich_node_features( enricher=LaplacianPositionalEncodingEnricher( num_features=num_features, - # In transductive setting, use total number of nodes to ensure consistent encoding across splits + # In transductive setting, use total number of nodes to ensure consistent encoding + # across splits # as the train dataset contain all nodes but may have no hyperedges where they appear num_nodes=train_dataset.hdata.num_nodes, ), diff --git a/examples/external_dataset.py b/examples/external_dataset.py index b5b0b305..13d6e4d0 100644 --- a/examples/external_dataset.py +++ b/examples/external_dataset.py @@ -81,7 +81,8 @@ train_dataset.enrich_node_features( enricher=LaplacianPositionalEncodingEnricher( num_features=num_features, - # In transductive setting, use total number of nodes to ensure consistent encoding across splits + # In transductive setting, use total number of nodes to ensure consistent encoding + # across splits # as the train dataset contain all nodes but may have no hyperedges where they appear num_nodes=train_dataset.hdata.num_nodes, ), diff --git a/hyperbench/__init__.py b/hyperbench/__init__.py index aaa13ee1..b13aa47e 100644 --- a/hyperbench/__init__.py +++ b/hyperbench/__init__.py @@ -10,5 +10,8 @@ warnings.filterwarnings( "ignore", - message="ignore:Failing to pass a value to the 'type_params' parameter of 'typing._eval_type' is deprecated.*", + message=( + "ignore:Failing to pass a value to the 'type_params' parameter of " + "'typing._eval_type' is deprecated.*" + ), ) diff --git a/hyperbench/data/dataset.py b/hyperbench/data/dataset.py index 6dbbe5a0..230f1484 100644 --- a/hyperbench/data/dataset.py +++ b/hyperbench/data/dataset.py @@ -30,7 +30,8 @@ class Dataset(TorchDataset): Args: hdata: The processed hypergraph data in HData format. - sampling_strategy: The strategy used for sampling sub-hypergraphs (e.g., by node IDs or hyperedge IDs). + sampling_strategy: The strategy used for sampling sub-hypergraphs + (e.g., by node IDs or hyperedge IDs). If not provided, defaults to ``SamplingStrategy.HYPEREDGE``. """ @@ -44,10 +45,11 @@ def __init__( Args: hdata: Optional HData object to initialize the dataset with. - If provided, the dataset will be initialized with this data instead of loading and processing from HIF. Must be provided if prepare is set to ``False``. - sampling_strategy: The sampling strategy to use for the dataset. If not provided, defaults to ``SamplingStrategy.HYPEREDGE``. + If provided, the dataset will be initialized with this data instead of loading and + processing from HIF. Must be provided if prepare is set to ``False``. + sampling_strategy: The sampling strategy to use for the dataset. If not provided, + defaults to ``SamplingStrategy.HYPEREDGE``. """ - self.__sampler = create_sampler_from_strategy(sampling_strategy) self.sampling_strategy = sampling_strategy self.hdata = hdata if hdata is not None else HData.empty() @@ -58,18 +60,23 @@ def __len__(self) -> int: def __getitem__(self, index: int | list[int]) -> HData: """ Sample a sub-hypergraph based on the sampling strategy and return it as HData. + If: - - Sampling by node IDs, the sub-hypergraph will contain all hyperedges incident to the sampled nodes and all nodes incident to those hyperedges. - - Sampling by hyperedge IDs, the sub-hypergraph will contain all nodes incident to the sampled hyperedges. + - Sampling by node IDs, the sub-hypergraph will contain all hyperedges incident to the + sampled nodes and all nodes incident to those hyperedges. + - Sampling by hyperedge IDs, the sub-hypergraph will contain all nodes incident to the + sampled hyperedges. Args: - index: An integer or a list of integers representing node or hyperedge IDs to sample, depending on the sampling strategy. + index: An integer or a list of integers representing node or hyperedge IDs to sample, + depending on the sampling strategy. Returns: hdata: An HData instance containing the sampled sub-hypergraph. Raises: - ValueError: If the provided index is invalid (e.g., empty list or list length exceeds number of nodes/hyperedges). + ValueError: If the provided index is invalid (e.g., empty list or list length exceeds + number of nodes/hyperedges). IndexError: If any node/hyperedge ID is out of bounds. """ return self.__sampler.sample(index, self.hdata) @@ -85,7 +92,8 @@ def from_hdata( Args: hdata: `HData` object containing the hypergraph data. - sampling_strategy: The sampling strategy to use for the dataset. If not provided, defaults to ``SamplingStrategy.HYPEREDGE``. + sampling_strategy: The sampling strategy to use for the dataset. If not provided, + defaults to ``SamplingStrategy.HYPEREDGE``. Returns: dataset: The `Dataset` instance with the provided `HData`. @@ -100,11 +108,13 @@ def from_url( save_on_disk: bool = False, ) -> Dataset: """ - Create a `Dataset` instance by loading a hypergraph from a URL pointing to a .json or .json.zst file in HIF format. + Create a `Dataset` instance by loading a hypergraph from a URL pointing to a .json or + .json.zst file in HIF format. Args: url: The URL to the .json or .json.zst file containing the HIF hypergraph data. - sampling_strategy: The sampling strategy to use for the dataset. If not provided, defaults to ``SamplingStrategy.HYPEREDGE``. + sampling_strategy: The sampling strategy to use for the dataset. If not provided, + defaults to ``SamplingStrategy.HYPEREDGE``. save_on_disk: Whether to save the downloaded file on disk. Returns: @@ -121,11 +131,14 @@ def from_path( sampling_strategy: SamplingStrategy = SamplingStrategy.HYPEREDGE, ) -> Dataset: """ - Create a `Dataset` instance by loading a hypergraph from a local file path pointing to a .json or .json.zst file in HIF format. + Create a `Dataset` instance by loading a hypergraph from a local file path pointing to a + .json or .json.zst file in HIF format. Args: - filepath: The local file path to the .json or .json.zst file containing the HIF hypergraph data. - sampling_strategy: The sampling strategy to use for the dataset. If not provided, defaults to ``SamplingStrategy.HYPEREDGE``. + filepath: The local file path to the .json or .json.zst file containing the + HIF hypergraph data. + sampling_strategy: The sampling strategy to use for the dataset. If not provided, + defaults to ``SamplingStrategy.HYPEREDGE``. Returns: dataset: The `Dataset` instance with the loaded hypergraph data. @@ -143,7 +156,8 @@ def enrich_node_features( Enrich node features using the provided node feature enricher. Args: - enricher: An instance of NodeEnricher to generate structural node features from hypergraph topology. + enricher: An instance of NodeEnricher to generate structural node features from + hypergraph topology. enrichment_mode: How to combine generated features with existing ``hdata.x``. ``concatenate`` appends new features to the existing ones as additional columns. ``replace`` substitutes ``hdata.x`` entirely. @@ -168,18 +182,22 @@ def enrich_node_features_from( >>> test_dataset.enrich_node_features_from( ... train_dataset, ... node_space_setting="inductive", - ... fill_value=0.0, # torch.tensor(0.0) also works and will be broadcast to the appropriate shape + ... fill_value=0.0, # torch.tensor(0.0) also works and will be broadcast to the + ... appropriate shape ... ) Args: dataset_with_features: Source dataset providing node features. node_space_setting: The setting for the node space, determining how nodes are handled. ``transductive`` (default) preserves the full node space of the target dataset. - ``inductive`` allows the target dataset to have a different node space, filling missing features with ``fill_value``. - fill_value: Scalar or vector used to fill missing node features when ``node_space_setting`` is not transductive. + ``inductive`` allows the target dataset to have a different node space, filling + missing features with ``fill_value``. + fill_value: Scalar or vector used to fill missing node features when + ``node_space_setting`` is not transductive. Raises: - ValueError: If the source dataset's node features cannot be aligned with the target dataset's nodes. + ValueError: If the source dataset's node features cannot be aligned with the target + dataset's nodes. """ self.hdata = self.hdata.enrich_node_features_from( hdata_with_features=dataset_with_features.hdata, @@ -196,8 +214,10 @@ def enrich_hyperedge_attr( Enrich hyperedge attributes using the provided hyperedge feature enricher. Args: - enricher: An instance of HyperedgeEnricher to generate structural hyperedge attributes from hypergraph topology. - enrichment_mode: How to combine generated attributes with existing ``hdata.hyperedge_attr``. + enricher: An instance of HyperedgeEnricher to generate structural hyperedge + attributes from hypergraph topology. + enrichment_mode: How to combine generated attributes with existing + ``hdata.hyperedge_attr``. ``concatenate`` appends new attributes to the existing ones as additional columns. ``replace`` substitutes ``hdata.hyperedge_attr`` entirely. Defaults to ``replace`` if not provided. @@ -213,8 +233,10 @@ def enrich_hyperedge_weights( Enrich hyperedge weights using the provided hyperedge weight enricher. Args: - enricher: An instance of HyperedgeEnricher to generate structural hyperedge weights from hypergraph topology. - enrichment_mode: How to combine generated weights with existing ``hdata.hyperedge_weights``. + enricher: An instance of HyperedgeEnricher to generate structural hyperedge weights + from hypergraph topology. + enrichment_mode: How to combine generated weights with existing + ``hdata.hyperedge_weights``. ``concatenate`` appends new weights to the existing ones as additional columns. ``replace`` substitutes ``hdata.hyperedge_weights`` entirely. Defaults to ``replace`` if not provided. @@ -242,7 +264,8 @@ def add_negative_samples( Create a new `Dataset` with sampled negative hyperedges added. Args: - negative_sampler: Sampler used to generate negative hyperedges from this dataset's ``hdata``. + negative_sampler: Sampler used to generate negative hyperedges from + this dataset's ``hdata``. seed: Optional random seed used for both negative sampling and the final shuffle. Returns: @@ -265,10 +288,11 @@ def remove_hyperedges_with_fewer_than_k_nodes( Args: k: The minimum number of nodes a hyperedge must have to be retained. - preserve_global_node_ids: Whether to preserve the global node IDs after removing hyperedges. Defaults to ``False``. - If ``False``, the global node IDs will be reindexed to be contiguous after removing hyperedges. - If ``True``, the global node IDs will be preserved, which may cause some models to raise - as they may expect contiguous global node IDs. + preserve_global_node_ids: Whether to preserve the global node IDs + after removing hyperedges. Defaults to ``False``. If ``False``, the global node IDs + will be reindexed to be contiguous after removing hyperedges. + If ``True``, the global node IDs will be preserved, which may cause some models + to raise as they may expect contiguous global node IDs. """ self.hdata = self.hdata.remove_hyperedges_with_fewer_than_k_nodes( k, preserve_global_node_ids @@ -313,7 +337,9 @@ def split( Args: ratios: List of floats summing to ``1.0``, e.g., ``[0.8, 0.1, 0.1]``. - shuffle: Whether to shuffle hyperedges before splitting. Defaults to ``False`` for deterministic splits. + shuffle: Whether to shuffle hyperedges before splitting. Defaults to ``False`` + for deterministic splits. + seed: Optional random seed for reproducibility. Ignored if shuffle is set to ``False``. node_space_setting: Whether to preserve the full node space in the splits. ``transductive`` (default) preserves the full node space on the first split. ``inductive`` keeps each split's local node space. @@ -324,9 +350,11 @@ def split( train_split_idx: The index of the split to treat as the train split. Defaults to ``0``, so the first split is the train split that gets the full node space in the transductive setting and is optionally rebalanced to cover all nodes. - This is used only when ``node_space_setting=="transductive"`` and ``cover_all_nodes_in_train_split==True``, + This is used only when ``node_space_setting=="transductive"`` + and ``cover_all_nodes_in_train_split==True``, to determine which split should be rebalanced to cover all nodes. - For the 'inductive' setting, splits are always returned based on the provided ratios. + For the 'inductive' setting, splits are always returned based on + the provided ratios. seed: Optional random seed for reproducibility. Ignored if shuffle is set to ``False``. splitter: Optional dataset splitter. When provided, it owns split construction and final-ratio reporting. @@ -396,10 +424,13 @@ def split_with_ratios( train_split_idx: The index of the split to treat as the train split. Defaults to ``0``, so the first split is the train split that gets the full node space in the transductive setting and is optionally rebalanced to cover all nodes. - This is used only when ``node_space_setting=="transductive"`` and ``cover_all_nodes_in_train_split==True``, + This is used only when ``node_space_setting=="transductive"`` + and ``cover_all_nodes_in_train_split==True``, to determine which split should be rebalanced to cover all nodes. - For the 'inductive' setting, splits are always returned based on the provided ratios. - seed: Optional random seed for reproducibility. Ignored if ``shuffle`` is set to ``False``. + For the 'inductive' setting, splits are always returned based on + the provided ratios. + seed: Optional random seed for reproducibility. Ignored if ``shuffle`` + is set to ``False``. Returns: datasets_and_ratios: A tuple containing the split datasets and their @@ -451,27 +482,35 @@ def transform_hyperedge_attrs( def stats(self) -> dict[str, Any]: """ Compute statistics for the dataset. + This method currently delegates to the underlying HData's stats method. - The fields returned in the dictionary include: - - ``shape_x``: The shape of the node feature matrix ``x``. - - ``shape_hyperedge_attr``: The shape of the hyperedge attribute matrix, or ``None`` if hyperedge attributes are not present. - - ``num_nodes``: The number of nodes in the hypergraph. - - ``num_hyperedges``: The number of hyperedges in the hypergraph. - - ``avg_degree_node_raw``: The average degree of nodes, calculated as the mean number of hyperedges each node belongs to. - - ``avg_degree_node``: The floored node average degree. - - ``avg_degree_hyperedge_raw``: The average size of hyperedges, calculated as the mean number of nodes each hyperedge contains. - - ``avg_degree_hyperedge``: The floored hyperedge average size. - - ``node_degree_max``: The maximum degree of any node in the hypergraph. - - ``hyperedge_degree_max``: The maximum size of any hyperedge in the hypergraph. - - ``node_degree_median``: The median degree of nodes in the hypergraph. - - ``hyperedge_degree_median``: The median size of hyperedges in the hypergraph. - - ``distribution_node_degree``: A list where the value at index ``i`` represents the count of nodes with degree ``i``. - - ``distribution_hyperedge_size``: A list where the value at index ``i`` represents the count of hyperedges with size ``i``. - - ``distribution_node_degree_hist``: A dictionary where the keys are node degrees and the values are the count of nodes with that degree. - - ``distribution_hyperedge_size_hist``: A dictionary where the keys are hyperedge sizes and the values are the count of hyperedges with that size. + + Fields: + - ``shape_x``: The shape of the node feature matrix ``x``. + - ``shape_hyperedge_attr``: The shape of the hyperedge attribute matrix, or ``None`` + if hyperedge attributes are not present. + - ``num_nodes``: The number of nodes in the hypergraph. + - ``num_hyperedges``: The number of hyperedges in the hypergraph. + - ``avg_degree_node_raw``: The average degree of nodes, calculated as the mean number + of hyperedges each node belongs to. + - ``avg_degree_node``: The floored node average degree. + - ``avg_degree_hyperedge_raw``: The average size of hyperedges, calculated as the + mean number of nodes each hyperedge contains. + - ``avg_degree_hyperedge``: The floored hyperedge average size. + - ``node_degree_max``: The maximum degree of any node in the hypergraph. + - ``hyperedge_degree_max``: The maximum size of any hyperedge in the hypergraph. + - ``node_degree_median``: The median degree of nodes in the hypergraph. + - ``hyperedge_degree_median``: The median size of hyperedges in the hypergraph. + - ``distribution_node_degree``: A list where the value at index ``i`` represents + the count of nodes with degree ``i``. + - ``distribution_hyperedge_size``: A list where the value at index ``i`` represents + the count of hyperedges with size ``i``. + - ``distribution_node_degree_hist``: A dictionary where the keys are node degrees + and the values are the count of nodes with that degree. + - ``distribution_hyperedge_size_hist``: A dictionary where the keys are hyperedge + sizes and the values are the count of hyperedges with that size. Returns: stats: A dictionary containing various statistics about the hypergraph. """ - return self.hdata.stats() diff --git a/hyperbench/data/enricher.py b/hyperbench/data/enricher.py index 5d57ea3c..8178bcfe 100644 --- a/hyperbench/data/enricher.py +++ b/hyperbench/data/enricher.py @@ -30,7 +30,8 @@ class _VilLainTrainer: Args: num_features: Dimensionality of the embeddings to generate. num_nodes: Total number of nodes, including isolated nodes missing from ``hyperedge_index``. - num_hyperedges: Total number of hyperedges, including empty hyperedges missing from ``hyperedge_index``. + num_hyperedges: Total number of hyperedges, including empty hyperedges missing + from ``hyperedge_index``. labels_per_subspace: Number of virtual labels per VilLain subspace. training_steps: Propagation steps used for VilLain self-supervised loss. generation_steps: Propagation steps averaged for final embeddings. @@ -93,7 +94,8 @@ def _num_hyperedges(self, hyperedge_index: Tensor) -> int: Return the explicit hyperedge count or infer it from ``hyperedge_index``. Args: - hyperedge_index: Hyperedge index tensor used to infer the hyperedge count when no explicit count was provided. + hyperedge_index: Hyperedge index tensor used to infer the hyperedge count when + no explicit count was provided. Returns: Total number of hyperedges to preserve during VilLain propagation. @@ -109,14 +111,15 @@ def _num_nodes(self, hyperedge_index: Tensor) -> int: Return the explicit node count or infer it from ``hyperedge_index``. Args: - hyperedge_index: Hyperedge index tensor used to infer the node count when no explicit count was provided. + hyperedge_index: Hyperedge index tensor used to infer the node count when + no explicit count was provided. Returns: Total number of nodes to preserve during VilLain training and embedding generation. """ return HyperedgeIndex(hyperedge_index).num_nodes_if_isolated_exist(self.num_nodes) - def _train(self, hyperedge_index: Tensor): + def _train(self, hyperedge_index: Tensor) -> VilLain: """ Train a VilLain model on the provided hypergraph topology. @@ -247,7 +250,8 @@ def enrich(self, hyperedge_index: Tensor) -> Tensor: hyperedge_index: Hyperedge index tensor of shape ``(2, num_hyperedges)``. Returns: - hyperedge_attr: Tensor of shape ``(num_hyperedges, 1)`` containing the generated attribute for each hyperedge. + hyperedge_attr: Tensor of shape ``(num_hyperedges, 1)`` containing + the generated attribute for each hyperedge. """ num_hyperedges = HyperedgeIndex(hyperedge_index).num_hyperedges hyperedge_attrs = torch.full( @@ -265,8 +269,10 @@ class VilLainHyperedgeAttrsEnricher(_VilLainTrainer, HyperedgeAttrsEnricher): Args: num_features: Dimensionality of the hyperedge embeddings to generate. - num_nodes: Total number of nodes, including isolated nodes that do not appear in ``hyperedge_index``. - num_hyperedges: Total number of hyperedges, including empty hyperedges that do not appear in ``hyperedge_index``. + num_nodes: Total number of nodes, including isolated nodes that do not + appear in ``hyperedge_index``. + num_hyperedges: Total number of hyperedges, including empty hyperedges that + do not appear in ``hyperedge_index``. labels_per_subspace: Number of virtual labels per subspace. Defaults to ``2``. training_steps: Propagation steps used for VilLain self-supervised loss. Defaults to ``4``. generation_steps: Propagation steps averaged for final embeddings. Defaults to ``100``. @@ -320,7 +326,8 @@ def enrich(self, hyperedge_index: Tensor) -> Tensor: hyperedge_index: Hyperedge index tensor of shape ``(2, num_hyperedges)``. Returns: - hyperedge_embeddings: Tensor of shape ``(num_hyperedges, num_features)`` containing VilLain hyperedge embeddings. + hyperedge_embeddings: Tensor of shape ``(num_hyperedges, num_features)`` + containing VilLain hyperedge embeddings. """ num_hyperedges = self._num_hyperedges(hyperedge_index) if num_hyperedges == 0: @@ -347,8 +354,10 @@ class ABHyperedgeWeightsEnricher(HyperedgeWeightsEnricher): Args: cache_dir: Directory for saving/loading cached features. If ``None``, caching is disabled. - alpha: Scaling factor for the random component added to weights. Must be between 0.0 and 1.0. - beta: If provided, the random component is alpha * beta. If None, no random component is added. + alpha: Scaling factor for the random component added to weights. + Must be between 0.0 and 1.0. + beta: If provided, the random component is alpha * beta. + If None, no random component is added. """ def __init__( @@ -373,10 +382,13 @@ def enrich(self, hyperedge_index: Tensor) -> Tensor: hyperedge_index: Hyperedge index tensor of shape ``(2, num_hyperedges)``. Returns: - hyperedge_weight: Tensor of shape ``(num_hyperedges,)`` containing the weight of each hyperedge. + hyperedge_weight: Tensor of shape ``(num_hyperedges,)`` containing + the weight of each hyperedge. """ - # Count the number of nodes in each hyperedge by counting occurrences of each hyperedge index. - # Example: if hyperedge_index[1] = [0, 0, 1, 1, 1], then we have 2 nodes in hyperedge 0 and 3 nodes in hyperedge 1. + # Count the number of nodes in each hyperedge by counting occurrences of + # each hyperedge index. + # Example: if hyperedge_index[1] = [0, 0, 1, 1, 1], then we have 2 nodes + # in hyperedge 0 and 3 nodes in hyperedge 1. num_hyperedges = int(hyperedge_index[1].max().item()) + 1 weights = torch.bincount(hyperedge_index[1], minlength=num_hyperedges).float() @@ -388,40 +400,53 @@ def enrich(self, hyperedge_index: Tensor) -> Tensor: class Node2VecEnricher(NodeEnricher): """ - Enrich node features using Node2Vec embeddings computed from the clique expansion of the hypergraph. + Enrich node features using Node2Vec embeddings computed from the clique expansion of the + hypergraph. Args: num_features: Dimensionality of the node embeddings to generate. walk_length: Length of each random walk. - context_size: Window size for the skip-gram model (number of neighbors in the walk considered as context). - For example, if ``context_size=2`` and ``walk_length=5``, then for a random walk ``[v0, v1, v2, v3, v4]``, - the context for ``v2`` would be ``[v0, v1, v3, v4]`` as we take neighbors within distance 2 in the walk. + context_size: Window size for the skip-gram model + (number of neighbors in the walk considered as context). + For example, if ``context_size=2`` and ``walk_length=5``, then for + a random walk ``[v0, v1, v2, v3, v4]``, + the context for ``v2`` would be ``[v0, v1, v3, v4]`` as we take neighbors within + distance 2 in the walk. The pairs generated by skip-gram would be ``[(v2, v0), (v2, v1), (v2, v3), (v2, v4)]``. - Rule of thumb: Graphs with strong local structure (5-10), Graphs with communities/long-range patterns (10-20). + Rule of thumb: Graphs with strong local structure (5-10), Graphs with + communities/long-range patterns (10-20). Defaults to ``10``. num_walks_per_node: Number of random walks to start at each node. p: Return hyperparameter for Node2Vec. Default is ``1.0`` (unbiased). This controls the probability of stepping back to the node visited in the previous step. - Lower values of ``p`` make immediate backtracking more likely, which keeps walks closer to the - local neighborhood. Higher values of ``p`` discourage returning to the previous node, so walks + Lower values of ``p`` make immediate backtracking more likely, + which keeps walks closer to the + local neighborhood. Higher values of ``p`` discourage returning to the previous node, + so walks are less likely to bounce back and forth across the same edge. q: In-out hyperparameter for Node2Vec. Default is ``1.0`` (unbiased). This controls whether walks stay near the source node or explore further outward. - Lower values of ``q`` bias the walk toward outward exploration, behaving more like DFS and + Lower values of ``q`` bias the walk toward outward exploration, behaving more like DFS + and emphasizing structural roles. Higher values of ``q`` bias the walk toward nearby nodes, behaving more like BFS and emphasizing community structure and homophily. num_negative_samples: Number of negative samples to use for training the skip-gram model. - If set to ``X``, then for each positive pair ``(u, v)`` generated from the random walks, ``X`` negative pairs ``(u, v_neg)`` will be generated, + If set to ``X``, then for each positive pair ``(u, v)`` generated from the random walks, + ``X`` negative pairs ``(u, v_neg)`` will be generated, where ``v_neg`` is a node sampled uniformly at random from all nodes in the graph. Defaults to ``1``, meaning one negative sample per positive pair. - num_nodes: Total number of nodes in the graph. If not provided, it will be inferred from the hyperedge_index. - This is only needed if the hyperedge_index does not include all nodes (e.g., some isolated nodes are missing). - graph_reduction_strategy: Strategy for reducing the hyperedge graph. Defaults to ``clique_expansion``. + num_nodes: Total number of nodes in the graph. If not provided, it will be inferred from + the hyperedge_index. + This is only needed if the hyperedge_index does not include all nodes + (e.g., some isolated nodes are missing). + graph_reduction_strategy: Strategy for reducing the hyperedge graph. + Defaults to ``clique_expansion``. num_epochs: Number of epochs used to optimize Node2Vec embeddings. Defaults to ``5``. learning_rate: Learning rate for embedding optimization. Defaults to ``0.01``. batch_size: Batch size used by the random-walk loader. Defaults to ``128``. sparse: Whether Node2Vec embeddings should use sparse gradients. - cache_dir: Optional directory to cache computed embeddings. If ``None``, caching is disabled. + cache_dir: Optional directory to cache computed embeddings. If ``None``, caching + is disabled. verbose: Whether to print verbose output during training. Defaults to ``False``. """ @@ -465,15 +490,17 @@ def enrich(self, hyperedge_index: Tensor) -> Tensor: """ Compute Node2Vec embeddings from the clique expansion of the hypergraph. - The hypergraph is converted to a regular graph via clique expansion, where each hyperedge of size k - contributes a k x k block of edges between its member nodes. - The resulting ``edge_index`` is then used to train a Node2Vec model using random walks and the skip-gram objective. + The hypergraph is converted to a regular graph via clique expansion, where each hyperedge + of size k contributes a k x k block of edges between its member nodes. + The resulting ``edge_index`` is then used to train a Node2Vec model using random walks + and the skip-gram objective. Args: hyperedge_index: Hyperedge index tensor of shape ``(2, num_hyperedges)``. Returns: - x: Tensor of shape ``(num_nodes, embedding_dim)`` containing the Node2Vec embeddings for each node. + x: Tensor of shape ``(num_nodes, embedding_dim)`` containing the Node2Vec embeddings + for each node. """ device = hyperedge_index.device @@ -497,7 +524,9 @@ def enrich(self, hyperedge_index: Tensor) -> Tensor: edge_index_wrapper = EdgeIndex(reduced_edge_index).remove_selfloops() if edge_index_wrapper.num_edges == 0: warnings.warn( - "Clique expansion produced no non-self-loop edges. Returning zero node features.", + """ + Clique expansion produced no non-self-loop edges. Returning zero node features. + """, category=UserWarning, stacklevel=2, ) @@ -581,13 +610,17 @@ def __validate(self) -> None: class LaplacianPositionalEncodingEnricher(NodeEnricher): """ - Enrich node features with Laplacian Positional Encodings computed from the symmetric normalized Laplacian of the clique expansion of the hypergraph. + Enrich node features with Laplacian Positional Encodings computed from the symmetric normalized + Laplacian of the clique expansion of the hypergraph. Args: num_features: Number of positional encoding features to generate for each node. - num_nodes: Total number of nodes in the graph. If not provided, it will be inferred from the hyperedge_index. - This is only needed if the hyperedge_index does not include all nodes (e.g., some isolated nodes are missing). - Another instance is when the setting is transductive and the hyperedge index contains some hyperedges + num_nodes: Total number of nodes in the graph. If not provided, it will be inferred + from the hyperedge_index. + This is only needed if the hyperedge_index does not include all nodes + (e.g., some isolated nodes are missing). + Another instance is when the setting is transductive and the hyperedge index + contains some hyperedges that do not contain all the nodes in the node space. cache_dir: Optional directory to cache computed features. If ``None``, caching is disabled. """ @@ -658,7 +691,8 @@ def enrich(self, hyperedge_index: Tensor) -> Tensor: return eigenvectors[:, 1 : self.num_features + 1] # If the graph has fewer usable eigenvectors than requested - # (e.g., num_features = 5 but only 2 available), we create a zero-padded tensor and fill what we have. + # (e.g., num_features = 5 but only 2 available), we create a zero-padded tensor + # and fill what we have. # Example: num_nontrivial_eigenvectors = 2, num_features = 5 # -> shape (3, 5) # columns 0-1 filled, 2-4 are zeros. x = torch.zeros( @@ -676,8 +710,10 @@ class VilLainEnricher(_VilLainTrainer, NodeEnricher): Args: num_features: Dimensionality of the node embeddings to generate. - num_nodes: Total number of nodes, including isolated nodes that do not appear in ``hyperedge_index``. - num_hyperedges: Total number of hyperedges, including empty hyperedges that do not appear in ``hyperedge_index``. + num_nodes: Total number of nodes, including isolated nodes that do not appear + in ``hyperedge_index``. + num_hyperedges: Total number of hyperedges, including empty hyperedges that + do not appear in ``hyperedge_index``. labels_per_subspace: Number of virtual labels per subspace. Defaults to ``2``. training_steps: Propagation steps used for VilLain self-supervised loss. Defaults to ``4``. generation_steps: Propagation steps averaged for final embeddings. Defaults to ``100``. @@ -731,7 +767,8 @@ def enrich(self, hyperedge_index: Tensor) -> Tensor: hyperedge_index: Hyperedge index tensor of shape ``(2, num_hyperedges)``. Returns: - node_embeddings: Tensor of shape ``(num_nodes, num_features)`` containing VilLain node embeddings. + node_embeddings: Tensor of shape ``(num_nodes, num_features)`` containing + VilLain node embeddings. """ num_nodes = self._num_nodes(hyperedge_index) if num_nodes == 0: diff --git a/hyperbench/data/hif.py b/hyperbench/data/hif.py index a19278d6..f4fc50df 100644 --- a/hyperbench/data/hif.py +++ b/hyperbench/data/hif.py @@ -25,7 +25,9 @@ class HIFProcessor: - """A utility class to process HIF hypergraph data into `HData` format.""" + """ + A utility class to process HIF hypergraph data into `HData` format. + """ @staticmethod def transform_attrs( @@ -34,11 +36,13 @@ def transform_attrs( ) -> Tensor: """ Extract and encode numeric attributes to tensor. + Non-numeric attributes are discarded. Missing attributes are filled with ``0.0``. Args: attrs: Dictionary of attributes - attr_keys: Optional list of attribute keys to encode. If provided, ensures consistent ordering and fill missing with ``0.0``. + attr_keys: Optional list of attribute keys to encode. If provided, + ensures consistent ordering and fill missing with ``0.0``. Returns: attrs: Tensor of numeric attribute values @@ -67,7 +71,6 @@ def process_hypergraph(cls, hypergraph: HIFHypergraph) -> HData: Returns: hdata: The processed hypergraph data. """ - num_nodes = len(hypergraph.nodes) x = cls.__process_x(hypergraph, num_nodes) @@ -93,7 +96,8 @@ def process_hypergraph(cls, hypergraph: HIFHypergraph) -> HData: ) if hyperedge_id not in hyperedge_id_to_idx: - # Hyperedges start from 0 and are assigned IDs in the order they are first encountered in incidences + # Hyperedges start from 0 and are assigned IDs in the order they are + # first encountered in incidences hyperedge_id_to_idx[hyperedge_id] = len(hyperedge_id_to_idx) node_ids.append(node_id_to_idx[node_id]) @@ -244,7 +248,9 @@ def __process_hyperedge_weights( class HIFLoader: - """A utility class to load hypergraphs from HIF format.""" + """ + A utility class to load hypergraphs from HIF format. + """ @classmethod def load_from_url(cls, url: str, save_on_disk: bool = False) -> HData: @@ -263,7 +269,8 @@ def load_from_url(cls, url: str, save_on_disk: bool = False) -> HData: response = requests.get(url, timeout=20) if response.status_code != 200: raise ValueError( - f"Failed to download dataset from URL {url!r} with status code {response.status_code}" + f"Failed to download dataset from URL {url!r} " + f"with status code {response.status_code}" ) if not url.endswith((".json.zst", ".json")): @@ -273,7 +280,9 @@ def load_from_url(cls, url: str, save_on_disk: bool = False) -> HData: if os.path.basename(url).count(".") > 2: raise ValueError( - f"URL {url!r} has an unexpected filename format. Expected at most one dot in the base filename before the extension (e.g., dataset.json or dataset.json.zst)." + f"URL {url!r} has an unexpected filename format. " + "Expected at most one dot in the base filename before the " + "extension (e.g., dataset.json or dataset.json.zst)." ) if url.endswith(".json.zst"): @@ -298,7 +307,8 @@ def load_from_url(cls, url: str, save_on_disk: bool = False) -> HData: @classmethod def load_from_path(cls, filepath: str) -> HData: """ - Load a hypergraph from a local file path pointing to a .json or .json.zst file in HIF format. + Load a hypergraph from a local file path pointing to a .json or .json.zst file in HIF + format. Args: filepath: The local file path to the .json or .json.zst file @@ -337,7 +347,10 @@ def load_by_name( hif_data = from_zst_file_to_json(zst_filename) return cls.__process_hif_data(hif_data, dataset_name) - github_url = f"https://raw.githubusercontent.com/hypernetwork-research-group/datasets/{GITHUB_COMMIT_SHA}/{dataset_name}.json.zst" + github_url = ( + f"https://raw.githubusercontent.com/hypernetwork-research-group/datasets/" + f"{GITHUB_COMMIT_SHA}/{dataset_name}.json.zst" + ) response = requests.get(github_url, timeout=20) if response.status_code == 200: dataset_bytes = response.content @@ -348,7 +361,8 @@ def load_by_name( return hdata warnings.warn( - f"GitHub raw download failed for dataset {dataset_name!r} with status code {response.status_code}\n" + f"GitHub raw download failed for dataset {dataset_name!r} " + f"with status code {response.status_code}\n" "Falling back to Hugging Face Hub download for dataset", category=UserWarning, stacklevel=2, @@ -356,7 +370,8 @@ def load_by_name( if hf_sha is None: raise ValueError( - f"Failed to download dataset {dataset_name!r} from GitHub with status code {response.status_code} " + f"Failed to download dataset {dataset_name!r} from GitHub " + f"with status code {response.status_code} " f"and no SHA provided for Hugging Face Hub fallback." ) @@ -384,7 +399,8 @@ def load_by_name( shutil.copyfile(downloaded_path, zst_filename) except Exception as e: raise ValueError( - f"Failed to save downloaded dataset {dataset_name!r} to disk at {zst_filename!r}: {e!s}." + f"Failed to save downloaded dataset {dataset_name!r} to disk at " + f"{zst_filename!r}: {e!s}." ) from e if os.path.isdir(hf_cache_dir): @@ -394,7 +410,8 @@ def load_by_name( shutil.rmtree(os.path.join(hf_cache_dir, ".locks", path_prefix)) except Exception as e: warnings.warn( - f"Failed to clean up Hugging Face Hub cache after downloading dataset {dataset_name!r}: {e!s}.", + f"Failed to clean up Hugging Face Hub cache after downloading " + f"dataset {dataset_name!r}: {e!s}.", category=UserWarning, stacklevel=2, ) diff --git a/hyperbench/data/loader.py b/hyperbench/data/loader.py index 77e79b44..e0c60dd3 100644 --- a/hyperbench/data/loader.py +++ b/hyperbench/data/loader.py @@ -35,12 +35,14 @@ def collate(self, batch: list[HData]) -> HData: """ Collates a list of `HData` objects into a single batched `HData` object. - This function combines multiple separate samples into a single batched representation suitable for mini-batch training. - It handles: - - Concatenating node features from all samples. - - Concatenating and offsetting hyperedges from all samples. - - Concatenating hyperedge attributes from all samples, if present. - - Concatenating hyperedge weights from all samples, if present. + This function combines multiple separate samples into a single batched representation + suitable for mini-batch training. + + Handles: + - Concatenating node features from all samples. + - Concatenating and offsetting hyperedges from all samples. + - Concatenating hyperedge attributes from all samples, if present. + - Concatenating hyperedge weights from all samples, if present. Examples: Given ``batch = [HData_0, HData_1]``: @@ -56,7 +58,7 @@ def collate(self, batch: list[HData]) -> HData: - ``HData_0`` (3 nodes, 2 hyperedges): >>> hyperedge_index = [[0, 1, 1, 2], # Nodes 0, 1, 1, 2 - ... [0, 0, 1, 1]] # Hyperedge 0 contains {0,1}, Hyperedge 1 contains {1,2} + ... [0, 0, 1, 1]] # HE 0 contains {0,1}, HE 1 contains {1,2} - ``HData_1`` (2 nodes, 1 hyperedge): diff --git a/hyperbench/data/negative_sampler.py b/hyperbench/data/negative_sampler.py index 3cf485d0..299108a1 100644 --- a/hyperbench/data/negative_sampler.py +++ b/hyperbench/data/negative_sampler.py @@ -18,8 +18,10 @@ class NegativeSampler(ABC): Args: return_0based_negatives: - - If ``True``, the negative samples returned by the ``sample`` method will have 0-based node and hyperedge IDs. - - If ``False``, the negative samples will retain the original global node and hyperedge IDs from the input data. + - If ``True``, the negative samples returned by the ``sample`` method + will have 0-based node and hyperedge IDs. + - If ``False``, the negative samples will retain the original global node + and hyperedge IDs from the input data. """ def __init__(self, return_0based_negatives: bool = False): @@ -59,8 +61,10 @@ def _new_negative_hyperedge_index( Returns: hyperedge_index: The concatenated, sorted, and remapped hyperedge index tensor. - If ``self.return_0based_negatives`` is ``True``, the returned tensor will have 0-based node and hyperedge IDs. - Otherwise, it will retain the original global node and hyperedge IDs from the input data. + If ``self.return_0based_negatives`` is ``True``, the returned tensor will + have 0-based node and hyperedge IDs. + Otherwise, it will retain the original global node and hyperedge IDs + from the input data. """ negative_hyperedge_index = torch.cat(sampled_hyperedge_indexes, dim=1) if not self.return_0based_negatives: @@ -86,7 +90,8 @@ def _new_global_node_ids( negative_node_ids: Tensor of negative node IDs. Returns: - global_node_ids: The global node IDs for the negative samples, or ``None`` if the input global node IDs are ``None``. + global_node_ids: The global node IDs for the negative samples, or ``None`` if + the input global node IDs are ``None``. """ if global_node_ids is None: return None @@ -122,11 +127,13 @@ def _new_enriched_hyperedge_attr( Generate enriched hyperedge attributes for the negative samples. Args: - hyperedge_attr_enricher: An optional `HyperedgeAttrsEnricher` to generate attributes for the new hyperedges. + hyperedge_attr_enricher: An optional `HyperedgeAttrsEnricher` to generate attributes + for the new hyperedges. negative_hyperedge_index: The index tensor for the negative hyperedges. Returns: - hyperedge_attr: The enriched hyperedge attribute tensor for the negative samples, or ``None`` if the enricher is not provided. + hyperedge_attr: The enriched hyperedge attribute tensor for the negative samples, + or ``None`` if the enricher is not provided. """ if hyperedge_attr_enricher is None: return None @@ -145,11 +152,13 @@ def _new_enriched_hyperedge_weights( Generate enriched hyperedge weights for the negative samples. Args: - hyperedge_weights_enricher: An optional `HyperedgeWeightsEnricher` to generate weights for the new hyperedges. + hyperedge_weights_enricher: An optional `HyperedgeWeightsEnricher` to + generate weights for the new hyperedges. negative_hyperedge_index: The index tensor for the negative hyperedges. Returns: - hyperedge_weights: The enriched hyperedge weight tensor for the negative samples, or ``None`` if the enricher is not provided. + hyperedge_weights: The enriched hyperedge weight tensor for the negative samples, + or ``None`` if the enricher is not provided. """ if hyperedge_weights_enricher is None: return None @@ -168,7 +177,8 @@ def _new_x(self, x: Tensor, negative_node_ids: Tensor) -> tuple[Tensor, int]: negative_node_ids: Tensor of negative node IDs. Returns: - x_and_num_negative_nodes: The node feature matrix for the negative samples and the number of negative nodes. + x: The node feature matrix for the negative samples. + num_negative_nodes: The number of negative nodes. """ return x[negative_node_ids], len(negative_node_ids) @@ -245,11 +255,15 @@ class SameNodeSpaceNegativeSampler(NegativeSampler, ABC): Base class for negative samplers that sample only from existing nodes. Args: - hyperedge_attr_enricher: An optional `HyperedgeAttrsEnricher` to generate attributes for the new hyperedges. - hyperedge_weights_enricher: An optional `HyperedgeWeightsEnricher` to generate weights for the new hyperedges. + hyperedge_attr_enricher: An optional `HyperedgeAttrsEnricher` to generate attributes for + the new hyperedges. + hyperedge_weights_enricher: An optional `HyperedgeWeightsEnricher` to generate weights + for the new hyperedges. return_0based_negatives: - - If ``True``, the negative samples returned by the ``sample`` method will have 0-based node and hyperedge IDs. - - If ``False``, the negative samples will retain the original global node and hyperedge IDs from the input data. + - If ``True``, the negative samples returned by the ``sample`` method will have + 0-based node and hyperedge IDs. + - If ``False``, the negative samples will retain the original global node + and hyperedge IDs from the input data. """ def __init__( @@ -269,11 +283,15 @@ class GeneratedNodesNegativeSampler(NegativeSampler, ABC): Args: node_feature_enricher: A `NodeEnricher` to generate features for the new nodes. - hyperedge_attr_enricher: An optional `HyperedgeAttrsEnricher` to generate attributes for the new hyperedges. - hyperedge_weights_enricher: An optional `HyperedgeWeightsEnricher` to generate weights for the new hyperedges. + hyperedge_attr_enricher: An optional `HyperedgeAttrsEnricher` to generate attributes + for the new hyperedges. + hyperedge_weights_enricher: An optional `HyperedgeWeightsEnricher` to generate weights + for the new hyperedges. return_0based_negatives: - - If ``True``, the negative samples returned by the ``sample`` method will have 0-based node and hyperedge IDs. - - If ``False``, the negative samples will retain the original global node and hyperedge IDs from the input data. + - If ``True``, the negative samples returned by the ``sample`` method will have + 0-based node and hyperedge IDs. + - If ``False``, the negative samples will retain the original global node and + hyperedge IDs from the input data. """ def __init__( @@ -291,23 +309,30 @@ def __init__( class RandomNegativeSampler(SameNodeSpaceNegativeSampler): """ - A random negative sampler. Negatives generated with ``return_0based_negatives = False`` aren't usable standalone - as they have global node and hyperedge IDs. They must be concatenated with the original `HData` object - that is provided as input to the ``sample`` method, as it contains the global node and hyperedge IDs and features - that can be indexed with the negative samples' IDs. + A random negative sampler. Negatives generated with ``return_0based_negatives = False`` + aren't usable standalone as they have global node and hyperedge IDs. They must be concatenated + with the original `HData` object that is provided as input to the ``sample`` method, as it + contains the global node and hyperedge IDs and features that can be indexed with + the negative samples' IDs. Args: num_negative_samples: Number of negative hyperedges to generate. num_nodes_per_sample: Number of nodes per negative hyperedge. - hyperedge_attr_enricher: An optional `HyperedgeAttrsEnricher` to generate attributes for the new hyperedges. - If not provided, random attributes will be generated for the negative hyperedges if the input data has hyperedge attributes. - hyperedge_weights_enricher: An optional `HyperedgeEnricher` to generate weights for the new hyperedges. - If not provided, the negative hyperedges will not have weights. + hyperedge_attr_enricher: An optional `HyperedgeAttrsEnricher` to generate attributes + for the new hyperedges. + If not provided, random attributes will be generated for the negative hyperedges if + the input data has hyperedge attributes. + hyperedge_weights_enricher: An optional `HyperedgeWeightsEnricher` to generate weights + for the new hyperedges. If not provided, the negative hyperedges will not have weights. return_0based_negatives: - - If ``True``, the negative samples returned by the ``sample`` method will have 0-based node and hyperedge IDs. - - If ``False``, the negative samples will retain the original global node and hyperedge IDs from the input data. - max_retry: Maximum number of rejected sampling attempts allowed per requested negative hyperedge before failing. - If ``num_negative_samples`` is ``N``, the total maximum number of attempts will be ``N * max_retry``. + - If ``True``, the negative samples returned by the ``sample`` method + will have 0-based node and hyperedge IDs. + - If ``False``, the negative samples will retain the original global node and + hyperedge IDs from the input data. + max_retry: Maximum number of rejected sampling attempts allowed per requested + negative hyperedge before failing. + If ``num_negative_samples`` is ``N``, the total maximum number of attempts + will be ``N * max_retry``. Raises: ValueError: If any numeric argument is not positive. @@ -341,10 +366,12 @@ def __init__( def sample(self, hdata: HData, seed: int | None = None) -> HData: """ Generate negative hyperedges by randomly sampling unique node IDs. - Node IDs are sampled from the same node space as the input data, and the new negative hyperedge IDs - start from the original number of hyperedges in the input data to avoid ID conflicts. - The resulting negative samples are returned as a new `HData` object with remapped 0-based node and hyperedge IDs, if ``self.return_0based_negatives == True``. - Otherwise, the negative samples retain their original global node and hyperedge IDs from the input data. + Node IDs are sampled from the same node space as the input data, and the new negative + hyperedge IDs start from the original number of hyperedges in the input data to + avoid ID conflicts. The resulting negative samples are returned as a new `HData` object + with remapped 0-based node and hyperedge IDs, if ``self.return_0based_negatives == True``. + Otherwise, the negative samples retain their original global node and hyperedge IDs + from the input data. Examples: With ``self.return_0based_negatives = True``: @@ -383,7 +410,8 @@ def sample(self, hdata: HData, seed: int | None = None) -> HData: """ if self.num_nodes_per_sample > hdata.num_nodes: raise ValueError( - f"Asked to create samples with {self.num_nodes_per_sample} nodes, but only {hdata.num_nodes} nodes are available." + f"Asked to create samples with {self.num_nodes_per_sample} nodes," + f" but only {hdata.num_nodes} nodes are available." ) device = hdata.device @@ -439,7 +467,8 @@ def sample(self, hdata: HData, seed: int | None = None) -> HData: hyperedge_attr_enricher=self.hyperedge_attr_enricher, negative_hyperedge_index=negative_hyperedge_index, ) - # Default to the random attributes if no enricher is provided and the input data has hyperedge attributes + # Default to the random attributes if no enricher is provided and the input + # data has hyperedge attributes if negative_hyperedge_attr is None: negative_hyperedge_attr = self._new_hyperedge_attr( sampled_hyperedge_attrs=sampled_hyperedge_attrs, hyperedge_attr=hdata.hyperedge_attr @@ -471,12 +500,15 @@ def __sample_loop( Args: hdata: The input hypergraph data used as the node and hyperedge ID source. - positive_hyperedges_signatures: Existing positive hyperedge signatures that must not be sampled as negatives. + positive_hyperedges_signatures: Existing positive hyperedge signatures that + must not be sampled as negatives. seed: Optional random seed for reproducible sampling. Returns: - samples: A tuple containing sampled hyperedge index tensors, sampled hyperedge attribute - tensors, sampled node IDs, and the first negative hyperedge ID. + sampled_hyperedge_indexes: sampled hyperedge index tensors + sampled_hyperedge_attrs: sampled hyperedge attribute tensors. + sampled_negative_node_ids: sampled negative node IDs. + new_hyperedge_id_offset: first negative hyperedge ID. Raises: ValueError: If the sampler cannot produce the requested number of unique negative @@ -500,7 +532,8 @@ def __sample_loop( attempts += 1 # Sample with multinomial without replacement to ensure unique node ids - # and assign each node id equal probability of being selected by setting all of them to 1 + # and assign each node id equal probability of being + # selected by setting all of them to 1 # Example: num_nodes_per_sample=3, max_node_id=5 # -> possible output: [2, 0, 4] equal_probabilities = torch.ones( @@ -612,14 +645,18 @@ class CliqueNegativeSampler(SameNodeSpaceNegativeSampler): num_nodes_per_sample: Number of nodes per negative hyperedge. Must be at least 2. hyperedge_attr_enricher: Optional enricher to generate attributes for sampled negatives. hyperedge_weights_enricher: Optional enricher to generate weights for sampled negatives. - return_0based_negatives: If ``True``, returned negative node and hyperedge IDs are rebased to 0-based IDs. - max_candidates: Optional upper bound for full-size clique candidates enumerated during search + return_0based_negatives: If ``True``, returned negative node and hyperedge IDs + are rebased to 0-based IDs. + max_candidates: Optional upper bound for full-size clique candidates enumerated + during search. If ``None``, it means no explicit cap. The limit counts every full-size clique candidate - encountered before positive-hyperedge filtering, so positive hyperedges still consume the budget - because they still require search work. This is a safety guard for dense graphs where clique enumeration - can grow quickly. For example, ``max_candidates=10_000`` means the sampler stops if finding candidates + encountered before positive-hyperedge filtering, so positive hyperedges still consume + the budget because they still require search work. This is a safety guard for + dense graphs where clique enumeration can grow quickly. For example, + ``max_candidates=10_000`` means the sampler stops if finding candidates requires enumerating more than 10,000 cliques of size ``num_nodes_per_sample``. - It does not control how many negatives are returned, as that is controlled by ``num_negative_samples``. + It does not control how many negatives are returned, as that is controlled + by ``num_negative_samples``. Raises: ValueError: If numeric arguments are invalid. @@ -638,7 +675,8 @@ def __init__( raise ValueError(f"num_negative_samples must be positive, got {num_negative_samples}.") if num_nodes_per_sample < 2: raise ValueError( - f"num_nodes_per_sample must be at least 2 for clique negative sampling, got {num_nodes_per_sample}." + f"num_nodes_per_sample must be at least 2 for clique " + f"negative sampling, got {num_nodes_per_sample}." ) if max_candidates is not None and max_candidates <= 0: raise ValueError( @@ -670,7 +708,8 @@ def sample(self, hdata: HData, seed: int | None = None) -> HData: """ if self.num_nodes_per_sample > hdata.num_nodes: raise ValueError( - f"Asked to create samples with {self.num_nodes_per_sample} nodes, but only {hdata.num_nodes} nodes are available." + f"Asked to create samples with {self.num_nodes_per_sample} nodes, " + f"but only {hdata.num_nodes} nodes are available." ) device = hdata.device @@ -774,7 +813,8 @@ def __expand_clique_candidates( prefix: Current partial clique, represented as sorted node IDs. candidates: Node IDs that may extend ``prefix`` while preserving clique structure. adjacency_list: Clique-expanded graph adjacency list. - positive_hyperedge_signatures: Positive hyperedge node signatures that must not be returned as negatives. + positive_hyperedge_signatures: Positive hyperedge node signatures that must not + be returned as negatives. valid_candidates: Output list mutated in place with valid negative clique candidates. enumerated_candidates: Number of full-size clique candidates visited so far. @@ -787,7 +827,8 @@ def __expand_clique_candidates( if len(prefix) == self.num_nodes_per_sample: # Found a full-size clique candidate if self.max_candidates is not None and enumerated_candidates >= self.max_candidates: raise ValueError( - f"Clique negative candidate enumeration exceeded max_candidates={self.max_candidates}." + f"Clique negative candidate enumeration exceeded " + f"max_candidates={self.max_candidates}." ) enumerated_candidates += 1 @@ -832,7 +873,8 @@ def __find_valid_clique_candidates( Args: adjacency_list: Clique-expanded graph adjacency list. - positive_hyperedge_signatures: Positive hyperedge node signatures with the requested sample size. + positive_hyperedge_signatures: Positive hyperedge node signatures with + the requested sample size. Returns: candidates: Clique node signatures that are not positive hyperedges. @@ -881,8 +923,10 @@ def __sample_loop( seed: Optional seed for reproducible candidate shuffling and random attributes. Returns: - samples: A tuple containing sampled hyperedge index tensors, sampled hyperedge - attribute tensors, sampled node IDs, and the first negative hyperedge ID. + sampled_hyperedge_indexes: sampled hyperedge index tensors + sampled_hyperedge_attrs: sampled hyperedge attribute tensors. + sampled_negative_node_ids: sampled negative node IDs. + new_hyperedge_id_offset: first negative hyperedge ID. """ device = hdata.device generator = create_seeded_torch_generator(device=device, seed=seed) @@ -892,7 +936,8 @@ def __sample_loop( # (0, 2, 3), # index 1 # (1, 2, 3)], # index 2 # -> shuffled_clique_candidate_indexes = [2, 0, 1] - # -> sampled_clique_candidate_indexes = [2, 0] if num_negative_samples=2 as we only need 2 samples + # as we only need 2 samples + # -> sampled_clique_candidate_indexes = [2, 0] if num_negative_samples=2 # -> sampled_clique_candidates = [(1, 2, 3), # index 2 in clique_candidates # (0, 1, 3)] # index 0 in clique_candidates num_valid_clique_candidates = len(clique_candidates) diff --git a/hyperbench/data/negative_sampling_scheduler.py b/hyperbench/data/negative_sampling_scheduler.py index 68c0fa8d..a404d601 100644 --- a/hyperbench/data/negative_sampling_scheduler.py +++ b/hyperbench/data/negative_sampling_scheduler.py @@ -13,15 +13,20 @@ class NegativeSamplingScheduler: """ Manages when to perform negative sampling during training based on a specified schedule. - This class allows for flexible scheduling of negative sampling, enabling it to be performed at different frequencies (e.g., every epoch, every N epochs, or only at the first epoch). - The scheduler maintains a cache of the most recently sampled negatives, which can be reused across epochs if the schedule does not require resampling. This helps to optimize training - by avoiding unnecessary sampling when the schedule dictates that negatives should only be generated at certain intervals. + + This class allows for flexible scheduling of negative sampling, enabling it to be performed at + different frequencies (e.g., every epoch, every N epochs, or only at the first epoch). The + scheduler maintains a cache of the most recently sampled negatives, which can be reused across + epochs if the schedule does not require resampling. This helps to optimize training by avoiding + unnecessary sampling when the schedule dictates that negatives should only be generated at + certain intervals. Args: negative_sampler: An instance of a ``NegativeSampler`` that defines how to sample negatives. negative_sampling_schedule: Literal string specifying the schedule for sampling negatives. negative_sampling_every_n: An integer specifying the interval for sampling negatives - when the schedule is set to ``"every_n_epochs"``. This parameter is ignored for other schedules. + when the schedule is set to ``"every_n_epochs"``. This parameter is ignored + for other schedules. """ def __init__( @@ -38,7 +43,9 @@ def __init__( @property def config(self) -> dict[str, Any]: - """Returns the configuration of the negative sampling scheduler as a dictionary.""" + """ + Returns the configuration of the negative sampling scheduler as a dictionary. + """ return { "negative_sampler": self.negative_sampler, "negative_sampling_schedule": self.negative_sampling_schedule, @@ -50,10 +57,12 @@ def should_sample(self, epoch: int) -> bool: Whether to resample negatives for the current epoch. Args: - epoch: The current epoch number, used to determine if sampling should occur based on the schedule. + epoch: The current epoch number, used to determine if sampling should occur based + on the schedule. Returns: - should_sample: True if negatives should be resampled for the current epoch, False otherwise. + should_sample: True if negatives should be resampled for the current epoch, + False otherwise. """ if epoch < 0: raise ValueError(f"Epoch must be non-negative, got {epoch}.") @@ -62,7 +71,8 @@ def should_sample(self, epoch: int) -> bool: case "every_n_epochs": if self.negative_sampling_every_n <= 0: raise ValueError( - f"negative_sampling_every_n must be positive, got {self.negative_sampling_every_n}." + f"negative_sampling_every_n must be positive, " + f"got {self.negative_sampling_every_n}." ) return epoch % self.negative_sampling_every_n == 0 case "first_epoch": @@ -80,7 +90,8 @@ def sample(self, batch: HData, epoch: int) -> HData: Args: batch: The current batch of data for which to sample negatives. - epoch: The current epoch number, used to determine if sampling should occur based on the schedule. + epoch: The current epoch number, used to determine if sampling should occur + based on the schedule. Returns: negatives: A batch of negative samples, either freshly sampled or from cache. diff --git a/hyperbench/data/sampler.py b/hyperbench/data/sampler.py index 0cb2178e..5d44d9b8 100644 --- a/hyperbench/data/sampler.py +++ b/hyperbench/data/sampler.py @@ -48,7 +48,8 @@ def _normalize_index(self, index: int | list[int], size: int) -> list[int]: ids: List of IDs to sample. Raises: - ValueError: If the provided index is invalid (e.g., empty list or list length exceeds number of sampleable items). + ValueError: If the provided index is invalid (e.g., empty list or list length exceeds + number of sampleable items). TypeError: If the index is not an integer or a list of integers. """ if isinstance(index, list): @@ -56,7 +57,8 @@ def _normalize_index(self, index: int | list[int], size: int) -> list[int]: raise ValueError("Index list cannot be empty.") if len(index) > size: raise ValueError( - f"Index list length ({len(index)}) cannot exceed the number of sampleable items ({size})." + f"Index list length ({len(index)}) cannot exceed the number of " + f"sampleable items ({size})." ) for id in index: if not isinstance(id, int) or isinstance(id, bool): @@ -75,14 +77,16 @@ def _sample_hyperedge_index( sampled_hyperedge_ids: Tensor, ) -> Tensor: """ - Sample the hyperedge index to keep only incidences belonging to the specified sampled hyperedge IDs. + Sample the hyperedge index to keep only incidences belonging to the specified sampled + hyperedge IDs. Args: hyperedge_index: The original hyperedge index tensor of shape ``[2, num_incidences]``. sampled_hyperedge_ids: A tensor containing the IDs of hyperedges to sample. Returns: - hyperedge_index: A new hyperedge index tensor containing only the incidences of the sampled hyperedges. + hyperedge_index: A new hyperedge index tensor containing only the incidences of the + sampled hyperedges. """ hyperedge_ids = hyperedge_index[1] @@ -120,27 +124,30 @@ def _validate_bounds(self, ids: list[int], size: int, label: str) -> None: class HyperedgeSampler(BaseSampler): def sample(self, index: int | list[int], hdata: HData) -> HData: """ - Sample hyperedges by their IDs and return the sub-hypergraph containing only those hyperedges and their incident nodes. + Sample hyperedges by their IDs and return the sub-hypergraph containing only those + hyperedges and their incident nodes. Examples: - >>> hyperedge_index = [[0, 0, 1, 2, 3, 4], - ... [0, 0, 0, 1, 2, 2]] - >>> hdata = HData.from_hyperedge_index(hyperedge_index) - >>> strategy = HyperedgeSampler() - >>> sampled_hdata = strategy.sample([0, 2], hdata) - >>> sampled_hdata.hyperedge_index - >>> tensor([[0, 0, 1, 3, 4], - ... [0, 0, 0, 2, 2]]) + >>> hyperedge_index = [[0, 0, 1, 2, 3, 4], + ... [0, 0, 0, 1, 2, 2]] + >>> hdata = HData.from_hyperedge_index(hyperedge_index) + >>> strategy = HyperedgeSampler() + >>> sampled_hdata = strategy.sample([0, 2], hdata) + >>> sampled_hdata.hyperedge_index + >>> tensor([[0, 0, 1, 3, 4], + ... [0, 0, 0, 2, 2]]) Args: index: An integer or a list of integers representing hyperedge IDs to sample. hdata: The original HData to sample from. Returns: - hdata: An HData instance containing only the sampled hyperedges and their incident nodes. + hdata: An HData instance containing only the sampled hyperedges and + their incident nodes. Raises: - ValueError: If the provided index is invalid (e.g., empty list or list length exceeds number of hyperedges). + ValueError: If the provided index is invalid (e.g., empty list or list length exceeds + number of hyperedges). IndexError: If any hyperedge ID is out of bounds. """ ids = self._normalize_index(index, self.len(hdata)) @@ -182,27 +189,30 @@ def len(self, hdata: HData) -> int: class NodeSampler(BaseSampler): def sample(self, index: int | list[int], hdata: HData) -> HData: """ - Sample nodes by their IDs and return the sub-hypergraph containing only those nodes and their incident hyperedges. + Sample nodes by their IDs and return the sub-hypergraph containing only those nodes and + their incident hyperedges. Examples: - >>> hyperedge_index = [[0, 0, 1, 2, 3, 4], - ... [0, 0, 0, 1, 2, 2]] - >>> hdata = HData.from_hyperedge_index(hyperedge_index) - >>> strategy = NodeSampler() - >>> sampled_hdata = strategy.sample([0, 3], hdata) - >>> sampled_hdata.hyperedge_index - >>> tensor([[0, 0, 1, 3, 4], - ... [0, 0, 0, 2, 2]]) + >>> hyperedge_index = [[0, 0, 1, 2, 3, 4], + ... [0, 0, 0, 1, 2, 2]] + >>> hdata = HData.from_hyperedge_index(hyperedge_index) + >>> strategy = NodeSampler() + >>> sampled_hdata = strategy.sample([0, 3], hdata) + >>> sampled_hdata.hyperedge_index + >>> tensor([[0, 0, 1, 3, 4], + ... [0, 0, 0, 2, 2]]) Args: index: An integer or a list of integers representing node IDs to sample. hdata: The original HData to sample from. Returns: - hdata: An HData instance containing only the sampled nodes and their incident hyperedges. + hdata: An HData instance containing only the sampled nodes and their + incident hyperedges. Raises: - ValueError: If the provided index is invalid (e.g., empty list or list length exceeds number of nodes). + ValueError: If the provided index is invalid (e.g., empty list or list length exceeds + number of nodes). IndexError: If any node ID is out of bounds. """ ids = self._normalize_index(index, self.len(hdata)) @@ -220,7 +230,8 @@ def sample(self, index: int | list[int], hdata: HData) -> HData: sampled_nodes_mask = torch.isin(node_ids, sampled_node_ids) # Get unique hyperedges that have at least one sampled node - # Example: hyperedge_ids = [0, 0, 0, 1, 2, 2], sampled_nodes_mask = [True, True, False, False, True, False] + # Example: hyperedge_ids = [0, 0, 0, 1, 2, 2], + # sampled_nodes_mask = [True, True, False, False, True, False] # -> sampled_hyperedge_ids = [0, 2] as they connect to sampled nodes sampled_hyperedge_ids = hyperedge_ids[sampled_nodes_mask].unique() diff --git a/hyperbench/data/splitter.py b/hyperbench/data/splitter.py index c7616359..81addc02 100644 --- a/hyperbench/data/splitter.py +++ b/hyperbench/data/splitter.py @@ -38,7 +38,8 @@ def split(self, to_split: _ToSplitType, **kwargs: Any) -> _SplitResultType: Args: to_split: The object to split. - **kwargs: Additional keyword arguments that may be required by specific splitter implementations. + **kwargs: Additional keyword arguments that may be required by specific splitter + implementations. Returns: The result of splitting the input object. @@ -51,17 +52,7 @@ class DefaultDatasetSplitter(Splitter["Dataset", tuple[list["Dataset"], list[flo Split a dataset by hyperedges and materialize dataset partitions. Args: - ratios: List of floats summing to ``1.0``. node_space_setting: Whether to preserve full or local node spaces. - cover_all_nodes_in_train_split: Whether transductive splits should move - hyperedges into the first split until all nodes are incident to at - least one selected training hyperedge. - train_split_idx: The index of the split to treat as the train split. Defaults to ``0``, - so the first split is the train split that gets the full node space in the - transductive setting and is optionally rebalanced to cover all nodes. - This is used only when ``node_space_setting=="transductive"`` and ``cover_all_nodes_in_train_split==True``, - to determine which split should be rebalanced to cover all nodes. - For the 'inductive' setting, splits are always returned based on the provided ratios. shuffle: Whether to shuffle hyperedges before splitting. seed: Optional random seed for reproducibility. """ @@ -84,11 +75,25 @@ def split(self, to_split: Dataset, **kwargs: Any) -> tuple[list[Dataset], list[f Args: to_split: The `Dataset` to split. - ratios: Desired split ratios, used for initial split construction and - as a reference during rebalancing. Expected as a keyword argument. + kwargs: + ratios: Desired split ratios, used for initial split construction and + as a reference during rebalancing. Expected as a keyword argument. + List of floats summing to ``1.0``. + cover_all_nodes_in_train_split: Whether transductive splits should move + hyperedges into the first split until all nodes are incident to at + least one selected training hyperedge. + train_split_idx: The index of the split to treat as the train split. + Defaults to ``0``, so the first split is the train split that gets the full + node space in the transductive setting and is optionally rebalanced to cover + all nodes. This is used only when ``node_space_setting=="transductive"`` + and ``cover_all_nodes_in_train_split==True``, + to determine which split should be rebalanced to cover all nodes. + For the 'inductive' setting, splits are always returned based on the + provided ratios. Returns: - datasets_and_ratios: Split datasets and final hyperedge-count ratios. + split_datasets: The list of split datasets. + final_ratios: The list of final hyperedge-count ratios. Raises: ValueError: If ratios do not sum to ``1.0``, a final split has zero @@ -149,7 +154,8 @@ def __validate_train_split_idx(self, train_split_idx: int, ratios: list[float]) if self.node_space_setting != "transductive" and train_split_idx != 0: raise ValueError( f"'train_split_idx' is only relevant when 'node_space_setting' is 'transductive', " - f"got 'node_space_setting={self.node_space_setting}' and 'train_split_idx={train_split_idx}'." + f"got 'node_space_setting={self.node_space_setting}' and" + f" 'train_split_idx={train_split_idx}'." "For the 'inductive' setting, splits are returned based on the provided ratios." ) validate_is_between("train_split_idx", train_split_idx, 0, len(ratios) - 1) @@ -176,7 +182,9 @@ def split(self, to_split: HData, **kwargs: Any) -> HData: Args: to_split: The original `HData` containing the full hypergraph. - split_hyperedge_ids: The hyperedge IDs that should be included in the split, expected as a keyword argument. + kwargs: + split_hyperedge_ids: The hyperedge IDs that should be included in the split, + expected as a keyword argument. Returns: hdata: The splitted instance with remapped node and hyperedge IDs. @@ -282,7 +290,8 @@ def ensure_split_covers_all_nodes( ratios: The final ratios of hyperedges in each split after rebalancing. Raises: - ValueError: If one or more nodes do not appear in any hyperedge of the source hypergraph. + ValueError: If one or more nodes do not appear in any hyperedge of + he source hypergraph. """ validate_is_non_empty("hyperedge_ids_by_split", hyperedge_ids_by_split) validate_is_between("split_idx", split_idx, 0, len(hyperedge_ids_by_split) - 1) @@ -356,7 +365,8 @@ def get_hyperedge_ids_permutation(self, shuffle: bool | None, seed: int | None) Returns: hyperedge_ids_permutation: Ordered or shuffled hyperedge IDs on the HData device. """ - # Shuffle hyperedge IDs if shuffle is requested, otherwise keep original order for deterministic splits + # Shuffle hyperedge IDs if shuffle is requested, otherwise keep original order + # for deterministic splits if shuffle: generator = create_seeded_torch_generator(device=self.device, seed=seed) random_hyperedge_ids_permutation = torch.randperm( @@ -405,8 +415,9 @@ def split(self, to_split: Tensor, **kwargs: Any) -> tuple[list[Tensor], list[flo Args: to_split: Hyperedge IDs to partition. - ratios: Desired split ratios, used for initial split construction and - as a reference during rebalancing. Expected as a keyword argument. + kwargs: + ratios: Desired split ratios, used for initial split construction and + as a reference during rebalancing. Expected as a keyword argument. Returns: hyperedge_ids_by_split: The updated hyperedge IDs for each split. diff --git a/hyperbench/data/supported_datasets.py b/hyperbench/data/supported_datasets.py index ed5ed954..c6615e8c 100644 --- a/hyperbench/data/supported_datasets.py +++ b/hyperbench/data/supported_datasets.py @@ -8,12 +8,15 @@ class _PreloadedDataset(Dataset): """ Base class for datasets that use default loading. - Subclasses should specify the ``DATASET_NAME`` class variable. - The dataset will be saved on disk after the first load. + + Subclasses should specify the ``DATASET_NAME`` class variable. The dataset will be saved on + disk after the first load. Args: - hdata: Optional HData object. If ``None``, the dataset will be loaded using the ``DATASET_NAME``. - sampling_strategy: The sampling strategy to use for this dataset. Default is ``SamplingStrategy.HYPEREDGE``. + hdata: Optional HData object. If ``None``, the dataset will be loaded using + the ``DATASET_NAME``. + sampling_strategy: The sampling strategy to use for this dataset. + Default is ``SamplingStrategy.HYPEREDGE``. """ DATASET_NAME: ClassVar[str] = "" @@ -72,8 +75,9 @@ def __validate(self) -> None: def list_datasets() -> list[str]: - """Return supported preloaded dataset names in deterministic order.""" - + """ + Return supported preloaded dataset names in deterministic order. + """ return sorted(_PreloadedDataset._registry) diff --git a/hyperbench/hlp/common.py b/hyperbench/hlp/common.py index 870a222e..2e47eb25 100644 --- a/hyperbench/hlp/common.py +++ b/hyperbench/hlp/common.py @@ -14,14 +14,17 @@ class HlpModule(L.LightningModule): A LightningModule for HLP models with optional negative sampling. Args: - encoder: Optional encoder module. Defaults to ``None`` as not all HLP model will use an encoder. + encoder: Optional encoder module. Defaults to ``None`` as not + all HLP model will use an encoder. decoder: Decoder module to use to predict whether hyperedges are positive or negative. loss_fn: Loss function. metrics: Optional ``MetricCollection`` of torchmetrics to compute during evaluation. Cloned per stage (train, val, test) for independent state accumulation. negative_sampler: Optional negative sampler. If ``None``, no negative sampling is performed. - negative_sampling_schedule: When to perform negative sampling during training. Defaults to ``"every_epoch"``. - negative_sampling_every_n: If using ``"every_n_epochs"`` schedule, how many epochs between negative sampling runs. Defaults to ``1``. + negative_sampling_schedule: When to perform negative sampling during training. + Defaults to ``"every_epoch"``. + negative_sampling_every_n: If using ``"every_n_epochs"`` schedule, how many epochs between + negative sampling runs. Defaults to ``1``. """ def __init__( @@ -97,7 +100,8 @@ def _compute_metrics( Uses class-based torchmetrics with proper multi-batch accumulation: 1. ``update()`` accumulates predictions/targets across batches. - 2. Passing the MetricCollection to ``self.log_dict()`` tells Lightning to call ``compute()`` at epoch end and ``reset()`` automatically. + 2. Passing the MetricCollection to ``self.log_dict()`` tells Lightning to call + ``compute()`` at epoch end and ``reset()`` automatically. Args: scores: The predicted scores (logits) from the model. @@ -121,7 +125,7 @@ def _compute_metrics( stage_metrics, prog_bar=True, on_step=False, - on_epoch=True, # Compute and log metrics at epoch end, not per step, for proper accumulation + on_epoch=True, # Compute and log metrics at epoch end for proper accumulation batch_size=batch_size, ) @@ -133,7 +137,8 @@ def _get_stage_metrics(self, stage: Stage) -> MetricCollection | None: stage: The current stage (train/val/test) for which to get metrics. Returns: - metrics: The metric collection corresponding to the given stage, or ``None`` if no metrics are configured. + metrics: The metric collection corresponding to the given stage, or ``None`` + if no metrics are configured. """ match stage: case Stage.TRAIN: @@ -146,7 +151,9 @@ def _get_stage_metrics(self, stage: Stage) -> MetricCollection | None: raise ValueError(f"Unrecognized stage: {stage}") def _should_sample_negatives(self) -> bool: - """Whether to resample negatives for the current epoch.""" + """ + Whether to resample negatives for the current epoch. + """ if self.__negative_sampling_scheduler is None: raise ValueError( "Asked to check negative sampling schedule but no negative sampler is configured." diff --git a/hyperbench/hlp/common_neighbors_hlp.py b/hyperbench/hlp/common_neighbors_hlp.py index 0ff7cda3..781fd5fb 100644 --- a/hyperbench/hlp/common_neighbors_hlp.py +++ b/hyperbench/hlp/common_neighbors_hlp.py @@ -55,7 +55,9 @@ def forward(self, hyperedge_index: Tensor) -> Tensor: return self.decoder(hyperedge_index, self.node_to_neighbors) def on_fit_start(self) -> None: - """Warn users if they are running unnecessary training epochs.""" + """ + Warn users if they are running unnecessary training epochs. + """ if self.trainer.max_epochs is None or self.trainer.max_epochs > 0: warnings.warn( f"{self.__class__.__name__} is a non-trainable heuristic model. " @@ -86,7 +88,8 @@ def __step(self, batch: HData, stage: Stage) -> Tensor: Args: batch: `HData` object containing the hypergraph. - stage: The current stage of evaluation (e.g., ``Stage.TRAIN``, ``Stage.VAL``, ``Stage.TEST``). + stage: The current stage of evaluation + (e.g., ``Stage.TRAIN``, ``Stage.VAL``, ``Stage.TEST``). Returns: loss: The computed loss. diff --git a/hyperbench/hlp/gcn_hlp.py b/hyperbench/hlp/gcn_hlp.py index f5b7ac62..51ba1d66 100644 --- a/hyperbench/hlp/gcn_hlp.py +++ b/hyperbench/hlp/gcn_hlp.py @@ -14,7 +14,7 @@ class GCNEncoderConfig(TypedDict): """ Configuration for the GCN encoder in GCNHlpModule. - Args: + Attributes: in_channels: Number of input features per node. out_channels: Number of output features (embedding size) per node. hidden_channels: Number of hidden units in the intermediate GCN layers. @@ -25,9 +25,11 @@ class GCNEncoderConfig(TypedDict): add_self_loops: Whether to add self-loops before convolution. Defaults to ``True``. normalize: Whether to normalize the adjacency matrix in ``GCNConv``. Defaults to ``True``. cached: Whether to cache the normalized graph in ``GCNConv``. Defaults to ``False``. - graph_reduction_strategy: Strategy for reducing the hypergraph to a graph. Defaults to ``"clique_expansion"`` - num_nodes: Total number of nodes in the hypergraph. This is useful when setting is transductive - but train dataset may not contain all hyperedges where some nodes appear, to ensure consistent encoding across splits. + graph_reduction_strategy: Strategy for reducing the hypergraph to a graph. + Defaults to ``"clique_expansion"``. + num_nodes: Total number of nodes in the hypergraph. This is useful when setting is + transductive but train dataset may not contain all hyperedges where some nodes appear, + to ensure consistent encoding across splits. activation_fn: Activation function to use after each hidden layer. Defaults to ``nn.ReLU``. activation_fn_kwargs: Keyword arguments for the activation function. Defaults to empty dict. """ diff --git a/hyperbench/hlp/hgnn_hlp.py b/hyperbench/hlp/hgnn_hlp.py index fdb9f63a..b70b9add 100644 --- a/hyperbench/hlp/hgnn_hlp.py +++ b/hyperbench/hlp/hgnn_hlp.py @@ -14,7 +14,7 @@ class HGNNEncoderConfig(TypedDict): """ Configuration for the HGNN encoder in HGNNHlpModule. - Args: + Attributes: in_channels: Number of input features per node. hidden_channels: Number of hidden units in the intermediate HGNN layer. out_channels: Number of output features (embedding size) per node. @@ -83,33 +83,34 @@ def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor: Run the full HGNN-based hyperedge link prediction pipeline. The pipeline has three stages: - 1. Encode: HGNN applies two rounds of ``D_n^{-1/2} H D_e^{-1} H^T D_n^{-1/2}`` - smoothing to propagate information through the hypergraph topology (nodes -> - hyperedges -> nodes). The output is a structure-aware node embedding matrix of - shape ``(num_nodes, out_channels)``. - 2. Aggregate: For each hyperedge being scored, pool the embeddings of its member - nodes using the configured strategy (mean/max/min/sum). This produces a hyperedge - embedding that summarizes the collective representation of the hyperedge's nodes. - Shape: ``(num_hyperedges, out_channels)``. - 3. Decode: A single linear layer (SLP) projects each hyperedge embedding to a - scalar score representing the likelihood that the hyperedge is a real (positive) - hyperedge. Shape: ``(num_hyperedges,)``. + 1. Encode: HGNN applies two rounds of ``D_n^{-1/2} H D_e^{-1} H^T D_n^{-1/2}`` + smoothing to propagate information through the hypergraph topology (nodes -> + hyperedges -> nodes). The output is a structure-aware node embedding matrix of + shape ``(num_nodes, out_channels)``. + 2. Aggregate: For each hyperedge being scored, pool the embeddings of its member + nodes using the configured strategy (mean/max/min/sum). This produces a hyperedge + embedding that summarizes the collective representation of the hyperedge's nodes. + Shape: ``(num_hyperedges, out_channels)``. + 3. Decode: A single linear layer (SLP) projects each hyperedge embedding to a + scalar score representing the likelihood that the hyperedge is a real (positive) + hyperedge. Shape: ``(num_hyperedges,)``. Examples: - Given 5 nodes with 8 features and 2 hyperedges:: + Given 5 nodes with 8 features and 2 hyperedges: >>> x.shape # (5, 8) - all nodes in the hypergraph >>> hyperedge_index = [[0, 1, 2, 3, 4], # node IDs ... [0, 0, 0, 1, 1]] # hyperedge IDs The forward pass: - 1. HGNN encodes all 5 nodes using the hypergraph Laplacian. - ``node_embeddings.shape = (5, out_channels)`` - 2. Aggregate per hyperedge: - - hyperedge 0: pool(emb[0], emb[1], emb[2]) - - hyperedge 1: pool(emb[3], emb[4]) - ``hyperedge_embeddings.shape = (2, out_channels)`` - 3. Decode: one scalar per hyperedge -> ``scores.shape = (2,)`` + + >>> HGNN encodes all 5 nodes using the hypergraph Laplacian. + ... ``node_embeddings.shape = (5, out_channels)`` + >>> Aggregate per hyperedge: + ... - hyperedge 0: pool(emb[0], emb[1], emb[2]) + ... - hyperedge 1: pool(emb[3], emb[4]) + ... ``hyperedge_embeddings.shape = (2, out_channels)`` + >>> Decode: one scalar per hyperedge -> ``scores.shape = (2,)`` Args: x: Node feature matrix of shape ``(num_nodes, in_channels)``. diff --git a/hyperbench/hlp/hgnnp_hlp.py b/hyperbench/hlp/hgnnp_hlp.py index 1769a41a..c97fe9e5 100644 --- a/hyperbench/hlp/hgnnp_hlp.py +++ b/hyperbench/hlp/hgnnp_hlp.py @@ -14,7 +14,7 @@ class HGNNPEncoderConfig(TypedDict): """ Configuration for the HGNN+ encoder in HGNNPHlpModule. - Args: + Attributes: in_channels: Number of input features per node. hidden_channels: Number of hidden units in the intermediate HGNN+ layer. out_channels: Number of output features (embedding size) per node. @@ -83,15 +83,15 @@ def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor: Run the full HGNN+-based hyperedge link prediction pipeline. The pipeline has three stages: - 1. Encode: HGNN+ applies two rounds of ``D_v^{-1} H D_e^{-1} H^T`` - smoothing to propagate information through the hypergraph topology with - two-stage mean aggregation. The output is a structure-aware node - embedding matrix of shape ``(num_nodes, out_channels)``. - 2. Aggregate: For each hyperedge being scored, pool the embeddings of its member - nodes using the configured strategy (mean/max/min/sum). This produces a hyperedge - embedding of shape ``(num_hyperedges, out_channels)``. - 3. Decode: A single linear layer projects each hyperedge embedding to a - scalar score. Shape: ``(num_hyperedges,)``. + 1. Encode: HGNN+ applies two rounds of ``D_v^{-1} H D_e^{-1} H^T`` + smoothing to propagate information through the hypergraph topology with + two-stage mean aggregation. The output is a structure-aware node + embedding matrix of shape ``(num_nodes, out_channels)``. + 2. Aggregate: For each hyperedge being scored, pool the embeddings of its member + nodes using the configured strategy (mean/max/min/sum). This produces a hyperedge + embedding of shape ``(num_hyperedges, out_channels)``. + 3. Decode: A single linear layer projects each hyperedge embedding to a + scalar score. Shape: ``(num_hyperedges,)``. Args: x: Node feature matrix of shape ``(num_nodes, in_channels)``. diff --git a/hyperbench/hlp/hnhn_hlp.py b/hyperbench/hlp/hnhn_hlp.py index f4836d4c..e9f02148 100644 --- a/hyperbench/hlp/hnhn_hlp.py +++ b/hyperbench/hlp/hnhn_hlp.py @@ -14,7 +14,7 @@ class HNHNEncoderConfig(TypedDict): """ Configuration for the HNHN encoder in HNHNHlpModule. - Args: + Attributes: in_channels: Number of input features per node. hidden_channels: Number of hidden units in the intermediate HNHN layer. out_channels: Number of output features (embedding size) per node. diff --git a/hyperbench/hlp/hypergcn_hlp.py b/hyperbench/hlp/hypergcn_hlp.py index 3981f4a6..7f3abf34 100644 --- a/hyperbench/hlp/hypergcn_hlp.py +++ b/hyperbench/hlp/hypergcn_hlp.py @@ -14,16 +14,19 @@ class HyperGCNEncoderConfig(TypedDict): """ Configuration for the HyperGCN encoder in HyperGCNHlpModule. - Args: + Attributes: in_channels: Number of input features per node. hidden_channels: Number of hidden units in the intermediate HyperGCN layer. out_channels: Number of output features (embedding size) per node. bias: Whether to include bias terms. Defaults to ``True``. use_batch_normalization: Whether to use batch normalization. Defaults to ``False``. drop_rate: Dropout rate. Defaults to ``0.5``. - use_mediator: Whether to use mediator nodes for hyperedge-to-edge conversion. Defaults to ``False``. - fast: Whether to cache the graph structure after first computation. Defaults to ``True``. - seed: Optional random seed for the random reduction of hyperedges to edges. Defaults to ``None``. + use_mediator: Whether to use mediator nodes for hyperedge-to-edge conversion. + Defaults to ``False``. + fast: Whether to cache the graph structure after first computation. + Defaults to ``True``. + seed: Optional random seed for the random reduction of hyperedges to edges. + Defaults to ``None``. """ in_channels: int @@ -99,19 +102,20 @@ def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor: 3. Decode: A linear layer scores each hyperedge embedding. Examples: - Given 5 nodes with 3 features and 2 hyperedges:: + Given 5 nodes with 3 features and 2 hyperedges: >>> x.shape # (5, 3) — all nodes in the hypergraph >>> hyperedge_index = [[0, 1, 2, 3, 4], # node IDs (global) ... [0, 0, 0, 1, 1]] # hyperedge IDs The forward pass: - 1. HyperGCN encodes all 5 nodes using the full graph Laplacian. - ``node_embeddings.shape = (5, out_channels)`` - 2. Aggregate per hyperedge: - - hyperedge 0: pool(emb[0], emb[1], emb[2]) - - hyperedge 1: pool(emb[3], emb[4]) - 3. Decode: one scalar score per hyperedge → ``scores.shape = (2,)`` + + >>> HyperGCN encodes all 5 nodes using the full graph Laplacian. + ... ``node_embeddings.shape = (5, out_channels)`` + >>> Aggregate per hyperedge: + ... - hyperedge 0: pool(emb[0], emb[1], emb[2]) + ... - hyperedge 1: pool(emb[3], emb[4]) + >>> Decode: one scalar score per hyperedge → ``scores.shape = (2,)`` Args: x: Node feature matrix of shape ``(num_nodes, in_channels)``. diff --git a/hyperbench/hlp/mlp_hlp.py b/hyperbench/hlp/mlp_hlp.py index c25c7b4d..1bf5a7d2 100644 --- a/hyperbench/hlp/mlp_hlp.py +++ b/hyperbench/hlp/mlp_hlp.py @@ -14,17 +14,23 @@ class MlpEncoderConfig(TypedDict): """ Configuration for the MLP encoder in MLPHlpModule. - Args: + Attributes: in_channels: Number of input features per node. out_channels: Number of output features (embedding size) per node. num_layers: Number of layers in the MLP encoder. - hidden_channels: Optional number of hidden units per layer. If ``None``, no hidden layers are used and the encoder is a simple linear layer. - activation_fn: Optional activation function class to use in the MLP encoder. If ``None``, no activation function is applied. - activation_fn_kwargs: Optional dictionary of keyword arguments to pass to the activation function constructor. - normalization_fn: Optional normalization function class to use in the MLP encoder. If ``None``, no normalization is applied. - normalization_fn_kwargs: Optional dictionary of keyword arguments to pass to the normalization function constructor. + hidden_channels: Optional number of hidden units per layer. If ``None``, no hidden layers + are used and the encoder is a simple linear layer. + activation_fn: Optional activation function class to use in the MLP encoder. + If ``None``, no activation function is applied. + activation_fn_kwargs: Optional dictionary of keyword arguments to pass to the activation + function constructor. + normalization_fn: Optional normalization function class to use in the MLP encoder. + If ``None``, no normalization is applied. + normalization_fn_kwargs: Optional dictionary of keyword arguments to pass to the + normalization function constructor. bias: Whether to include bias terms in the MLP layers. Defaults to ``True``. - drop_rate: Dropout rate to apply after each MLP layer (except the last one). Defaults to ``0.0`` (no dropout). + drop_rate: Dropout rate to apply after each MLP layer (except the last one). + Defaults to ``0.0`` (no dropout). """ in_channels: int @@ -76,7 +82,8 @@ def __init__( drop_rate=encoder_config.get("drop_rate", 0.0), ) - # The decoder takes in the aggregated hyperedge embeddings of shape (num_hyperedges, encoder_config.out_channels) + # The decoder takes in the aggregated hyperedge embeddings of shape + # (num_hyperedges, encoder_config.out_channels) # and produces a score for each hyperedge of shape (num_hyperedges, 1). decoder = SLP(in_channels=encoder_config.get("out_channels", 1), out_channels=1) @@ -107,14 +114,15 @@ def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor: ... [0, 0, 0, 1, 1]] # hyperedge ids The forward pass: - 1. Encoder maps each node to an embedding vector. - 2. Aggregate embeddings by summing them per hyperedge: - - hyperedge 0: emb[0] + emb[1] + emb[2] - - hyperedge 1: emb[2] + emb[3] - 3. Sums are divided by the number of nodes per hyperedge (mean pooling): - - hyperedge 0: (emb[0] + emb[1] + emb[2]) / 3 - - hyperedge 1: (emb[2] + emb[3]) / 2 - 4. Decoder scores each hyperedge embedding, producing one scalar per hyperedge. + + >>> Encoder maps each node to an embedding vector. + >>> Aggregate embeddings by summing them per hyperedge: + ... - hyperedge 0: emb[0] + emb[1] + emb[2] + ... - hyperedge 1: emb[2] + emb[3] + >>> Sums are divided by the number of nodes per hyperedge (mean pooling): + ... - hyperedge 0: (emb[0] + emb[1] + emb[2]) / 3 + ... - hyperedge 1: (emb[2] + emb[3]) / 2 + >>> Decoder scores each hyperedge embedding, producing one scalar per hyperedge. Args: x: Node feature matrix of shape ``(num_nodes, in_channels)``. @@ -137,8 +145,10 @@ def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor: # Aggregate: for each hyperedge, aggregate the embeddings of its member nodes. # Example:: - # - hyperedge 0 contains node 0, 1, 2 -> aggregate([e00, e01], [e10, e11], [e20, e21]) -> [pooled_0, pooled_1] - # - hyperedge 1 contains node 2, 3 -> aggregate([e20, e21], [e30, e31]) -> [pooled_0, pooled_1] + # - hyperedge 0 contains node 0, 1, 2 -> aggregate([e00, e01], [e10, e11], [e20, e21]) + # -> [pooled_0, pooled_1] + # - hyperedge 1 contains node 2, 3 -> aggregate([e20, e21], [e30, e31]) + # -> [pooled_0, pooled_1] # shape: (num_hyperedges, out_channels) hyperedge_embeddings = HyperedgeAggregator(hyperedge_index, node_embeddings).pool( self.aggregation, diff --git a/hyperbench/hlp/nhp_hlp.py b/hyperbench/hlp/nhp_hlp.py index d6912d89..625d7c37 100644 --- a/hyperbench/hlp/nhp_hlp.py +++ b/hyperbench/hlp/nhp_hlp.py @@ -14,7 +14,7 @@ class NHPEncoderConfig(TypedDict): """ Configuration for the NHP encoder/scorer to be used for hyperedge link prediction. - Args: + Attributes: in_channels: Number of input features per node. hidden_channels: Number of hidden channels for incidence embeddings. Defaults to ``512``. aggregation: Hyperedge scoring aggregation. ``"maxmin"`` uses the paper's diff --git a/hyperbench/hlp/node2vec_common.py b/hyperbench/hlp/node2vec_common.py index 3d9c9905..d1cba1dd 100644 --- a/hyperbench/hlp/node2vec_common.py +++ b/hyperbench/hlp/node2vec_common.py @@ -18,22 +18,29 @@ class Node2VecGCNHlpConfig(TypedDict): """ Configuration for the GCN model. - Args: + Attributes: out_channels: Dimension of the output node embeddings from the GCN layers. hidden_channels: Dimension of the hidden node embeddings in the GCN layers. num_layers: Number of GCN layers. Must be at least 1. Defaults to ``2``. - drop_rate: Dropout rate applied after each GCN layer (except the last one). Defaults to ``0.0`` (no dropout). + drop_rate: Dropout rate applied after each GCN layer (except the last one). + Defaults to ``0.0`` (no dropout). bias: Whether to include a bias term in the GCN layers. Defaults to ``True``. improved: Whether to use the improved version of GCNConv. Defaults to ``False``. add_self_loops: Whether to add self-loops to the input graph. Defaults to ``True``. - normalize: Whether to symmetrically normalize the adjacency matrix in GCNConv. Defaults to ``True``. + normalize: Whether to symmetrically normalize the adjacency matrix in GCNConv. + Defaults to ``True``. cached: Whether to cache the normalized adjacency matrix in GCNConv. - Only applicable if the graph structure does not change between epochs. Defaults to ``False``. - graph_reduction_strategy: Strategy for reducing the hyperedge graph. Defaults to ``clique_expansion``. - num_nodes: Total number of nodes in the hypergraph. This is useful when setting is transductive - but train dataset may not contain all hyperedges where some nodes appear, to ensure consistent encoding across splits. - activation_fn: Activation function to use after each hidden layer. Defaults to ``nn.ReLU``. - activation_fn_kwargs: Keyword arguments for the activation function. Defaults to empty dict. + Only applicable if the graph structure does not change between epochs. + Defaults to ``False``. + graph_reduction_strategy: Strategy for reducing the hyperedge graph. + Defaults to ``clique_expansion``. + num_nodes: Total number of nodes in the hypergraph. This is useful when setting is + transductive but train dataset may not contain all hyperedges where some nodes appear, + to ensure consistent encoding across splits. + activation_fn: Activation function to use after each hidden layer. + Defaults to ``nn.ReLU``. + activation_fn_kwargs: Keyword arguments for the activation function. + Defaults to empty dict. """ out_channels: int @@ -55,12 +62,15 @@ class Node2VecHlpConfig(TypedDict): """ Configuration for the Node2Vec encoder. - Args: + Attributes: context_size: Skip-gram context size for Node2Vec. - For example, if ``context_size=2`` and ``walk_length=5``, then for a random walk ``[v0, v1, v2, v3, v4]``, - the context for ``v2`` would be ``[v0, v1, v3, v4]`` as we take neighbors within distance 2 in the walk. + For example, if ``context_size=2`` and ``walk_length=5``, then for a + random walk ``[v0, v1, v2, v3, v4]``, + the context for ``v2`` would be ``[v0, v1, v3, v4]`` as we take neighbors within + distance 2 in the walk. The pairs generated by skip-gram would be ``[(v2, v0), (v2, v1), (v2, v3), (v2, v4)]``. - Rule of thumb: Graphs with strong local structure (5-10), Graphs with communities/long-range patterns (10-20). + Rule of thumb: Graphs with strong local structure (5-10), Graphs with + communities/long-range patterns (10-20). Defaults to ``10``. walk_length: Length of each random walk. num_walks_per_node: Number of walks sampled per node. @@ -76,14 +86,19 @@ class Node2VecHlpConfig(TypedDict): ``X`` negative pairs ``(u, v_neg)`` will be generated, where ``v_neg`` is a node sampled uniformly at random from all nodes in the graph. Defaults to ``1``, meaning one negative sample per positive pair. - num_nodes: Number of nodes in the stable node space. Defaults to the number of nodes in the ``hyperedge_index`` if not provided. - train_hyperedge_index: Training hypereddge index used to build the Node2Vec walk graph. Required in ``joint`` mode. - graph_reduction_strategy: Strategy for reducing the hyperedge graph. Defaults to ``clique_expansion``. + num_nodes: Number of nodes in the stable node space. Defaults to the number of nodes + in the ``hyperedge_index`` if not provided. + train_hyperedge_index: Training hypereddge index used to build the Node2Vec walk graph. + Required in ``joint`` mode. + graph_reduction_strategy: Strategy for reducing the hyperedge graph. + Defaults to ``clique_expansion``. random_walk_batch_size: Batch size used by the walk sampler in joint mode. node2vec_loss_weight: Weight applied to the Node2Vec walk loss in joint mode. - This is to decide how much the loss of Node2Vec contributes to the overall loss in joint training, relative to the HLP loss. - Defaults to ``1.0`` (equal weighting). Set to a higher value to prioritize learning good node embeddings, - or a lower value to prioritize the HLP loss. Ignored in precomputed mode. + This is to decide how much the loss of Node2Vec contributes to the overall loss in + joint training, relative to the HLP loss. + Defaults to ``1.0`` (equal weighting). Set to a higher value to prioritize learning + good node embeddings, or a lower value to prioritize the HLP loss. + Ignored in precomputed mode. sparse: Whether to use sparse gradients in the Node2Vec encoder. Defaults to ``False``. """ @@ -105,9 +120,9 @@ class Node2VecWalkLoaderState: """ State object to hold the walk loader and its iterator for joint Node2Vec training. - Args: - walk_loader: The DataLoader that provides batches of random walks from the Node2Vec encoder during joint training. - Initialized lazily when first needed. + Attributes: + walk_loader: The DataLoader that provides batches of random walks from the Node2Vec encoder + during joint training. Initialized lazily when first needed. cached_walk_loader_iterator: An iterator over the walk_loader, cached to allow fetching the next batch of walks at each training step without reinitializing. """ diff --git a/hyperbench/hlp/node2vecgcn_hlp.py b/hyperbench/hlp/node2vecgcn_hlp.py index 1cfe0554..94b700cd 100644 --- a/hyperbench/hlp/node2vecgcn_hlp.py +++ b/hyperbench/hlp/node2vecgcn_hlp.py @@ -28,10 +28,12 @@ class Node2VecGCNEncoderConfig(TypedDict): """ Configuration for the Node2Vec encoder in ``Node2VecGCNHlpModule``. - Args: - mode: Whether to use precomputed node embeddings from ``x`` or train a Node2Vec encoder jointly inside the module. + Attributes: + mode: Whether to use precomputed node embeddings from ``x`` or train a Node2Vec encoder + jointly inside the module. num_features: Dimension of the node embeddings consumed by the decoder. - node2vec_config: Shared Node2Vec configuration used in joint mode, or metadata for validating precomputed embeddings. + node2vec_config: Shared Node2Vec configuration used in joint mode, or metadata for + validating precomputed embeddings. gcn_config: Configuration for the GCN layers. """ @@ -46,15 +48,16 @@ class Node2VecGCNHlpModule(HlpModule): A LightningModule for Node2Vec-based Hyperedge Link Prediction with GCN encoder. Supports two modes: - - ``precomputed``: use node embeddings already stored in ``batch.x``. - - ``joint``: train a Node2Vec encoder jointly with the GCN layers and hyperedge decoder. + - ``precomputed``: use node embeddings already stored in ``batch.x``. + - ``joint``: train a Node2Vec encoder jointly with the GCN layers and hyperedge decoder. Args: encoder_config: Configuration for the Node2Vec encoder and GCN layers. aggregation: Method to aggregate node embeddings per hyperedge. loss_fn: Loss function. Defaults to ``BCEWithLogitsLoss``. lr: Learning rate for the optimizer. Defaults to ``0.001``. - weight_decay: Weight decay (L2 regularization) for the optimizer. Defaults to ``0.0`` (no weight decay). + weight_decay: Weight decay (L2 regularization) for the optimizer. + Defaults to ``0.0`` (no weight decay). metrics: Optional dictionary of metric functions. """ diff --git a/hyperbench/hlp/node2vecslp_hlp.py b/hyperbench/hlp/node2vecslp_hlp.py index 4999d052..355ced2f 100644 --- a/hyperbench/hlp/node2vecslp_hlp.py +++ b/hyperbench/hlp/node2vecslp_hlp.py @@ -25,10 +25,12 @@ class Node2VecSLPEncoderConfig(TypedDict): """ Configuration for the Node2Vec encoder in ``Node2VecSLPHlpModule``. - Args: - mode: Whether to use precomputed node embeddings from ``x`` or train a Node2Vec encoder jointly inside the module. + Attributes: + mode: Whether to use precomputed node embeddings from ``x`` or train a Node2Vec encoder + jointly inside the module. num_features: Dimension of the node embeddings consumed by the decoder. - node2vec_config: Shared Node2Vec configuration used in joint mode, or metadata for validating precomputed embeddings. + node2vec_config: Shared Node2Vec configuration used in joint mode, or metadata for + validating precomputed embeddings. """ mode: NotRequired[Node2VecMode] @@ -41,15 +43,16 @@ class Node2VecSLPHlpModule(HlpModule): A LightningModule for Node2Vec-based Hyperedge Link Prediction. Supports two modes: - - ``precomputed``: use node embeddings already stored in ``batch.x``. - - ``joint``: train a Node2Vec encoder jointly with the hyperedge decoder. + - ``precomputed``: use node embeddings already stored in ``batch.x``. + - ``joint``: train a Node2Vec encoder jointly with the hyperedge decoder. Args: encoder_config: Configuration for the Node2Vec encoder. aggregation: Method to aggregate node embeddings per hyperedge. loss_fn: Loss function. Defaults to ``BCEWithLogitsLoss``. lr: Learning rate for the optimizer. Defaults to ``0.001``. - weight_decay: Weight decay (L2 regularization) for the optimizer. Defaults to ``0.0`` (no weight decay). + weight_decay: Weight decay (L2 regularization) for the optimizer. + Defaults to ``0.0`` (no weight decay). metrics: Optional dictionary of metric functions. """ diff --git a/hyperbench/hlp/villain_hlp.py b/hyperbench/hlp/villain_hlp.py index 3e08c6f3..6edca6ba 100644 --- a/hyperbench/hlp/villain_hlp.py +++ b/hyperbench/hlp/villain_hlp.py @@ -14,7 +14,7 @@ class VilLainEncoderConfig(TypedDict): """ Configuration for ``VilLainHlpModule``. - Args: + Attributes: num_nodes: Total number of trainable nodes. embedding_dim: Returned node and hyperedge embedding dimension. Defaults to ``128``. labels_per_subspace: Number of virtual labels per subspace. Defaults to ``2``. @@ -42,7 +42,8 @@ class VilLainHlpModule(HlpModule): Args: encoder_config: Configuration for the VilLain encoder. embedding_mode: Whether to return node or hyperedge embeddings from the VilLain encoder. - aggregation: Aggregation method to pool node embeddings into hyperedge embeddings when ``embedding_mode="node"``. + aggregation: Aggregation method to pool node embeddings into hyperedge embeddings + when ``embedding_mode="node"``. Ignored when ``embedding_mode="hyperedge"``. Defaults to ``maxmin``. loss_fn: Loss function for the HLP task. Defaults to ``nn.BCEWithLogitsLoss()``. lr: Learning rate for the optimizer. Defaults to ``0.01``. diff --git a/hyperbench/integration_tests/data/enricher_integration_test.py b/hyperbench/integration_tests/data/enricher_integration_test.py index 34f42cd8..b7c24896 100644 --- a/hyperbench/integration_tests/data/enricher_integration_test.py +++ b/hyperbench/integration_tests/data/enricher_integration_test.py @@ -33,9 +33,10 @@ # reasonable amount of time, we limit the number of nodes and hyperedges to 75000 # for the enrichment tests. This allows us to test the functionality of the # enrichers without running into excessively long test times, while still providing -# a meaningful test of their behavior on reasonably sized datasets. +# a meaningful test of their behavior on reasonably sized datasets. # With the threshold of 75000 nodes and hyperedges, we cover ~75% of the datasets. -# The datasets.py in the scripts folder contains a function that calculates the node count cutoff to cover 75% of the datasets. +# The datasets.py in the scripts folder contains a function that calculates the node count +# cutoff to cover 75% of the datasets. @pytest.mark.flaky(reruns=3, reruns_delay=10, rerun_show_tracebacks=True) diff --git a/hyperbench/models/gcn.py b/hyperbench/models/gcn.py index e889e836..815fde6f 100644 --- a/hyperbench/models/gcn.py +++ b/hyperbench/models/gcn.py @@ -9,20 +9,25 @@ class GCNConfig(TypedDict): """ Configuration for the GCN model. - Args: + Attributes: in_channels: Dimension of the input node embeddings to the GCN layers. out_channels: Dimension of the output node embeddings from the GCN layers. hidden_channels: Dimension of the hidden node embeddings in the GCN layers. num_layers: Number of GCN layers. Must be at least 1. Defaults to ``2``. - drop_rate: Dropout rate applied after each GCN layer (except the last one). Defaults to ``0.0`` (no dropout). - activation_fn: Activation function to use after each hidden layer. Defaults to ``nn.ReLU``. - activation_fn_kwargs: Keyword arguments for the activation function. Defaults to empty dict. + drop_rate: Dropout rate applied after each GCN layer (except the last one). + Defaults to ``0.0`` (no dropout). + activation_fn: Activation function to use after each hidden layer. + Defaults to ``nn.ReLU``. + activation_fn_kwargs: Keyword arguments for the activation function. + Defaults to empty dict. bias: Whether to include a bias term in the GCN layers. Defaults to ``True``. improved: Whether to use the improved version of GCNConv. Defaults to ``False``. add_self_loops: Whether to add self-loops to the input graph. Defaults to ``True``. - normalize: Whether to symmetrically normalize the adjacency matrix in GCNConv. Defaults to ``True``. + normalize: Whether to symmetrically normalize the adjacency matrix in GCNConv. + Defaults to ``True``. cached: Whether to cache the normalized adjacency matrix in GCNConv. - Only applicable if the graph structure does not change between epochs. Defaults to ``False``. + Only applicable if the graph structure does not change between epochs. + Defaults to ``False``. """ in_channels: int @@ -52,7 +57,8 @@ class GCN(nn.Module): drop_rate: Dropout rate applied after each GCN layer except the last one. bias: Whether to include a bias term in the GCN layers. activation_fn: Activation function to use after each hidden layer. Defaults to ``nn.ReLU``. - activation_fn_kwargs: Keyword arguments for the activation function. Defaults to empty dict. + activation_fn_kwargs: Keyword arguments for the activation function. + Defaults to empty dict. improved: Whether to use the improved version of ``GCNConv``. add_self_loops: Whether to add self-loops to the input graph. normalize: Whether to symmetrically normalize the adjacency matrix in ``GCNConv``. @@ -122,7 +128,8 @@ def __build_layers( hidden_channels = hidden_channels if hidden_channels is not None else 0 if num_layers > 1 and hidden_channels <= 0: raise ValueError( - f"Expected positive hidden_channels for GCN with multiple layers, got {hidden_channels}." + f"Expected positive hidden_channels for GCN with multiple layers, " + f"got {hidden_channels}." ) common_kwargs: dict[str, bool] = { diff --git a/hyperbench/models/hgnn.py b/hyperbench/models/hgnn.py index 6d2255e2..21fd6b4e 100644 --- a/hyperbench/models/hgnn.py +++ b/hyperbench/models/hgnn.py @@ -9,15 +9,19 @@ class HGNN(nn.Module): Unlike HyperGCN (which approximates each hyperedge by selecting representative pairwise edges via random projection), HGNN preserves all higher-order relationships by passing messages through the full incidence structure: nodes -> hyperedges -> nodes. - - Proposed in `Hypergraph Neural Networks `_ paper (AAAI 2019). - - Reference implementation: `source `_. + + References: + - Proposed in [Hypergraph Neural Networks](https://arxiv.org/pdf/1809.09401) (AAAI 2019). + - Reference implementation: [Code](https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/models/hypergraphs/hgnn.html#HGNN). Args: in_channels: The number of input channels. hidden_channels: The number of hidden channels. num_classes: The number of output channels. - bias: If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``. - use_batch_normalization: If set to ``True``, layers will use batch normalization. Defaults to ``False``. + bias: If set to ``False``, the layer will not learn the bias parameter. + Defaults to ``True``. + use_batch_normalization: If set to ``True``, layers will use batch normalization. + Defaults to ``False``. drop_rate: Dropout ratio. Defaults to ``0.5``. """ diff --git a/hyperbench/models/hgnnp.py b/hyperbench/models/hgnnp.py index d52332a5..7c73dce7 100644 --- a/hyperbench/models/hgnnp.py +++ b/hyperbench/models/hgnnp.py @@ -6,17 +6,21 @@ class HGNNP(nn.Module): """ HGNN+ performs hypergraph convolution with two-stage mean aggregation using the incidence structure directly: nodes -> hyperedges -> nodes. - - Proposed in `HGNN+: General Hypergraph Neural Networks `_ paper (IEEE T-PAMI 2022). - - Reference implementation: `source `_. + + References: + - Proposed in [HGNN+: General Hypergraph Neural Networks](https://ieeexplore.ieee.org/document/9795251) paper (IEEE T-PAMI 2022). + - Reference implementation: [Code](https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/models/hypergraphs/hgnnp.html#HGNNP). Args: in_channels: The number of input channels. hidden_channels: The number of hidden channels. num_classes: The number of output channels. - bias: If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``. - use_batch_normalization: If set to ``True``, layers will use batch normalization. Defaults to ``False``. + bias: If set to ``False``, the layer will not learn the bias parameter. + Defaults to ``True``. + use_batch_normalization: If set to ``True``, layers will use batch normalization. + Defaults to ``False``. drop_rate: Dropout ratio. Defaults to ``0.5``. - """ + """ # noqa: E501 def __init__( self, diff --git a/hyperbench/models/hnhn.py b/hyperbench/models/hnhn.py index 350ae906..9ca8b5ea 100644 --- a/hyperbench/models/hnhn.py +++ b/hyperbench/models/hnhn.py @@ -5,19 +5,23 @@ class HNHN(nn.Module): """ - HNHN performs incidence-based hypergraph convolution with explicit hyperedge - embeddings between the node -> hyperedge -> node propagation steps. - - Proposed in `HNHN: Hypergraph Networks with Hyperedge Neurons `_ paper. - - Reference implementation: `source `_. + HNHN performs incidence-based hypergraph convolution with explicit hyperedge embeddings between + the node -> hyperedge -> node propagation steps. + + References: + - Proposed in [HNHN: Hypergraph Networks with Hyperedge Neurons](https://arxiv.org/abs/2006.12278) paper. + - Reference implementation: [Code](https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/models/hypergraphs/hnhn.html#HNHN). Args: in_channels: The number of input channels. hidden_channels: The number of hidden channels. num_classes: The number of output channels. - bias: If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``. - use_batch_normalization: If set to ``True``, layers will use batch normalization. Defaults to ``False``. + bias: If set to ``False``, the layer will not learn the bias parameter. + Defaults to ``True``. + use_batch_normalization: If set to ``True``, layers will use batch normalization. + Defaults to ``False``. drop_rate: Dropout ratio. Defaults to ``0.5``. - """ + """ # noqa: E501 def __init__( self, diff --git a/hyperbench/models/hypergcn.py b/hyperbench/models/hypergcn.py index e6a4b9c9..e10d5d5d 100644 --- a/hyperbench/models/hypergcn.py +++ b/hyperbench/models/hypergcn.py @@ -5,24 +5,33 @@ class HyperGCN(nn.Module): """ - HyperGCN approximates each hyperedge of the hypergraph by a set of pairwise edges connecting the vertices of the hyperedge - and treats the learning problem as a graph learning problem on the approximation. - - Proposed in `HyperGCN: A New Method of Training Graph Convolutional Networks on Hypergraphs `_ paper (NeurIPS 2019). - - Code of the paper: `source `_. - - Reference implementation: `source `_. + HyperGCN approximates each hyperedge of the hypergraph by a set of pairwise edges connecting the + vertices of the hyperedge and treats the learning problem as a graph learning problem on the + approximation. + + References: + - Proposed in [HyperGCN: A New Method of Training Graph Convolutional Networks on Hypergraphs](https://dl.acm.org/doi/10.5555/3454287.3454422) paper (NeurIPS 2019). + - Code of the paper: [source](https://github.com/malllabiisc/HyperGCN). + - Reference implementation: [source](https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/models/hypergraphs/hypergcn.html#HyperGCN). Args: in_channels: The number of input channels. hidden_channels: The number of hidden channels. - num_classes: The number of classes of the classification task as HyperGCB is a node classification model. - bias: If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``. - use_batch_normalization: If set to ``True``, layers will use batch normalization. Defaults to ``False``. + num_classes: The number of classes of the classification task as HyperGCB is a + node classification model. + bias: If set to ``False``, the layer will not learn the bias parameter. + Defaults to ``True``. + use_batch_normalization: If set to ``True``, layers will use batch normalization. + Defaults to ``False``. drop_rate: Dropout ratio. Defaults to ``0.5``. - use_mediator: Whether to use mediator to transform the hyperedges to edges in the graph. Defaults to ``False``. - fast: If set to ``True``, the transformed graph structure will be computed once from the input hypergraph - and vertex features, and cached for future use. Defaults to ``True``. - seed: Optional random seed for the random reduction of hyperedges to edges. Defaults to ``None``. - """ + use_mediator: Whether to use mediator to transform the hyperedges to edges in the graph. + Defaults to ``False``. + fast: If set to ``True``, the transformed graph structure will be computed once from + the input hypergraph and vertex features, and cached for future use. + Defaults to ``True``. + seed: Optional random seed for the random reduction of hyperedges to edges. + Defaults to ``None``. + """ # noqa: E501 def __init__( self, @@ -82,7 +91,8 @@ def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor: return x # If the GCN Laplacian is cached, we need to check if the node feature size has changed - # with cached_gcn_laplacian_matrix.size(0) != x.size(0), this can happen, for example, due to: + # with cached_gcn_laplacian_matrix.size(0) != x.size(0), this can happen, + # for example, due to: # adding new negative samples or having validation/test sets with different node features should_not_use_cached_gcn_laplacian_matrix = ( self.cached_gcn_laplacian_matrix is None # Not cached yet diff --git a/hyperbench/models/mlp.py b/hyperbench/models/mlp.py index c27eb01a..0a191888 100644 --- a/hyperbench/models/mlp.py +++ b/hyperbench/models/mlp.py @@ -9,7 +9,8 @@ class MLP(nn.Module): """ - A simple multi-layer perceptron (MLP) with configurable number of layers, hidden channels, activation functions, normalization, and dropout. + A simple multi-layer perceptron (MLP) with configurable number of layers, hidden channels, + activation functions, normalization, and dropout. Examples: >>> mlp = MLP(in_channels=16, out_channels=1, hidden_channels=32, num_layers=3) @@ -39,13 +40,19 @@ class MLP(nn.Module): in_channels: Number of input features. out_channels: Number of output features. hidden_channels: Number of hidden units in each hidden layer. Required if num_layers > 1. - num_layers: Total number of layers (including output layer). Must be at least 1. Defaults to 1. + num_layers: Total number of layers (including output layer). Must be at least 1. + Defaults to 1. activation_fn: Activation function to use after each hidden layer. Defaults to ``nn.ReLU``. - activation_fn_kwargs: Keyword arguments for the activation function. Defaults to empty dict. - normalization_fn: Normalization function to use after each hidden layer (before activation). If ``None``, no normalization is applied. Defaults to ``None``. - normalization_fn_kwargs: Keyword arguments for the normalization function. Defaults to empty dict. + activation_fn_kwargs: Keyword arguments for the activation function. + Defaults to empty dict. + normalization_fn: Normalization function to use after each + hidden layer (before activation). + If ``None``, no normalization is applied. Defaults to ``None``. + normalization_fn_kwargs: Keyword arguments for the normalization function. + Defaults to empty dict. bias: Whether to include bias terms in the linear layers. Defaults to ``True``. - drop_rate: Dropout rate to apply after each hidden layer (after activation). If 0.0, no dropout is applied. Defaults to 0.0. + drop_rate: Dropout rate to apply after each hidden layer (after activation). If 0.0, no + dropout is applied. Defaults to 0.0. """ def __init__( @@ -105,7 +112,8 @@ def __validate_num_layers(self, num_layers: int, hidden_channels: int | None) -> class SLP(MLP): """ - A single-layer perceptron (SLP) which is a special case of MLP with exactly one layer and no hidden units. + A single-layer perceptron (SLP) which is a special case of MLP with exactly + one layer and no hidden units. Examples: >>> slp = SLP(in_channels=16, out_channels=1) diff --git a/hyperbench/models/nhp.py b/hyperbench/models/nhp.py index 4be38118..774127d5 100644 --- a/hyperbench/models/nhp.py +++ b/hyperbench/models/nhp.py @@ -9,11 +9,14 @@ class NHP(nn.Module): """ Neural Hyperlink Predictor (NHP) for undirected hyperedge link prediction. - - Proposed in `NHP: Neural Hypergraph Link Prediction `_ paper (CIKM 2020). - - Reference implementation: `source `_. + + References: + - Proposed in [NHP: Neural Hypergraph Link Prediction](https://dl.acm.org/doi/10.1145/3340531.3411870) paper (CIKM 2020). + - Reference implementation: [Code](https://github.com/cyixiao/NHP-reproduce/). NHP scores each candidate hyperedge by building candidate-specific node embeddings. - A node that appears in multiple candidate hyperedges can receive a different incidence embedding in each one, + A node that appears in multiple candidate hyperedges can receive a different incidence + embedding in each one, because its update depends on the other nodes in that candidate hyperedge. Examples: @@ -37,11 +40,13 @@ class NHP(nn.Module): Args: in_channels: Number of input features per node. hidden_channels: Number of hidden units in the node embeddings. - activation_fn: Activation function to use after the linear transformations. Defaults to ``nn.ReLU``. + activation_fn: Activation function to use after the linear transformations. + Defaults to ``nn.ReLU``. activation_fn_kwargs: Keyword arguments for the activation function. Defaults to empty dict. - aggregation: Method to aggregate the incidence embeddings into a hyperedge embedding. Must be either "maxmin" or "mean". Defaults to "maxmin". + aggregation: Method to aggregate the incidence embeddings into a hyperedge embedding. + Must be either "maxmin" or "mean". Defaults to "maxmin". bias: Whether to include bias terms in the linear layers. Defaults to ``True``. - """ + """ # noqa: E501 def __init__( self, @@ -100,7 +105,8 @@ def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor: # shape: (num_incidences, in_channels) incidence_node_features = x[node_ids] - # Do one local message-passing step to sum original node features per hyperedge to get hyperedge features. + # Do one local message-passing step to sum original node features per hyperedge + # to get hyperedge features. # that are aware of all nodes in the candidate hyperedge. # Example: hyperedge 0 contains nodes (0, 1) -> [1, 0] + [0, 1] = [1, 1] # hyperedge 1 contains nodes (1, 2, 3) -> [0, 1] + [1, 1] + [1, 0] = [2, 2] @@ -146,7 +152,8 @@ def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor: # shape (num_incidences, hidden_channels) selfloop_embeddings = self.self_loop(incidence_node_features) - # incidence_embeddings[0] = activation_fn(selfloop_embeddings[0] + neighbor_aware_hyperedge_embeddings[0]) + # incidence_embeddings[0] = + # activation_fn(selfloop_embeddings[0] + neighbor_aware_hyperedge_embeddings[0]) # is the embedding of the first incidence (i.e., node 0 in hyperedge 0) # after one local message-passing step inside that candidate hyperedge. incidence_embeddings = self.activation_fn( @@ -155,7 +162,8 @@ def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor: # Treat each incidence embedding as a separately aggregatable set of features. # This is required because incidence embeddings are not global node embeddings: - # node 1 may appear twice with two different embeddings as it participates in two different candidate hyperedges. + # node 1 may appear twice with two different embeddings as it participates in + # two different candidate hyperedges. # Example: incidence_ids = [0, 1, 2, 3, 4], # hyperedge_ids = [0, 0, 1, 1, 1] # -> incidence_hyperedge_index = [[0, 1, 2, 3, 4], @@ -173,11 +181,13 @@ def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor: # [5, 6], # features 2, node 1 in hyperedge 1 # [7, 8], # features 3, node 2 in hyperedge 1 # [9, 10]] # features 4, node 3 in hyperedge 1 - # -> incidence_aggregator pools features (0, 1) for hyperedge 0 and features (2, 3, 4) for hyperedge 1 + # -> incidence_aggregator pools features (0, 1) for hyperedge 0 and + # features (2, 3, 4) for hyperedge 1 # if aggregation == "maxmin": - # -> hyperedge_embeddings = [[max(1, 3) - min(1, 3), max(2, 4) - min(2, 4)], # hyperedge 0 - # [max(5, 7, 9) - min(5, 7, 9), max(6, 8, 10) - min(6, 8, 10)]] # hyperedge 1 - # shape: (num_hyperedges, hidden_channels) + # -> hyperedge_embeddings = + # [[max(1, 3) - min(1, 3), max(2, 4) - min(2, 4)], # hyperedge 0 + # [max(5, 7, 9) - min(5, 7, 9), max(6, 8, 10) - min(6, 8, 10)]] # hyperedge 1 + # shape: (num_hyperedges, hidden_channels) # if aggregation == "mean": # -> hyperedge_embeddings = [[mean(1, 3), mean(2, 4)], # hyperedge 0 # [mean(5, 7, 9), mean(6, 8, 10)]] # hyperedge 1 diff --git a/hyperbench/models/node2vec.py b/hyperbench/models/node2vec.py index b7d40d88..85a8ce96 100644 --- a/hyperbench/models/node2vec.py +++ b/hyperbench/models/node2vec.py @@ -14,29 +14,35 @@ class Node2Vec(nn.Module): edge_index: Edge index representing the graph structure. Size ``(2, num_edges)``. embedding_dim: Dimension of the node embeddings to learn. walk_length: Length of each random walk. - context_size: Window size for the skip-gram model (number of neighbors in the walk considered as context). - For example, if ``context_size=2`` and ``walk_length=5``, then for a random walk ``[v0, v1, v2, v3, v4]``, - the context for ``v2`` would be ``[v0, v1, v3, v4]`` as we take neighbors within distance 2 in the walk. + context_size: Window size for the skip-gram model (number of neighbors in the walk + considered as context). + For example, if ``context_size=2`` and ``walk_length=5``, then for + a random walk ``[v0, v1, v2, v3, v4]``, + the context for ``v2`` would be ``[v0, v1, v3, v4]`` as we take neighbors within + distance 2 in the walk. The pairs generated by skip-gram would be ``[(v2, v0), (v2, v1), (v2, v3), (v2, v4)]``. - Rule of thumb: Graphs with strong local structure (5-10), Graphs with communities/long-range patterns (10-20). + Rule of thumb: Graphs with strong local structure (5-10), Graphs with + communities/long-range patterns (10-20). Defaults to ``10``. num_walks_per_node: Number of random walks to start at each node. p: Return hyperparameter for Node2Vec. Default is ``1.0`` (unbiased). This controls the probability of stepping back to the node visited in the previous step. - Lower values of ``p`` make immediate backtracking more likely, which keeps walks closer to the - local neighborhood. Higher values of ``p`` discourage returning to the previous node, so walks - are less likely to bounce back and forth across the same edge. + Lower values of ``p`` make immediate backtracking more likely, which keeps walks closer + to the local neighborhood. Higher values of ``p`` discourage returning to the + previous node, so walks are less likely to bounce back and forth across the same edge. q: In-out hyperparameter for Node2Vec. Default is ``1.0`` (unbiased). This controls whether walks stay near the source node or explore further outward. - Lower values of ``q`` bias the walk toward outward exploration, behaving more like DFS and - emphasizing structural roles. Higher values of ``q`` bias the walk toward nearby nodes, - behaving more like BFS and emphasizing community structure and homophily. + Lower values of ``q`` bias the walk toward outward exploration, behaving more like DFS + and emphasizing structural roles. Higher values of ``q`` bias the walk toward + nearby nodes, behaving more like BFS and emphasizing community structure and homophily. num_negative_samples: Number of negative samples to use for training the skip-gram model. - If set to ``X``, then for each positive pair ``(u, v)`` generated from the random walks, ``X`` negative pairs ``(u, v_neg)`` will be generated, - where ``v_neg`` is a node sampled uniformly at random from all nodes in the graph. + If set to ``X``, then for each positive pair ``(u, v)`` generated from the random walks, + ``X`` negative pairs ``(u, v_neg)`` will be generated, where ``v_neg`` is a node + sampled uniformly at random from all nodes in the graph. Defaults to ``1``, meaning one negative sample per positive pair. - num_nodes: Total number of nodes in the graph. If not provided, it will be inferred from the hyperedge_index. - This is only needed if the hyperedge_index does not include all nodes (e.g., some isolated nodes are missing). + num_nodes: Total number of nodes in the graph. If not provided, it will be inferred from + the hyperedge_index. This is only needed if the hyperedge_index does not include all + nodes (e.g., some isolated nodes are missing). sparse: Whether Node2Vec embeddings should use sparse gradients. """ @@ -91,33 +97,40 @@ class Node2VecConfig(TypedDict): """ Configuration for the Node2Vec model. - Args: + Attributes: edge_index: Edge index representing the graph structure. Size ``(2, num_edges)``. embedding_dim: Dimension of the node embeddings to learn. walk_length: Length of each random walk. - context_size: Window size for the skip-gram model (number of neighbors in the walk considered as context). - For example, if ``context_size=2`` and ``walk_length=5``, then for a random walk ``[v0, v1, v2, v3, v4]``, - the context for ``v2`` would be ``[v0, v1, v3, v4]`` as we take neighbors within distance 2 in the walk. + context_size: Window size for the skip-gram model (number of neighbors in the walk + considered as context). + For example, if ``context_size=2`` and ``walk_length=5``, then for a + random walk ``[v0, v1, v2, v3, v4]``, + the context for ``v2`` would be ``[v0, v1, v3, v4]`` as we take neighbors within + distance 2 in the walk. The pairs generated by skip-gram would be ``[(v2, v0), (v2, v1), (v2, v3), (v2, v4)]``. - Rule of thumb: Graphs with strong local structure (5-10), Graphs with communities/long-range patterns (10-20). + Rule of thumb: Graphs with strong local structure (5-10), Graphs with + communities/long-range patterns (10-20). Defaults to ``10``. num_walks_per_node: Number of random walks to start at each node. p: Return hyperparameter for Node2Vec. Default is ``1.0`` (unbiased). This controls the probability of stepping back to the node visited in the previous step. - Lower values of ``p`` make immediate backtracking more likely, which keeps walks closer to the - local neighborhood. Higher values of ``p`` discourage returning to the previous node, so walks - are less likely to bounce back and forth across the same edge. + Lower values of ``p`` make immediate backtracking more likely, which keeps walks + closer to the local neighborhood. Higher values of ``p`` discourage returning to the + previous node, so walks are less likely to bounce back and forth across the same edge. q: In-out hyperparameter for Node2Vec. Default is ``1.0`` (unbiased). This controls whether walks stay near the source node or explore further outward. - Lower values of ``q`` bias the walk toward outward exploration, behaving more like DFS and - emphasizing structural roles. Higher values of ``q`` bias the walk toward nearby nodes, - behaving more like BFS and emphasizing community structure and homophily. + Lower values of ``q`` bias the walk toward outward exploration, behaving more like + DFS and emphasizing structural roles. Higher values of ``q`` bias the walk toward + nearby nodes, behaving more like BFS and emphasizing community structure and homophily. num_negative_samples: Number of negative samples to use for training the skip-gram model. - If set to ``X``, then for each positive pair ``(u, v)`` generated from the random walks, ``X`` negative pairs ``(u, v_neg)`` will be generated, - where ``v_neg`` is a node sampled uniformly at random from all nodes in the graph. + If set to ``X``, then for each positive pair ``(u, v)`` generated from the random walks, + ``X`` negative pairs ``(u, v_neg)`` will be generated, where ``v_neg`` is a node sampled + uniformly at random from all nodes in the graph. Defaults to ``1``, meaning one negative sample per positive pair. - num_nodes: Total number of nodes in the graph. If not provided, it will be inferred from the hyperedge_index. - This is only needed if the hyperedge_index does not include all nodes (e.g., some isolated nodes are missing). + num_nodes: Total number of nodes in the graph. If not provided, it will be inferred from + the hyperedge_index. + This is only needed if the hyperedge_index does not include all nodes + (e.g., some isolated nodes are missing). sparse: Whether Node2Vec embeddings should use sparse gradients. """ diff --git a/hyperbench/models/villain.py b/hyperbench/models/villain.py index 8ff27f59..721d0c82 100644 --- a/hyperbench/models/villain.py +++ b/hyperbench/models/villain.py @@ -11,14 +11,17 @@ class VilLain(nn.Module): """ VilLain learns node-specific virtual-label logits instead of consuming existing node features. - The model is transductive: rows in ``node_embedding`` correspond to the fixed global node space used during training. - - Proposed in `VilLain: Self-Supervised Learning on Homogeneous Hypergraphs without Features via Virtual Label Propagation `_ paper (WWW 2024). - - Reference implementation: `source `_. + The model is transductive: rows in ``node_embedding`` correspond to the fixed global node space + used during training. + + References: + - Proposed in [VilLain: Self-Supervised Learning on Homogeneous Hypergraphs without Features via Virtual Label Propagation](https://dl.acm.org/doi/pdf/10.1145/3589334.3645454) paper (WWW 2024). + - Reference implementation: [Code](https://github.com/geon0325/VilLain/). Each forward pass: - 1. Samples differentiable virtual-label assignments with Gumbel-Softmax. - 2. Propagates them over the incidence structure. - 3. Returns averaged propagated node embeddings. + 1. Samples differentiable virtual-label assignments with Gumbel-Softmax. + 2. Propagates them over the incidence structure. + 3. Returns averaged propagated node embeddings. Args: num_nodes: Total number of trainable nodes. @@ -28,7 +31,7 @@ class VilLain(nn.Module): generation_steps: Propagation steps averaged for final embeddings. Defaults to ``100``. tau: Gumbel-Softmax temperature. Defaults to ``1.0``. eps: Numerical stability constant. Defaults to ``1e-10``. - """ + """ # noqa: E501 def __init__( self, @@ -81,18 +84,27 @@ def forward( ) -> tuple[Tensor, VilLainLossParts]: """ Compute the self-supervised VilLain objective. - Use ``hyperedge_embeddings`` or ``node_embeddings`` to generate final embeddings for inference after training. + + Use ``hyperedge_embeddings`` or ``node_embeddings`` to generate final embeddings for + inference after training. Args: hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``. - node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting. - Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table. - This is needed as the model keeps an internal embedding table with a row for every node in the global node space. - num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges. + node_ids: Optional global node ids matching local node ids the embedding table in the + transductive setting. + Use this when a batch has rebased local node ids but the learned logits live in the + full transductive node table. + This is needed as the model keeps an internal embedding table with a row for every + node in the global node space. + num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling + to preserve empty hyperedges. If not provided, the hyperedge count is inferred from ``hyperedge_index``. Returns: - node_embeddings: Node embeddings of shape ``(num_local_nodes, embedding_dim)``. + total_loss: The combined loss scalar tensor to optimize. + loss_parts: A dictionary containing the individual loss components. It contains + ``local_loss`` and ``global_loss`` scalar tensors. + """ return self.loss( hyperedge_index=hyperedge_index, @@ -111,14 +123,19 @@ def loss( Args: hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``. - node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting. - Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table. - This is needed as the model keeps an internal embedding table with a row for every node in the global node space. - num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges. + node_ids: Optional global node ids matching local node ids the embedding table in the + transductive setting. Use this when a batch has rebased local node ids but the + learned logits live in the full transductive node table. + This is needed as the model keeps an internal embedding table with a row for every + node in the global node space. + num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling + to preserve empty hyperedges. If not provided, the hyperedge count is inferred from ``hyperedge_index``. Returns: - loss: A tuple ``(total_loss, loss_parts)`` where ``loss_parts`` contains ``local_loss`` and ``global_loss`` scalar tensors. + total_loss: The combined loss scalar tensor to optimize. + loss_parts: A dictionary containing the individual loss components. It contains + ``local_loss`` and ``global_loss`` scalar tensors. """ node_embeddings = self.__get_initial_virtual_node_features(node_ids=node_ids) actual_num_hyperedges = self.__num_hyperedges(hyperedge_index, num_hyperedges) @@ -149,15 +166,20 @@ def hyperedge_embeddings( ) -> Tensor: """ Generate hyperedge embeddings by averaging propagated hyperedge states. - Every generation step computes hyperedge states from the current node states, then updates node states for the next step. + + Every generation step computes hyperedge states from the current node states, then updates + node states for the next step. Args: hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``. - node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting. - Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table. - This is needed as the model keeps an internal embedding table with a row for every node in the global node space. - num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges. - If not provided, the hyperedge count is inferred from ``hyperedge_index``. + node_ids: Optional global node ids matching local node ids the embedding table in the + transductive setting. Use this when a batch has rebased local node ids but the + learned logits live in the full transductive node table. + This is needed as the model keeps an internal embedding table with a row for every + node in the global node space. + num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling + to preserve empty hyperedges. If not provided, the hyperedge count is inferred from + ``hyperedge_index``. Returns: hyperedge_embeddings: Hyperedge embeddings of shape ``(num_hyperedges, embedding_dim)``. @@ -180,11 +202,14 @@ def node_embeddings( Args: hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``. - node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting. - Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table. - This is needed as the model keeps an internal embedding table with a row for every node in the global node space. - num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges. - If not provided, the hyperedge count is inferred from ``hyperedge_index``. + node_ids: Optional global node ids matching local node ids the embedding table in the + transductive setting. Use this when a batch has rebased local node ids but the + learned logits live in the full transductive node table. + This is needed as the model keeps an internal embedding table with a row for every + node in the global node space. + num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling + to preserve empty hyperedges. If not provided, the hyperedge count is inferred from + ``hyperedge_index``. Returns: node_embeddings: Node embeddings of shape ``(num_local_nodes, embedding_dim)``. @@ -197,7 +222,9 @@ def node_embeddings( ) def reset_parameters(self) -> None: - """Initialize trainable virtual-label logits near zero.""" + """ + Initialize trainable virtual-label logits near zero. + """ nn.init.normal_(self.node_embedding, mean=0.0, std=0.1) def __embeddings( @@ -212,8 +239,10 @@ def __embeddings( Args: hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``. - node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting. - num_hyperedges: Optional explicit hyperedge count to preserve empty hyperedges during propagation. + node_ids: Optional global node ids matching local node ids the embedding table in the + transductive setting. + num_hyperedges: Optional explicit hyperedge count to preserve empty hyperedges + during propagation. mode: Selects whether to accumulate propagated node states or hyperedge states. Returns: @@ -237,16 +266,20 @@ def __embeddings( ) # Suppose generation_steps = 100. - # Average 100 propagated embeddings for each node/hyperedge to get more stable final embeddings. - # Sum here and divide by generation_steps later to avoid storing all 100 embeddings in memory at once. + # Average 100 propagated embeddings for each node/hyperedge to get more + # stable final embeddings. + # Sum here and divide by generation_steps later to avoid storing all 100 embeddings + # in memory at once. final_embeddings = final_embeddings + ( x if mode == "node" else hyperedge_embeddings ) final_embeddings = final_embeddings / self.generation_steps - # Example: final_embeddings.shape = (num_nodes/num_hyperedges, 8) with raw_embedding_dim=8 + # Example: final_embeddings.shape = (num_nodes/num_hyperedges, 8) + # with raw_embedding_dim=8 # -> returned shape = (num_nodes/num_hyperedges, 4) with embedding_dim=4 - # as it takes the first 4 channels of the raw embedding as the final embedding. + # as it takes the first 4 channels of the raw embedding + # as the final embedding. return final_embeddings[:, : self.embedding_dim] def __get_initial_virtual_node_features(self, node_ids: Tensor | None = None) -> Tensor: @@ -254,7 +287,8 @@ def __get_initial_virtual_node_features(self, node_ids: Tensor | None = None) -> Convert trainable node logits into flattened virtual-label probabilities. Args: - node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting. + node_ids: Optional global node ids matching local node ids the embedding table + in the transductive setting. If ``None``, all node rows are used. Returns: @@ -291,7 +325,8 @@ def __message_passing( num_hyperedges: int, ) -> tuple[Tensor, Tensor]: """ - One round of message passing, where nodes send messages to hyperedges and then hyperedges send messages back to nodes. + One round of message passing, where nodes send messages to hyperedges and then hyperedges + send messages back to nodes. Args: x: Virtual node features of shape (num_nodes, raw_embedding_dim). @@ -299,7 +334,9 @@ def __message_passing( num_hyperedges: Total number of hyperedges. Returns: - embeddings: The updated node and hyperedge embeddings after one round of message passing. + node_embeddings: The updated node embeddings after one round of message passing. + hyperedge_embeddings: The updated hyperedge embeddings after one round + of message passing. """ hyperedge_embeddings = HyperedgeAggregator( hyperedge_index=hyperedge_index, @@ -321,7 +358,9 @@ def __num_hyperedges( num_hyperedges: int | None, ) -> int: """ - Return the explicit hyperedge count or infer it from the ``hyperedge_index``, if not provided. + Return the explicit hyperedge count or infer it from the ``hyperedge_index``, if not + provided. + Explicit counts are required when empty hyperedges must remain in the hypergraph. """ if num_hyperedges is not None: diff --git a/hyperbench/nn/aggregator.py b/hyperbench/nn/aggregator.py index 8be38081..446e7ca7 100644 --- a/hyperbench/nn/aggregator.py +++ b/hyperbench/nn/aggregator.py @@ -16,7 +16,8 @@ class HyperedgeAggregator: hyperedge_index: Hyperedge incidence in COO format of size ``(2, num_incidences)``. node_embeddings: Node embedding matrix of size ``(num_nodes, num_channels)``. num_hyperedges: Optional explicit hyperedge count. - When provided, the pooled output preserves empty hyperedges that do not appear in ``hyperedge_index``. + When provided, the pooled output preserves empty hyperedges that do not appear + in ``hyperedge_index``. """ def __init__( @@ -34,16 +35,24 @@ def pool(self, aggregation: Literal["maxmin", "max", "min", "mean", "mul", "sum" Aggregate node embeddings for each hyperedge. ``hyperedge_index`` is the COO encoding of the nonzero entries of ``H``, - so ``hyperedge_index[0, k] = v`` and ``hyperedge_index[1, k] = e`` means ``H[v, e] = 1`` for incidence ``k``. + so ``hyperedge_index[0, k] = v`` and ``hyperedge_index[1, k] = e`` means ``H[v, e] = 1`` + for incidence ``k``. Let ``H`` be the binary incidence matrix of shape ``(num_nodes, num_hyperedges)`` and let ``X`` be the node embedding matrix of shape ``(num_nodes, num_channels)``. - This method pools node features into hyperedge features using the incidence pattern in ``H``: - - ``aggregation="sum"`` computes the equivalent of the standard sparse matrix product ``H^T X``. - - ``aggregation="mean"`` computes ``D_e^{-1} H^T X``, where ``D_e[e, e] = sum_v H[v, e]`` is the hyperedge cardinality matrix. - - ``aggregation in {"max", "min", "mul"}`` uses the same sparsity pattern as ``H^T X``, - but replaces the summation over incident nodes with a channel-wise ``max``, ``min``, or product reduction. - - ``aggregation="maxmin"`` computes the channel-wise range ``max - min`` for each hyperedge. + This method pools node features into hyperedge features using the incidence pattern in + ``H``. + + Aggregations: + - ``aggregation="sum"`` computes the equivalent of the standard + sparse matrix product ``H^T X``. + - ``aggregation="mean"`` computes ``D_e^{-1} H^T X``, where + ``D_e[e, e] = sum_v H[v, e]`` is the hyperedge cardinality matrix. + - ``aggregation in {"max", "min", "mul"}`` uses the same sparsity pattern as ``H^T X``, + but replaces the summation over incident nodes with a channel-wise ``max``, ``min``, + or product reduction. + - ``aggregation="maxmin"`` computes the channel-wise range ``max - min`` + for each hyperedge. Examples: >>> hyperedge_index = [[0, 1, 2, 2, 3], @@ -62,7 +71,8 @@ def pool(self, aggregation: Literal["maxmin", "max", "min", "mean", "mul", "sum" aggregation: Reduction applied across the nodes belonging to each hyperedge. Returns: - hyperedge_embeddings: A hyperedge embedding matrix of shape ``(num_hyperedges, num_channels)``. + hyperedge_embeddings: A hyperedge embedding matrix of + shape ``(num_hyperedges, num_channels)``. """ # Gather the embeddings for each incidence. # A node appearing in multiple hyperedges is repeated, once per incidence. @@ -121,7 +131,8 @@ class NodeAggregator: Args: hyperedge_index: Hyperedge incidence in COO format of size ``(2, num_incidences)``. hyperedge_embeddings: Hyperedge embedding matrix of size ``(num_hyperedges, num_channels)``. - num_nodes: Optional explicit node count. When provided, the pooled output preserves isolated nodes that do not appear in ``hyperedge_index``. + num_nodes: Optional explicit node count. When provided, the pooled output preserves + isolated nodes that do not appear in ``hyperedge_index``. """ def __init__( @@ -139,15 +150,22 @@ def pool(self, aggregation: Literal["maxmin", "max", "min", "mean", "mul", "sum" Aggregate hyperedge embeddings for each node. ``hyperedge_index`` is the COO encoding of the nonzero entries of ``H``, - so ``hyperedge_index[0, k] = v`` and ``hyperedge_index[1, k] = e`` means ``H[v, e] = 1`` for incidence ``k``. + so ``hyperedge_index[0, k] = v`` and ``hyperedge_index[1, k] = e`` means ``H[v, e] = 1`` + for incidence ``k``. Let ``H`` be the incidence matrix of shape ``(num_nodes, num_hyperedges)`` and let ``E`` be the hyperedge embedding matrix of shape ``(num_hyperedges, num_channels)``. - This method pools hyperedge features into node features using the incidence pattern in ``H``: - - ``aggregation="sum"`` computes the equivalent of the standard sparse matrix product ``H E``. - - ``aggregation="mean"`` computes ``D_v^{-1} H E``, where ``D_v[v, v] = sum_e H[v, e]`` is the node degree matrix. - - ``aggregation in {"max", "min", "mul"}`` uses the same sparsity pattern as ``H E``, - but replaces the summation over incident hyperedges with a channel-wise ``max``, ``min``, or product reduction. + This method pools hyperedge features into node features using the incidence pattern + in ``H``. + + Aggregations: + - ``aggregation="sum"`` computes the equivalent of the standard + sparse matrix product ``H E``. + - ``aggregation="mean"`` computes ``D_v^{-1} H E``, where ``D_v[v, v] = sum_e H[v, e]`` + is the node degree matrix. + - ``aggregation in {"max", "min", "mul"}`` uses the same sparsity pattern as ``H E``, + but replaces the summation over incident hyperedges with a channel-wise + ``max``, ``min``, or product reduction. Examples: >>> hyperedge_index = [[0, 1, 1, 2], diff --git a/hyperbench/nn/conv.py b/hyperbench/nn/conv.py index 6f415acd..992c9a5c 100644 --- a/hyperbench/nn/conv.py +++ b/hyperbench/nn/conv.py @@ -6,19 +6,25 @@ class HyperGCNConv(nn.Module): """ - The HyperGCNConv layer proposed in `HyperGCN: A New Method of Training Graph Convolutional Networks on Hypergraphs `_ paper (NeurIPS 2019). - Reference implementation: `source `_. + References: + - The HyperGCNConv layer proposed in [HyperGCN: A New Method of Training Graph Convolutional Networks on Hypergraphs](https://dl.acm.org/doi/10.5555/3454287.3454422) paper (NeurIPS 2019). + - Reference implementation: [source](https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/nn/convs/hypergraphs/hypergcn_conv.html#HyperGCNConv). Args: in_channels: The number of input channels. out_channels: The number of output channels. - bias: If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``. - use_batch_normalization: If set to ``True``, the layer will use batch normalization. Defaults to ``False``. + bias: If set to ``False``, the layer will not learn the bias parameter. + Defaults to ``True``. + use_batch_normalization: If set to ``True``, the layer will use batch normalization. + Defaults to ``False``. drop_rate: If set to a positive number, the layer will use dropout. Defaults to ``0.5``. - use_mediator: Whether to use mediator to transform the hyperedges to edges in the graph. Defaults to ``False``. - is_last: If set to ``True``, the layer will not apply the final activation and dropout functions. Defaults to ``False``. - seed: Optional random seed for the random reduction of hyperedges to edges. Defaults to ``None``. - """ + use_mediator: Whether to use mediator to transform the hyperedges to edges in the graph. + Defaults to ``False``. + is_last: If set to ``True``, the layer will not apply the final activation and + dropout functions. Defaults to ``False``. + seed: Optional random seed for the random reduction of hyperedges to edges. + Defaults to ``None``. + """ # noqa: E501 def __init__( self, @@ -39,7 +45,8 @@ def __init__( self.dropout = nn.Dropout(drop_rate) # θ is the learnable weight matrix (as in the HyperGCN paper), - # it projects node features from in_channels to out_channels and learns how to mix feature channels + # it projects node features from in_channels to out_channels and + # learns how to mix feature channels self.theta = nn.Linear(in_channels, out_channels, bias=bias) self.seed = seed @@ -55,9 +62,12 @@ def forward( Args: x: Input node feature matrix. Size ``(num_nodes, in_channels)``. - hyperedge_index: Hyperedge indices representing the hypergraph structure. Size ``(2, num_hyperedges)``. - gcn_laplacian_matrix: Optional precomputed normalized GCN Laplacian matrix. Size ``(num_nodes, num_nodes)``. Defaults to ``None``. - If provided, it will be used directly for smoothing, so we can skip computing it from edge_index. + hyperedge_index: Hyperedge indices representing the hypergraph structure. + Size ``(2, num_hyperedges)``. + gcn_laplacian_matrix: Optional precomputed normalized GCN Laplacian matrix. + Size ``(num_nodes, num_nodes)``. Defaults to ``None``. + If provided, it will be used directly for smoothing, so we can skip computing + it from edge_index. Returns: x: The output node feature matrix. Size ``(num_nodes, out_channels)``. @@ -94,12 +104,15 @@ def forward( class HGNNConv(nn.Module): """ - The HGNNConv layer proposed in `Hypergraph Neural Networks `_ paper (AAAI 2019). - Reference implementation: `source `_. + References: + - The HGNNConv layer proposed in [Hypergraph Neural Networks](https://arxiv.org/pdf/1809.09401) paper (AAAI 2019). + - Reference implementation: [Code](https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/nn/convs/hypergraphs/hgnn_conv.html#HGNNConv). - Each layer performs: ``X' = sigma(L_HGNN X Theta)`` where ``L_HGNN = D_n^{-1/2} H D_e^{-1} H^T D_n^{-1/2}`` - is the hypergraph Laplacian computed from the incidence matrix H. This smooths node features through - the hypergraph structure (nodes -> hyperedges -> nodes) without reducing to a pairwise graph. + Each layer performs: ``X' = sigma(L_HGNN X Theta)`` + where ``L_HGNN = D_n^{-1/2} H D_e^{-1} H^T D_n^{-1/2}`` + is the hypergraph Laplacian computed from the incidence matrix H. + This smooths node features through the hypergraph structure (nodes -> hyperedges -> nodes) + without reducing to a pairwise graph. Unlike ``HyperGCNConv``, which uses a GCN Laplacian on a graph reduced from the hypergraph, ``HGNNConv`` operates entirely in hypergraph space and preserves all higher-order relationships. @@ -107,11 +120,14 @@ class HGNNConv(nn.Module): Args: in_channels: The number of input channels. out_channels: The number of output channels. - bias: If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``. - use_batch_normalization: If set to ``True``, the layer will use batch normalization. Defaults to ``False``. + bias: If set to ``False``, the layer will not learn the bias parameter. + Defaults to ``True``. + use_batch_normalization: If set to ``True``, the layer will use batch normalization. + Defaults to ``False``. drop_rate: If set to a positive number, the layer will use dropout. Defaults to ``0.5``. - is_last: If set to ``True``, the layer will not apply the final activation and dropout functions. Defaults to ``False``. - """ + is_last: If set to ``True``, the layer will not apply the final activation and + dropout functions. Defaults to ``False``. + """ # noqa: E501 def __init__( self, @@ -166,8 +182,9 @@ def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor: class HGNNPConv(nn.Module): """ - The HGNNPConv layer proposed in `HGNN+: General Hypergraph Neural Networks `_ paper (IEEE T-PAMI 2022). - Reference implementation: `source `_. + References: + - The HGNNPConv layer proposed in [HGNN+: General Hypergraph Neural Networks](https://ieeexplore.ieee.org/document/9795251) paper (IEEE T-PAMI 2022). + - Reference implementation: [Code](https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/nn/convs/hypergraphs/hgnnp_conv.html#HGNNPConv). Each layer performs: ``X' = sigma(M_HGNN+ X Theta)`` where ``M_HGNN+ = D_v^{-1} H D_e^{-1} H^T`` is the HGNN+ smoothing matrix. @@ -179,11 +196,14 @@ class HGNNPConv(nn.Module): Args: in_channels: The number of input channels. out_channels: The number of output channels. - bias: If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``. - use_batch_normalization: If set to ``True``, the layer will use batch normalization. Defaults to ``False``. + bias: If set to ``False``, the layer will not learn the bias parameter. + Defaults to ``True``. + use_batch_normalization: If set to ``True``, the layer will use batch normalization. + Defaults to ``False``. drop_rate: If set to a positive number, the layer will use dropout. Defaults to ``0.5``. - is_last: If set to ``True``, the layer will not apply the final activation and dropout functions. Defaults to ``False``. - """ + is_last: If set to ``True``, the layer will not apply the final activation and dropout + functions. Defaults to ``False``. + """ # noqa: E501 def __init__( self, @@ -233,17 +253,21 @@ def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor: class HNHNConv(nn.Module): """ - The HNHNConv layer proposed in `HNHN: Hypergraph Networks with Hyperedge Neurons `_ paper. - Reference implementation: `source `_. + References: + - The HNHNConv layer proposed in [HNHN: Hypergraph Networks with Hyperedge Neurons](https://arxiv.org/abs/2006.12278) paper. + - Reference implementation: [Code](https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/nn/convs/hypergraphs/hnhn_conv.html#HNHNConv). Args: in_channels: The number of input channels. out_channels: The number of output channels. - bias: If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``. - use_batch_normalization: If set to ``True``, the layer will use batch normalization. Defaults to ``False``. + bias: If set to ``False``, the layer will not learn the bias parameter. + Defaults to ``True``. + use_batch_normalization: If set to ``True``, the layer will use batch normalization. + Defaults to ``False``. drop_rate: If set to a positive number, the layer will use dropout. Defaults to ``0.5``. - is_last: If set to ``True``, the layer will not apply the final activation and dropout functions. Defaults to ``False``. - """ + is_last: If set to ``True``, the layer will not apply the final activation and + dropout functions. Defaults to ``False``. + """ # noqa: E501 __AGGREGATION: Literal["mean"] = "mean" @@ -266,8 +290,8 @@ def __init__( def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor: """ - Apply one HNHN convolution layer using two learned projections around - node-to-hyperedge and hyperedge-to-node mean aggregation. + Apply one HNHN convolution layer using two learned projections around node-to-hyperedge and + hyperedge-to-node mean aggregation. Args: x: Input node feature matrix of size ``(num_nodes, in_channels)``. diff --git a/hyperbench/nn/loss.py b/hyperbench/nn/loss.py index 261be18f..adb84a84 100644 --- a/hyperbench/nn/loss.py +++ b/hyperbench/nn/loss.py @@ -24,7 +24,8 @@ def forward(self, logits: Tensor, labels: Tensor) -> Tensor: Args: logits: Logit scores for each candidate hyperedge, of shape ``(num_hyperedges,)``. - labels: Binary labels indicating positive (1) and negative (0) hyperedges, of shape ``(num_hyperedges,)``. + labels: Binary labels indicating positive (1) and negative (0) hyperedges, of shape + ``(num_hyperedges,)``. Returns: loss: Scalar loss value. @@ -90,8 +91,10 @@ def local_loss(self, node_embeddings: Tensor, hyperedge_embeddings: Tensor) -> T to become confident within each virtual-label subspace. Args: - node_embeddings: Propagated node states of shape ``(num_nodes, num_subspaces * labels_per_subspace)``. - hyperedge_embeddings: Propagated hyperedge states with the same channel dimension as ``node_embeddings``. + node_embeddings: Propagated node states of shape + ``(num_nodes, num_subspaces * labels_per_subspace)``. + hyperedge_embeddings: Propagated hyperedge states with the same channel dimension + as ``node_embeddings``. Returns: loss: Scalar tensor containing node plus hyperedge entropy losses. @@ -106,8 +109,10 @@ def global_loss(self, node_embeddings: Tensor, hyperedge_embeddings: Tensor) -> with a distinctiveness term that separates label columns inside each subspace. Args: - node_embeddings: Propagated node states of shape ``(num_nodes, num_subspaces * labels_per_subspace)``. - hyperedge_embeddings: Propagated hyperedge states with the same channel dimension as ``node_embeddings``. + node_embeddings: Propagated node states of shape + ``(num_nodes, num_subspaces * labels_per_subspace)``. + hyperedge_embeddings: Propagated hyperedge states with the same channel dimension + as ``node_embeddings``. Returns: loss: Scalar tensor containing node plus hyperedge global losses. @@ -137,7 +142,8 @@ def entropy_loss(self, x: Tensor) -> Tensor: Compute mean entropy within each virtual-label subspace. Args: - x: Flattened virtual-label probabilities of shape ``(num_items, num_subspaces * labels_per_subspace)``. + x: Flattened virtual-label probabilities of shape + ``(num_items, num_subspaces * labels_per_subspace)``. Returns: loss: Scalar entropy loss. @@ -151,7 +157,8 @@ def entropy_loss(self, x: Tensor) -> Tensor: # virtual-label distribution in subspace 0. probs = x.view(-1, self.num_subspaces, self.labels_per_subspace) - # With this, we induce structurally close nodes (or hyperedges) to be assigned to the same label. + # With this, we induce structurally close nodes (or hyperedges) + # to be assigned to the same label. # Example: probs.shape = (num_nodes, 4, 2) # -> entropy.shape = (num_nodes, 4), one entropy per item and subspace entropy = -(probs * torch.log(probs + self.eps)).sum(dim=2, dtype=torch.float) @@ -161,11 +168,12 @@ def balance_loss(self, x: Tensor) -> Tensor: """ Compute negative entropy of global virtual-label usage. - This term is minimized, so the negative sign makes optimization maximize entropy of average label usage - and reduces collapse to one virtual label. + This term is minimized, so the negative sign makes optimization maximize entropy + of average label usage and reduces collapse to one virtual label. Args: - x: Flattened virtual-label probabilities of shape ``(num_items, num_subspaces * labels_per_subspace)``. + x: Flattened virtual-label probabilities of shape + ``(num_items, num_subspaces * labels_per_subspace)``. Returns: loss: Scalar balance loss. @@ -183,7 +191,8 @@ def balance_loss(self, x: Tensor) -> Tensor: mean_probs = probs.mean(dim=0, dtype=torch.float) # Negative entropy to maximize global label diversity and prevents collapse. - # Example: mean_probs[0] = [0.50, 0.50] has higher entropy than mean_probs[0] = [0.99, 0.01]. + # Example: mean_probs[0] = [0.50, 0.50] has higher entropy + # than mean_probs[0] = [0.99, 0.01]. entropy = -(mean_probs * torch.log(mean_probs + self.eps)).sum(dim=1, dtype=torch.float) return -entropy.mean(dtype=torch.float) @@ -191,11 +200,14 @@ def distinctiveness_loss(self, x: Tensor) -> Tensor: """ Penalize similar virtual-label columns inside each subspace. - For every subspace, this compares all label columns across items with cosine similarity and applies a diagonal classification objective. - The diagonal target encourages each label column to be most similar to itself and less similar to other labels. + For every subspace, this compares all label columns across items with cosine similarity + and applies a diagonal classification objective. + The diagonal target encourages each label column to be most similar to itself + and less similar to other labels. Args: - x: Flattened virtual-label probabilities of shape ``(num_items, num_subspaces * labels_per_subspace)``. + x: Flattened virtual-label probabilities of shape + ``(num_items, num_subspaces * labels_per_subspace)``. Returns: loss: Scalar distinctiveness loss. @@ -222,22 +234,33 @@ def distinctiveness_loss(self, x: Tensor) -> Tensor: ).repeat_interleave(self.labels_per_subspace) # Compare every virtual-label column against every other column. - # Two different labels in the same subspace should not describe the same pattern of nodes/hyperedges. + # Two different labels in the same subspace should not describe + # the same pattern of nodes/hyperedges. # Example: with num_subspaces=4: # probs[:, :, idx_i] and probs[:, :, idx_j] both have shape (4, 4, 4), # where the last dimension enumerates the four ordered label pairs above - # probs[:, :, idx_i] == [[[p00, p01, p00, p01], # node/hyperedge 0's label probabilities for the four pairs - # [p10, p11, p10, p11], # node/hyperedge 1's label probabilities for the four pairs - # [p20, p21, p20, p21], # node/hyperedge 2's label probabilities for the four pairs - # [p30, p31, p30, p31]], # node/hyperedge 3's label probabilities for the four pairs - # ...] - # probs[:, :, idx_j] == [[[p00, p00, p01, p01], # node/hyperedge 0's label probabilities for the four pairs - # [p10, p10, p11, p11], # node/hyperedge 1's label probabilities for the four pairs - # [p20, p20, p21, p21], # node/hyperedge 2's label probabilities for the four pairs - # [p30, p30, p31, p31]], # node/hyperedge 3's label probabilities for the four pairs - # ...] - # F.cosine_similarity(..., dim=0) compares each pair across the 4 items, producing shape (4, 4) - # view(-1, 2, 2) restores one 2x2 similarity matrix per subspace, so shape becomes (4, 2, 2) + # # node/hyperedge 0's label probabilities for the four pairs + # probs[:, :, idx_i] == [[[p00, p01, p00, p01], + # # node/hyperedge 1's label probabilities for the four pairs + # [p10, p11, p10, p11], + # # node/hyperedge 2's label probabilities for the four pairs + # [p20, p21, p20, p21], + # # node/hyperedge 3's label probabilities for the four pairs + # [p30, p31, p30, p31]], + # ...] + # # node/hyperedge 0's label probabilities for the four pairs + # probs[:, :, idx_j] == [[[p00, p00, p01, p01], + # # node/hyperedge 1's label probabilities for the four pairs + # [p10, p10, p11, p11], + # # node/hyperedge 2's label probabilities for the four pairs + # [p20, p20, p21, p21], + # # node/hyperedge 3's label probabilities for the four pairs + # [p30, p30, p31, p31]], + # ..] + # F.cosine_similarity(..., dim=0) compares each pair across the 4 items, + # producing shape (4, 4) + # view(-1, 2, 2) restores one 2x2 similarity matrix per subspace, + # so shape becomes (4, 2, 2) similarity = F.cosine_similarity( probs[:, :, idx_i], probs[:, :, idx_j], @@ -245,7 +268,8 @@ def distinctiveness_loss(self, x: Tensor) -> Tensor: eps=self.eps, ).view(-1, self.labels_per_subspace, self.labels_per_subspace) - # Turn each similarity row into a classification distribution and keep the diagonal self-match probabilities. + # Turn each similarity row into a classification distribution and keep the + # diagonal self-match probabilities. # Example: similarity[subspace 0].shape = (2, 2) # - row 0 scores how label 0 matches labels [0, 1] # - row 1 scores how label 1 matches labels [0, 1] @@ -263,9 +287,11 @@ class VilLainLossParts(TypedDict): """ Named VilLain self-supervised loss parts returned by ``VilLain.loss``. - Args: - local_loss: Sum of node and hyperedge local entropy losses over all training propagation steps. - global_loss: Sum of balance and distinctiveness losses over all training propagation steps. + Attributes: + local_loss: Sum of node and hyperedge local entropy losses over all training + propagation steps. + global_loss: Sum of balance and distinctiveness losses over all training + propagation steps. """ local_loss: Tensor diff --git a/hyperbench/nn/scorer.py b/hyperbench/nn/scorer.py index d54809ae..e5f65392 100644 --- a/hyperbench/nn/scorer.py +++ b/hyperbench/nn/scorer.py @@ -73,10 +73,12 @@ def score_batch( Args: hyperedge_index: Tensor of shape ``(2, |E|)``. - node_to_neighbors: Optional precomputed node to neighborhood mapping. If None, it will be computed from ``hyperedge_index``. + node_to_neighbors: Optional precomputed node to neighborhood mapping. If None, it will + be computed from ``hyperedge_index``. Returns: - scores: A 1-D tensor of shape ``(num_hyperedges,)`` with the CN score for each hyperedge. + scores: A 1-D tensor of shape ``(num_hyperedges,)`` with the CN score + or each hyperedge. """ if node_to_neighbors is None: node_to_neighbors = Hypergraph.from_hyperedge_index(hyperedge_index).neighbors_of_all() diff --git a/hyperbench/tests/data/dataset_test.py b/hyperbench/tests/data/dataset_test.py index 6074fd48..4f54d3e0 100644 --- a/hyperbench/tests/data/dataset_test.py +++ b/hyperbench/tests/data/dataset_test.py @@ -219,13 +219,13 @@ def test_getitem_index_list_empty(mock_hdata, strategy): pytest.param( SamplingStrategy.NODE, [0, 1, 2, 3, 4], - r"Index list length \(5\) cannot exceed the number of sampleable items \(4\)\.", + re.escape("Index list length (5) cannot exceed the number of sampleable items (4)."), id="node_strategy", ), pytest.param( SamplingStrategy.HYPEREDGE, [0, 1, 2], - r"Index list length \(3\) cannot exceed the number of sampleable items \(2\)\.", + re.escape("Index list length (3) cannot exceed the number of sampleable items (2)."), id="hyperedge_strategy", ), ], @@ -244,12 +244,15 @@ def test_getitem_raises_when_index_list_larger_than_max( "strategy, index, expected_message", [ pytest.param( - SamplingStrategy.NODE, 4, r"Node ID 4 is out of bounds \(0, 3\)\.", id="node_strategy" + SamplingStrategy.NODE, + 4, + re.escape("Node ID 4 is out of bounds (0, 3)."), + id="node_strategy", ), pytest.param( SamplingStrategy.HYPEREDGE, 2, - r"Hyperedge ID 2 is out of bounds \(0, 1\)\.", + re.escape("Hyperedge ID 2 is out of bounds (0, 1)."), id="hyperedge_strategy", ), ], @@ -267,7 +270,8 @@ def test_getitem_raises_when_index_out_of_bounds( @pytest.mark.parametrize( "strategy, index, expected_shape, expected_num_hyperedges", [ - # When node 1 is selected, we get hyperedge 0 with nodes 0 and 1 -> 2 incidences, 1 hyperedge + # When node 1 is selected, we get hyperedge 0 with nodes 0 + # and 1 -> 2 incidences, 1 hyperedge pytest.param(SamplingStrategy.NODE, 1, (2, 1), 1, id="node_strategy"), # When hyperedge 0 is selected, we get nodes 0 and 1 -> 2 incidences, 1 hyperedge pytest.param(SamplingStrategy.HYPEREDGE, 0, (2, 1), 1, id="hyperedge_strategy"), @@ -288,7 +292,8 @@ def test_getitem_single_index( @pytest.mark.parametrize( "strategy, index, expected_shape, expected_num_hyperedges", [ - # When nodes (0, 2, 3) -> hyperedge 0 (nodes 0, 1) + hyperedge 1 (nodes 2, 3) -> 4 incidences, 2 hyperedges + # When nodes (0, 2, 3) -> hyperedge 0 (nodes 0, 1) + hyperedge 1 (nodes 2, 3) + # -> 4 incidences, 2 hyperedges pytest.param(SamplingStrategy.NODE, [0, 2, 3], (2, 4), 2, id="node_strategy"), # When hyperedge 0 (nodes 0, 1) + hyperedge 1 (nodes 2, 3) -> 4 incidences, 2 hyperedges pytest.param(SamplingStrategy.HYPEREDGE, [0, 1], (2, 4), 2, id="hyperedge_strategy"), @@ -322,7 +327,8 @@ def test_getitem_with_hyperedge_attr(mock_hdata_with_hyperedge_attr, strategy): assert data.hyperedge_index.shape == (2, 2) assert data.num_hyperedges == 1 - # Even though the original hypergraph has edge attributes, __getitem__ should return hyperedge_attr as None + # Even though the original hypergraph has edge attributes, __getitem__ should + # return hyperedge_attr as None # as the hyperedge attributes are handled by the loader's collate function during batching assert data.hyperedge_attr is None @@ -362,7 +368,8 @@ def test_getitem_with_multiple_hyperedge_attr( data = dataset[index] assert data.num_hyperedges == 2 - # Even though the original hypergraph has edge attributes, __getitem__ should return hyperedge_attr as None + # Even though the original hypergraph has edge attributes, __getitem__ should + # return hyperedge_attr as None # as the hyperedge attributes are handled by the loader's collate function during batching assert data.hyperedge_attr is None @@ -383,7 +390,8 @@ def test_getitem_with_hyperedge_weights(mock_hdata_with_hyperedge_weights, strat assert data.hyperedge_index.shape == (2, 2) assert data.num_hyperedges == 1 - # Even though the original hypergraph has edge attributes, __getitem__ should return hyperedge_weights as None + # Even though the original hypergraph has edge attributes, __getitem__ should + # return hyperedge_weights as None # as the hyperedge weights are handled by the loader's collate function during batching assert data.hyperedge_weights is None @@ -1392,7 +1400,8 @@ def test_split_with_ratios_raises_when_train_split_idx_provided_but_not_transduc # 3/5 and 2/5 as we ensure splits don't get more then requested, # in this way, all later splits get at least what they requested, # except the last one that might get slightly more due to rounding. - # This effect is mitigated the more hyperedges we have, as the ratios get closer to the requested ones. + # This effect is mitigated the more hyperedges we have, as the ratios get closer to the + # requested ones. [0.6, 0.4], id="five_hyperedges_rounds_train_up", ), @@ -1403,7 +1412,7 @@ def test_split_with_ratios_raises_when_train_split_idx_provided_but_not_transduc torch.arange( 500, dtype=torch.long, - ), # 500 hyperedges, 125 per node, so we can split exactly according to the ratios + ), # 500 hyperedges, 125 per node, so we can split according to the ratios ] ), [375, 125], @@ -1453,7 +1462,8 @@ def test_split_transductive_raises_when_node_is_missing_from_all_hyperedges(): with pytest.raises( ValueError, match=re.escape( - "Cannot create a transductive first split covering all nodes because these node ids do not appear in any hyperedge: [3]." + "Cannot create a transductive first split covering all nodes because " + "these node ids do not appear in any hyperedge: [3]." ), ): dataset.split( diff --git a/hyperbench/tests/data/enricher_test.py b/hyperbench/tests/data/enricher_test.py index 8f3f0674..2af87de3 100644 --- a/hyperbench/tests/data/enricher_test.py +++ b/hyperbench/tests/data/enricher_test.py @@ -230,7 +230,9 @@ def test_node2vec_enricher_returns_zero_features_when_clique_has_no_non_selfloop with pytest.warns( UserWarning, - match="Clique expansion produced no non-self-loop edges. Returning zero node features.", + match=re.escape( + "Clique expansion produced no non-self-loop edges. Returning zero node features." + ), ): result = enricher.enrich(hyperedge_index) diff --git a/hyperbench/tests/data/hif_test.py b/hyperbench/tests/data/hif_test.py index 2ad70bc3..d57330f2 100644 --- a/hyperbench/tests/data/hif_test.py +++ b/hyperbench/tests/data/hif_test.py @@ -246,12 +246,19 @@ def test_load_from_url_raises_when_status_is_not_200(): [ pytest.param( "https://example.com/algebra.json.zst.zst", - r"Unsupported file format for URL 'https://example.com/algebra.json.zst.zst'\. Expected \.json or \.json\.zst", + re.escape( + "Unsupported file format for URL 'https://example.com/algebra.json.zst.zst'" + ". Expected .json or .json.zst" + ), id="json-zst-zst", ), pytest.param( "https://example.com/algebra.zst.json.zst", - r"URL 'https://example.com/algebra.zst.json.zst' has an unexpected filename format\. Expected at most one dot in the base filename before the extension \(e\.g\., dataset\.json or dataset\.json\.zst\)\.", + re.escape( + "URL 'https://example.com/algebra.zst.json.zst' has an unexpected filename " + "format. Expected at most one dot in the base filename before the extension (e.g., " + "dataset.json or dataset.json.zst)." + ), id="zst-json-zst", ), ], @@ -567,7 +574,8 @@ def test_hifloader_falls_back_to_hf_hub_download_when_github_raw_download_fails( pytest.raises( ValueError, match=re.escape( - "Failed to download dataset 'algebra' from GitHub with status code 404 and no SHA provided for Hugging Face Hub fallback." + "Failed to download dataset 'algebra' from GitHub with " + "status code 404 and no SHA provided for Hugging Face Hub fallback." ), ), ): @@ -586,7 +594,7 @@ def test_hifloader_from_url_raise_error_on_wrong_extension(): with pytest.raises( ValueError, - match=r"Unsupported file format for URL 'https://example.com/algebra.txt'", + match=re.escape("Unsupported file format for URL 'https://example.com/algebra.txt'"), ): HIFLoader.load_from_url("https://example.com/algebra.txt") @@ -821,7 +829,8 @@ def test_load_by_name_raises_warn_when_fail_to_cleanup_hf_cache(tmp_path, mock_h patch( "hyperbench.data.hif.shutil.rmtree", side_effect=FileNotFoundError( - f"[Errno 2] No such file or directory: '{tmp_path / 'hf_cache' / 'datasets--HypernetworkRG--algebra'}'" + f"[Errno 2] No such file or directory: '" + f"{tmp_path / 'hf_cache' / 'datasets--HypernetworkRG--algebra'}'" ), ), pytest.warns(UserWarning, match="Failed to clean up Hugging Face Hub cache"), @@ -855,7 +864,9 @@ def test_load_by_name_raises_when_downloaded_hf_file_cannot_be_read(tmp_path): pytest.warns(UserWarning, match="GitHub raw download failed"), pytest.raises( ValueError, - match=r"Failed to read compressed JSON file 'downloaded\.json\.zst': missing file\.", + match=re.escape( + "Failed to read compressed JSON file 'downloaded.json.zst': missing file." + ), ), patch("hyperbench.data.hif.os.getenv", return_value=None), ): @@ -897,7 +908,8 @@ def test_load_by_name_raises_when_downloaded_hf_content_cannot_be_written(tmp_pa pytest.warns(UserWarning, match="GitHub raw download failed"), pytest.raises( ValueError, - match=r"Failed to save downloaded dataset 'algebra' to disk at '.*algebra\.json\.zst': disk full\.", + match=r"Failed to save downloaded dataset 'algebra' to disk at '.*algebra\.json\.zst': " + r"disk full\.", ), ): HIFLoader.load_by_name("algebra", hf_sha=hf_sha, save_on_disk=True) @@ -929,7 +941,7 @@ def test_load_by_name_raises_when_saving_downloaded_dataset_fails(tmp_path): ), pytest.raises( ValueError, - match=r"Failed to save downloaded 'algebra\.json\.zst'", + match=re.escape("Failed to save downloaded 'algebra.json.zst'"), ), ): HIFLoader.load_by_name("algebra", save_on_disk=True) @@ -963,9 +975,9 @@ def test_hifloader_download_failure_when_hf_fallback_fails(tmp_path): pytest.warns(UserWarning, match="GitHub raw download failed"), pytest.raises( ValueError, - match=( - r"Failed to download dataset 'algebra' from GitHub and Hugging Face Hub\. " - r"GitHub error: 404 \| Hugging Face error: HFHub failed" + match=re.escape( + "Failed to download dataset 'algebra' from GitHub and Hugging Face Hub. " + "GitHub error: 404 | Hugging Face error: HFHub failed" ), ), patch("hyperbench.data.hif.os.getenv", return_value=None), @@ -999,8 +1011,10 @@ def test_hifloader_download_failure_when_hf_token_is_invalid(tmp_path): pytest.raises( ValueError, match=( - r"Failed to download dataset 'algebra' from GitHub and Hugging Face Hub\. " - r"GitHub error: 404 \| Hugging Face error: HFHub failed" + re.escape( + "Failed to download dataset 'algebra' from GitHub and Hugging Face Hub. " + "GitHub error: 404 | Hugging Face error: HFHub failed" + ) ), ), patch("hyperbench.data.hif.os.getenv", return_value="invalid_token"), diff --git a/hyperbench/tests/data/negative_sampler_test.py b/hyperbench/tests/data/negative_sampler_test.py index 48acdde2..9540fd2d 100644 --- a/hyperbench/tests/data/negative_sampler_test.py +++ b/hyperbench/tests/data/negative_sampler_test.py @@ -574,7 +574,8 @@ def test_clique_negative_sampler_fails_when_positive_clique_is_only_candidate(): with pytest.raises( ValueError, - match="Asked to create 1 clique negative samples with 3 nodes each, but only 0 are available", + match="Asked to create 1 clique negative samples with 3 nodes each, " + "but only 0 are available", ): sampler.sample(hdata) diff --git a/hyperbench/tests/data/splitter_test.py b/hyperbench/tests/data/splitter_test.py index 84b6d7ff..fca637a3 100644 --- a/hyperbench/tests/data/splitter_test.py +++ b/hyperbench/tests/data/splitter_test.py @@ -187,7 +187,7 @@ def test_default_dataset_splitter_rebalances_first_split_to_cover_all_nodes(): assert torch.equal(split_labels.sort().values, hdata.y) -def test_default_dataset_splitter_returns_final_transductive_ratios_when_train_coverage_is_enabled(): +def test_default_dataset_splitter_returns_final_transductive_ratios_when_train_cov_is_enabled(): hdata = HData( x=torch.arange(4, dtype=torch.float).unsqueeze(1), hyperedge_index=torch.tensor([[0, 1, 2, 3, 0], [0, 1, 2, 3, 4]], dtype=torch.long), @@ -253,7 +253,8 @@ def test_default_dataset_splitter_raises_when_node_is_missing_from_all_hyperedge with pytest.raises( ValueError, match=re.escape( - "Cannot create a transductive first split covering all nodes because these node ids do not appear in any hyperedge: [3]." + "Cannot create a transductive first split covering all nodes because these " + "node ids do not appear in any hyperedge: [3]." ), ): DefaultDatasetSplitter().split( @@ -306,7 +307,7 @@ def test_hyperedge_id_splitter_get_hyperedge_ids_permutation_is_deterministic_wi assert torch.equal(permutation_a.sort().values, torch.arange(5, dtype=torch.long)) -def test_hyperedge_id_splitter_split_uses_cumulative_floor_boundaries_and_last_split_absorbs_remainder( +def test_hyperedge_id_splitter_split_cumulative_floor_boundaries_and_last_split_absorbs_remainder( mock_hdata_five_hyperedges, ): hyperedge_ids = torch.arange(5, dtype=torch.long) @@ -357,7 +358,8 @@ def test_split_validates_ratio_values( # 3/5 and 2/5 as we ensure splits don't get more then requested, # in this way, all later splits get at least what they requested, # except the last one that might get slightly more due to rounding. - # This effect is mitigated the more hyperedges we have, as the ratios get closer to the requested ones. + # This effect is mitigated the more hyperedges we have, as the ratios get closer to the + # requested ones. [0.6, 0.4], id="five_hyperedges_rounds_train_up", ), @@ -368,7 +370,7 @@ def test_split_validates_ratio_values( torch.arange( 500, dtype=torch.long, - ), # 500 hyperedges, 125 per node, so we can split exactly according to the ratios + ), # 500 hyperedges, 125 per node, so we can split according to the ratios ] ), [375, 125], @@ -398,7 +400,7 @@ def test_hyperedge_id_splitter_split_returns_expected_cumulative_ratios( assert final_ratios == pytest.approx(expected_final_ratios) -def test_hyperedge_id_splitter_ensure_split_covers_all_nodes_moves_best_covering_hyperedge_into_first_split(): +def test_hyperedge_id_splitter_split_covers_all_nodes_moves_best_covering_he_in_first_split(): x = torch.ones((4, 1), dtype=torch.float32) hyperedge_index = torch.tensor( [ @@ -459,7 +461,7 @@ def test_hyperedge_id_splitter_ensure_split_covers_all_nodes_rejects_invalid_spl ) -def test_hyperedge_id_splitter_ensure_split_covers_all_nodes_raises_when_node_is_missing_from_hypergraph(): +def test_hyperedge_id_splitter_ensure_split_covers_all_nodes_raises_when_node_is_missing(): x = torch.ones((4, 1), dtype=torch.float32) hyperedge_index = torch.tensor([[0, 1, 2], [0, 0, 1]], dtype=torch.long) hdata = HData(x=x, hyperedge_index=hyperedge_index) @@ -472,7 +474,8 @@ def test_hyperedge_id_splitter_ensure_split_covers_all_nodes_raises_when_node_is with pytest.raises( ValueError, match=re.escape( - "Cannot create a transductive first split covering all nodes because these node ids do not appear in any hyperedge: [3]." + "Cannot create a transductive first split covering all nodes because these " + "node ids do not appear in any hyperedge: [3]." ), ): splitter.ensure_split_covers_all_nodes( diff --git a/hyperbench/tests/train/latex_logger_test.py b/hyperbench/tests/train/latex_logger_test.py index 2ad6a5bf..aa302a42 100644 --- a/hyperbench/tests/train/latex_logger_test.py +++ b/hyperbench/tests/train/latex_logger_test.py @@ -254,7 +254,8 @@ def test_finalize_writes_comprehensive_overall_table_trail(tmp_path, mock_option \multicolumn{3}{c}{\textbf{Test Results}} \\ \midrule Model & test\_auc & test\_loss \\ - model\_a & \cellcolor[HTML]{59FF59}\underline{0.9123} & \cellcolor[HTML]{59FF59}\underline{0.1230} \\ + model\_a & \cellcolor[HTML]{59FF59}\underline{0.9123} & """ + r"""\cellcolor[HTML]{59FF59}\underline{0.1230} \\ model\_b & \cellcolor[HTML]{FF5959}0.8821 & - \\ \hline \addlinespace[3pt] @@ -310,7 +311,8 @@ def test_finalize_writes_comprehensive_test_table_trail(tmp_path, mock_option_co \multicolumn{3}{c}{\textbf{Test Results}} \\ \midrule Model & test\_auc & test\_loss \\ - model\_a & \cellcolor[HTML]{59FF59}\underline{0.9123} & \cellcolor[HTML]{59FF59}\underline{0.1234} \\ + model\_a & \cellcolor[HTML]{59FF59}\underline{0.9123} & """ + r"""\cellcolor[HTML]{59FF59}\underline{0.1234} \\ model\_b & \cellcolor[HTML]{FF5959}0.8821 & - \\ \hline \end{tabular} diff --git a/hyperbench/tests/types/graph_test.py b/hyperbench/tests/types/graph_test.py index 4179f6f7..f629bac6 100644 --- a/hyperbench/tests/types/graph_test.py +++ b/hyperbench/tests/types/graph_test.py @@ -232,7 +232,8 @@ def test_to_edge_index_is_contiguous(mock_single_edge_graph): Examples: If edges = [[0, 1]], then edge_index = [[0], [1]] should be contiguous. - If edges = [[0, 1], [1, 2], [2, 3]], then edge_index = [[0, 1, 2], [1, 2, 3]] should be contiguous. + If edges = [[0, 1], [1, 2], [2, 3]], then edge_index = [[0, 1, 2], [1, 2, 3]] + should be contiguous. """ edge_index = mock_single_edge_graph.to_edge_index() assert edge_index.is_contiguous() @@ -281,7 +282,9 @@ def test_bidirectional_edges(): def test_star_graph(): - """Test star graph (all edges connected to central node).""" + """ + Test star graph (all edges connected to central node). + """ graph = Graph([[0, 1], [0, 2], [0, 3], [0, 4]]) assert graph.num_nodes == 5 assert graph.num_edges == 4 @@ -292,7 +295,9 @@ def test_star_graph(): def test_cyclic_graph(): - """Test cyclic graph (a closed loop).""" + """ + Test cyclic graph (a closed loop). + """ graph = Graph([[0, 1], [1, 2], [2, 3], [3, 0]]) assert graph.num_nodes == 4 assert graph.num_edges == 4 @@ -326,7 +331,9 @@ def test_smoothing_with_laplacian_output_shape_matches_x_shape(num_nodes, num_fe def test_smoothing_with_laplacian_with_identity_laplacian_returns_original_x(): - """Smoothing with identity laplacian should return the original features.""" + """ + Smoothing with identity laplacian should return the original features. + """ num_nodes = 3 x = torch.tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=torch.float) @@ -475,6 +482,7 @@ def test_smoothing_with_laplacian_drop_rate_stochastic(): def test_smoothing_with_laplacian_influences_connected_nodes(): """ Features of connected nodes should be aggregated. + For a connected graph with GCN normalization, smoothing should mix features from neighbors. """ # Two connected nodes with distinct features @@ -718,7 +726,9 @@ def test_get_sparse_adjacency_matrix_shape(edge_index, num_nodes): def test_get_sparse_adjacency_matrix_empty_edge_index(): - """Empty edge_index produces all-zero adjacency matrix when converted to dense.""" + """ + Empty edge_index produces all-zero adjacency matrix when converted to dense. + """ edge_index = torch.tensor([[], []], dtype=torch.long) adj_matrix = EdgeIndex(edge_index).get_sparse_adjacency_matrix(num_nodes=3) dense_adj_matrix = adj_matrix.to_dense() @@ -898,7 +908,9 @@ def test_get_sparse_adjacency_matrix_ignores_stored_edge_weights_by_default(): ], ) def test_get_sparse_adjacency_matrix_isolated_nodes(edge_index, num_nodes, isolated_nodes): - """Nodes not in edge_index have zero rows and columns.""" + """ + Nodes not in edge_index have zero rows and columns. + """ adj_matrix = EdgeIndex(edge_index).get_sparse_adjacency_matrix(num_nodes=num_nodes) dense_adj_matrix = adj_matrix.to_dense() @@ -997,7 +1009,9 @@ def test_get_sparse_normalized_degree_matrix_isolated_nodes_are_zero(): def test_get_sparse_normalized_degree_matrix_empty_edge_index(): - """Empty edge_index produces all-zero matrix (all nodes isolated).""" + """ + Empty edge_index produces all-zero matrix (all nodes isolated). + """ edge_index = torch.tensor([[], []], dtype=torch.long) degree_matrix = EdgeIndex(edge_index).get_sparse_normalized_degree_matrix(num_nodes=3) @@ -1144,7 +1158,8 @@ def test_get_sparse_normalized_laplacian_has_features_for_isolated_nodes(): # isolated nodes are not in the edge_index edge_index = torch.tensor([[0], [1]], dtype=torch.long) - # we want all nodes in the gcn laplacian, so we specify num_nodes=4 to include nodes 2 and 3 which are isolated + # we want all nodes in the gcn laplacian, so we specify num_nodes=4 to include nodes 2 and 3 + # which are isolated gcn_laplacian = EdgeIndex(edge_index).get_sparse_normalized_gcn_laplacian(num_nodes=4) dense_gcn_laplacian = gcn_laplacian.to_dense() @@ -1328,7 +1343,9 @@ def test_get_sparse_normalized_laplacian_is_symmetric(): def test_get_sparse_normalized_laplacian_diagonal_values(): - """For a connected graph without self-loops, diagonal of the laplacian should be non-negative.""" + """ + For a connected graph without self-loops, diagonal of the laplacian should be non-negative. + """ edge_index = EdgeIndex(torch.tensor([[0, 1], [1, 0]], dtype=torch.long)) laplacian = edge_index.get_sparse_normalized_laplacian(num_nodes=2) dense_laplacian = laplacian.to_dense() diff --git a/hyperbench/tests/types/hdata_test.py b/hyperbench/tests/types/hdata_test.py index d3fdf5cd..99a71489 100644 --- a/hyperbench/tests/types/hdata_test.py +++ b/hyperbench/tests/types/hdata_test.py @@ -277,8 +277,8 @@ def test_init_hyperedge_attr_defaults_to_none(): "hyperedge_index": torch.tensor([[0, 1, 2], [0, 0, 0]], dtype=torch.long), }, ( - "'x' must have one feature row per node, or be 'torch.empty((0, 0))' if there are no " - "nodes. Got x.shape=(2, 2) but num_nodes=3." + "'x' must have one feature row per node, or be 'torch.empty((0, 0))' " + "if there are no nodes. Got x.shape=(2, 2) but num_nodes=3." ), id="x_rows_do_not_match_num_nodes", ), @@ -648,7 +648,8 @@ def test_cat_same_node_space_raises_on_overlapping_hyperedge_ids(): with pytest.raises( ValueError, match=re.escape( - "Overlapping hyperedge IDs found across instances. Ensure each instance uses distinct hyperedge IDs." + "Overlapping hyperedge IDs found across instances. Ensure each " + "instance uses distinct hyperedge IDs." ), ): HData.cat_same_node_space([hdata1, hdata2]) @@ -1443,7 +1444,8 @@ def test_enrich_node_features_from_raises_when_source_rows_do_not_match_global_n with pytest.raises( ValueError, match=re.escape( - "Expected 'hdata_with_features.x' rows to align with hdata_with_features.global_node_ids." + "Expected 'hdata_with_features.x' rows to align with " + "hdata_with_features.global_node_ids." ), ): target_hdata.enrich_node_features_from(source_hdata) @@ -2053,7 +2055,8 @@ def test_remove_hyperedges_with_fewer_than_k_nodes(hyperedge_index, k, expected_ id="disjoint_nodes_first_hyperedge_removed", ), pytest.param( - # Hyperedge 0: nodes {0, 2} -> 2 nodes (removed), hyperedge 1: nodes {1, 2, 3} -> 3 nodes (kept) + # Hyperedge 0: nodes {0, 2} -> 2 nodes (removed), hyperedge 1: nodes {1, 2, 3} + # -> 3 nodes (kept) # Node 2 is shared, so it survives because hyperedge 1 is kept # Node 0 is the only node removed as it is only in the removed hyperedge 0 torch.tensor([[0, 2, 1, 2, 3], [0, 0, 1, 1, 1]], dtype=torch.long), @@ -2082,7 +2085,8 @@ def test_remove_hyperedges_with_fewer_than_k_nodes_subsets_x(hyperedge_index, k, id="disjoint_nodes_first_hyperedge_removed", ), pytest.param( - # Hyperedge 0: nodes {0, 2} -> 2 nodes (removed). hyperedge 1: nodes {1, 2, 3} -> 3 nodes (kept) + # Hyperedge 0: nodes {0, 2} -> 2 nodes (removed). hyperedge 1: nodes {1, 2, 3} + # -> 3 nodes (kept) # Node 2 is shared, so y for hyperedge 1 must survive torch.tensor([[0, 2, 1, 2, 3], [0, 0, 1, 1, 1]], dtype=torch.long), 3, @@ -2112,7 +2116,8 @@ def test_remove_hyperedges_with_fewer_than_k_nodes_subsets_y(hyperedge_index, k, id="disjoint_nodes_first_hyperedge_removed", ), pytest.param( - # Hyperedge 0: nodes {0, 2} -> 2 nodes (removed), hyperedge 1: nodes {1, 2, 3} -> 3 nodes (kept) + # Hyperedge 0: nodes {0, 2} -> 2 nodes (removed), hyperedge 1: nodes {1, 2, 3} + # -> 3 nodes (kept) # Node 2 is shared, so attr for hyperedge 1 must survive torch.tensor([[0, 2, 1, 2, 3], [0, 0, 1, 1, 1]], dtype=torch.long), 3, @@ -2165,7 +2170,7 @@ def test_remove_hyperedges_with_fewer_than_k_nodes_subsets_global_node_ids_when_ assert torch.equal(result.global_node_ids, torch.tensor([30, 40, 50], dtype=torch.long)) -def test_remove_hyperedges_with_fewer_than_k_nodes_does_not_subset_global_node_ids_when_preserve_false(): +def test_remove_hyperedges_with_fewer_than_k_nodes_not_subset_global_node_ids_when_preserve_false(): x = torch.randn(5, 2, dtype=torch.float) hyperedge_index = torch.tensor([[0, 1, 2, 3, 4], [0, 0, 1, 1, 1]], dtype=torch.long) hdata = HData(x=x, hyperedge_index=hyperedge_index) diff --git a/hyperbench/tests/types/hypergraph_test.py b/hyperbench/tests/types/hypergraph_test.py index d5a3e0c1..cbf484af 100644 --- a/hyperbench/tests/types/hypergraph_test.py +++ b/hyperbench/tests/types/hypergraph_test.py @@ -11,7 +11,9 @@ @pytest.fixture(autouse=True) def seed(): - """Fix random seed for deterministic projections.""" + """ + Fix random seed for deterministic projections. + """ torch.manual_seed(42) diff --git a/hyperbench/tests/utils/nn_utils_test.py b/hyperbench/tests/utils/nn_utils_test.py index 21694862..fafea6e7 100644 --- a/hyperbench/tests/utils/nn_utils_test.py +++ b/hyperbench/tests/utils/nn_utils_test.py @@ -81,14 +81,16 @@ def test_maxmin_scatter_respects_explicit_dim_size(): # Example: # - index[1] == 0 means src[1] = [3, 1] contributes to output row 0. # - index[2] == 2 means src[2] = [-2, 7] contributes to output row 2. - # Missing group ids indicate that those groups receive no source rows, so group 1 and group 3 are empty. + # Missing group ids indicate that those groups receive no source rows, so group 1 + # and group 3 are empty. index = torch.tensor([0, 0, 2], dtype=torch.long) # dim_size=4 forces four output rows even though max(index) would only imply three rows. result = maxmin_scatter(src=src, index=index, dim=0, dim_size=4) # Group 0 receives [1, 4] and [3, 1], so its range is [2, 3]. - # Group 2 receives only row [-2, 7], so max(-2) - min(-2) and max(7) - min(7) are both 0 and the range is [0, 0]. + # Group 2 receives only row [-2, 7], so max(-2) - min(-2) and max(7) - min(7) are both 0 and + # the range is [0, 0]. # Empty groups 1 and 3 follow torch_geometric.scatter's neutral empty output, # so max and min both become [0, 0], and max - min is also [0, 0]. expected = torch.tensor( diff --git a/hyperbench/tests/utils/sparse_utils_test.py b/hyperbench/tests/utils/sparse_utils_test.py index f3adfe22..77f399d0 100644 --- a/hyperbench/tests/utils/sparse_utils_test.py +++ b/hyperbench/tests/utils/sparse_utils_test.py @@ -16,7 +16,6 @@ def mock_values(): def test_dropout_zero_probability(mock_indices, mock_values): - """Test that zero dropout probability returns the original sparse tensor.""" sparse_tensor = torch.sparse_coo_tensor( mock_indices, mock_values, (3, 3), dtype=mock_values.dtype ) @@ -28,7 +27,6 @@ def test_dropout_zero_probability(mock_indices, mock_values): def test_dropout_full_probability(mock_indices, mock_values): - """Test that full dropout probability (1.0) drops all elements.""" sparse_tensor = torch.sparse_coo_tensor( mock_indices, mock_values, (3, 3), dtype=mock_values.dtype ) @@ -43,7 +41,6 @@ def test_dropout_full_probability(mock_indices, mock_values): @pytest.mark.parametrize("invalid_prob", [-0.5, 1.5]) def test_dropout_invalid_probability_out_of_range(mock_indices, mock_values, invalid_prob): - """Test that dropout probability below 0 raises ValueError.""" sparse_tensor = torch.sparse_coo_tensor( mock_indices, mock_values, (2, 2), dtype=mock_values.dtype ) @@ -56,7 +53,6 @@ def test_dropout_invalid_probability_out_of_range(mock_indices, mock_values, inv def test_dropout_preserves_indices(): - """Test that dropout preserves the sparsity pattern (indices) unchanged.""" indices = torch.tensor([[0, 1, 2, 0], [0, 1, 2, 2]], dtype=torch.long) values = torch.tensor([1.0, 2.0, 3.0, 4.0], dtype=torch.float) sparse_tensor = torch.sparse_coo_tensor(indices, values, (3, 3), dtype=values.dtype) @@ -68,7 +64,6 @@ def test_dropout_preserves_indices(): def test_dropout_preserves_shape(): - """Test that dropout preserves the tensor shape.""" shape = (5, 10) # Shape of the tensor if it were dense indices = torch.tensor([[0, 2, 4], [1, 5, 9]], dtype=torch.long) values = torch.tensor([1.0, 2.0, 3.0], dtype=torch.float) @@ -80,7 +75,6 @@ def test_dropout_preserves_shape(): def test_dropout_preserves_dtype(): - """Test that dropout preserves the tensor dtype.""" indices = torch.tensor([[0, 1], [0, 1]], dtype=torch.long) values = torch.tensor([1.0, 2.0], dtype=torch.float32) sparse_tensor = torch.sparse_coo_tensor(indices, values, (2, 2), dtype=torch.float32) @@ -91,7 +85,6 @@ def test_dropout_preserves_dtype(): def test_dropout_with_fill_value_zero(mock_indices): - """Test dropout with fill_value=0.0 (default behavior).""" values = torch.tensor([5.0, 10.0, 15.0], dtype=torch.float) sparse_tensor = torch.sparse_coo_tensor(mock_indices, values, (3, 3), dtype=values.dtype) @@ -105,7 +98,6 @@ def test_dropout_with_fill_value_zero(mock_indices): def test_dropout_with_nonzero_fill_value(mock_indices): - """Test dropout with a non-zero fill_value.""" values = torch.tensor([5.0, 10.0, 15.0], dtype=torch.float) sparse_tensor = torch.sparse_coo_tensor(mock_indices, values, (3, 3), dtype=values.dtype) fill_value = 99.0 @@ -120,7 +112,6 @@ def test_dropout_with_nonzero_fill_value(mock_indices): def test_dropout_with_negative_values(): - """Test dropout with negative values in the sparse tensor.""" indices = torch.tensor([[0, 1, 2], [0, 1, 2]], dtype=torch.long) values = torch.tensor([-1.0, -5.0, -10.0], dtype=torch.float) sparse_tensor = torch.sparse_coo_tensor(indices, values, (3, 3), dtype=values.dtype) @@ -133,7 +124,6 @@ def test_dropout_with_negative_values(): def test_dropout_preserves_cpu_device(): - """Test that dropout preserves the device.""" device = torch.device("cpu") indices = torch.tensor([[0, 1], [0, 1]], device=device, dtype=torch.long) @@ -149,7 +139,6 @@ def test_dropout_preserves_cpu_device(): @pytest.mark.skipif(not torch.cuda.is_available(), reason="Cuda not available") def test_dropout_preserves_cuda_device(): - """Test that dropout preserves the device.""" device = torch.device("cuda") indices = torch.tensor([[0, 1], [0, 1]], device=device, dtype=torch.long) @@ -165,7 +154,6 @@ def test_dropout_preserves_cuda_device(): @pytest.mark.skipif(not torch.mps.is_available(), reason="MPS not available") def test_dropout_preserves_mps_device(): - """Test that dropout preserves the device.""" device = torch.device("mps") indices = torch.tensor([[0, 1], [0, 1]], device=device, dtype=torch.long) @@ -180,7 +168,6 @@ def test_dropout_preserves_mps_device(): def test_dropout_fill_value_with_full_dropout(): - """Test that fill_value is applied correctly when dropout is 1.0.""" indices = torch.tensor([[0, 1], [0, 1]], dtype=torch.long) values = torch.tensor([1.0, 2.0], dtype=torch.float) sparse_tensor = torch.sparse_coo_tensor(indices, values, (2, 2), dtype=values.dtype) @@ -195,7 +182,6 @@ def test_dropout_fill_value_with_full_dropout(): def test_dropout_with_unsorted_indices(): - """Test that dropout handles unsorted indices correctly.""" # Create a sparse tensor with unsorted/duplicate indices indices = torch.tensor([[2, 0, 1, 0], [2, 0, 1, 0]], dtype=torch.long) values = torch.tensor([3.0, 1.0, 2.0, 4.0], dtype=torch.float) @@ -214,7 +200,6 @@ def test_dropout_with_unsorted_indices(): def test_dropout_single_element(): - """Test dropout on a sparse tensor with a single element.""" indices = torch.tensor([[0], [0]], dtype=torch.long) values = torch.tensor([42.0], dtype=torch.float) sparse_tensor = torch.sparse_coo_tensor(indices, values, (1, 1), dtype=values.dtype) @@ -229,7 +214,6 @@ def test_dropout_single_element(): def test_dropout_large_sparse_matrix(): - """Test dropout on a large sparse matrix.""" size = 1000 num_nonzero_elements = 500 rows = torch.randint(0, size, (num_nonzero_elements,), dtype=torch.long) @@ -245,7 +229,6 @@ def test_dropout_large_sparse_matrix(): def test_dropout_returns_new_tensor(mock_indices, mock_values): - """Test that dropout returns a new tensor, not a reference to the original.""" sparse_tensor = torch.sparse_coo_tensor( mock_indices, mock_values, (2, 2), dtype=mock_values.dtype ) @@ -257,7 +240,6 @@ def test_dropout_returns_new_tensor(mock_indices, mock_values): def test_dropout_statistical_property_moderate_rate(): - """Test that dropout approximately respects the expected keep probability.""" # Create a larger sparse tensor for statistical testing num_elements = 1000 diff --git a/hyperbench/train/latex_logger.py b/hyperbench/train/latex_logger.py index e08126b2..990cd474 100644 --- a/hyperbench/train/latex_logger.py +++ b/hyperbench/train/latex_logger.py @@ -69,7 +69,7 @@ class LaTexTableConfig(TypedDict): """ Configuration for the LaTex table logger. - Args: + Attributes: table_caption: Caption for the LaTex table. sort_by: Per-column sorting criteria ("asc" or "des"). border: Whether to include borders in the LaTex table. @@ -81,8 +81,6 @@ class LaTexTableConfig(TypedDict): class LaTexTableLogger(Logger): - # TODO: settings has to be configurable in Trainer - """A Lightning Logger that accumulates metrics and writes a LaTex comparison table. Multiple instances (one per model) share a class-level store keyed by experiment_name. diff --git a/hyperbench/train/markdown_logger.py b/hyperbench/train/markdown_logger.py index 6c2e7afb..84bc084e 100644 --- a/hyperbench/train/markdown_logger.py +++ b/hyperbench/train/markdown_logger.py @@ -145,11 +145,14 @@ def __split_results( - "train*" --> train_results - "val*" --> val_results - anything else (e.g., "epoch") --> ignored + Models with no metrics in a category are excluded from that category's dict. Returns: results: Tuple of (test_results, train_results, val_results), where each is a dict - mapping model names to their respective metric dicts. Models with no metrics - in a category are excluded from that category's dict. + mapping model names to their respective metric dicts. + test_results: Dict mapping model names to their test metric dicts. + train_results: Dict mapping model names to their train metric dicts. + val_results: Dict mapping model names to their val metric dicts. """ store = self.__shared_stores.get(self.__experiment_name, {}) test_results: dict[str, dict[str, float]] = {} @@ -183,7 +186,6 @@ def clear(self, experiment_name: str) -> None: Args: experiment_name: The experiment name whose data should be cleared. - """ self.__shared_stores.pop(experiment_name, None) @@ -220,8 +222,6 @@ def __build_comparison_table( Returns: table: Markdown table string. Returns an empty string if ``results`` is empty. - - """ if not results: return "" diff --git a/hyperbench/train/trainer.py b/hyperbench/train/trainer.py index 626b7c2a..55bf8db3 100644 --- a/hyperbench/train/trainer.py +++ b/hyperbench/train/trainer.py @@ -28,18 +28,21 @@ class MultiModelTrainer: A trainer class to handle training multiple models with individual trainers. Args: - model_configs: A list of ModelConfig objects, each containing a model and its associated trainer (if any). + model_configs: A list of ModelConfig objects, each containing a model and its + associated trainer (if any). experiment_name: Name for this experiment run's log directory. When ``None`` (default), - auto-increments as ``experiment_0``, ``experiment_1``, etc. under the log root directory. - Only used when ``logger`` is not provided. + auto-increments as ``experiment_0``, ``experiment_1``, etc. under + the log root directory. Only used when ``logger`` is not provided. - accelerator: Supports passing different accelerator types ("cpu", "gpu", "tpu", "hpu", "mps", "auto") + accelerator: Supports passing different accelerator types + ("cpu", "gpu", "tpu", "hpu", "mps", "auto") as well as custom accelerator instances. - devices: The devices to use. Can be set to a positive number (int or str), a sequence of device indices - (list or str), the value ``-1`` to indicate all available devices should be used, or ``"auto"`` for - automatic selection based on the chosen accelerator. Defaults to ``"auto"``. + devices: The devices to use. Can be set to a positive number (int or str), a + sequence of device indices (list or str), the value ``-1`` to indicate all available + devices should be used, or ``"auto"`` for automatic selection based on the chosen + accelerator. Defaults to ``"auto"``. strategy: Supports different training strategies with aliases as well custom strategies. Defaults to ``"auto"``. @@ -47,8 +50,9 @@ class MultiModelTrainer: num_nodes: Number of GPU nodes for distributed training. Defaults to ``1``. - precision: Double precision (64, '64' or '64-true'), full precision (32, '32' or '32-true'), - 16bit mixed precision (16, '16', '16-mixed') or bfloat16 mixed precision ('bf16', 'bf16-mixed'). + precision: Double precision (64, '64' or '64-true'), + full precision (32, '32' or '32-true'), 16bit mixed precision (16, '16', '16-mixed') or + bfloat16 mixed precision ('bf16', 'bf16-mixed'). Can be used on CPU, GPU, TPUs, or HPUs. Defaults to ``'32-true'``. @@ -58,27 +62,28 @@ class MultiModelTrainer: min_epochs: Force training for at least these many epochs. Disabled by default (None). - max_steps: Stop training after this number of steps. Disabled by default (-1). If ``max_steps = -1`` - and ``max_epochs = None``, will default to ``max_epochs = 1000``. To enable infinite training, set - ``max_epochs`` to ``-1``. - - min_steps: Force training for at least these number of steps. Disabled by default (``None``). - - check_val_every_n_epoch: Perform a validation loop after every `N` training epochs. If ``None``, - validation will be done solely based on the number of training batches, requiring ``val_check_interval`` - to be an integer value. When used together with a time-based ``val_check_interval`` and - ``check_val_every_n_epoch`` > 1, validation is aligned to epoch multiples: if the interval elapses - before the next multiple-N epoch, validation runs at the start of that epoch (after the first batch) - and the timer resets; if it elapses during a multiple-N epoch, validation runs after the current batch. - For ``None`` or ``1`` cases, the time-based behavior of ``val_check_interval`` applies without - additional alignment. - Defaults to ``1``. - - logger: Logger (or iterable collection of loggers) for experiment tracking. A ``True`` value uses - the default ``TensorBoardLogger`` if it is installed, otherwise ``CSVLogger``. - ``False`` will disable logging. If multiple loggers are provided, local files - (checkpoints, profiler traces, etc.) are saved in the ``log_dir`` of the first logger. - Defaults to ``True``. + max_steps: Stop training after this number of steps. Disabled by default (-1). + If ``max_steps = -1`` and ``max_epochs = None``, will default to ``max_epochs = 1000``. + To enable infinite training, set ``max_epochs`` to ``-1``. + + min_steps: Force training for at least these number of steps. + Disabled by default (``None``). + + check_val_every_n_epoch: Perform a validation loop after every `N` training epochs. + If ``None``, validation will be done solely based on the number of training batches, + requiring ``val_check_interval`` to be an integer value. When used together with a + time-based ``val_check_interval`` and ``check_val_every_n_epoch`` > 1, validation is + aligned to epoch multiples: if the interval elapses before the next multiple-N epoch, + validation runs at the start of that epoch (after the first batch) and the timer resets; + if it elapses during a multiple-N epoch, validation runs after the current batch. + For ``None`` or ``1`` cases, the time-based behavior of ``val_check_interval`` + applies without additional alignment. Defaults to ``1``. + + logger: Logger (or iterable collection of loggers) for experiment tracking. A ``True`` + value uses the default ``TensorBoardLogger`` if it is installed, + otherwise ``CSVLogger``. ``False`` will disable logging. If multiple loggers are + provided, local files (checkpoints, profiler traces, etc.) are saved in the ``log_dir`` + of the first logger. Defaults to ``True``. default_root_dir: Default path for logs and weights when no logger/ckpt_callback passed. Defaults to ``os.getcwd()``. @@ -98,8 +103,8 @@ class MultiModelTrainer: Defaults to ``False``. enable_checkpointing: If ``True``, enable checkpointing. - It will configure a default ModelCheckpoint callback if there is no user-defined ModelCheckpoint in - :paramref:`~hyperbench.train.MultiModelTrainer.callbacks`. + It will configure a default ModelCheckpoint callback if there is no user-defined + ModelCheckpoint in :paramref:`~hyperbench.train.MultiModelTrainer.callbacks`. Defaults to ``True``. enable_progress_bar: Whether to enable the progress bar by default. @@ -119,10 +124,11 @@ class MultiModelTrainer: auto_start_tensorboard: When ``True`` and tensorboard is installed, automatically starts a TensorBoard server pointing at the experiment log directory. - Using this option requires that TensorBoard is installed in the environment and moves control - of the TensorBoard server lifecycle to the trainer, which will automatically terminate the server - when the trainer is finalized (e.g., at the end of a `with` block or when the object is garbage collected). - Enable `auto_wait` to keep the server alive after training completes so you can inspect results before the trainer is finalized. + Using this option requires that TensorBoard is installed in the environment and moves + control of the TensorBoard server lifecycle to the trainer, which will automatically + terminate the server when the trainer is finalized (e.g., at the end of a `with` block + or when the object is garbage collected). Enable `auto_wait` to keep the server alive + after training completes so you can inspect results before the trainer is finalized. Defaults to ``False``. tensorboard_port: Port for the auto-launched TensorBoard server. @@ -272,7 +278,8 @@ def fit_all( if not config.is_trainable: if verbose: print( - f"Skipping training for model {config.full_model_name()} [{i + 1}/{len(self.model_configs)} models] (is_trainable=False)" + f"Skipping training for model {config.full_model_name()} " + f"[{i + 1}/{len(self.model_configs)} models] (is_trainable=False)" ) continue @@ -335,7 +342,8 @@ def test_all( verbose=verbose_loop, ) - # In Lightning, test() returns a list of dicts, one per dataloader, but we use a single dataloader + # In Lightning, test() returns a list of dicts, one per dataloader, + # but we use a single dataloader test_results[config.full_model_name()] = ( trainer_test_results[0] if len(trainer_test_results) > 0 else {} ) @@ -352,6 +360,7 @@ def finalize(self) -> None: def wait(self) -> None: """ Wait until the user presses Enter, keeping process alive. + If no process is running, this method does nothing. """ # For now, we only use this for waiting on TensorBoard, but this can be extended @@ -373,7 +382,8 @@ def __auto_start_tensorboard_if_enabled(self) -> None: else: warnings.warn( "TensorBoard is not available. " - "Install it with `pip install hyperbench[tensorboard]` or `pip install tensorboard`" + "Install it with `pip install hyperbench[tensorboard]` or " + "`pip install tensorboard`" "to enable auto-start.", category=UserWarning, stacklevel=2, diff --git a/hyperbench/types/graph.py b/hyperbench/types/graph.py index c8943bac..d8f31ccf 100644 --- a/hyperbench/types/graph.py +++ b/hyperbench/types/graph.py @@ -12,8 +12,10 @@ class Graph: A simple graph data structure using edge list representation. Args: - edges: A list of edges, where each edge is represented as a list of two integers (source_node, destination_node). - edge_weights: Optional list of edge weights corresponding to each edge in ``edges``. If provided, must have the same length as ``edges``. + edges: A list of edges, where each edge is represented as a list of two integers + (source_node, destination_node). + edge_weights: Optional list of edge weights corresponding to each edge in ``edges``. + If provided, must have the same length as ``edges``. """ def __init__(self, edges: list[list[int]], edge_weights: list[float] | None = None): @@ -23,19 +25,25 @@ def __init__(self, edges: list[list[int]], edge_weights: list[float] | None = No @property def edge_weights(self) -> list[float] | None: - """Return the edge weights, if present.""" + """ + Return the edge weights, if present. + """ return self.__edge_weights @property def edge_weights_tensor(self) -> Tensor: - """Return the edge weights as a tensor, if present.""" + """ + Return the edge weights as a tensor, if present. + """ if self.__edge_weights is not None: return torch.tensor(self.__edge_weights, dtype=torch.float) return torch.empty(0, dtype=torch.float) @property def num_nodes(self) -> int: - """Return the number of nodes in the graph.""" + """ + Return the number of nodes in the graph. + """ nodes = set() for edge in self.edges: nodes.update(edge) @@ -43,7 +51,9 @@ def num_nodes(self) -> int: @property def num_edges(self) -> int: - """Return the number of edges in the graph.""" + """ + Return the number of edges in the graph. + """ return len(self.edges) def remove_selfloops(self) -> Graph: @@ -120,7 +130,8 @@ def smoothing_with_laplacian_matrix( Args: x: Node feature matrix. Size ``(num_nodes, C)``. laplacian_matrix: The Laplacian matrix. Size ``(num_nodes, num_nodes)``. - drop_rate: Randomly dropout the connections in the Laplacian with probability ``drop_rate``. Defaults to ``0.0``. + drop_rate: Randomly dropout the connections in the Laplacian with probability + ``drop_rate``. Defaults to ``0.0``. Returns: x: The smoothed feature matrix. Size ``(num_nodes, C)``. @@ -135,7 +146,8 @@ def smoothing_with_laplacian_matrix( class EdgeIndex: """ A wrapper for edge index representation of a graph. - Edge index is a tensor of shape ``(2, num_edges)`` where the first row contains source node indices + Edge index is a tensor of shape ``(2, num_edges)`` where the first row contains source + node indices and the second row contains destination node indices for each edge. Examples: @@ -161,24 +173,32 @@ def __init__( @property def item(self) -> Tensor: - """Return the edge index tensor.""" + """ + Return the edge index tensor. + """ return self.__edge_index @property def edge_weights(self) -> Tensor | None: - """Return the edge weight tensor, if present.""" + """ + Return the edge weight tensor, if present. + """ return self.__edge_weights @property def max_node_id(self) -> int: - """Return the maximum node ID in the edge index.""" + """ + Return the maximum node ID in the edge index. + """ if self.__edge_index.size(1) < 1: return -1 return int(self.__edge_index.max()) @property def num_edges(self) -> int: - """Return the number of edges in the graph.""" + """ + Return the number of edges in the graph. + """ if self.__edge_index.size(1) < 1: return 0 # Number of edges is the number of columns in edge_index, which is dim=1, @@ -187,7 +207,9 @@ def num_edges(self) -> int: @property def num_nodes(self) -> int: - """Return the number of nodes in the graph.""" + """ + Return the number of nodes in the graph. + """ if self.__edge_index.size(1) < 1: return 0 unique_nodes = torch.unique(self.__edge_index) @@ -218,10 +240,14 @@ def add_selfloops( ... [1, 0, 3, 0, 1, 2, 3, 4, 5]] Args: - num_nodes: Total number of nodes. When provided, self-loops are added for nodes ``0`` to ``num_nodes - 1``. When ``None``, defaults to ``self.num_nodes``. - This parameter is important when ``edge_index`` does not contain all nodes (e.g., some nodes are isolated and have no edges or have been removed), - as it ensures that the resulting Laplacian matrix has the correct size and includes all nodes. For instance, for self-loops. - with_duplicate_removal: Whether to remove duplicate edges after adding self-loops. Defaults to ``True``. + num_nodes: Total number of nodes. When provided, self-loops are added for nodes ``0`` + to ``num_nodes - 1``. When ``None``, defaults to ``self.num_nodes``. + This parameter is important when ``edge_index`` does not contain all nodes + (e.g., some nodes are isolated and have no edges or have been removed), + as it ensures that the resulting Laplacian matrix has the correct size and includes + all nodes. For instance, for self-loops. + with_duplicate_removal: Whether to remove duplicate edges after adding self-loops. + Defaults to ``True``. Returns: edge_index: This `EdgeIndex` instance with self-loops added. @@ -302,7 +328,8 @@ def get_sparse_adjacency_matrix( Args: num_nodes: The number of nodes in the graph. If ``None``, it will be inferred from ``self.num_nodes``. - Note that the node indices in ``edge_index`` are assumed to be in the range [0, num_nodes-1]. + Note that the node indices in ``edge_index`` are assumed to be in the + range [0, num_nodes-1]. use_edge_weights: Whether to use edge weights if they are present. If ``False``, all edges will have weight 1. Defaults to ``False``. @@ -373,7 +400,8 @@ def get_sparse_identity_matrix(self, num_nodes: int | None = None) -> Tensor: # Example: num_nodes = 3 # -> identity_indices = [[0, 1, 2], # [0, 1, 2]] - # we use repeat(2, 1) as I is a matrix NxN, so we need indices for both rows and columns + # we use repeat(2, 1) as I is a matrix NxN, so we need indices + # for both rows and columns # -> values = [1, 1, 1] # 0 1 2 # -> I = [[1, 0, 0], 0 @@ -404,11 +432,14 @@ def get_sparse_normalized_degree_matrix( Args: num_nodes: The number of nodes in the graph. If ``None``, it will be inferred from ``self.num_nodes``. - Note that the node indices in ``edge_index`` are assumed to be in the range [0, num_nodes-1]. - use_edge_weights: If ``True``, use the edge weights from ``self.edge_weights``. If ``False``, all edges use weight 1. + Note that the node indices in ``edge_index`` are assumed to be in + the range [0, num_nodes-1]. + use_edge_weights: If ``True``, use the edge weights from ``self.edge_weights``. + If ``False``, all edges use weight 1. Returns: - degree_matrix: The sparse normalized degree matrix D^-1/2 of shape ``(num_nodes, num_nodes)``. + degree_matrix: The sparse normalized degree matrix D^-1/2 of + shape ``(num_nodes, num_nodes)``. """ num_nodes = self.num_nodes if num_nodes is None else num_nodes self.__validate_num_nodes(num_nodes) @@ -458,7 +489,7 @@ def get_sparse_normalized_laplacian( num_nodes: int | None = None, ) -> Tensor: """ - Compute the sparse symmetric normalized Laplacian matrix: L = I - D^{-1/2} A D^{-1/2}. + Compute the sparse symmetric normalized Laplacian matrix: `L = I - D^{-1/2} A D^{-1/2}`. Unlike ``get_sparse_normalized_gcn_laplacian``, this method does not add self-loops and computes the standard Laplacian (not the GCN propagation matrix). @@ -468,7 +499,8 @@ def get_sparse_normalized_laplacian( it will be inferred from ``self.num_nodes``. Returns: - laplacian: The sparse symmetric normalized Laplacian matrix of shape ``(num_nodes, num_nodes)``. + laplacian: The sparse symmetric normalized Laplacian + matrix of shape ``(num_nodes, num_nodes)``. """ num_nodes = self.num_nodes if num_nodes is None else num_nodes self.__validate_num_nodes(num_nodes) @@ -496,19 +528,25 @@ def get_sparse_normalized_gcn_laplacian( """ Compute the sparse Laplacian matrix from a graph edge index. - The GCN Laplacian is defined as: L_GCN = D_hat^-1/2 * A_hat * D_hat^-1/2, - where A_hat = A + I (adjacency with self-loops) and D_hat is the degree matrix of A_hat. + The GCN Laplacian is defined as: `L_GCN = D_hat^-1/2 * A_hat * D_hat^-1/2`, + where `A_hat = A + I` (adjacency with self-loops) and `D_hat` is the degree matrix + of `A_hat`. Args: num_nodes: The number of nodes in the graph. If ``None``, it will be inferred from ``self.num_nodes``. - Note that the node indices in ``edge_index`` are assumed to be in the range [0, num_nodes-1]. - This parameter is important when ``edge_index`` does not contain all nodes (e.g., some nodes are isolated and have no edges or have been removed), - as it ensures that the resulting Laplacian matrix has the correct size and includes all nodes. For instance, for self-loops. - use_edge_weights: If ``True``, use the edge weights from ``self.edge_weights``. If ``False``, all edges use weight 1. + Note that the node indices in ``edge_index`` are assumed to be + in the range [0, num_nodes-1]. + This parameter is important when ``edge_index`` does not contain all nodes + (e.g., some nodes are isolated and have no edges or have been removed), + as it ensures that the resulting Laplacian matrix has the correct size and + includes all nodes. For instance, for self-loops. + use_edge_weights: If ``True``, use the edge weights from ``self.edge_weights``. + If ``False``, all edges use weight 1. Returns: - laplacian: The sparse symmetrically normalized Laplacian matrix of shape ``(num_nodes, num_nodes)``. + laplacian: The sparse symmetrically normalized Laplacian matrix of + shape ``(num_nodes, num_nodes)``. """ num_nodes = self.num_nodes if num_nodes is None else num_nodes self.__validate_num_nodes(num_nodes) @@ -530,7 +568,9 @@ def get_sparse_normalized_gcn_laplacian( return normalized_laplacian_matrix.coalesce() def remove_selfloops(self) -> EdgeIndex: - """Remove self-loops from the edge index.""" + """ + Remove self-loops from the edge index. + """ # Example: edge_index = [[0, 1, 2, 3], # [1, 1, 3, 2]], shape (2, |E| = 4) # -> keep_mask = [True, False, True, True] @@ -545,15 +585,19 @@ def remove_selfloops(self) -> EdgeIndex: def remove_duplicate_edges(self, num_nodes: int | None = None) -> EdgeIndex: """ - Remove duplicate edges from the edge index. Keeps the tensor contiguous in memory. - - Args: - num_nodes: The number of nodes in the graph. If ``None``, it will be inferred from ``self.num_nodes``. - This parameter is important when ``edge_index`` does not contain all nodes (e.g., some nodes are isolated and have no edges or have been removed), - as it ensures that the resulting Laplacian matrix has the correct size and includes all nodes. For instance, for self-loops. - - Returns: - edge_index: This `EdgeIndex` instance with duplicate edges removed. + Remove duplicate edges from the edge index. + + Keeps the tensor contiguous in memory. + Args: + num_nodes: The number of nodes in the graph. If ``None``, it will be + inferred from ``self.num_nodes``. + This parameter is important when ``edge_index`` does not contain all nodes + (e.g., some nodes are isolated and have no edges or have been removed), + as it ensures that the resulting Laplacian matrix has the correct size + and includes all nodes. For instance, for self-loops. + + Returns: + edge_index: This `EdgeIndex` instance with duplicate edges removed. """ num_nodes = self.num_nodes if num_nodes is None else num_nodes self.__validate_num_nodes(num_nodes) @@ -609,9 +653,12 @@ def to_undirected( Args: with_selfloops: Whether to add self-loops to each node. Defaults to ``False``. - num_nodes: Total number of nodes. Propagated to ``add_selfloops`` when ``with_selfloops`` is ``True``. - This parameter is useful when ``edge_index`` does not contain all nodes (e.g., some nodes are isolated and have no edges or have been removed), - as it ensures that the resulting Laplacian matrix has the correct size and includes all nodes. For instance, for self-loops. + num_nodes: Total number of nodes. Propagated to ``add_selfloops`` when + ``with_selfloops`` is ``True``. + This parameter is useful when ``edge_index`` does not contain all nodes + (e.g., some nodes are isolated and have no edges or have been removed), + as it ensures that the resulting Laplacian matrix has the correct size and + includes all nodes. For instance, for self-loops. Returns: edge_index: This `EdgeIndex` instance converted to undirected. @@ -636,17 +683,22 @@ def to_undirected( # Example: encoded_edge_ids = [1, 4, 11], # reversed_encoded_edge_ids = [4, 1, 14] # -> missing_reverse_mask = [False, False, True] - # because 4 and 1 are in both, it means edges (0,1) and (1,0) are already present, - # but 14 is only in reversed_encoded_edge_ids, which means edge (3,2) is missing - # and this is because the mask points to the missing reversee edges that are missing + # because 4 and 1 are in both, it means edges (0,1) and (1,0) + # are already present, + # but 14 is only in reversed_encoded_edge_ids, which means + # edge (3,2) is missing + # and this is because the mask points to the missing reversee edges + # that are missing missing_mask = torch.logical_not(torch.isin(reversed_encoded_edge_ids, encoded_edge_ids)) - # Keep all original sources and append the destination of each edge whose reverse is missing. + # Keep all original sources and append the destination of each edge + # whose reverse is missing. # Example: orig_src = [0, 1, 2], orig_dest[missing_mask] = [3] # -> src = [0, 1, 2, 3] src = torch.cat([orig_src, orig_dest[missing_mask]]) - # Keep all original destinations and append the source of each edge whose reverse is missing. + # Keep all original destinations and append the source of each edge + # whose reverse is missing. # Example: orig_dest = [1, 0, 3], orig_src[missing_mask] = [2] # -> dest = [1, 0, 3, 2] # -> final undirected edges: [(0,1), (1,0), (2,3), (3,2)] @@ -678,8 +730,10 @@ def to_undirected( if with_selfloops: # Don't remove duplicate edges when adding self-loops, as we need to remove them - # even if with_selfloops is False, to ensure that the edge index is clean and doesn't contain duplicate edges. - # In this way, we don't do the duplicate edge removal twice, which would be redundant and inefficient + # even if with_selfloops is False, to ensure that the edge index is clean + # and doesn't contain duplicate edges. + # In this way, we don't do the duplicate edge removal twice, which would be + # redundant and inefficient self.add_selfloops(num_nodes=num_nodes, with_duplicate_removal=False) self.remove_duplicate_edges(num_nodes=num_nodes) @@ -692,13 +746,15 @@ def __validate_edge_weights(self, edge_weights: Tensor | None) -> None: if edge_weights.dim() != 1: raise ValueError( - f"'edge_weights' must be a 1D tensor. Got {edge_weights.dim()}D tensor with shape {edge_weights.shape}." + f"'edge_weights' must be a 1D tensor. Got " + f"{edge_weights.dim()}D tensor with shape {edge_weights.shape}." ) if edge_weights.size(0) != self.__edge_index.size(1): raise ValueError( - "'edge_weights' must have the same number of entries as edges in the 'edge_index'. " - f"Got {edge_weights.size(0)} edge weights but {self.__edge_index.size(1)} edges." + f"'edge_weights' must have the same number of entries as edges in " + f"the 'edge_index'. Got {edge_weights.size(0)} edge weights but " + f"{self.__edge_index.size(1)} edge columns." ) def __validate_num_nodes(self, num_nodes: int) -> None: diff --git a/hyperbench/types/hdata.py b/hyperbench/types/hdata.py index 85df8c8c..b7441c46 100644 --- a/hyperbench/types/hdata.py +++ b/hyperbench/types/hdata.py @@ -44,12 +44,15 @@ class HData: ... [0, 1, 2, 3, 4]]) # hyperedge IDs >>> data = HData(x=x, hyperedge_index=hyperedge_index) - Args: + Attributes: x: Node feature matrix of shape ``[num_nodes, num_features]``. hyperedge_index: Hyperedge connectivity in COO format of shape ``[2, num_incidences]``, - where ``hyperedge_index[0]`` contains node IDs and ``hyperedge_index[1]`` contains hyperedge IDs. - hyperedge_weights: Optional tensor of shape ``[num_hyperedges]`` containing weights for each hyperedge. - hyperedge_attr: Hyperedge feature matrix of shape ``[num_hyperedges, num_hyperedge_features]``. + where ``hyperedge_index[0]`` contains node IDs and ``hyperedge_index[1]`` + contains hyperedge IDs. + hyperedge_weights: Optional tensor of shape ``[num_hyperedges]`` containing weights + for each hyperedge. + hyperedge_attr: Hyperedge feature matrix of + shape ``[num_hyperedges, num_hyperedge_features]``. Features associated with each hyperedge (e.g., weights, timestamps, types). num_nodes: Number of nodes in the hypergraph. If ``None``, inferred as ``x.size(0)``. @@ -58,9 +61,11 @@ class HData: y: Labels for hyperedges, of shape ``[num_hyperedges]``. Used for supervised learning tasks. For unsupervised tasks, this can be ignored. Default is a tensor of ones, indicating all hyperedges are positive examples. - global_node_ids: Optional stable node IDs of shape ``[num_nodes]`` matching the row order of ``x``. - Use this to preserve access to the canonical node space when ``hyperedge_index`` is rebased locally. - If ``None``, defaults to ``torch.arange(num_nodes)``, assuming that these are the global node IDs in the same order as the rows of ``x``. + global_node_ids: Optional stable node IDs of shape ``[num_nodes]`` matching the + row order of ``x``. Use this to preserve access to the canonical node space + when ``hyperedge_index`` is rebased locally. + If ``None``, defaults to ``torch.arange(num_nodes)``, assuming that these are the + global node IDs in the same order as the rows of ``x``. """ def __init__( @@ -115,6 +120,13 @@ def __init__( self.device = self.get_device_if_all_consistent() def __repr__(self) -> str: + hyperedge_weights_shape = ( + self.hyperedge_weights.shape if self.hyperedge_weights is not None else None + ) + hyperedge_attr_shape = ( + self.hyperedge_attr.shape if self.hyperedge_attr is not None else None + ) + return ( f"{self.__class__.__name__}(\n" f" num_nodes={self.num_nodes},\n" @@ -122,8 +134,8 @@ def __repr__(self) -> str: f" x_shape={self.x.shape},\n" f" global_node_ids_shape={self.global_node_ids.shape},\n" f" hyperedge_index_shape={self.hyperedge_index.shape},\n" - f" hyperedge_weights_shape={self.hyperedge_weights.shape if self.hyperedge_weights is not None else None},\n" - f" hyperedge_attr_shape={self.hyperedge_attr.shape if self.hyperedge_attr is not None else None},\n" + f" hyperedge_weights_shape={hyperedge_weights_shape},\n" + f" hyperedge_attr_shape={hyperedge_attr_shape},\n" f" y_shape={self.y.shape if self.y is not None else None}\n" f" device={self.device}\n" f")" @@ -137,21 +149,29 @@ def cat_same_node_space( global_node_ids: Tensor | None = None, ) -> HData: """ - Concatenate `HData` instances that share the same node space, meaning nodes with the same ID in different instances are the same node. - This is useful when combining positive and negative hyperedges that reference the same set of nodes. + Concatenate `HData` instances that share the same node space, meaning nodes with + the same ID in different instances are the same node. + This is useful when combining positive and negative hyperedges that reference + the same set of nodes. Notes: - - ``x`` is derived from the instance with the largest number of nodes, if not provided explicitly. + - ``x`` is derived from the instance with the largest number of nodes, + if not provided explicitly. If there are conflicting features for the same node ID across instances, the features from the instance with the largest number of nodes will be used. - If ``global_node_ids`` is provided explicitly, ``x`` must also be provided to ensure consistency. + If ``global_node_ids`` is provided explicitly, ``x`` must also be provided + to ensure consistency. - ``hyperedge_index`` is the concatenation of all input hyperedge indices. - ``hyperedge_weights`` is the concatenation of all input hyperedge weights, if present. - If some instances have hyperedge weights and others do not, the resulting ``hyperedge_weights`` will be set to ``None``. + If some instances have hyperedge weights and others do not, the resulting + ``hyperedge_weights`` will be set to ``None``. - ``hyperedge_attr`` is the concatenation of all input hyperedge attributes, if present. - If some instances have hyperedge attributes and others do not, the resulting ``hyperedge_attr`` will be set to ``None``. - - ``global_node_ids`` is derived from the instance with the largest number of nodes, if not provided explicitly. - If ``x`` is provided explicitly, ``global_node_ids`` must be provided explicitly as well to ensure consistency. + If some instances have hyperedge attributes and others do not, the resulting + ``hyperedge_attr`` will be set to ``None``. + - ``global_node_ids`` is derived from the instance with the largest number of nodes, + if not provided explicitly. + If ``x`` is provided explicitly, ``global_node_ids`` must be provided explicitly + as well to ensure consistency. - ``y`` is the concatenation of all input labels. Examples: @@ -165,21 +185,29 @@ def cat_same_node_space( Args: hdatas: One or more `HData` instances sharing the same node space. x: Optional node feature matrix to use for the resulting `HData`. - If ``None``, the node features from the instance with the largest number of nodes will be used. - If ``global_node_ids`` is provided explicitly, ``x`` must also be provided to ensure consistency. + If ``None``, the node features from the instance with the largest number of + nodes will be used. + If ``global_node_ids`` is provided explicitly, ``x`` must also be provided + to ensure consistency. global_node_ids: Optional global node IDs for the resulting `HData`. - If ``None``, the global node IDs from the instance with the largest number of nodes will be used. - If ``x`` is provided explicitly, ``global_node_ids`` must also be provided to ensure consistency. - If ``x`` is provided and there is no need for ``global_node_ids`` to preserve access to the canonical node space, - it is recommended to use arbitrary global node IDs that are consistent with the feature rows of ``x``. + If ``None``, the global node IDs from the instance with the largest number of + nodes will be used. + If ``x`` is provided explicitly, ``global_node_ids`` must also be provided + to ensure consistency. + If ``x`` is provided and there is no need for ``global_node_ids`` to preserve + access to the canonical node space, + it is recommended to use arbitrary global node IDs that are consistent with + the feature rows of ``x``. For example, ``global_node_ids=torch.arange(x.size(0))``). Returns: hdata: A new `HData` with shared nodes and concatenated hyperedges. Raises: - ValueError: If no HData instances are provided, if there are overlapping hyperedge IDs across instances, - or if ``x`` and ``global_node_ids`` are not both provided when one of them is provided. + ValueError: If no HData instances are provided, if there are overlapping + hyperedge IDs across instances, + or if ``x`` and ``global_node_ids`` are not both provided when one of + them is provided. """ cls.__validate_can_perform_cat_same_node_space(hdatas, x, global_node_ids) @@ -253,7 +281,8 @@ def empty(cls) -> HData: @classmethod def from_hyperedge_index(cls, hyperedge_index: Tensor) -> HData: """ - Build an `HData` from a given hyperedge index, with empty node features and hyperedge attributes. + Build an `HData` from a given hyperedge index, with empty node features and + hyperedge attributes. - Node features are initialized as an empty tensor of shape ``[0, 0]``. - Hyperedge attributes are set to ``None``. @@ -270,10 +299,12 @@ def from_hyperedge_index(cls, hyperedge_index: Tensor) -> HData: >>> hyperedge_weights = None Args: - hyperedge_index: Tensor of shape ``[2, num_incidences]`` representing the hypergraph connectivity. + hyperedge_index: Tensor of shape ``[2, num_incidences]`` representing + the hypergraph connectivity. Returns: - hdata: An `HData` instance with the given hyperedge index and default values for other attributes. + hdata: An `HData` instance with the given hyperedge index and default values + for other attributes. """ return cls( x=empty_nodefeatures(), @@ -297,7 +328,10 @@ def split( Examples: Transductive split (default) preserving the full node space: - >>> split_hdata = HData.split(hdata, torch.tensor([1]), node_space_setting="transductive") + >>> split_hdata = HData.split( + ... hdata, + ... torch.tensor([1]), + ... node_space_setting="transductive") >>> split_hdata.x.shape[0] == hdata.x.shape[0] >>> split_hdata.hyperedge_index ... # node IDs stay in the original row space, hyperedge IDs are rebased @@ -310,8 +344,10 @@ def split( Args: hdata: The original `HData` containing the full hypergraph. split_hyperedge_ids: Tensor of hyperedge IDs to include in this split. - It is assumed that the provided hyperedge IDs are valid and exist in ``hdata.hyperedge_index[1]``. - It is mandatory to provide this argument unless a custom ``splitter`` is provided that owns split materialization. + It is assumed that the provided hyperedge IDs are valid and exist + in ``hdata.hyperedge_index[1]``. + It is mandatory to provide this argument unless a custom ``splitter`` is provided + that owns split materialization. node_space_setting: Whether to preserve the full node space in the splits. ``transductive`` (default) ensures all node features are present in the split, while ``inductive`` allows splits to have disjoint node spaces. @@ -348,7 +384,8 @@ def enrich_node_features( Enrich node features using the provided node feature enricher. Args: - enricher: An instance of NodeEnricher to generate structural node features from hypergraph topology. + enricher: An instance of NodeEnricher to generate structural node features + from hypergraph topology. enrichment_mode: How to combine generated features with existing ``hdata.x``. ``concatenate`` appends new features as additional columns. ``replace`` substitutes ``hdata.x`` entirely. @@ -384,7 +421,8 @@ def enrich_node_features_from( Copy node features from another `HData` by aligning features by ``global_node_ids``. Examples: - Transductive enrichment (default) expecting the same node space in both source and target: + Transductive enrichment (default) expecting the same node space in both + source and target: >>> target = target.enrich_node_features_from(source, node_space_setting="transductive") Inductive with a scalar fill value: @@ -405,8 +443,10 @@ def enrich_node_features_from( hdata_with_features: Source `HData` providing node features. node_space_setting: The setting for the node space, determining how nodes are handled. If ``"transductive"``, every target node is expected to exist in the source. - If ``"inductive"``, the target dataset may have a different node space, and missing nodes are filled using ``fill_value``. - fill_value: Scalar or vector used to fill missing node features when ``node_space_setting`` is not transductive. + If ``"inductive"``, the target dataset may have a different node space, and missing + nodes are filled using ``fill_value``. + fill_value: Scalar or vector used to fill missing node features when + ``node_space_setting`` is not transductive. Returns: hdata: A new `HData` with node features copied from ``hdata_with_features``. @@ -414,20 +454,24 @@ def enrich_node_features_from( Raises: ValueError: If either instance lacks ``global_node_ids``, if the source feature rows do not align with the source node IDs, if ``fill_value`` is used with - ``node_space_setting="transductive"``, or if ``fill_value`` is missing or malformed when ``node_space_setting="inductive"``. + ``node_space_setting="transductive"``, or if ``fill_value`` is missing or + malformed when ``node_space_setting="inductive"``. """ source_global_node_ids = hdata_with_features.global_node_ids source_x = hdata_with_features.x if source_x.size(0) != source_global_node_ids.size(0): raise ValueError( - "Expected 'hdata_with_features.x' rows to align with hdata_with_features.global_node_ids." + "Expected 'hdata_with_features.x' rows to align with " + "hdata_with_features.global_node_ids." ) self.__validate_node_space_setting(node_space_setting, fill_value) target_global_node_ids = self.global_node_ids.detach().cpu().tolist() - # We need the index of the features for each node in the source, as we will use the index to track back - # to the node feautures after we match the global node id in the target to the one that is in the source + # We need the index of the features for each node in the source, as we will use + # the index to track back + # to the node feautures after we match the global node id in the target to the one that + # is in the source source_feature_idx_by_global_node_id = { int(global_node_id): feature_idx for feature_idx, global_node_id in enumerate( @@ -448,8 +492,10 @@ def enrich_node_features_from( source_feature_idx = source_feature_idx_by_global_node_id.get(int(global_node_id)) if source_feature_idx is None: # Example: global_node_id = 30 is not present in the source - # -> strict transductive mode records it as missing and then raises an error - # -> non-transductive mode fills the features with fill_value and continues enriching the other nodes + # -> strict transductive mode records it as + # missing and then raises an error + # -> non-transductive mode fills the features with + # fill_value and continues enriching the other nodes if is_transductive_setting(node_space_setting): missing_global_node_ids.append( int(global_node_id) @@ -460,7 +506,8 @@ def enrich_node_features_from( ) # fill missing node features with fill_value and continue - # Match the global node IDs in the target to the corresponding feature indices in the source + # Match the global node IDs in the target to the corresponding + # feature indices in the source # Example: source_global_node_ids = [10, 20, 30], source_x has shape (3, num_features) # target_global_node_ids = [10, 30] # -> source_feature_idx_by_global_node_id = {10: 0, 20: 1, 30: 2} @@ -494,8 +541,10 @@ def enrich_hyperedge_weights( Enrich hyperedge weights using the provided hyperedge weight enricher. Args: - enricher: An instance of HyperedgeEnricher to generate hyperedge weights from hypergraph topology. - enrichment_mode: How to combine generated weights with existing ``hdata.hyperedge_weights``. + enricher: An instance of HyperedgeEnricher to generate hyperedge weights from + hypergraph topology. + enrichment_mode: How to combine generated weights with + existing ``hdata.hyperedge_weights``. ``concatenate`` appends new weights to the existing 1D tensor. ``replace`` substitutes ``hdata.hyperedge_weights`` entirely. Defaults to ``replace`` if not provided. @@ -536,8 +585,10 @@ def enrich_hyperedge_attr( Enrich hyperedge features using the provided hyperedge feature enricher. Args: - enricher: An instance of HyperedgeEnricher to generate structural hyperedge features from hypergraph topology. - enrichment_mode: How to combine generated features with existing ``hdata.hyperedge_attr``. + enricher: An instance of HyperedgeEnricher to generate structural hyperedge + features from hypergraph topology. + enrichment_mode: How to combine generated features with + existing ``hdata.hyperedge_attr``. ``concatenate`` appends new features as additional columns. ``replace`` substitutes ``hdata.hyperedge_attr`` entirely. Defaults to ``replace`` if not provided. @@ -569,6 +620,7 @@ def enrich_hyperedge_attr( def get_device_if_all_consistent(self) -> torch.device: """ Check that all tensors are on the same device and return that device. + If there are no tensors or if they are on different devices, return CPU. Returns: @@ -604,10 +656,12 @@ def remove_hyperedges_with_fewer_than_k_nodes( Args: k: The minimum number of nodes a hyperedge must have to be retained. - preserve_global_node_ids: Whether to preserve the global node IDs after removing hyperedges. Defaults to ``False``. - If ``False``, the global node IDs will be reindexed to be contiguous after removing hyperedges. - If ``True``, the global node IDs will be preserved, which may cause some models to raise - as they may expect contiguous global node IDs. + preserve_global_node_ids: Whether to preserve the global node IDs after + removing hyperedges. Defaults to ``False``. + If ``False``, the global node IDs will be reindexed to be contiguous after + removing hyperedges. + If ``True``, the global node IDs will be preserved, which may cause some models + to raise as they may expect contiguous global node IDs. """ validate_is_positive("k", k) @@ -649,8 +703,10 @@ def shuffle(self, seed: int | None = None) -> HData: """ Return a new `HData` instance with hyperedge IDs randomly reassigned. - Each hyperedge keeps its original set of nodes, but is assigned a new ID via a random permutation. - ``y`` and ``hyperedge_attr`` are reordered to match, so that ``y[new_id]`` still corresponds to the correct hyperedge. + Each hyperedge keeps its original set of nodes, but is assigned a new ID + via a random permutation. + ``y`` and ``hyperedge_attr`` are reordered to match, so that ``y[new_id]`` + still corresponds to the correct hyperedge. Same for ``hyperedge_attr[new_id]`` if hyperedge attributes are present. Examples: @@ -665,10 +721,12 @@ def shuffle(self, seed: int | None = None) -> HData: >>> shuffled_hdata.y # labels are permuted to match new hyperedge IDs, e.g., [0, 1] Args: - seed: Optional random seed for reproducibility. If ``None``, the shuffle will be non-deterministic. + seed: Optional random seed for reproducibility. If ``None``, the shuffle + will be non-deterministic. Returns: - hdata: A new `HData` instance with hyperedge IDs, ``y``, and ``hyperedge_attr`` permuted. + hdata: A new `HData` instance with hyperedge IDs, ``y``, and + ``hyperedge_attr`` permuted. """ generator = create_seeded_torch_generator(device=self.device, seed=seed) permutation = torch.randperm( @@ -680,8 +738,10 @@ def shuffle(self, seed: int | None = None) -> HData: # permutation[new_id] = old_id, so y[permutation] puts old labels into new slots # inverse_permutation[old_id] = new_id, used to remap hyperedge IDs in incidences - # Example: permutation = [1, 2, 0] means new_id 0 gets old_id 1, new_id 1 gets old_id 2, new_id 2 gets old_id 0 - # -> inverse_permutation = [2, 0, 1] means old_id 0 gets new_id 2, old_id 1 gets new_id 0, old_id 2 gets new_id 1 + # Example: permutation = [1, 2, 0] means new_id 0 gets old_id 1, + # new_id 1 gets old_id 2, new_id 2 gets old_id 0 + # -> inverse_permutation = [2, 0, 1] means old_id 0 gets new_id 2, + # old_id 1 gets new_id 0, old_id 2 gets new_id 1 inverse_permutation = torch.empty_like( permutation, dtype=permutation.dtype, @@ -697,14 +757,17 @@ def shuffle(self, seed: int | None = None) -> HData: # Example: hyperedge_index = [[0, 1, 2, 3, 4], # [0, 0, 1, 1, 2]], - # inverse_permutation = [2, 0, 1] (new_id 0 -> old_id 2, new_id 1 -> old_id 0, new_id 2 -> old_id 1) + # inverse_permutation = [2, 0, 1] (new_id 0 -> old_id 2, new_id 1 -> + # old_id 0, new_id 2 -> old_id 1) # -> new_hyperedge_index = [[0, 1, 2, 3, 4], # [2, 2, 0, 0, 1]] old_hyperedge_ids = self.hyperedge_index[1] new_hyperedge_index[1] = inverse_permutation[old_hyperedge_ids] # Example: hyperedge_attr = [attr_0, attr_1, attr_2], permutation = [1, 2, 0] - # -> new_hyperedge_attr = [attr_1 (attr of old_id 1), attr_2 (attr of old_id 2), attr_0 (attr of old_id 0)] + # -> new_hyperedge_attr = [attr_1 (attr of old_id 1), + # attr_2 (attr of old_id 2), + # attr_0 (attr of old_id 0)] new_hyperedge_attr = ( self.hyperedge_attr[permutation] if self.hyperedge_attr is not None else None ) @@ -752,7 +815,8 @@ def to(self, device: torch.device | str, non_blocking: bool = False) -> HData: Args: device: The target device (e.g., 'cpu', 'cuda:0'). - non_blocking: If ``True`` and the source and destination devices are both CUDA, the copy will be non-blocking. + non_blocking: If ``True`` and the source and destination devices are both CUDA, + the copy will be non-blocking. Returns: hdata: The `HData` instance with all tensors moved to the specified device. @@ -782,7 +846,8 @@ def with_y_to(self, value: float) -> HData: value: The value to set for all entries in the y attribute. Returns: - hdata: A new `HData` instance with the same attributes except for y, which is set to a tensor of the given value. + hdata: A new `HData` instance with the same attributes except for y, + which is set to a tensor of the given value. """ return self.__class__( x=self.x.clone(), @@ -800,7 +865,8 @@ def with_y_ones(self) -> HData: Return a copy of this instance with a y attribute of all ones. Returns: - hdata: A new `HData` instance with the same attributes except for y, which is set to a tensor of ones. + hdata: A new `HData` instance with the same attributes except for y, which is + set to a tensor of ones. """ return self.with_y_to(1.0) @@ -809,36 +875,45 @@ def with_y_zeros(self) -> HData: Return a copy of this instance with a y attribute of all zeros. Returns: - hdata: A new `HData` instance with the same attributes except for y, which is set to a tensor of zeros. + hdata: A new `HData` instance with the same attributes except for y, which + is set to a tensor of zeros. """ return self.with_y_to(0.0) def stats(self) -> dict[str, Any]: """ Compute statistics for the hypergraph data. - The fields returned in the dictionary include: - - ``shape_x``: The shape of the node feature matrix ``x``. - - ``shape_hyperedge_weights``: The shape of the hyperedge weights tensor, or ``None`` if hyperedge weights are not present. - - ``shape_hyperedge_attr``: The shape of the hyperedge attribute matrix, or ``None`` if hyperedge attributes are not present. - - ``num_nodes``: The number of nodes in the hypergraph. - - ``num_hyperedges``: The number of hyperedges in the hypergraph. - - ``avg_degree_node_raw``: The average degree of nodes, calculated as the mean number of hyperedges each node belongs to. - - ``avg_degree_node``: The floored node average degree. - - ``avg_degree_hyperedge_raw``: The average size of hyperedges, calculated as the mean number of nodes each hyperedge contains. - - ``avg_degree_hyperedge``: The floored hyperedge average size. - - ``node_degree_max``: The maximum degree of any node in the hypergraph. - - ``hyperedge_degree_max``: The maximum size of any hyperedge in the hypergraph. - - ``node_degree_median``: The median degree of nodes in the hypergraph. - - ``hyperedge_degree_median``: The median size of hyperedges in the hypergraph. - - ``distribution_node_degree``: A list where the value at index ``i`` represents the count of nodes with degree ``i``. - - ``distribution_hyperedge_size``: A list where the value at index ``i`` represents the count of hyperedges with size ``i``. - - ``distribution_node_degree_hist``: A dictionary where the keys are node degrees and the values are the count of nodes with that degree. - - ``distribution_hyperedge_size_hist``: A dictionary where the keys are hyperedge sizes and the values are the count of hyperedges with that size. + + Fields: + - ``shape_x``: The shape of the node feature matrix ``x``. + - ``shape_hyperedge_weights``: The shape of the hyperedge weights tensor, or + ``None`` if hyperedge weights are not present. + - ``shape_hyperedge_attr``: The shape of the hyperedge attribute matrix, or ``None`` + if hyperedge attributes are not present. + - ``num_nodes``: The number of nodes in the hypergraph. + - ``num_hyperedges``: The number of hyperedges in the hypergraph. + - ``avg_degree_node_raw``: The average degree of nodes, calculated as the mean + number of hyperedges each node belongs to. + - ``avg_degree_node``: The floored node average degree. + - ``avg_degree_hyperedge_raw``: The average size of hyperedges, calculated as + the mean number of nodes each hyperedge contains. + - ``avg_degree_hyperedge``: The floored hyperedge average size. + - ``node_degree_max``: The maximum degree of any node in the hypergraph. + - ``hyperedge_degree_max``: The maximum size of any hyperedge in the hypergraph. + - ``node_degree_median``: The median degree of nodes in the hypergraph. + - ``hyperedge_degree_median``: The median size of hyperedges in the hypergraph. + - ``distribution_node_degree``: A list where the value at index ``i`` represents + the count of nodes with degree ``i``. + - ``distribution_hyperedge_size``: A list where the value at index ``i`` represents + the count of hyperedges with size ``i``. + - ``distribution_node_degree_hist``: A dictionary where the keys are node degrees and + the values are the count of nodes with that degree. + - ``distribution_hyperedge_size_hist``: A dictionary where the keys are hyperedge + sizes and the values are the count of hyperedges with that size. Returns: stats: A dictionary containing various statistics about the hypergraph. """ - node_ids = self.hyperedge_index[0] hyperedge_ids = self.hyperedge_index[1] @@ -937,7 +1012,8 @@ def __validate_can_perform_cat_same_node_space( unique_joint_hyperedge_ids = joint_hyperedge_ids.unique() if unique_joint_hyperedge_ids.size(0) != joint_hyperedge_ids.size(0): raise ValueError( - "Overlapping hyperedge IDs found across instances. Ensure each instance uses distinct hyperedge IDs." + "Overlapping hyperedge IDs found across instances. Ensure each " + "instance uses distinct hyperedge IDs." ) def __to_fill_features( @@ -961,7 +1037,8 @@ def __to_fill_features( # This can happen when fill_value is: # - A scalar tensor, e.g., tensor(0.0), which should be broadcasted to all features - # - A list with a single value, e.g., [0.0], which should also be broadcasted to all features + # - A list with a single value, e.g., [0.0], which should + # also be broadcasted to all features if fill_features.numel() == 1: fill_features = fill_features.repeat(num_features) @@ -985,7 +1062,8 @@ def __validate_enrichment_mode(self, enrichment_mode: EnrichmentMode | None) -> return raise ValueError( - f"'enrichment_mode' must be one of 'replace', 'concatenate', or None, got {enrichment_mode!r}." + f"'enrichment_mode' must be one of 'replace', 'concatenate', " + f"or None, got {enrichment_mode!r}." ) def __validate_hyperedge_attr(self) -> None: @@ -995,12 +1073,14 @@ def __validate_hyperedge_attr(self) -> None: validate_floating_tensor_dtype("hyperedge_attr", self.hyperedge_attr) if self.hyperedge_attr.dim() != 2: raise ValueError( - f"'hyperedge_attr' must be a 2D tensor, got shape {tuple(self.hyperedge_attr.shape)}." + f"'hyperedge_attr' must be a 2D tensor, got shape " + f"{tuple(self.hyperedge_attr.shape)}." ) if self.hyperedge_attr.size(0) != self.num_hyperedges: raise ValueError( - f"'hyperedge_attr' must have one row per hyperedge. " - f"Got size={self.hyperedge_attr.size(0)} but num_hyperedges={self.num_hyperedges}." + "'hyperedge_attr' must have one row per hyperedge. " + f"Got size={self.hyperedge_attr.size(0)} but " + f"num_hyperedges={self.num_hyperedges}." ) def __validate_hyperedge_index(self) -> None: @@ -1031,19 +1111,22 @@ def __validate_hyperedge_weights(self) -> None: if self.hyperedge_weights.dim() != 1: raise ValueError( - f"'hyperedge_weights' must be a 1D tensor, got shape {tuple(self.hyperedge_weights.shape)}." + f"'hyperedge_weights' must be a 1D tensor, " + f"got shape {tuple(self.hyperedge_weights.shape)}." ) if self.hyperedge_weights.size(0) != self.num_hyperedges: raise ValueError( f"'hyperedge_weights' must have one entry per hyperedge. " - f"Got size={self.hyperedge_weights.size(0)} but num_hyperedges={self.num_hyperedges}." + f"Got size={self.hyperedge_weights.size(0)} but " + f"num_hyperedges={self.num_hyperedges}." ) def __validate_global_node_ids(self) -> None: validate_long_tensor_dtype("global_node_ids", self.global_node_ids) if self.global_node_ids.dim() != 1: raise ValueError( - f"'global_node_ids' must be a 1D tensor, got shape {tuple(self.global_node_ids.shape)}." + f"'global_node_ids' must be a 1D tensor, got " + f"shape {tuple(self.global_node_ids.shape)}." ) if self.global_node_ids.size(0) != self.num_nodes: raise ValueError( @@ -1064,7 +1147,8 @@ def __validate_labels(self) -> None: def __validate_x(self) -> None: if self.x.size(0) not in (0, self.num_nodes): raise ValueError( - f"'x' must have one feature row per node, or be 'torch.empty((0, 0))' if there are no nodes. " + f"'x' must have one feature row per node, or be 'torch.empty((0, 0))' " + f"if there are no nodes. " f"Got x.shape={tuple(self.x.shape)} but num_nodes={self.num_nodes}." ) diff --git a/hyperbench/types/hypergraph.py b/hyperbench/types/hypergraph.py index b953e712..6fba83f1 100644 --- a/hyperbench/types/hypergraph.py +++ b/hyperbench/types/hypergraph.py @@ -21,15 +21,19 @@ class HIFHypergraph: """ - A hypergraph data structure that supports directed/undirected hyperedges - with incidence-based representation. + A hypergraph data structure that supports directed/undirected hyperedges with incidence-based + representation. Args: - network_type: The type of hypergraph, which can be "asc" (or "directed") for directed hyperedges, or "undirected" for undirected hyperedges. + network_type: The type of hypergraph, which can be "asc" (or "directed") for + directed hyperedges, or "undirected" for undirected hyperedges. metadata: Optional dictionary of metadata about the hypergraph. - incidences: A list of incidences, where each incidence is a dictionary with keys "node" and "edge" representing the relationship between a node and a hyperedge. - nodes: A list of node dictionaries, where each dictionary contains information about a node (e.g., id, features). - hyperedges: A list of edge dictionaries, where each dictionary contains information about a hyperedge (e.g., id, features). + incidences: A list of incidences, where each incidence is a dictionary with keys "node" + and "edge" representing the relationship between a node and a hyperedge. + nodes: A list of node dictionaries, where each dictionary contains information about + a node (e.g., id, features). + hyperedges: A list of edge dictionaries, where each dictionary contains information + about a hyperedge (e.g., id, features). """ def __init__( @@ -83,37 +87,47 @@ def from_hif(cls, data: dict[str, Any]) -> HIFHypergraph: @property def num_nodes(self) -> int: - """Return the number of nodes in the hypergraph.""" + """ + Return the number of nodes in the hypergraph. + """ return len(self.nodes) @property def num_hyperedges(self) -> int: - """Return the number of hyperedges in the hypergraph.""" + """ + Return the number of hyperedges in the hypergraph. + """ return len(self.hyperedges) def stats(self) -> dict[str, Any]: """ Compute statistics for the HIFhypergraph. - The fields returned in the dictionary include: - - ``num_nodes``: The number of nodes in the hypergraph. - - ``num_hyperedges``: The number of hyperedges in the hypergraph. - - ``avg_degree_node_raw``: The average degree of nodes, calculated as the mean number of hyperedges each node belongs to. - - ``avg_degree_node``: The floored node average degree. - - ``avg_degree_hyperedge_raw``: The average size of hyperedges, calculated as the mean number of nodes each hyperedge contains. - - ``avg_degree_hyperedge``: The floored hyperedge average size. - - ``node_degree_max``: The maximum degree of any node in the hypergraph. - - ``hyperedge_degree_max``: The maximum size of any hyperedge in the hypergraph. - - ``node_degree_median``: The median degree of nodes in the hypergraph. - - ``hyperedge_degree_median``: The median size of hyperedges in the hypergraph. - - ``distribution_node_degree``: A list where the value at index ``i`` represents the count of nodes with degree ``i``. - - ``distribution_hyperedge_size``: A list where the value at index ``i`` represents the count of hyperedges with size ``i``. - - ``distribution_node_degree_hist``: A dictionary where the keys are node degrees and the values are the count of nodes with that degree. - - ``distribution_hyperedge_size_hist``: A dictionary where the keys are hyperedge sizes and the values are the count of hyperedges with that size. + + Fields: + - ``num_nodes``: The number of nodes in the hypergraph. + - ``num_hyperedges``: The number of hyperedges in the hypergraph. + - ``avg_degree_node_raw``: The average degree of nodes, calculated as the mean + number of hyperedges each node belongs to. + - ``avg_degree_node``: The floored node average degree. + - ``avg_degree_hyperedge_raw``: The average size of hyperedges, calculated as the + mean number of nodes each hyperedge contains. + - ``avg_degree_hyperedge``: The floored hyperedge average size. + - ``node_degree_max``: The maximum degree of any node in the hypergraph. + - ``hyperedge_degree_max``: The maximum size of any hyperedge in the hypergraph. + - ``node_degree_median``: The median degree of nodes in the hypergraph. + - ``hyperedge_degree_median``: The median size of hyperedges in the hypergraph. + - ``distribution_node_degree``: A list where the value at index ``i`` represents + the count of nodes with degree ``i``. + - ``distribution_hyperedge_size``: A list where the value at index ``i`` represents + the count of hyperedges with size ``i``. + - ``distribution_node_degree_hist``: A dictionary where the keys are node degrees + and the values are the count of nodes with that degree. + - ``distribution_hyperedge_size_hist``: A dictionary where the keys are hyperedge + sizes and the values are the count of hyperedges with that size. Returns: stats: A dictionary containing various statistics about the hypergraph. """ - node_degree: dict[Any, int] = {} hyperedge_size: dict[Any, int] = {} @@ -206,7 +220,9 @@ def __init__(self, hyperedges: list[list[int]]): @property def num_nodes(self) -> int: - """Return the number of nodes in the hypergraph.""" + """ + Return the number of nodes in the hypergraph. + """ nodes = set() for edge in self.hyperedges: nodes.update(edge) @@ -214,7 +230,9 @@ def num_nodes(self) -> int: @property def num_hyperedges(self) -> int: - """Return the number of hyperedges in the hypergraph.""" + """ + Return the number of hyperedges in the hypergraph. + """ return len(self.hyperedges) def neighbors_of(self, node: int) -> Neighborhood: @@ -261,7 +279,9 @@ def neighbors_of_all(self) -> dict[int, Neighborhood]: return node_to_neighbors def stats(self) -> dict[str, Any]: - """Return basic statistics about the hypergraph.""" + """ + Return basic statistics about the hypergraph. + """ node_degree: dict[int, int] = {} distribution_hyperedge_size: list[int] = [] total_incidences = 0 @@ -339,7 +359,8 @@ def from_hyperedge_index(cls, hyperedge_index: Tensor) -> Hypergraph: Create a Hypergraph from a hyperedge index representation. Args: - hyperedge_index: Tensor of shape (2, |E|) representing hyperedges, where each column is (node, hyperedge). + hyperedge_index: Tensor of shape (2, |E|) representing hyperedges, where each + column is (node, hyperedge). Returns: hypergraph: Hypergraph instance @@ -363,12 +384,14 @@ def smoothing_with_matrix( ) -> Tensor: """ Return the feature matrix smoothed with a smoothing matrix. + Computes ``M @ X`` where ``M`` is the smoothing matrix and ``X`` is the node feature matrix. Args: x: Node feature matrix. Size ``(num_nodes, C)``. matrix: The smoothing matrix. Size ``(num_nodes, num_nodes)``. - drop_rate: Randomly dropout the connections in the smoothing matrix with probability ``drop_rate``. Defaults to ``0.0``. + drop_rate: Randomly dropout the connections in the smoothing matrix with + probability ``drop_rate``. Defaults to ``0.0``. Returns: x: The smoothed feature matrix. Size ``(num_nodes, C)``. @@ -381,9 +404,10 @@ def smoothing_with_matrix( class HyperedgeIndex: """ A wrapper for hyperedge index representation. - Hyperedge index is a tensor of shape ``(2, num_incidences)`` that encodes the relationships between nodes and hyperedges. - Each column in the tensor represents an incidence between a node and a hyperedge, with the first row containing node indices - and the second row containing corresponding hyperedge indices. + Hyperedge index is a tensor of shape ``(2, num_incidences)`` that encodes the relationships + between nodes and hyperedges. + Each column in the tensor represents an incidence between a node and a hyperedge, with the + first row containing node indices and the second row containing corresponding hyperedge indices. Examples: >>> hyperedge_index = [[0, 1, 2, 0], @@ -397,7 +421,8 @@ class HyperedgeIndex: The number of hyperedges is 2 (hyperedges 0 and 1). Args: - hyperedge_index: A tensor of shape ``(2, num_incidences)`` representing hyperedges, where each column is (node, hyperedge). + hyperedge_index: A tensor of shape ``(2, num_incidences)`` representing hyperedges, + where each column is (node, hyperedge). """ def __init__(self, hyperedge_index: Tensor): @@ -405,32 +430,44 @@ def __init__(self, hyperedge_index: Tensor): @property def all_node_ids(self) -> Tensor: - """Return the tensor of all node IDs in the hyperedge index.""" + """ + Return the tensor of all node IDs in the hyperedge index. + """ return self.__hyperedge_index[0] @property def all_hyperedge_ids(self) -> Tensor: - """Return the tensor of all hyperedge IDs in the hyperedge index.""" + """ + Return the tensor of all hyperedge IDs in the hyperedge index. + """ return self.__hyperedge_index[1] @property def item(self) -> Tensor: - """Return the hyperedge index tensor.""" + """ + Return the hyperedge index tensor. + """ return self.__hyperedge_index @property def node_ids(self) -> Tensor: - """Return the sorted unique node IDs from the hyperedge index.""" + """ + Return the sorted unique node IDs from the hyperedge index. + """ return self.__hyperedge_index[0].unique(sorted=True) @property def hyperedge_ids(self) -> Tensor: - """Return the sorted unique hyperedge IDs from the hyperedge index.""" + """ + Return the sorted unique hyperedge IDs from the hyperedge index. + """ return self.__hyperedge_index[1].unique(sorted=True) @property def num_hyperedges(self) -> int: - """Return the number of hyperedges in the hypergraph.""" + """ + Return the number of hyperedges in the hypergraph. + """ if self.num_incidences < 1: return 0 @@ -439,7 +476,9 @@ def num_hyperedges(self) -> int: @property def num_nodes(self) -> int: - """Return the number of nodes in the hypergraph.""" + """ + Return the number of nodes in the hypergraph. + """ if self.num_incidences < 1: return 0 @@ -448,7 +487,10 @@ def num_nodes(self) -> int: @property def num_incidences(self) -> int: - """Return the number of incidences in the hypergraph, which is the number of columns in the hyperedge index.""" + """ + Return the number of incidences in the hypergraph, which is the number of columns in the + hyperedge index. + """ return self.__hyperedge_index.size(1) def nodes_in(self, hyperedge_id: int) -> list[int]: @@ -466,13 +508,15 @@ def nodes_in(self, hyperedge_id: int) -> list[int]: def num_nodes_if_isolated_exist(self, num_nodes: int) -> int: """ - Return the number of nodes in the hypergraph, accounting for isolated nodes that may not appear in the hyperedge index. + Return the number of nodes in the hypergraph, accounting for isolated nodes that may not + appear in the hyperedge index. Args: num_nodes: The total number of nodes in the hypergraph, including isolated nodes. Returns: - num_nodes: The number of nodes in the hypergraph, which is the maximum of the number of unique nodes in the hyperedge index and the provided ``num_nodes``. + num_nodes: The number of nodes in the hypergraph, which is the maximum of the number of + unique nodes in the hyperedge index and the provided ``num_nodes``. """ return max(self.num_nodes, num_nodes) @@ -488,7 +532,8 @@ def get_clique_expansion_adjacency_list(self, num_nodes: int | None = None) -> l If ``None``, inferred from the unique node IDs in ``hyperedge_index``. Returns: - adjacency: A list where ``adjacency[node_id]`` is the set of nodes adjacent to ``node_id``. + adjacency: A list where ``adjacency[node_id]`` is the set of + nodes adjacent to ``node_id``. """ num_nodes = num_nodes if num_nodes is not None else self.num_nodes self.__validate_num_nodes(num_nodes) @@ -500,7 +545,8 @@ def get_clique_expansion_adjacency_list(self, num_nodes: int | None = None) -> l self.all_node_ids[self.all_hyperedge_ids == hyperedge_id].unique().tolist() ) - # Clique expansion: every pair of nodes in the same hyperedge becomes an undirected graph edge + # Clique expansion: every pair of nodes in the same hyperedge + # becomes an undirected graph edge # Example: hyperedge [0, 1, 2] adds (0, 1), (0, 2), and (1, 2): # -> adjacency[0] = {1, 2} # -> adjacency[1] = {0, 2} @@ -527,7 +573,8 @@ def get_sparse_incidence_matrix( num_hyperedges: Total number of hyperedges. If ``None``, inferred from hyperedge index. Returns: - incidence_matrix: The sparse incidence matrix H of shape ``(num_nodes, num_hyperedges)``. + incidence_matrix: The sparse incidence matrix H of + shape ``(num_nodes, num_hyperedges)``. Raises: ValueError: If the provided dimensions cannot contain the raw node or hyperedge IDs. @@ -559,7 +606,8 @@ def get_sparse_normalized_node_degree_matrix( Compute a sparse diagonal node degree matrix from row-sums of the incidence matrix. Args: - incidence_matrix: The sparse incidence matrix H of shape ``(num_nodes, num_hyperedges)``. + incidence_matrix: The sparse incidence matrix H of + shape ``(num_nodes, num_hyperedges)``. power: Exponent applied to node degrees before placing them on the diagonal. num_nodes: Total number of nodes. If ``None``, inferred from hyperedge index. @@ -600,16 +648,18 @@ def get_sparse_rownormalized_node_degree_matrix( num_nodes: int | None = None, ) -> Tensor: """ - Compute the sparse normalized node degree matrix D_n^-1. + Compute the sparse normalized node degree matrix `D_n^-1`. + The node degree ``d_n[i]`` is the number of hyperedges containing node ``i`` (i.e., the row-sum of the incidence matrix H). Args: - incidence_matrix: The sparse incidence matrix H of shape ``(num_nodes, num_hyperedges)``. + incidence_matrix: The sparse incidence matrix H of + shape ``(num_nodes, num_hyperedges)``. num_nodes: Total number of nodes. If ``None``, inferred from hyperedge index. Returns: - degree_matrix: The sparse diagonal matrix D_n^-1 of shape ``(num_nodes, num_nodes)``. + degree_matrix: The sparse diagonal matrix `D_n^-1` of shape ``(num_nodes, num_nodes)``. """ # Example: hyperedge_index = [[0, 1, 2, 0], # [0, 0, 0, 1]] @@ -632,16 +682,19 @@ def get_sparse_symnormalized_node_degree_matrix( num_nodes: int | None = None, ) -> Tensor: """ - Compute the sparse normalized node degree matrix D_n^-1/2. + Compute the sparse normalized node degree matrix `D_n^-1/2`. + The node degree ``d_n[i]`` is the number of hyperedges containing node ``i`` (i.e., the row-sum of the incidence matrix H). Args: - incidence_matrix: The sparse incidence matrix H of shape ``(num_nodes, num_hyperedges)``. + incidence_matrix: The sparse incidence matrix H of + shape ``(num_nodes, num_hyperedges)``. num_nodes: Total number of nodes. If ``None``, inferred from hyperedge index. Returns: - degree_matrix: The sparse diagonal matrix D_n^-1/2 of shape ``(num_nodes, num_nodes)``. + degree_matrix: The sparse diagonal matrix `D_n^-1/2` + of shape ``(num_nodes, num_nodes)``. """ # Example: hyperedge_index = [[0, 1, 2, 0], # [0, 0, 0, 1]] @@ -664,17 +717,19 @@ def get_sparse_normalized_hyperedge_degree_matrix( num_hyperedges: int | None = None, ) -> Tensor: """ - Compute the sparse normalized hyperedge degree matrix D_e^-1. + Compute the sparse normalized hyperedge degree matrix `D_e^-1`. The hyperedge degree ``d_e[j]`` is the number of nodes in hyperedge ``j`` (i.e., the column-sum of the incidence matrix H). Args: - incidence_matrix: The sparse incidence matrix H of shape ``(num_nodes, num_hyperedges)``. + incidence_matrix: The sparse incidence matrix H of + shape ``(num_nodes, num_hyperedges)``. num_hyperedges: Total number of hyperedges. If ``None``, inferred from hyperedge index. Returns: - degree_matrix: The sparse diagonal matrix D_e^-1 of shape ``(num_hyperedges, num_hyperedges)``. + degree_matrix: The sparse diagonal matrix `D_e^-1` of + shape ``(num_hyperedges, num_hyperedges)``. """ num_hyperedges = ( num_hyperedges if num_hyperedges is not None else int(incidence_matrix.size(1)) @@ -735,8 +790,8 @@ def get_sparse_hgnn_smoothing_matrix( where: - H is the incidence matrix of shape ``(num_nodes, num_hyperedges)`` - - D_n^-1/2 is the normalized node degree matrix - - D_e^-1 is the inverse hyperedge degree matrix (with W = I) + - `D_n^-1/2` is the normalized node degree matrix + - `D_e^-1` is the inverse hyperedge degree matrix (with W = I) Args: num_nodes: Total number of nodes. If ``None``, inferred from hyperedge index. @@ -817,7 +872,7 @@ def get_sparse_hgnnp_smoothing_matrix( ) return smoothing_matrix.coalesce() - def reduce(self, strategy: Literal["clique_expansion"], **kwargs) -> Tensor: + def reduce(self, strategy: Literal["clique_expansion"], **kwargs: Any) -> Tensor: """ Reduce the hypergraph to a graph represented by edge index using the specified strategy. @@ -843,12 +898,14 @@ def reduce_to_edge_index_on_clique_expansion( num_hyperedges: int | None = None, ) -> Tensor: """ - Construct a graph from a hypergraph via clique expansion using ``H @ H^T``, where ``H`` is the incidence matrix of the hypergraph. + Construct a graph from a hypergraph via clique expansion using ``H @ H^T``, + where ``H`` is the incidence matrix of the hypergraph. In clique expansion, each hyperedge is replaced by a clique connecting all its member nodes. For each hyperedge, all pairs of member nodes become edges in the resulting graph. This is computed efficiently using the incidence matrix: ``A = H @ H^T``, where ``H`` is - the sparse incidence matrix of shape ``[num_nodes, num_hyperedges]`` and ``A`` is the adjacency matrix of the clique-expanded graph. + the sparse incidence matrix of shape ``[num_nodes, num_hyperedges]`` and ``A`` is + the adjacency matrix of the clique-expanded graph. Args: num_nodes: Total number of nodes. If ``None``, inferred from hyperedge index. @@ -897,23 +954,26 @@ def reduce_to_edge_index_on_random_direction( seed: int | None = None, ) -> tuple[Tensor, Tensor | None]: """ - Construct a graph from a hypergraph with methods proposed in `HyperGCN: A New Method of Training Graph Convolutional Networks on Hypergraphs `_ paper. - Reference implementation: `source `_. + References: + - Construct a graph from a hypergraph with methods proposed in [HyperGCN: A New Method of Training Graph Convolutional Networks on Hypergraphs](https://arxiv.org/pdf/1809.02589.pdf) paper. + - Reference implementation: [source](https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/structure/graphs/graph.html#Graph.from_hypergraph_hypergcn). Args: x: Node feature matrix. Size ``(num_nodes, C)``. - with_mediators: Whether to use mediator to transform the hyperedges to edges in the graph. Defaults to ``False``. + with_mediators: Whether to use mediator to transform the hyperedges to edges in the + graph. Defaults to ``False``. remove_selfloops: Whether to remove self-loops. Defaults to ``True``. - return_weights: Whether to return the DHG-style reduced-edge weights alongside the edge index. Defaults to ``False``. + return_weights: Whether to return the DHG-style reduced-edge weights alongside the + edge index. Defaults to ``False``. Returns: - reduced_graph: A tuple ``(edge_index, edge_weights)`` where: - - ``edge_index`` has size ``(2, |num_edges|)``. - - ``edge_weights`` has size ``(|num_edges|,)`` when ``return_weights=True``, otherwise ``None``. + edge_index: The edge index of the reduced graph. Size ``(2, |num_edges|)``. + edge_weights: The edge weights of the reduced graph. Size ``(|num_edges|,)`` when + ``return_weights=True``, otherwise ``None``. Raises: ValueError: If any hyperedge contains fewer than 2 nodes. - """ + """ # noqa: E501 device = x.device generator = create_seeded_torch_generator(device, seed) @@ -923,7 +983,8 @@ def reduce_to_edge_index_on_random_direction( graph_edge_weights: list[float] = [] # Random direction (feature_dim, 1) for projecting nodes in each hyperedge - # Geometrically, we are choosing a random line through the origin in ℝᵈ, where ᵈ = feature_dim + # Geometrically, we are choosing a random line through the origin + # in ℝᵈ, where ᵈ = feature_dim random_direction = torch.rand( size=(x.shape[1], 1), dtype=x.dtype, @@ -936,13 +997,15 @@ def reduce_to_edge_index_on_random_direction( if num_nodes_in_edge < 2: raise ValueError("The number of vertices in an hyperedge must be >= 2.") - # projections (num_nodes_in_edge,) contains a scalar value for each node in the hyperedge, + # projections (num_nodes_in_edge,) contains a scalar value for + # each node in the hyperedge, # indicating its projection on the random vector 'random_direction'. # Key idea: If two points are very far apart in ℝᵈ, there is a high probability # that a random projection will still separate them projections = torch.matmul(x[edge], random_direction).squeeze() - # The indices of the nodes that the farthest apart in the direction of 'random_direction' + # The indices of the nodes that the farthest apart in the + # direction of 'random_direction' node_max_proj_idx = torch.argmax(projections) node_min_proj_idx = torch.argmin(projections) @@ -971,7 +1034,11 @@ def reduce_to_edge_index_on_random_direction( ) def remove_duplicate_edges(self) -> HyperedgeIndex: - """Remove duplicate edges from the hyperedge index. Keeps the tensor contiguous in memory.""" + """ + Remove duplicate edges from the hyperedge index. + + Keeps the tensor contiguous in memory. + """ # Example: hyperedge_index = [[0, 1, 2, 2, 0, 3, 2], # [3, 4, 4, 3, 4, 3, 3]], shape (2, 7) # -> after torch.unique(..., dim=1): @@ -997,7 +1064,8 @@ def remove_hyperedges_with_fewer_than_k_nodes(self, k: int) -> HyperedgeIndex: >>> k = 3 >>> unique_hyperedge_ids: [0, 1, 2] ... # inverse -> idx_to_hyperedge_id, counts -> num_nodes_per_hyperedge - ... inverse = [0, 0, 1, 1, 2, 1] # (index into unique_hyperedge_ids per column) + ... # (index into unique_hyperedge_ids per column) + ... inverse = [0, 0, 1, 1, 2, 1] ... counts = [2, 3, 1] >>> # counts[inverse] is equivalent to: ... # for i, inv in enumerate(inverse): keep_mask[i] = counts[inv] @@ -1012,7 +1080,8 @@ def remove_hyperedges_with_fewer_than_k_nodes(self, k: int) -> HyperedgeIndex: k: The minimum number of nodes a hyperedge must contain to be kept. Returns: - hyperedge_index: A new `HyperedgeIndex` instance with hyperedges containing fewer than k nodes. + hyperedge_index: A new `HyperedgeIndex` instance with hyperedges + containing fewer than k nodes. """ validate_is_positive("k", k) @@ -1031,16 +1100,22 @@ def to_0based( hyperedge_ids_to_rebase: Tensor | None = None, ) -> HyperedgeIndex: """ - Convert hyperedge index to the 0-based format by rebasing node IDs to the range ``[0, num_nodes-1]`` and hyperedge IDs ``[0, num_hyperedges-1]``. + Convert hyperedge index to the 0-based format by rebasing node IDs to the range ``[0, + num_nodes-1]`` and hyperedge IDs ``[0, num_hyperedges-1]``. Args: - node_ids_to_rebase: Tensor of shape ``(num_nodes,)`` containing the original node IDs that need to be rebased to 0-based format. - If ``None``, all node IDs in the hyperedge index will be rebased to 0-based format based on their unique sorted order. - hyperedge_ids_to_rebase: Tensor of shape ``(num_hyperedges,)`` containing the original hyperedge IDs that need to be rebased to 0-based format. - If ``None``, all hyperedge IDs in the hyperedge index will be rebased to 0-based format based on their unique sorted order. + node_ids_to_rebase: Tensor of shape ``(num_nodes,)`` containing the original node IDs + that need to be rebased to 0-based format. + If ``None``, all node IDs in the hyperedge index will be rebased to 0-based format + based on their unique sorted order. + hyperedge_ids_to_rebase: Tensor of shape ``(num_hyperedges,)`` containing the original + hyperedge IDs that need to be rebased to 0-based format. + If ``None``, all hyperedge IDs in the hyperedge index will be rebased to + 0-based format based on their unique sorted order. Returns: - hyperedge_index: A new `HyperedgeIndex` instance with the hyperedge index converted to 0-based format. + hyperedge_index: A new `HyperedgeIndex` instance with the hyperedge index + converted to 0-based format. """ # Example: hyperedge_index after sorting: [[0, 0, 1, 2, 3, 4], # [3, 4, 4, 3, 4, 3]] diff --git a/hyperbench/utils/data_utils.py b/hyperbench/utils/data_utils.py index 7eb2a87b..1262d66b 100644 --- a/hyperbench/utils/data_utils.py +++ b/hyperbench/utils/data_utils.py @@ -65,8 +65,8 @@ def validate_is_between( ) -> None: if min_value > max_value: raise ValueError( - f"Invalid bounds for {name!r}: 'min_value' ({min_value}) " - f"cannot be greater than 'max_value' ({max_value})." + f"Invalid bounds for {name!r}: 'min_value' ({min_value}) cannot " + f"be greater than 'max_value' ({max_value})." ) if not math.isfinite(value) or value < min_value or value > max_value: raise ValueError( @@ -109,7 +109,8 @@ def validate_ratios(ratios: list[int | float]) -> None: # Allow small imprecision in sum of ratios, but raise error if it's significant # Example: ratios = [0.8, 0.1, 0.1] -> sum = 1.0 (valid) # ratios = [0.8, 0.1, 0.05] -> sum = 0.95 (invalid, raises ValueError) - # ratios = [0.8, 0.1, 0.1, 0.0000001] -> sum = 1.0000001 (valid, allows small imprecision) + # (valid, allows small imprecision) + # ratios = [0.8, 0.1, 0.1, 0.0000001] -> sum = 1.0000001 ratio_sum = float(sum(ratios)) if abs(ratio_sum - 1.0) > 1e-6: raise ValueError(f"'ratios' must sum to 1.0, got {ratio_sum}.") diff --git a/hyperbench/utils/nn_utils.py b/hyperbench/utils/nn_utils.py index efebc73a..c68dd9b6 100644 --- a/hyperbench/utils/nn_utils.py +++ b/hyperbench/utils/nn_utils.py @@ -35,7 +35,8 @@ def maxmin_scatter( dim_size: int | None = None, ) -> Tensor: """ - Performs a scatter reduction that computes the channel-wise range (max - min) for each index group. + Performs a scatter reduction that computes the channel-wise range (max - min) for each + index group. Args: src: The source tensor containing the values to scatter. diff --git a/hyperbench/utils/node_utils.py b/hyperbench/utils/node_utils.py index 91fd2e47..b037c67c 100644 --- a/hyperbench/utils/node_utils.py +++ b/hyperbench/utils/node_utils.py @@ -33,7 +33,8 @@ def validate_node_space_setting(node_space_setting: NodeSpaceSetting) -> None: Validate that the node space setting is one of the supported values. Args: - node_space_setting: The node space setting to validate, which should be either "inductive" or "transductive". + node_space_setting: The node space setting to validate, which should be either "inductive" + or "transductive". Raises: ValueError: If the node space setting is not one of the supported values. @@ -42,5 +43,6 @@ def validate_node_space_setting(node_space_setting: NodeSpaceSetting) -> None: return raise ValueError( - f"'node_space_setting' must be one of 'transductive' or 'inductive', got {node_space_setting!r}." + f"'node_space_setting' must be one of 'transductive' or 'inductive', " + f"got {node_space_setting!r}." ) diff --git a/hyperbench/utils/sparse_utils.py b/hyperbench/utils/sparse_utils.py index 87010041..01b7dba6 100644 --- a/hyperbench/utils/sparse_utils.py +++ b/hyperbench/utils/sparse_utils.py @@ -8,7 +8,8 @@ def sparse_dropout( dropout_prob: float, fill_value: float = 0.0, ) -> Tensor: - """Dropout function for sparse matrix. + """ + Dropout function for sparse matrix. Returns a new sparse matrix with the same shape as the input sparse matrix, but with some elements dropped out. @@ -19,7 +20,8 @@ def sparse_dropout( fill_value: The fill value for dropped elements. Defaults to ``0.0``. Returns: - matrix: A new sparse matrix with the same shape as the input sparse matrix, but with some elements dropped out. + matrix: A new sparse matrix with the same shape as the input sparse matrix, + but with some elements dropped out. """ device = sparse_tensor.device @@ -41,9 +43,10 @@ def sparse_dropout( # Generate a binary mask matching the shape of values for elements to keep # 'torch.bernoulli()' samples 1 with probability keep_prob and 0 with probability dropout_prob - # Example: values = [0.5, 1.2, 3.4], keep_prob = 0.8 - # -> keep_mask might be [1, 0, 1], meaning we keep the 1st and 3rd elements, drop the 2nd - keep_mask = torch.bernoulli(torch.full_like(values, keep_prob, dtype=values.dtype)).to(device) + # Example: + # values = [0.5, 1.2, 3.4], keep_prob = 0.8 + # -> keep_mask might be [1, 0, 1], meaning we keep the 1st and 3rd elements, drop the 2nd + keep_mask = torch.bernoulli(torch.full_like(values, keep_prob)).to(device) if fill_value == 0.0: # If fill_value is 0, just zero out the dropped elements, @@ -52,7 +55,8 @@ def sparse_dropout( # -> new_values = [0.5*1, 1.2*0, 3.4*1] = [0.5, 0.0, 3.4] new_values = values * keep_mask else: - # If fill_value is non-zero, we must fill the dropped elements with the specified fill_value instead of zero + # If fill_value is non-zero, we must fill the dropped elements with the + # specified fill_value instead of zero # 'torch.logical_not(keep_mask)' identifies dropped elements where mask is 0 and # Example: values = [0.5, 1.2, 3.4], keep_mask = [1, 0, 1], fill_value = 9.9 # -> values_to_fill_mask = [0, 1, 0] diff --git a/pyproject.toml b/pyproject.toml index 71f8866d..44e0c85c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ readme = "README.md" requires-python = ">=3.10" authors = [ { name = "Tiziano Citro", email = "tcitro@unisa.it" }, - { name = "Daniele De Vinco", email = "ddevinco@unisa.it"} + { name = "Daniele De Vinco", email = "ddevinco@unisa.it" }, ] dependencies = [ "fastjsonschema>=2.21.2,<3.0.0", @@ -64,7 +64,7 @@ dev = [ ] docs = [ "mkdocstrings[python]>=1.0.4,<2.0.0", - "zensical>=0.0.43,<1.0.0", + "zensical>=0.0.44,<1.0.0", ] test = [ "pytest>=9.0.3,<10.0.0", @@ -78,16 +78,9 @@ where = ["."] include = ["hyperbench"] [tool.pytest.ini_options] -addopts = [ - "--color=yes", - "--verbose", - "--tb=short", - "--strict-markers", -] +addopts = ["--color=yes", "--verbose", "--tb=short", "--strict-markers"] testpaths = ["hyperbench/tests", "hyperbench/integration_tests"] -markers = [ - "integration: tests that use real workflows", -] +markers = ["integration: tests that use real workflows"] filterwarnings = [ "ignore:.*torch.jit.script.*deprecated.*", "ignore:.*torch.jit.script.*is not supported in Python 3.14.*", @@ -108,7 +101,7 @@ omit = [ "hyperbench/tests/*", "hyperbench/nn/*", "hyperbench/models/*", - "hyperbench/hlp/*" + "hyperbench/hlp/*", ] [tool.coverage.report] @@ -131,29 +124,28 @@ line-length = 100 [tool.ruff.lint] select = [ - "B", # flake8-bugbear: bugs / bad practices (https://docs.astral.sh/ruff/rules/#flake8-bugbear-b) - "C4", # flake8-comprehensions (https://docs.astral.sh/ruff/rules/#flake8-comprehensions-c4) - "E", # pycodestyle (https://docs.astral.sh/ruff/rules/#pycodestyle-e-w) - "F", # pyflakes: undefined names, unused imports, etc. (https://docs.astral.sh/ruff/rules/#pyflakes-f) - "FURB", # refurb: modern/simpler code improvements (https://docs.astral.sh/ruff/rules/#refurb-furb) - "I", # isort (https://docs.astral.sh/ruff/rules/#isort-i) - "N", # pep8-naming (https://docs.astral.sh/ruff/rules/#pep8-naming-n) - "PERF", # perflint: performance suggestions (https://docs.astral.sh/ruff/rules/#perflint-perf) - "PL", # pylint-inspired rules (https://docs.astral.sh/ruff/rules/#pylint-pl) - "Q", # flake8-quotes: string quote normalization (https://docs.astral.sh/ruff/rules/#flake8-quotes-q) - "RET", # flake8-return: return statement simplifications (https://docs.astral.sh/ruff/rules/#flake8-return-ret) - "RSE", # flake8-raise: raise statement improvements (https://docs.astral.sh/ruff/rules/#flake8-raise-rse) - "RUF", # Ruff-specific rules (https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf) - "SIM", # flake8-simplify: simplify code (https://docs.astral.sh/ruff/rules/#flake8-simplify-sim) - "T10", # flake8-debugger (https://docs.astral.sh/ruff/rules/#flake8-debugger-t10) - "UP", # pyupgrade: modern Python syntax (https://docs.astral.sh/ruff/rules/#pyupgrade-up) + "B", # flake8-bugbear: bugs / bad practices (https://docs.astral.sh/ruff/rules/#flake8-bugbear-b) + "C4", # flake8-comprehensions (https://docs.astral.sh/ruff/rules/#flake8-comprehensions-c4) + "E", # pycodestyle (https://docs.astral.sh/ruff/rules/#pycodestyle-e-w) + "F", # pyflakes: undefined names, unused imports, etc. (https://docs.astral.sh/ruff/rules/#pyflakes-f) + "FURB", # refurb: modern/simpler code improvements (https://docs.astral.sh/ruff/rules/#refurb-furb) + "I", # isort (https://docs.astral.sh/ruff/rules/#isort-i) + "N", # pep8-naming (https://docs.astral.sh/ruff/rules/#pep8-naming-n) + "PERF", # perflint: performance suggestions (https://docs.astral.sh/ruff/rules/#perflint-perf) + "PL", # pylint-inspired rules (https://docs.astral.sh/ruff/rules/#pylint-pl) + "Q", # flake8-quotes: string quote normalization (https://docs.astral.sh/ruff/rules/#flake8-quotes-q) + "RET", # flake8-return: return statement simplifications (https://docs.astral.sh/ruff/rules/#flake8-return-ret) + "RSE", # flake8-raise: raise statement improvements (https://docs.astral.sh/ruff/rules/#flake8-raise-rse) + "RUF", # Ruff-specific rules (https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf) + "SIM", # flake8-simplify: simplify code (https://docs.astral.sh/ruff/rules/#flake8-simplify-sim) + "T10", # flake8-debugger (https://docs.astral.sh/ruff/rules/#flake8-debugger-t10) + "UP", # pyupgrade: modern Python syntax (https://docs.astral.sh/ruff/rules/#pyupgrade-up) ] ignore = [ - "E501", # line too long (https://docs.astral.sh/ruff/rules/line-too-long) - "I001", # unsorted imports (https://docs.astral.sh/ruff/rules/unsorted-imports) - "N812", # lowercase imported as non lowercase (https://docs.astral.sh/ruff/rules/lowercase-imported-as-non-lowercase) - "PLC0415", # import outside top-level (https://docs.astral.sh/ruff/rules/import-outside-top-level) - "PLR0913", # too many arguments (https://docs.astral.sh/ruff/rules/too-many-arguments) - "PLR2004", # magic numbers (https://docs.astral.sh/ruff/rules/magic-value-comparison) - "RET504", # unnecessary assignment (https://docs.astral.sh/ruff/rules/unnecessary-assign) + "I001", # unsorted imports (https://docs.astral.sh/ruff/rules/unsorted-imports) + "N812", # lowercase imported as non lowercase (https://docs.astral.sh/ruff/rules/lowercase-imported-as-non-lowercase) + "PLC0415", # import outside top-level (https://docs.astral.sh/ruff/rules/import-outside-top-level) + "PLR0913", # too many arguments (https://docs.astral.sh/ruff/rules/too-many-arguments) + "PLR2004", # magic numbers (https://docs.astral.sh/ruff/rules/magic-value-comparison) + "RET504", # unnecessary assignment (https://docs.astral.sh/ruff/rules/unnecessary-assign) ] diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index fcebed1f..24f589f2 100644 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -93,7 +93,7 @@ def validate_docstrings( def format_issues(issues: Sequence[DocstringIssue]) -> str: if not issues: - return "No docstring issues found." + return "\033[1;32mAll checks passed!\033[0m" lines = ["Docstring issues:"] for issue in issues: diff --git a/zensical.toml b/zensical.toml index fd2c78c7..b9a88519 100644 --- a/zensical.toml +++ b/zensical.toml @@ -1,6 +1,7 @@ [project] docs_dir = "docs" site_dir = "docs/site" +watch = ["hyperbench"] site_name = "©Hyperbench Documentation" site_description = "Documentation for Hyperbench" site_author = "Hypernetwork Research Group"