Skip to content

Commit

Permalink
[GraphBolt][Dataset] Merging part of #7708, igb-het small datasets …
Browse files Browse the repository at this point in the history
…by Bowen Yao's (#7788)

Co-authored-by: Bowen Yao <112051015+BowenYao18@users.noreply.github.com>
Co-authored-by: BowenYao18 <by18@rice.edu>
  • Loading branch information
3 people authored Sep 7, 2024
1 parent 8772c02 commit bbe00c0
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 7 deletions.
19 changes: 12 additions & 7 deletions examples/graphbolt/pyg/hetero/node_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,12 @@ def create_dataloader(
datapipe = datapipe.copy_to(device=device)
need_copy = False

node_feature_keys = {"paper": ["feat"], "author": ["feat"]}
if args.dataset == "ogb-lsc-mag240m":
node_feature_keys = {
"paper": ["feat"],
"author": ["feat"],
"institution": ["feat"],
}
node_feature_keys["institution"] = ["feat"]
if "igb-het" in args.dataset:
node_feature_keys["institute"] = ["feat"]
node_feature_keys["fos"] = ["feat"]
# Fetch node features for the sampled subgraph.
datapipe = datapipe.fetch_feature(features, node_feature_keys)

Expand Down Expand Up @@ -335,8 +335,13 @@ def parse_args():
"--dataset",
type=str,
default="ogb-lsc-mag240m",
choices=["ogb-lsc-mag240m"],
help="Dataset name. Possible values: ogb-lsc-mag240m",
choices=[
"ogb-lsc-mag240m",
"igb-het-tiny",
"igb-het-small",
"igb-het-medium",
],
help="Dataset name. Possible values: ogb-lsc-mag240m, igb-het-[tiny|small|medium].",
)
parser.add_argument(
"--fanout",
Expand Down
16 changes: 16 additions & 0 deletions python/dgl/graphbolt/impl/ondisk_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -990,6 +990,16 @@ class BuiltinDataset(OnDiskDataset):
Self edges are added to the original graph.
Node features are stored as float32.
**igb-het-[tiny|small|medium]**
The igb-hom-[tiny|small|medium] dataset is a heterogeneous citation network,
which is designed for developers to train and evaluate GNN models with
high fidelity. See more details in `igb-het-[tiny|small|medium]
<https://github.com/IllinoisGraphBenchmark/IGB-Datasets>`_.
.. note::
Four Reverse edge types are added to the original graph.
Node features are stored as float32.
Parameters
----------
name : str
Expand Down Expand Up @@ -1019,6 +1029,10 @@ class BuiltinDataset(OnDiskDataset):
"igb-hom-tiny-seeds",
"igb-hom-small",
"igb-hom-small-seeds",
"igb-het-tiny",
"igb-het-tiny-seeds",
"igb-het-small",
"igb-het-small-seeds",
]
_large_datasets = [
"ogb-lsc-mag240m",
Expand All @@ -1031,6 +1045,8 @@ class BuiltinDataset(OnDiskDataset):
"igb-hom-large-seeds",
"igb-hom",
"igb-hom-seeds",
"igb-het-medium",
"igb-het-medium-seeds",
]
_all_datasets = _datasets + _large_datasets

Expand Down

0 comments on commit bbe00c0

Please sign in to comment.