From 477d1187be864c11005cfdaa02220d0d9df75474 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Thu, 1 Aug 2024 14:10:05 +0200 Subject: [PATCH 1/2] resuming CM meetings --- dev/common-paper/README.md | 1 + dev/meetings/20240731.md | 118 ++++++++++++++++++++++++++++++++++ dev/reproducibility/README.md | 1 + 3 files changed, 120 insertions(+) create mode 100644 dev/common-paper/README.md create mode 100644 dev/meetings/20240731.md create mode 100644 dev/reproducibility/README.md diff --git a/dev/common-paper/README.md b/dev/common-paper/README.md new file mode 100644 index 0000000000..a0990367ef --- /dev/null +++ b/dev/common-paper/README.md @@ -0,0 +1 @@ +TBD diff --git a/dev/meetings/20240731.md b/dev/meetings/20240731.md new file mode 100644 index 0000000000..9efdeed050 --- /dev/null +++ b/dev/meetings/20240731.md @@ -0,0 +1,118 @@ +# Topic + +Syncing about the next steps for CM, CM4MLOps, CM4MLPerf, CM4ABTF, etc. + +# People + +* Grigori Fursin +* Arjun Suresh + +# Discussion + +## Need proper attribution + +Need to agree on the common text in the CM documentation and CM, CM4MLOps and CM4MLPerf GitHub +using these examples: + +* https://github.com/spack/spack?tab=readme-ov-file#authors +* https://cython.org (see Financial contributions section) + +* Author/creator +* Core developers +* Contributors (from the community, MLCommons and the Automation and Reproducibility TaskForce): + See https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md . +* Sponsorship & financial Contributions + +Should add to main GitHub and docs.mlcommons.org ... + +## Remove/reduce dependencies on non-MLCommons GitHub repositories + +At this moment, various non-MLCommons GATEOverflow GitHub repositories are used +in the official MLPerf workflows by default - that creates many possible legal issues +for CM and MLPerf users. + +We should either move all such repositories to MLCommons +or, if it's not easily possible, create another neutral GitHub ID +such as mlcommons-aux with a clear governances and agreement +with MLCommons to keep all dependencies in the MLCommons space. + +## Improve cm4mlops package + +Current cm4mlops package hides extra installation of various system dependencies +and CM repositories while using non-default branches and is difficult to debug if something goes wrong. + +A most standard way is to install cmind package and have a function to bootstrap cm4mlops +with a proper control over the flow, CM repositories and branches that can be changed via flags. + +For example: +```bash +pip install cmind +cm bootstrap cm4mlperf +cm bootstrap cm4mlops --branch=mlperf-inference +... +``` + +That will perform the same functions as cm4mlops package but will be easy to debug and will have easy to trace errors +that can be used in GitHub actions or other CI + +To be brainstormed further ... + +## Coordinate further developments + +* Document the roadmap and responsibilities for Q3-Q4 2024 +* Regular dev meetings (once a week or every two weeks)? +* Resume Discord channel discussions or mailing list (to be able to track discussions)? + +## CM4MLPerf inference v4.1 & v5.0 automation + +* Add CM for as many v4.1 submissions as possible to make it easier for everyone to reproduce results shortly after publication of results. +* Sync on the plans for inf v5.0 with MLCommons + +## CM4ABTF automation + +* Sync on the next steps during next meetings + +## Collaboration with Croissant + +* Sync on the next steps during next meetings + +## Testing infrastructure for CM4MLOps and CM4MLPerf + +* GitHub actions are not enough to test all dependencies and their versions for diverse hardware for CM-MLPerf workflows. + Brainstorm infrastructure for continuous testing (Grigori started prototyping some infrastructure). + +## Optimize MLPerf inference reference implementations + +* We need to add known optimizations to the MLPerf inference implementations + +## Support MLPerf training + +* We should start prototyping the unified CM interface and automation for MLPerf training and wrap existing MLCube tasks + +## Prepare tutorials + +* Sync on the tutorial about CM internals and scripts +* Sync on the tutorial for SCC'24 + +## Common paper + +* Start preparing a common paper about CM on GitHub + +## Collect feedback from companies + +* There were various discussions with MLCommons companies about using CM for reproducibility. + We need to collect and aggregate all the feedback in one place. + +## Next generation of CM + +Grigori started testing some ideas and prototyping the next generation of CM, CM4MLOps and CM4MLPerf +bsaed on 3 years of using CM to modularize and automate MLPerf and will share notes in the future dev meetings. + +## Sync with MLCommons + +* Prepare official CM page - should we do it with the MLPerf in v4.1 release? +* Prepare Press-release about CM with MLPerf inf v4.1 release? +* Where to host CM developments and discussions within MLCommons? + * Infra WG? + * Create a new *official* taskforce or WG on automation and reproducibility? + diff --git a/dev/reproducibility/README.md b/dev/reproducibility/README.md new file mode 100644 index 0000000000..a0990367ef --- /dev/null +++ b/dev/reproducibility/README.md @@ -0,0 +1 @@ +TBD From 478f4e1eca2ddb3ba13fd60a8ef3bf3f8b01c6ac Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Thu, 1 Aug 2024 14:18:35 +0200 Subject: [PATCH 2/2] removing outdated links from CK --- ck/ck/kernel.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/ck/ck/kernel.py b/ck/ck/kernel.py index 70bfb1ca40..ea1f72ae1d 100755 --- a/ck/ck/kernel.py +++ b/ck/ck/kernel.py @@ -28,7 +28,7 @@ # We use 3 digits for the main (released) version and 4th digit for development revision -__version__ = "2.6.3" +__version__ = "2.6.4" # Do not use characters (to detect outdated version)! # Import packages that are global for the whole kernel @@ -6745,9 +6745,8 @@ def short_help(i): # .replace(' ','')+'\n' h += '\n'+cfg['help_web'].replace('\n', '').strip()+'\n' - h += 'CK Google group: https://bit.ly/ck-google-group\n' - h += 'CK Slack channel: https://cKnowledge.org/join-slack\n' - h += 'Stable CK components: https://cknow.io' + h += 'CK white paper: https://royalsocietypublishing.org/doi/10.1098/rsta.2020.0211\n' + h += 'CK ACM TechTalk: https://learning.acm.org/techtalks/reproducibility\n' if o == 'con': out(h) @@ -12385,11 +12384,11 @@ def access(i): o = i.get('out', '') # Print message that this framework was discontinued - if o == 'con': - out('') - out('WARNING: this framework was discontinued in favor of the new CK2 framework aka CM being developed by the open taskforce on automation and reproducibility at MLCommons:') - out(' https://bit.ly/mlperf-edu-wg') - out('') +# if o == 'con': +# out('') +# out('WARNING: this framework was discontinued in favor of the new CK2 framework aka CM being developed by the open taskforce on automation and reproducibility at MLCommons:') +# out(' https://bit.ly/mlperf-edu-wg') +# out('') # If profile cp = i.get('ck_profile', '')