diff --git a/.gitignore b/.gitignore index 6dfd853..4367326 100644 --- a/.gitignore +++ b/.gitignore @@ -133,3 +133,6 @@ dmypy.json # experiments exps + +# snakemake logs +.snakemake diff --git a/docs/usage.md b/docs/usage.md index d42946e..4da91d7 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -4,7 +4,21 @@ ### Data Preparation -The code for downloading and preprocessing CMIP6 data is coming soon +First install `snakemake` following [these instructions](https://snakemake.readthedocs.io/en/stable/getting_started/installation.html) + +To download and regrid a CMIP6 dataset to a common resolution (e.g., 1.406525 degree), go to the corresponding directory inside `snakemake_configs` and run +```bash +snakemake all --configfile config_2m_temperature.yml --cores 8 +``` +This script will download and regrid the `2m_temperature` data in parallel using 8 CPU cores. Modify `configfile` for other variables. After downloading and regrdding, run the following script to preprocess the `.nc` files into `.npz` format for pretraining ClimaX +```bash +python src/data_preprocessing/nc2np_equally_cmip6.py \ + --dataset mpi + --path /data/CMIP6/MPI-ESM/1.40625deg/ + --num_shards 10 + --save_dir /data/CMIP6/MPI-ESM/1.40625deg_np_10shards +``` +in which `num_shards` denotes the number of chunks to break each `.nc` file into. ### Training diff --git a/snakemake_configs/AWI-ESM/Snakefile b/snakemake_configs/AWI-ESM/Snakefile new file mode 100755 index 0000000..8e711ba --- /dev/null +++ b/snakemake_configs/AWI-ESM/Snakefile @@ -0,0 +1,49 @@ + +year_strings = [f'{y}01010600-{y+1}01010000' for y in range(1850, 2015, 1)] + +print(config) + +rule download: + output: + "{dataset}/raw/{name}/{name}_{year_str}_raw.nc", + shell: + "wget https://esgf-data1.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP/AWI/AWI-ESM-1-1-LR/historical/{config[" + "run]}/6hrPlevPt/" + "{config[cmip_name]}/gn/v20200212/" + "{config[cmip_name]}_6hrPlevPt_AWI-ESM-1-1-LR_historical_{config[run]}_gn_{wildcards.year_str}.nc " + "-O {wildcards.dataset}/raw/{config[name]}/{config[name]}_{wildcards.year_str}_raw.nc" + +rule regrid: + input: + "{dataset}/raw/{name}/{name}_{year_str}_raw.nc" + output: + "{dataset}/{res}deg/{name}/{name}_{year_str}_{res}deg.nc.tmp" + shell: + "python ../../src/data_preprocessing/regrid.py \ + --input_fns {input} \ + --output_dir {wildcards.dataset}/{wildcards.res}deg/{wildcards.name} \ + --ddeg_out {wildcards.res} \ + --cmip 1 \ + --rename {config[cmip_name]} {config[era_name]} \ + --file_ending nc.tmp" + +rule delete: + input: + expand("{{dataset}}/{res}deg/{{name}}/{{name}}_{{year_str}}_{res}deg.nc.tmp", + res=config['res']), + output: + expand("{{dataset}}/{res}deg/{{name}}/{{name}}_{{year_str}}_{res}deg.nc", + res=config['res']) + priority: 100 + run: + for i, o in zip(input, output): + shell("mv {i} {o}") + # shell("rm {wildcards.dataset}/raw/{wildcards.name}/{wildcards.name}_{wildcards.year_str}_raw.nc"), + + +rule all: + input: + expand("{datadir}/{res}deg/{name}/{name}_{year_str}_{res}deg.nc", + datadir=config['datadir'], res=config['res'], name=config['name'], year_str=year_strings) + + diff --git a/snakemake_configs/AWI-ESM/config_10m_u_component_of_wind.yml b/snakemake_configs/AWI-ESM/config_10m_u_component_of_wind.yml new file mode 100755 index 0000000..2954561 --- /dev/null +++ b/snakemake_configs/AWI-ESM/config_10m_u_component_of_wind.yml @@ -0,0 +1,8 @@ +datadir: /data/CMIP6/AWI-ESM +name: 10m_u_component_of_wind +cmip_name: uas +era_name: u10 +run: r1i1p1f1 +res: + - 1.40625 + # - 5.625 \ No newline at end of file diff --git a/snakemake_configs/AWI-ESM/config_10m_v_component_of_wind.yml b/snakemake_configs/AWI-ESM/config_10m_v_component_of_wind.yml new file mode 100755 index 0000000..c0652f5 --- /dev/null +++ b/snakemake_configs/AWI-ESM/config_10m_v_component_of_wind.yml @@ -0,0 +1,8 @@ +datadir: /data/CMIP6/AWI-ESM +name: 10m_v_component_of_wind +cmip_name: vas +era_name: v10 +run: r1i1p1f1 +res: + - 1.40625 + # - 5.625 \ No newline at end of file diff --git a/snakemake_configs/AWI-ESM/config_2m_temperature.yml b/snakemake_configs/AWI-ESM/config_2m_temperature.yml new file mode 100755 index 0000000..75cbb12 --- /dev/null +++ b/snakemake_configs/AWI-ESM/config_2m_temperature.yml @@ -0,0 +1,8 @@ +datadir: /data/CMIP6/AWI-ESM +name: 2m_temperature +cmip_name: tas +era_name: t2m +run: r1i1p1f1 +res: + - 1.40625 + # - 5.625 \ No newline at end of file diff --git a/snakemake_configs/AWI-ESM/config_geopotential.yml b/snakemake_configs/AWI-ESM/config_geopotential.yml new file mode 100755 index 0000000..ced9bca --- /dev/null +++ b/snakemake_configs/AWI-ESM/config_geopotential.yml @@ -0,0 +1,8 @@ +datadir: /data/CMIP6/AWI-ESM +name: geopotential +cmip_name: zg +era_name: z +run: r1i1p1f1 +res: + - 1.40625 + # - 5.625 \ No newline at end of file diff --git a/snakemake_configs/AWI-ESM/config_specific_humidity.yml b/snakemake_configs/AWI-ESM/config_specific_humidity.yml new file mode 100755 index 0000000..69ac0d9 --- /dev/null +++ b/snakemake_configs/AWI-ESM/config_specific_humidity.yml @@ -0,0 +1,8 @@ +datadir: /data/CMIP6/AWI-ESM +name: specific_humidity +cmip_name: hus +era_name: q +run: r1i1p1f1 +res: + - 1.40625 + # - 5.625 \ No newline at end of file diff --git a/snakemake_configs/AWI-ESM/config_temperature.yml b/snakemake_configs/AWI-ESM/config_temperature.yml new file mode 100755 index 0000000..8e49f36 --- /dev/null +++ b/snakemake_configs/AWI-ESM/config_temperature.yml @@ -0,0 +1,8 @@ +datadir: /data/CMIP6/AWI-ESM +name: temperature +cmip_name: ta +era_name: t +run: r1i1p1f1 +res: + - 1.40625 + # - 5.625 \ No newline at end of file diff --git a/snakemake_configs/AWI-ESM/config_u_component_of_wind.yml b/snakemake_configs/AWI-ESM/config_u_component_of_wind.yml new file mode 100755 index 0000000..5cfb100 --- /dev/null +++ b/snakemake_configs/AWI-ESM/config_u_component_of_wind.yml @@ -0,0 +1,8 @@ +datadir: /data/CMIP6/AWI-ESM +name: u_component_of_wind +cmip_name: ua +era_name: u +run: r1i1p1f1 +res: + - 1.40625 + # - 5.625 diff --git a/snakemake_configs/AWI-ESM/config_v_component_of_wind.yml b/snakemake_configs/AWI-ESM/config_v_component_of_wind.yml new file mode 100755 index 0000000..3532bea --- /dev/null +++ b/snakemake_configs/AWI-ESM/config_v_component_of_wind.yml @@ -0,0 +1,8 @@ +datadir: /data/CMIP6/AWI-ESM +name: v_component_of_wind +cmip_name: va +era_name: v +run: r1i1p1f1 +res: + - 1.40625 + # - 5.625 \ No newline at end of file diff --git a/snakemake_configs/CMCC/Snakefile b/snakemake_configs/CMCC/Snakefile new file mode 100755 index 0000000..1d451e1 --- /dev/null +++ b/snakemake_configs/CMCC/Snakefile @@ -0,0 +1,50 @@ +year_strings = [f'{y}01010600-{y+1}01010000' for y in range(1850, 2015, 1)] + +print(config) + +rule download: + output: + "{dataset}/raw/{name}/{name}_{year_str}_raw.nc", + shell: + "wget https://esgf.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/CMIP/CMCC/CMCC-CM2-HR4/historical/{config[" + "run]}/6hrPlevPt/" + "{config[cmip_name]}/gn/v20200904/" + "{config[cmip_name]}_6hrPlevPt_CMCC-CM2-HR4_historical_{config[run]}_gn_{wildcards.year_str}.nc " + "-O {wildcards.dataset}/raw/{config[name]}/{config[name]}_{wildcards.year_str}_raw.nc" + + # https://esgf.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/CMIP/CMCC/CMCC-CM2-HR4/historical/r1i1p1f1/6hrPlevPt/ta/gn/v20200904/ta_6hrPlevPt_CMCC-CM2-HR4_historical_r1i1p1f1_gn_185001010600-185101010000.nc + +rule regrid: + input: + "{dataset}/raw/{name}/{name}_{year_str}_raw.nc" + output: + "{dataset}/{res}deg/{name}/{name}_{year_str}_{res}deg.nc.tmp" + shell: + "python ../../src/data_preprocessing/regrid.py \ + --input_fns {input} \ + --output_dir {wildcards.dataset}/{wildcards.res}deg/{wildcards.name} \ + --ddeg_out {wildcards.res} \ + --cmip 1 \ + --rename {config[cmip_name]} {config[era_name]} \ + --file_ending nc.tmp" + +rule delete: + input: + expand("{{dataset}}/{res}deg/{{name}}/{{name}}_{{year_str}}_{res}deg.nc.tmp", + res=config['res']), + output: + expand("{{dataset}}/{res}deg/{{name}}/{{name}}_{{year_str}}_{res}deg.nc", + res=config['res']) + priority: 100 + run: + for i, o in zip(input, output): + shell("mv {i} {o}") + # shell("rm {wildcards.dataset}/raw/{wildcards.name}/{wildcards.name}_{wildcards.year_str}_raw.nc"), + + +rule all: + input: + expand("{datadir}/{res}deg/{name}/{name}_{year_str}_{res}deg.nc", + datadir=config['datadir'], res=config['res'], name=config['name'], year_str=year_strings) + + diff --git a/snakemake_configs/CMCC/config_geopotential.yml b/snakemake_configs/CMCC/config_geopotential.yml new file mode 100755 index 0000000..8c262e0 --- /dev/null +++ b/snakemake_configs/CMCC/config_geopotential.yml @@ -0,0 +1,8 @@ +datadir: /data/CMIP6/CMCC +name: geopotential +cmip_name: zg +era_name: z +run: r1i1p1f1 +res: + - 1.40625 + # - 5.625 \ No newline at end of file diff --git a/snakemake_configs/CMCC/config_temperature.yml b/snakemake_configs/CMCC/config_temperature.yml new file mode 100755 index 0000000..b5bad0b --- /dev/null +++ b/snakemake_configs/CMCC/config_temperature.yml @@ -0,0 +1,8 @@ +datadir: /data/CMIP6/CMCC +name: temperature +cmip_name: ta +era_name: t +run: r1i1p1f1 +res: + - 1.40625 + # - 5.625 \ No newline at end of file diff --git a/snakemake_configs/CMCC/config_u_component_of_wind.yml b/snakemake_configs/CMCC/config_u_component_of_wind.yml new file mode 100755 index 0000000..ce4f3db --- /dev/null +++ b/snakemake_configs/CMCC/config_u_component_of_wind.yml @@ -0,0 +1,8 @@ +datadir: /data/CMIP6/CMCC +name: u_component_of_wind +cmip_name: ua +era_name: u +run: r1i1p1f1 +res: + - 1.40625 + # - 5.625 diff --git a/snakemake_configs/CMCC/config_v_component_of_wind.yml b/snakemake_configs/CMCC/config_v_component_of_wind.yml new file mode 100755 index 0000000..591834b --- /dev/null +++ b/snakemake_configs/CMCC/config_v_component_of_wind.yml @@ -0,0 +1,8 @@ +datadir: /data/CMIP6/CMCC +name: v_component_of_wind +cmip_name: va +era_name: v +run: r1i1p1f1 +res: + - 1.40625 + # - 5.625 \ No newline at end of file diff --git a/snakemake_configs/HAMMOZ/Snakefile b/snakemake_configs/HAMMOZ/Snakefile new file mode 100755 index 0000000..22664dc --- /dev/null +++ b/snakemake_configs/HAMMOZ/Snakefile @@ -0,0 +1,59 @@ + +year_strings = [ + '185001010600-187001010000', + '187001010600-189001010000', + '189001010600-191001010000', + '191001010600-193001010000', + '193001010600-195001010000', + '195001010600-197001010000', + '197001010600-199001010000', + '199001010600-201001010000', + '201001010600-201501010000', +] + +print(config) + +rule download: + output: + "{dataset}/raw/{name}/{name}_{year_str}_raw.nc", + shell: + "wget https://esgf-data1.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP/HAMMOZ-Consortium/MPI-ESM-1-2-HAM/historical/{config[" + "run]}/6hrPlevPt/" + "{config[cmip_name]}/gn/{config[version]}/" + "{config[cmip_name]}_6hrPlevPt_MPI-ESM-1-2-HAM_historical_{config[run]}_gn_{wildcards.year_str}.nc " + "-O {wildcards.dataset}/raw/{config[name]}/{config[name]}_{wildcards.year_str}_raw.nc" + +rule regrid: + input: + "{dataset}/raw/{name}/{name}_{year_str}_raw.nc" + output: + "{dataset}/{res}deg/{name}/{name}_{year_str}_{res}deg.nc.tmp" + shell: + "python ../../src/data_preprocessing/regrid.py \ + --input_fns {input} \ + --output_dir {wildcards.dataset}/{wildcards.res}deg/{wildcards.name} \ + --ddeg_out {wildcards.res} \ + --cmip 1 \ + --rename {config[cmip_name]} {config[era_name]} \ + --file_ending nc.tmp" + +rule delete: + input: + expand("{{dataset}}/{res}deg/{{name}}/{{name}}_{{year_str}}_{res}deg.nc.tmp", + res=config['res']), + output: + expand("{{dataset}}/{res}deg/{{name}}/{{name}}_{{year_str}}_{res}deg.nc", + res=config['res']) + priority: 100 + run: + for i, o in zip(input, output): + shell("mv {i} {o}") + # shell("rm {wildcards.dataset}/raw/{wildcards.name}/{wildcards.name}_{wildcards.year_str}_raw.nc"), + + +rule all: + input: + expand("{datadir}/{res}deg/{name}/{name}_{year_str}_{res}deg.nc", + datadir=config['datadir'], res=config['res'], name=config['name'], year_str=year_strings) + + diff --git a/snakemake_configs/HAMMOZ/config_10m_u_component_of_wind.yml b/snakemake_configs/HAMMOZ/config_10m_u_component_of_wind.yml new file mode 100755 index 0000000..4e3115f --- /dev/null +++ b/snakemake_configs/HAMMOZ/config_10m_u_component_of_wind.yml @@ -0,0 +1,9 @@ +datadir: /data/CMIP6/HAMMOZ +name: 10m_u_component_of_wind +cmip_name: uas +era_name: u10 +run: r1i1p1f1 +version: v20190627 +res: + - 1.40625 + # - 5.625 \ No newline at end of file diff --git a/snakemake_configs/HAMMOZ/config_10m_v_component_of_wind.yml b/snakemake_configs/HAMMOZ/config_10m_v_component_of_wind.yml new file mode 100755 index 0000000..6d13d3d --- /dev/null +++ b/snakemake_configs/HAMMOZ/config_10m_v_component_of_wind.yml @@ -0,0 +1,9 @@ +datadir: /data/CMIP6/HAMMOZ +name: 10m_v_component_of_wind +cmip_name: vas +era_name: v10 +run: r1i1p1f1 +version: v20190627 +res: + - 1.40625 + # - 5.625 \ No newline at end of file diff --git a/snakemake_configs/HAMMOZ/config_2m_temperature.yml b/snakemake_configs/HAMMOZ/config_2m_temperature.yml new file mode 100755 index 0000000..d001df7 --- /dev/null +++ b/snakemake_configs/HAMMOZ/config_2m_temperature.yml @@ -0,0 +1,9 @@ +datadir: /data/CMIP6/HAMMOZ +name: 2m_temperature +cmip_name: tas +era_name: t2m +run: r1i1p1f1 +version: v20190628 +res: + - 1.40625 + # - 5.625 \ No newline at end of file diff --git a/snakemake_configs/HAMMOZ/config_geopotential.yml b/snakemake_configs/HAMMOZ/config_geopotential.yml new file mode 100755 index 0000000..c0d7f39 --- /dev/null +++ b/snakemake_configs/HAMMOZ/config_geopotential.yml @@ -0,0 +1,9 @@ +datadir: /data/CMIP6/HAMMOZ +name: geopotential +cmip_name: zg +era_name: z +run: r1i1p1f1 +version: v20190628 +res: + - 1.40625 + # - 5.625 diff --git a/snakemake_configs/HAMMOZ/config_specific_humidity.yml b/snakemake_configs/HAMMOZ/config_specific_humidity.yml new file mode 100755 index 0000000..64a44ff --- /dev/null +++ b/snakemake_configs/HAMMOZ/config_specific_humidity.yml @@ -0,0 +1,9 @@ +datadir: /data/CMIP6/HAMMOZ +name: specific_humidity +cmip_name: hus +era_name: q +run: r1i1p1f1 +version: v20190628 +res: + - 1.40625 + # - 5.625 diff --git a/snakemake_configs/HAMMOZ/config_temperature.yml b/snakemake_configs/HAMMOZ/config_temperature.yml new file mode 100755 index 0000000..a8bea93 --- /dev/null +++ b/snakemake_configs/HAMMOZ/config_temperature.yml @@ -0,0 +1,9 @@ +datadir: /data/CMIP6/HAMMOZ +name: temperature +cmip_name: ta +era_name: t +run: r1i1p1f1 +version: v20190628 +res: + - 1.40625 + # - 5.625 diff --git a/snakemake_configs/HAMMOZ/config_u_component_of_wind.yml b/snakemake_configs/HAMMOZ/config_u_component_of_wind.yml new file mode 100755 index 0000000..009ed36 --- /dev/null +++ b/snakemake_configs/HAMMOZ/config_u_component_of_wind.yml @@ -0,0 +1,9 @@ +datadir: /data/CMIP6/HAMMOZ +name: u_component_of_wind +cmip_name: ua +era_name: u +run: r1i1p1f1 +version: v20190628 +res: + - 1.40625 + # - 5.625 diff --git a/snakemake_configs/HAMMOZ/config_v_component_of_wind.yml b/snakemake_configs/HAMMOZ/config_v_component_of_wind.yml new file mode 100755 index 0000000..1703f2c --- /dev/null +++ b/snakemake_configs/HAMMOZ/config_v_component_of_wind.yml @@ -0,0 +1,9 @@ +datadir: /data/CMIP6/HAMMOZ +name: v_component_of_wind +cmip_name: va +era_name: v +run: r1i1p1f1 +version: v20190628 +res: + - 1.40625 + # - 5.625 diff --git a/snakemake_configs/MPI-ESM/Snakefile b/snakemake_configs/MPI-ESM/Snakefile new file mode 100755 index 0000000..03f685b --- /dev/null +++ b/snakemake_configs/MPI-ESM/Snakefile @@ -0,0 +1,71 @@ + +# year_strings = [ +# '185001010600-187001010000', +# '187001010600-189001010000', +# '189001010600-191001010000', +# '191001010600-193001010000', +# '193001010600-195001010000', +# '195001010600-197001010000', +# '197001010600-199001010000', +# '199001010600-201001010000', +# '201001010600-201501010000' +# ] +if config['output_type'] == '6hrPlev': + year_strings = [f'{y}01010300-{y+4}12312100' for y in range(1850, 2015, 5)] +elif config['output_type'] == 'E3hr': + year_strings = [f'{y}01010130-{y+4}12312230' for y in range(1850, 2015, 5)] +else: + year_strings = [f'{y}01010600-{y+5}01010000' for y in range(1850, 2015, 5)] + +print(config) + + +#v20190815 +rule download: + output: + "{dataset}/raw/{name}/{name}_{year_str}_raw.nc", + shell: + "wget {config[server_prefix]}/MPI-M/MPI-ESM1-2-HR/historical/{config[" + "run]}/{config[output_type]}/" + "{config[cmip_name]}/gn/{config[version]}/" + "{config[cmip_name]}_{config[output_type]}_MPI-ESM1-2-HR_historical_{config[run]}_gn_{wildcards.year_str}.nc " + "-O {wildcards.dataset}/raw/{config[name]}/{config[name]}_{wildcards.year_str}_raw.nc" + + # http://esgf-data1.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-HR/historical/r1i1p1f1/6hrPlevPt/tas/gn/v20190815/tas_6hrPlevPt_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_185001010600-185501010000.nc + + # https://esgf.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/CMIP/MPI-M/MPI-ESM1-2-HR/historical/r1i1p1f1/6hrPlevPt/uas/gn/v20190710/uas_6hrPlevPt_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_185001010600-185501010000.nc + +rule regrid: + input: + "{dataset}/raw/{name}/{name}_{year_str}_raw.nc" + output: + "{dataset}/{res}deg/{name}/{name}_{year_str}_{res}deg.nc.tmp" + shell: + "python ../../src/data_preprocessing/regrid.py \ + --input_fns {input} \ + --output_dir {wildcards.dataset}/{wildcards.res}deg/{wildcards.name} \ + --ddeg_out {wildcards.res} \ + --cmip 1 \ + --rename {config[cmip_name]} {config[era_name]} \ + --file_ending nc.tmp" + +rule delete: + input: + expand("{{dataset}}/{res}deg/{{name}}/{{name}}_{{year_str}}_{res}deg.nc.tmp", + res=config['res']), + output: + expand("{{dataset}}/{res}deg/{{name}}/{{name}}_{{year_str}}_{res}deg.nc", + res=config['res']) + priority: 100 + run: + for i, o in zip(input, output): + shell("mv {i} {o}") + # shell("rm {wildcards.dataset}/raw/{wildcards.name}/{wildcards.name}_{wildcards.year_str}_raw.nc"), + + +rule all: + input: + expand("{datadir}/{res}deg/{name}/{name}_{year_str}_{res}deg.nc", + datadir=config['datadir'], res=config['res'], name=config['name'], year_str=year_strings) + + diff --git a/snakemake_configs/MPI-ESM/config_10m_u_component_of_wind.yml b/snakemake_configs/MPI-ESM/config_10m_u_component_of_wind.yml new file mode 100755 index 0000000..4458a8a --- /dev/null +++ b/snakemake_configs/MPI-ESM/config_10m_u_component_of_wind.yml @@ -0,0 +1,11 @@ +datadir: /data/CMIP6/MPI-ESM +server_prefix: https://esgf.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/CMIP +name: 10m_u_component_of_wind +cmip_name: uas +era_name: u10 +output_type: 6hrPlevPt +run: r1i1p1f1 +version: v20190710 +res: + - 1.40625 + # - 5.625 \ No newline at end of file diff --git a/snakemake_configs/MPI-ESM/config_10m_v_component_of_wind.yml b/snakemake_configs/MPI-ESM/config_10m_v_component_of_wind.yml new file mode 100755 index 0000000..37814c1 --- /dev/null +++ b/snakemake_configs/MPI-ESM/config_10m_v_component_of_wind.yml @@ -0,0 +1,11 @@ +datadir: /data/CMIP6/MPI-ESM +server_prefix: https://esgf.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/CMIP +name: 10m_v_component_of_wind +cmip_name: vas +era_name: v10 +output_type: 6hrPlevPt +run: r1i1p1f1 +version: v20190710 +res: + - 1.40625 + # - 5.625 \ No newline at end of file diff --git a/snakemake_configs/MPI-ESM/config_2m_temperature.yml b/snakemake_configs/MPI-ESM/config_2m_temperature.yml new file mode 100755 index 0000000..1826ba0 --- /dev/null +++ b/snakemake_configs/MPI-ESM/config_2m_temperature.yml @@ -0,0 +1,11 @@ +datadir: /data/CMIP6/MPI-ESM +server_prefix: http://esgf-data1.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP +name: 2m_temperature +cmip_name: tas +era_name: t2m +output_type: 6hrPlevPt +run: r1i1p1f1 +version: v20190815 +res: + - 1.40625 + # - 5.625 \ No newline at end of file diff --git a/snakemake_configs/MPI-ESM/config_geopotential.yml b/snakemake_configs/MPI-ESM/config_geopotential.yml new file mode 100755 index 0000000..a55f924 --- /dev/null +++ b/snakemake_configs/MPI-ESM/config_geopotential.yml @@ -0,0 +1,11 @@ +datadir: /data/CMIP6/MPI-ESM +server_prefix: http://esgf-data1.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP +name: geopotential +cmip_name: zg +era_name: z +output_type: 6hrPlevPt +run: r1i1p1f1 +version: v20190815 +res: + - 1.40625 + # - 5.625 \ No newline at end of file diff --git a/snakemake_configs/MPI-ESM/config_specific_humidity.yml b/snakemake_configs/MPI-ESM/config_specific_humidity.yml new file mode 100755 index 0000000..94485aa --- /dev/null +++ b/snakemake_configs/MPI-ESM/config_specific_humidity.yml @@ -0,0 +1,11 @@ +datadir: /data/CMIP6/MPI-ESM +server_prefix: http://esgf-data1.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP +name: specific_humidity +cmip_name: hus +era_name: q +output_type: 6hrPlevPt +run: r1i1p1f1 +version: v20190815 +res: + - 1.40625 + # - 5.625 \ No newline at end of file diff --git a/snakemake_configs/MPI-ESM/config_temperature.yml b/snakemake_configs/MPI-ESM/config_temperature.yml new file mode 100755 index 0000000..0abecc6 --- /dev/null +++ b/snakemake_configs/MPI-ESM/config_temperature.yml @@ -0,0 +1,11 @@ +datadir: /data/CMIP6/MPI-ESM +server_prefix: http://esgf-data1.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP +name: temperature +cmip_name: ta +era_name: t +output_type: 6hrPlevPt +run: r1i1p1f1 +version: v20190815 +res: + - 1.40625 + # - 5.625 \ No newline at end of file diff --git a/snakemake_configs/MPI-ESM/config_u_component_of_wind.yml b/snakemake_configs/MPI-ESM/config_u_component_of_wind.yml new file mode 100755 index 0000000..133b23e --- /dev/null +++ b/snakemake_configs/MPI-ESM/config_u_component_of_wind.yml @@ -0,0 +1,11 @@ +datadir: /data/CMIP6/MPI-ESM +server_prefix: http://esgf-data1.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP +name: u_component_of_wind +cmip_name: ua +era_name: u +output_type: 6hrPlevPt +run: r1i1p1f1 +version: v20190815 +res: + - 1.40625 + # - 5.625 diff --git a/snakemake_configs/MPI-ESM/config_v_component_of_wind.yml b/snakemake_configs/MPI-ESM/config_v_component_of_wind.yml new file mode 100755 index 0000000..f2b25a0 --- /dev/null +++ b/snakemake_configs/MPI-ESM/config_v_component_of_wind.yml @@ -0,0 +1,11 @@ +datadir: /data/CMIP6/MPI-ESM +server_prefix: http://esgf-data1.llnl.gov/thredds/fileServer/css03_data/CMIP6/CMIP +name: v_component_of_wind +cmip_name: va +era_name: v +output_type: 6hrPlevPt +run: r1i1p1f1 +version: v20190815 +res: + - 1.40625 + # - 5.625 \ No newline at end of file diff --git a/snakemake_configs/TaiESM1/Snakefile b/snakemake_configs/TaiESM1/Snakefile new file mode 100755 index 0000000..9ae228e --- /dev/null +++ b/snakemake_configs/TaiESM1/Snakefile @@ -0,0 +1,71 @@ + +year_strings = [ + '185001010000-186001010000', + '186001010600-187001010000', + '187001010600-188001010000', + '188001010600-189001010000', + '189001010600-190001010000', + '190001010600-191001010000', + '191001010600-192001010000', + '192001010600-193001010000', + '193001010600-194001010000', + '194001020000-195001010000', + '195001010600-196001010000', + '196001010600-197001010000', + '197001010600-198001010000', + '198001010600-199001010000', + '199001010600-200001010000', + '200001010600-201001010000', + '201001010600-201501010000' +] + +print(config) + +rule download: + output: + "{dataset}/raw/{name}/{name}_{year_str}_raw.nc", + shell: + "wget {config[server_prefix]}/AS-RCEC/TaiESM1/historical/{config[" + "run]}/6hrPlevPt/" + "{config[cmip_name]}/gn/v20201112/" + "{config[cmip_name]}_6hrPlevPt_TaiESM1_historical_{config[run]}_gn_{wildcards.year_str}.nc " + "-O {wildcards.dataset}/raw/{config[name]}/{config[name]}_{wildcards.year_str}_raw.nc" + + # http://esgf.rcec.sinica.edu.tw/thredds/fileServer/my_cmip6_dataroot/CMIP/AS-RCEC/TaiESM1/historical/r1i1p1f1/6hrPlevPt/ta/gn/v20201112/ta_6hrPlevPt_TaiESM1_historical_r1i1p1f1_gn_185001010000-186001010000.nc + + # https://esgf.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/CMIP/AS-RCEC/TaiESM1/historical/r1i1p1f1/6hrPlevPt/ta/gn/v20201112/ta_6hrPlevPt_TaiESM1_historical_r1i1p1f1_gn_185001010000-186001010000.nc + +rule regrid: + input: + "{dataset}/raw/{name}/{name}_{year_str}_raw.nc" + output: + "{dataset}/{res}deg/{name}/{name}_{year_str}_{res}deg.nc.tmp" + shell: + "python ../../src/data_preprocessing/regrid.py \ + --input_fns {input} \ + --output_dir {wildcards.dataset}/{wildcards.res}deg/{wildcards.name} \ + --ddeg_out {wildcards.res} \ + --cmip 1 \ + --rename {config[cmip_name]} {config[era_name]} \ + --file_ending nc.tmp" + +rule delete: + input: + expand("{{dataset}}/{res}deg/{{name}}/{{name}}_{{year_str}}_{res}deg.nc.tmp", + res=config['res']), + output: + expand("{{dataset}}/{res}deg/{{name}}/{{name}}_{{year_str}}_{res}deg.nc", + res=config['res']) + priority: 100 + run: + for i, o in zip(input, output): + shell("mv {i} {o}") + # shell("rm {wildcards.dataset}/raw/{wildcards.name}/{wildcards.name}_{wildcards.year_str}_raw.nc"), + + +rule all: + input: + expand("{datadir}/{res}deg/{name}/{name}_{year_str}_{res}deg.nc", + datadir=config['datadir'], res=config['res'], name=config['name'], year_str=year_strings) + + diff --git a/snakemake_configs/TaiESM1/config_2m_temperature.yml b/snakemake_configs/TaiESM1/config_2m_temperature.yml new file mode 100755 index 0000000..484b194 --- /dev/null +++ b/snakemake_configs/TaiESM1/config_2m_temperature.yml @@ -0,0 +1,9 @@ +datadir: /data/CMIP6/TaiESM1 +server_prefix: https://esgf.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/CMIP +name: 2m_temperature +cmip_name: tas +era_name: t2m +run: r1i1p1f1 +res: + - 1.40625 + # - 5.625 \ No newline at end of file diff --git a/snakemake_configs/TaiESM1/config_geopotential.yml b/snakemake_configs/TaiESM1/config_geopotential.yml new file mode 100755 index 0000000..4e4af8d --- /dev/null +++ b/snakemake_configs/TaiESM1/config_geopotential.yml @@ -0,0 +1,9 @@ +datadir: /data/CMIP6/TaiESM1 +server_prefix: https://esgf.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/CMIP +name: geopotential +cmip_name: zg +era_name: z +run: r1i1p1f1 +res: + - 1.40625 + # - 5.625 diff --git a/snakemake_configs/TaiESM1/config_specific_humidity.yml b/snakemake_configs/TaiESM1/config_specific_humidity.yml new file mode 100755 index 0000000..0b95178 --- /dev/null +++ b/snakemake_configs/TaiESM1/config_specific_humidity.yml @@ -0,0 +1,9 @@ +datadir: /data/CMIP6/TaiESM1 +server_prefix: https://esgf.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/CMIP +name: specific_humidity +cmip_name: hus +era_name: q +run: r1i1p1f1 +res: + - 1.40625 + # - 5.625 diff --git a/snakemake_configs/TaiESM1/config_temperature.yml b/snakemake_configs/TaiESM1/config_temperature.yml new file mode 100755 index 0000000..d3e1736 --- /dev/null +++ b/snakemake_configs/TaiESM1/config_temperature.yml @@ -0,0 +1,9 @@ +datadir: /data/CMIP6/TaiESM1 +server_prefix: https://esgf.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/CMIP +name: temperature +cmip_name: ta +era_name: t +run: r1i1p1f1 +res: + - 1.40625 + # - 5.625 diff --git a/snakemake_configs/TaiESM1/config_u_component_of_wind.yml b/snakemake_configs/TaiESM1/config_u_component_of_wind.yml new file mode 100755 index 0000000..55b5223 --- /dev/null +++ b/snakemake_configs/TaiESM1/config_u_component_of_wind.yml @@ -0,0 +1,9 @@ +datadir: /data/CMIP6/TaiESM1 +server_prefix: https://esgf.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/CMIP +name: u_component_of_wind +cmip_name: ua +era_name: u +run: r1i1p1f1 +res: + - 1.40625 + # - 5.625 diff --git a/snakemake_configs/TaiESM1/config_v_component_of_wind.yml b/snakemake_configs/TaiESM1/config_v_component_of_wind.yml new file mode 100755 index 0000000..4cea3a6 --- /dev/null +++ b/snakemake_configs/TaiESM1/config_v_component_of_wind.yml @@ -0,0 +1,9 @@ +datadir: /data/CMIP6/TaiESM1 +server_prefix: https://esgf.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/CMIP +name: v_component_of_wind +cmip_name: va +era_name: v +run: r1i1p1f1 +res: + - 1.40625 + # - 5.625 diff --git a/src/climax/utils/data_utils.py b/src/climax/utils/data_utils.py index 6e6a4cc..084545b 100644 --- a/src/climax/utils/data_utils.py +++ b/src/climax/utils/data_utils.py @@ -44,7 +44,7 @@ "relative_humidity", "specific_humidity", ] -DEFAULT_PRESSURE_LEVELS = [50, 250, 500, 600, 700, 850, 925, 1000] +DEFAULT_PRESSURE_LEVELS = [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000] NAME_LEVEL_TO_VAR_LEVEL = {} diff --git a/src/data_preprocessing/nc2np_equally_cmip6.py b/src/data_preprocessing/nc2np_equally_cmip6.py new file mode 100644 index 0000000..2e62972 --- /dev/null +++ b/src/data_preprocessing/nc2np_equally_cmip6.py @@ -0,0 +1,246 @@ +import glob +import os + +import click +import numpy as np +import xarray as xr +from tqdm import tqdm + +from climax.utils.data_utils import DEFAULT_PRESSURE_LEVELS, NAME_TO_VAR + +def extract_one_year(path, year, variables, len_to_extract, np_vars, normalize_mean, normalize_std): + for var in variables: + ps = glob.glob(os.path.join(path, var, f"*{year}*.nc")) + ds = xr.open_mfdataset(ps, combine="by_coords", parallel=True) # dataset for a single variable + code = NAME_TO_VAR[var] + lat = ds.lat.values + lon = ds.lon.values + + if len(ds[code].shape) == 3: # surface level variables + ds[code] = ds[code].expand_dims("val", axis=1) + + # remove the last 24 hours if this year has 366 days + np_vars[var] = ds[code].to_numpy()[:len_to_extract] + if len(np_vars[var]) < len_to_extract: + n_missing_data = len_to_extract - len(np_vars[var]) + np_vars[var] = np.concatenate((np_vars[var], np_vars[var][-n_missing_data:]), axis=0) + + var_mean_yearly = np_vars[var].mean(axis=(0, 2, 3)) + var_std_yearly = np_vars[var].std(axis=(0, 2, 3)) + if var not in normalize_mean: + normalize_mean[var] = [var_mean_yearly] + normalize_std[var] = [var_std_yearly] + else: + normalize_mean[var].append(var_mean_yearly) + normalize_std[var].append(var_std_yearly) + else: # multiple-level variables, only use a subset + assert len(ds[code].shape) == 4 + all_levels = ds["plev"][:].to_numpy() / 100 # 92500 --> 925 + all_levels = all_levels.astype(int) + all_levels = np.intersect1d(all_levels, DEFAULT_PRESSURE_LEVELS) + for level in all_levels: + ds_level = ds.sel(plev=[level * 100.0]) + # level = int(level / 100) # 92500 --> 925 + + # remove the last 24 hours if this year has 366 days + np_vars[f"{var}_{level}"] = ds_level[code].to_numpy()[:len_to_extract] + if len(np_vars[f"{var}_{level}"]) < len_to_extract: + n_missing_data = len_to_extract - len(np_vars[f"{var}_{level}"]) + np_vars[f"{var}_{level}"] = np.concatenate((np_vars[f"{var}_{level}"], np_vars[f"{var}_{level}"][-n_missing_data:]), axis=0) + + var_mean_yearly = np_vars[f"{var}_{level}"].mean(axis=(0, 2, 3)) + var_std_yearly = np_vars[f"{var}_{level}"].std(axis=(0, 2, 3)) + if var not in normalize_mean: + normalize_mean[f"{var}_{level}"] = [var_mean_yearly] + normalize_std[f"{var}_{level}"] = [var_std_yearly] + else: + normalize_mean[f"{var}_{level}"].append(var_mean_yearly) + normalize_std[f"{var}_{level}"].append(var_std_yearly) + + return np_vars, normalize_mean, normalize_std, lat, lon + +def aggregate_mean_std(normalize_mean, normalize_std): + for var in normalize_mean.keys(): + normalize_mean[var] = np.stack(normalize_mean[var], axis=0) + normalize_std[var] = np.stack(normalize_std[var], axis=0) + + mean, std = normalize_mean[var], normalize_std[var] + # var(X) = E[var(X|Y)] + var(E[X|Y]) + variance = (std**2).mean(axis=0) + (mean**2).mean(axis=0) - mean.mean(axis=0) ** 2 + std = np.sqrt(variance) + # E[X] = E[E[X|Y]] + mean = mean.mean(axis=0) + normalize_mean[var] = mean + normalize_std[var] = std + + return normalize_mean, normalize_std + +def nc2np(dataset, path, variables, years, hours_per_year, num_shards_per_year, save_dir): + os.makedirs(os.path.join(save_dir, "train"), exist_ok=True) + normalize_mean = {} + normalize_std = {} + lat, lon = None, None + + for year in tqdm(years): + np_vars = {} + len_to_extract = hours_per_year + if year == '201001010600-201501010000' and (dataset == 'hammoz' or dataset == 'tai'): # special case, only 7304 points + len_to_extract = 7300 + else: + len_to_extract = hours_per_year + + np_vars, normalize_mean, normalize_std, lat, lon = extract_one_year( + path, + year, + variables, + len_to_extract, + np_vars, + normalize_mean, + normalize_std + ) + if lat is None or lon is None: + lat = lat + lon = lon + + num_shards = num_shards_per_year + if year == '201001010600-201501010000' and dataset == 'tai': # only 7300 points + num_shards = num_shards // 2 + if year == '201001010600-201501010000' and dataset == 'hammoz': + num_shards = num_shards // 4 + + assert len_to_extract % num_shards == 0 + num_hrs_per_shard = len_to_extract // num_shards + for shard_id in range(num_shards): + start_id = shard_id * num_hrs_per_shard + end_id = start_id + num_hrs_per_shard + sharded_data = {k: np_vars[k][start_id:end_id] for k in np_vars.keys()} + np.savez( + os.path.join(save_dir, "train", f"{year}_{shard_id}.npz"), + **sharded_data, + ) + + normalize_mean, normalize_std = aggregate_mean_std(normalize_mean, normalize_std) + + np.savez(os.path.join(save_dir, "normalize_mean.npz"), **normalize_mean) + np.savez(os.path.join(save_dir, "normalize_std.npz"), **normalize_std) + np.save(os.path.join(save_dir, "lat.npy"), lat) + np.save(os.path.join(save_dir, "lon.npy"), lon) + + +@click.command() +@click.option("--dataset", type=str, default='mpi') +@click.option("--path", type=click.Path(exists=True)) +@click.option("--num_shards", type=int, default=10) ## recommended: 10 shards for MPI, 20 for tai, 2 for awi, 40 for hammoz, 2 for cmcc (must keep the same ratio to be able to train on multi gpus) +@click.option("--save_dir", type=click.Path(exists=False)) +def main( + dataset, + path, + num_shards, + save_dir +): + os.makedirs(save_dir, exist_ok=True) + + if dataset == 'mpi': + hours_per_year = 7300 + year_strings = [f"{y}01010600-{y+5}01010000" for y in range(1850, 2015, 5)] + variables = [ + "2m_temperature", + "10m_u_component_of_wind", + "10m_v_component_of_wind", + "geopotential", + "specific_humidity", + "temperature", + "u_component_of_wind", + "v_component_of_wind", + ] + elif dataset == 'tai': + hours_per_year = 14600 + year_strings = [ + '185001010000-186001010000', + '186001010600-187001010000', + '187001010600-188001010000', + '188001010600-189001010000', + '189001010600-190001010000', + '190001010600-191001010000', + '191001010600-192001010000', + '192001010600-193001010000', + '193001010600-194001010000', + '194001020000-195001010000', + '195001010600-196001010000', + '196001010600-197001010000', + '197001010600-198001010000', + '198001010600-199001010000', + '199001010600-200001010000', + '200001010600-201001010000', + '201001010600-201501010000' + ] + variables = [ + "2m_temperature", + "geopotential", + "specific_humidity", + "temperature", + "u_component_of_wind", + "v_component_of_wind", + ] + elif dataset == 'awi': + hours_per_year = 1460 + year_strings = [f'{y}01010600-{y+1}01010000' for y in range(1850, 2015, 1)] + variables = [ + "2m_temperature", + "10m_u_component_of_wind", + "10m_v_component_of_wind", + "geopotential", + "specific_humidity", + "temperature", + "u_component_of_wind", + "v_component_of_wind", + ] + elif dataset == 'hammoz': + hours_per_year = 29200 + year_strings = [ + '185001010600-187001010000', + '187001010600-189001010000', + '189001010600-191001010000', + '191001010600-193001010000', + '193001010600-195001010000', + '195001010600-197001010000', + '197001010600-199001010000', + '199001010600-201001010000', + '201001010600-201501010000' + ] + variables = [ + "2m_temperature", + "10m_u_component_of_wind", + "10m_v_component_of_wind", + "geopotential", + "specific_humidity", + "temperature", + "u_component_of_wind", + "v_component_of_wind", + ] + elif dataset == 'cmcc': + hours_per_year = 1460 + year_strings = [f'{y}01010600-{y+1}01010000' for y in range(1850, 2015, 1)] + variables = [ + "geopotential", + "temperature", + "u_component_of_wind", + "v_component_of_wind", + ] + else: + raise NotImplementedError(f'{dataset} is not supported') + + assert hours_per_year % num_shards == 0 + nc2np( + dataset=dataset, + path=path, + variables=variables, + years=year_strings, + hours_per_year=hours_per_year, + num_shards_per_year=num_shards, + save_dir=save_dir + ) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/data_preprocessing/regrid.py b/src/data_preprocessing/regrid.py new file mode 100644 index 0000000..bc713d8 --- /dev/null +++ b/src/data_preprocessing/regrid.py @@ -0,0 +1,174 @@ +import argparse +import xarray as xr +import numpy as np +import xesmf as xe +from glob import glob +import os + +def regrid( + ds_in, + ddeg_out, + method='bilinear', + reuse_weights=True, + cmip=False, + rename=None +): + """ + Regrid horizontally. + :param ds_in: Input xarray dataset + :param ddeg_out: Output resolution + :param method: Regridding method + :param reuse_weights: Reuse weights for regridding + :return: ds_out: Regridded dataset + """ + # import pdb; pdb.set_trace() + # Rename to ESMF compatible coordinates + if 'latitude' in ds_in.coords: + ds_in = ds_in.rename({'latitude': 'lat', 'longitude': 'lon'}) + if cmip: + ds_in = ds_in.drop(('lat_bnds', 'lon_bnds')) + if hasattr(ds_in, 'plev_bnds'): + ds_in = ds_in.drop(('plev_bnds')) + if hasattr(ds_in, 'time_bnds'): + ds_in = ds_in.drop(('time_bnds')) + if rename is not None: + ds_in = ds_in.rename({rename[0]: rename[1]}) + + # Create output grid + grid_out = xr.Dataset( + { + 'lat': (['lat'], np.arange(-90+ddeg_out/2, 90, ddeg_out)), + 'lon': (['lon'], np.arange(0, 360, ddeg_out)), + } + ) + + # Create regridder + regridder = xe.Regridder( + ds_in, grid_out, method, periodic=True, reuse_weights=reuse_weights + ) + + ds_out = regridder(ds_in, keep_attrs=True).astype('float32') + + # # Set attributes since they get lost during regridding + # for var in ds_out: + # ds_out[var].attrs = ds_in[var].attrs + # ds_out.attrs.update(ds_in.attrs) + + if rename is not None: + if rename[0] == 'zg': + ds_out['z'] *= 9.807 + if rename[0] == 'rsdt': + ds_out['tisr'] *= 60*60 + ds_out = ds_out.isel(time=slice(1, None, 12)) + ds_out = ds_out.assign_coords({'time': ds_out.time + np.timedelta64(90, 'm')}) + + # # Regrid dataset + # ds_out = regridder(ds_in) + return ds_out + + +def main( + input_fns, + output_dir, + ddeg_out, + method='bilinear', + reuse_weights=True, + custom_fn=None, + file_ending='nc', + cmip=False, + rename=None +): + """ + :param input_fns: Input files. Can use *. If more than one, loop over them + :param output_dir: Output directory + :param ddeg_out: Output resolution + :param method: Regridding method + :param reuse_weights: Reuse weights for regridding + :param custom_fn: If not None, use custom file name. Otherwise infer from parameters. + :param file_ending: Default = nc + """ + + # Make sure output directory exists + os.makedirs(output_dir, exist_ok=True) + # Get files for starred expressions + if '*' in input_fns[0]: + input_fns = sorted(glob(input_fns[0])) + # Loop over input files + for fn in input_fns: + print(f'Regridding file: {fn}') + ds_in = xr.open_dataset(fn) + ds_out = regrid(ds_in, ddeg_out, method, reuse_weights, cmip, rename) + fn_out = ( + custom_fn or + '_'.join(fn.split('/')[-1][:-3].split('_')[:-1]) + '_' + str(ddeg_out) + 'deg.' + file_ending + ) + print(f"Saving file: {output_dir + '/' + fn_out}") + ds_out.to_netcdf(output_dir + '/' + fn_out) + ds_in.close(); ds_out.close() + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + parser.add_argument( + '--input_fns', + type=str, + nargs='+', + help="Input files (full path). Can use *. If more than one, loop over them", + required=True + ) + parser.add_argument( + '--output_dir', + type=str, + help="Output directory", + required=True + ) + parser.add_argument( + '--ddeg_out', + type=float, + help="Output resolution", + required=True + ) + parser.add_argument( + '--reuse_weights', + type=int, + help="Reuse weights for regridding. 0 or 1 (default)", + # default=1, + default=0 + ) + parser.add_argument( + '--custom_fn', + type=str, + help="If not None, use custom file name. Otherwise infer from parameters.", + default=None + ) + parser.add_argument( + '--file_ending', + type=str, + help="File ending. Default = nc", + default='nc' + ) + parser.add_argument( + '--cmip', + type=int, + help="Is CMIP data. 0 or 1 (default)", + default=0 + ) + parser.add_argument( + '--rename', + type=str, + nargs='+', + help="Rename var in dataset", + default=None + ) + args = parser.parse_args() + + main( + input_fns=args.input_fns, + output_dir=args.output_dir, + ddeg_out=args.ddeg_out, + reuse_weights=args.reuse_weights, + custom_fn=args.custom_fn, + file_ending=args.file_ending, + cmip=args.cmip, + rename=args.rename + ) \ No newline at end of file