| | import os |
| | import numpy as np |
| | import dask.array as da |
| | import xarray as xr |
| |
|
| | def load_all_file(data_dir=""): |
| | data_list = [] |
| | filtered_files = [] |
| | for filename in os.listdir(data_dir): |
| | if filename.startswith("202306"): |
| | filtered_files.append(filename) |
| | |
| | |
| | sorted_files = sorted(filtered_files) |
| | for item in sorted_files: |
| | sub_dir = os.path.join(data_dir) |
| | pathfile = sub_dir + "/" + item |
| | file = np.load(pathfile) |
| | data_list.extend([file]) |
| | |
| | lon = np.arange(103.5, 109.2, 0.00892) |
| | lat = np.arange(8, 13.75, 0.00899) |
| | |
| | return data_list |
| |
|
| | def preprocess_data(data_list, out_dir=""): |
| | patches = [] |
| |
|
| | |
| | patch_size = 32 |
| | |
| | |
| | for k in range(len(data_list)): |
| | for i in range(0, 640, patch_size): |
| | for j in range(0, 640, patch_size): |
| | patch = data_list[k][i:i+patch_size, j:j+patch_size] |
| | patches.append(patch) |
| | |
| | print(len(patches)) |
| | data_shape = len(patches) |
| | patches_array = np.array(patches, dtype=np.uint8) |
| | temp_array = np.array(np.random.rand(data_shape, 2), dtype=np.uint16) |
| | temp_array2 = np.arange(256, dtype=np.float32) |
| | temp_array3 = np.arange(data_shape, dtype=np.int64) |
| |
|
| | data_da = da.from_array(patches_array, chunks=(data_shape,32,32)) |
| | data_da2 = da.from_array(temp_array, chunks=(data_shape, 2)) |
| | data_da3 = da.from_array(temp_array3, chunks=(data_shape, )) |
| | data_da4 = da.from_array(temp_array2, chunks=(256, )) |
| | |
| | |
| | patches = xr.DataArray(data_da, dims=("dim_patch", "dim_heigh", "dim_width")) |
| | patch_coords = xr.DataArray(data_da2, dims=("dim_patch1", "dim_coord")) |
| | patch_times = xr.DataArray(data_da3, dims=("dim_patch2")) |
| | zero_patch_coords = xr.DataArray(data_da2, dims=("dim_zero_patch", "dim_coord")) |
| | zero_patch_times = xr.DataArray(data_da3, dims=("dim_zero_patch1")) |
| | scale = xr.DataArray(data_da4, dims=("dim_scale")) |
| |
|
| | ds = patches.to_dataset(name = 'patches') |
| | ds['patch_coords'] = patch_coords |
| | ds['patch_times'] = patch_times |
| | ds['zero_patch_coords'] = zero_patch_coords |
| | ds['zero_patch_times'] = zero_patch_times |
| | ds['scale'] = scale |
| |
|
| | ds.attrs["zero_value"] = 1 |
| | out_dir = out_dir + "/" + "RZC" |
| | os.makedirs(out_dir, exist_ok=True) |
| | file_name = os.path.join(out_dir, "patches_RV_202306.nc") |
| | ds.to_netcdf(file_name) |
| | |
| | return len(data_list) |
| |
|
| | |
| | list = load_all_file(data_dir="/data/data_WF/ldcast_precipitation/test") |
| | print(preprocess_data(list, out_dir="/data/data_WF/ldcast_precipitation/preprocess_data_test")) |
| |
|