Source code for timeseries

"""
Functions for extracting timeseries from directories of GOES ABI imagery
"""

import glob

import pandas as pd
import xarray as xr

import goes_ortho as go

# def df_from_zarr(zarrFilepath, variable, point_lat_lon, outFilepath=None):

#     ds = xr.open_dataset(
#         zarrFilepath,
#         chunks={'time': 40785, 'latitude': 50, 'longitude': 50},
#         engine='zarr'
#     )
#     # When we pass in a chunks argument, the dataset opened will be filled with Dask arrays

#     point_timeseries = ds[variable].sel(latitude = point_lat_lon[0], longitude = point_lat_lon[1], method='nearest')

#     # Convert the timeseries into a pandas dataframe and save in a .csv file
#     df = point_timeseries.to_dataframe().drop(columns=['latitude', 'longitude'])

#     if outFilepath != None:
#         df.to_csv(outFilepath)

#     return df


[docs]def make_abi_timeseries(directory, product, data_vars, lon, lat, z, outfilepath=None): """Given a directory of GOES ABI products, create a timeseries of data variables (specified in data_vars) for a single point (at lon, lat, elevation). Returns a pandas dataframe, optional output to a csv file.""" path = "{directory}/**/*{product}*.nc".format(directory=directory, product=product) file_list = glob.glob(path, recursive=True) # create empty dataframe to hold the data variables we want plus a timestamp df_columns = list(data_vars) df_columns.append("time") # if Radiance is one of the data variables we are interested in if "Rad" in data_vars: # create a new column for reflectance (for bands 1-6) or brightness temperature (for band 7-16) df_columns.append("ref_or_tb") # create the data frame we will populate with values df = pd.DataFrame(columns=df_columns) print( "Creating a timeseries of {data_vars} from {product} at ({lat}, {lon}, {z})".format( data_vars=data_vars, product=product, lat=lat, lon=lon, z=z ) ) print("Reading:") for filename in file_list: try: print("{}".format(filename), end="\r") with xr.open_dataset(filename, decode_times=False) as f: # I've included "decode_times=False" to this xr.open_dataset because I've encountered some ABI-L2-ACMC files where the timestamp couldn't be read # and xarray gave a "ValueError: unable to decode time units 'seconds since 2000-01-01 12:00:00' with the default calendar. Try opening your dataset with decode_times=False." # I've also switched which timestamp from the ABI files I'm reading (was f.time_bounds.values.min(), now f.time_coverage_start) # Read goes_imager_projection values needed for geometry calculations # and compute the corresponding look angles (in radiance) for the lat, lon, elevation we are interested in x_rad, y_rad = go.geometry.LonLat2ABIangle( lon, lat, z, f.goes_imager_projection.perspective_point_height + f.goes_imager_projection.semi_major_axis, f.goes_imager_projection.semi_major_axis, f.goes_imager_projection.semi_minor_axis, 0.0818191910435, # GRS-80 eccentricity f.goes_imager_projection.longitude_of_projection_origin, ) # get the timestamp for this observation (these should all be UTC, but I am removing timezone info because not all timestamps are converting the same way, and I was getting a "Cannot compare tz-naive and tz-aware timestamps" error) timestamp = pd.Timestamp(f.time_coverage_start).replace(tzinfo=None) # create an empty dictionary we will populate with values from file f this_row_dict = {} # create an empty list of the same length as data_vars to hold each variable's value values = ["" for v in data_vars] # For each variable we are interested, specified in the list "data_vars" for i, var in enumerate(data_vars): # find corresponding pixel data_var value nearest to these scan angles y_rad and x_rad values[i] = ( f[var].sel(y=y_rad, x=x_rad, method="nearest").values.mean() ) # For all other products set ref_or_tb to None ref_or_tb = None # For ABI-L1b-Rad products only: if var == "Rad": # If we are looking at a reflective band (bands 1-6), convert Radiance to Reflectance if f.band_id.values <= 6: ref_or_tb = go.rad.goesReflectance( values[i], f.kappa0.values ) # If we are looking at an emissive band (bands 7-16), convert Radiance to Brightness Temperature (K) else: ref_or_tb = go.rad.goesBrightnessTemp( values[i], f.planck_fk1.values, f.planck_fk2.values, f.planck_bc1.values, f.planck_bc2.values, ) # create a dictionary for this row of values (where each row is a GOES-R observation time) this_row_dict = dict(zip(data_vars, values)) # add our time stamp to this dict before we update the dataframe this_row_dict["time"] = timestamp # If we have reflectance or brightness temperature to add to our dataframe if ref_or_tb is not None: # add reflectance or brightness temperature to this row's update dict this_row_dict["ref_or_tb"] = ref_or_tb # Finally, append this_row_dict to our dataframe for this one GOES-R observation time this_row_df = pd.DataFrame(this_row_dict, index=[0]) df = pd.concat([df, this_row_df], ignore_index=True) except AttributeError as e: print(e) pass # drop duplicates if there are any, keep the first one df.drop_duplicates(["time"], keep="first", inplace=True) # set the dataframe intext to the timestamp column df.set_index("time", inplace=True, verify_integrity=True) # if an output filepath was provided, save the dataframe as a csv if outfilepath is not None: print("Saving csv file to: {}".format(outfilepath)) df.to_csv(outfilepath) return df
[docs]def make_nested_abi_timeseries( directory, product, data_vars, lon, lat, z, outfilepath=None ): """Given a directory of GOES ABI products, create a timeseries of data variables (specified in data_vars) for a single point (at lon, lat, elevation). Retrieves all pixels nested within larger "2 km" ABI Fixed Grid cell. Returns a pandas dataframe, optional output to a csv file.""" path = "{directory}/**/*{product}*.nc".format(directory=directory, product=product) file_list = glob.glob(path, recursive=True) path = "{directory}/**/*{product}*.nc".format(directory=directory, product=product) file_list = glob.glob(path, recursive=True) print(f"Found {len(file_list)} files in {path}") print( "Creating a timeseries of {data_vars} from {product} at ({lat}, {lon}, {z})\n".format( data_vars=data_vars, product=product, lat=lat, lon=lon, z=z ) ) # row_dicts = {} data_list = [] # eSun_list = [] print(f"Reading {len(file_list)} files from {path}\n") counter = 1 for filename in file_list: try: print( "file {} of {}: {}".format(counter, len(file_list), filename), end="\r" ) counter += 1 with xr.open_dataset(filename, decode_times=False) as f: # I've included "decode_times=False" to this xr.open_dataset because I've encountered some ABI-L2-ACMC files where the timestamp couldn't be read # and xarray gave a "ValueError: unable to decode time units 'seconds since 2000-01-01 12:00:00' with the default calendar. Try opening your dataset with decode_times=False." # I've also switched which timestamp from the ABI files I'm reading (was f.time_bounds.values.min(), now f.time_coverage_start) # print(filename) # Read goes_imager_projection values needed for geometry calculations # and compute the corresponding look angles (in radiance) for the lat, lon, elevation we are interested in x_rad, y_rad = go.geometry.LonLat2ABIangle( lon, lat, z, f.goes_imager_projection.perspective_point_height + f.goes_imager_projection.semi_major_axis, f.goes_imager_projection.semi_major_axis, f.goes_imager_projection.semi_minor_axis, 0.0818191910435, # GRS-80 eccentricity f.goes_imager_projection.longitude_of_projection_origin, ) ( nearest_xs_2km, nearest_ys_2km, nearest_xs_1km, nearest_ys_1km, nearest_xs_500m, nearest_ys_500m, ) = go.geometry.get_nested_coords(f, x_rad, y_rad) # get the timestamp for this observation (these should all be UTC, but I am removing timezone info because not all timestamps are converting the same way, and I was getting a "Cannot compare tz-naive and tz-aware timestamps" error) timestamp = ( pd.Timestamp(f.time_coverage_start) .replace(tzinfo=None) .round("min") ) band = f.band_id.values[0] # band_formatted = "{:02.0f}".format(band) if band in [2]: # print(f'Found band {f.band_id.values[0]} file...') # print(f'Using pixel coordinates for 500m pixels: {nearest_xs_500m}, {nearest_ys_500m}') # find corresponding pixel 'Rad' value nearest to these scan angles y_rad and x_rad rad_values = ( f["Rad"] .sel( y=nearest_ys_500m[:, 0], x=nearest_xs_500m[0, :], method="nearest", ) .rename("rad") ) # .rename({'x': 'x05','y': 'y05'}) # If we are looking at a reflective band (bands 1-6), convert Radiance to Reflectance ref_or_tb = go.rad.goesReflectance( rad_values, f.kappa0.values ).rename("ref") if band in [1, 3, 5]: # print(f'Found band {f.band_id.values[0]} file...') # print(f'Using pixel coordinates for 1km pixels: {nearest_xs_1km}, {nearest_ys_1km}') # find corresponding pixel 'Rad' value nearest to these scan angles y_rad and x_rad rad_values = ( f["Rad"] .sel( y=nearest_ys_1km[:, 0], x=nearest_xs_1km[0, :], method="nearest", ) .rename("rad") ) # .rename({'x': 'x1','y': 'y1'}) # If we are looking at a reflective band (bands 1-6), convert Radiance to Reflectance ref_or_tb = go.rad.goesReflectance( rad_values, f.kappa0.values ).rename("ref") if band in [4, 6]: # print(f'Found band {f.band_id.values[0]} file...') # print(f'Using pixel coordinates for 1km pixels: {nearest_xs_2km}, {nearest_ys_2km}') # find corresponding pixel 'Rad' value nearest to these scan angles y_rad and x_rad rad_values = ( f["Rad"] .sel( y=nearest_ys_2km[:, 0], x=nearest_xs_2km[0, :], method="nearest", ) .rename("rad") ) # # If we are looking at a reflective band (bands 1-6), convert Radiance to Reflectance ref_or_tb = go.rad.goesReflectance( rad_values, f.kappa0.values ).rename("ref") if band in [7, 8, 9, 10, 11, 12, 13, 14, 15, 16]: # print(f'Found band {f.band_id.values[0]} file...') # print(f'Using pixel coordinates for 2km pixels: {nearest_xs_2km}, {nearest_ys_2km}') # find corresponding pixel 'Rad' value nearest to these scan angles y_rad and x_rad rad_values = ( f["Rad"] .sel( y=nearest_ys_2km[:, 0], x=nearest_xs_2km[0, :], method="nearest", ) .rename("rad") ) # .rename({'x': 'x2','y': 'y2'}) # If we are looking at an emissive band (bands 7-16), convert Radiance to Brightness Temperature (K) ref_or_tb = go.rad.goesBrightnessTemp( rad_values, f.planck_fk1.values, f.planck_fk2.values, f.planck_bc1.values, f.planck_bc2.values, ).rename("tb") # append to list rad_values["t"] = timestamp.round("min") ref_or_tb["t"] = timestamp.round("min") data_list.append( rad_values.expand_dims(dim={"t": 1}) .expand_dims(dim={"band": 1}) .assign_coords(band=("band", [band])) ) data_list.append( ref_or_tb.expand_dims(dim={"t": 1}) .expand_dims(dim={"band": 1}) .assign_coords(band=("band", [band])) ) except (AttributeError, OSError) as e: print(e) pass df = data_list_to_df(data_list) # if an output filepath was provided, save the dataframe as a csv if outfilepath is not None: print("Saving csv file to: {}".format(outfilepath)) df.to_csv(outfilepath) return df
def data_list_to_df(data_list): this_dict = {} counter = 1 for i in range(len(data_list)): print("dataset {} of {}".format(counter, len(data_list)), end="\r") counter += 1 if data_list[i].t.values[0] not in this_dict.keys(): this_dict[ data_list[i].t.values[0] ] = {} # create new dict entry if it does not exist # now update that dict entry this_dict[data_list[i].t.values[0]]["t"] = data_list[i].t.values[0] this_dict[data_list[i].t.values[0]]["x_2km"] = data_list[i].x_image.values this_dict[data_list[i].t.values[0]]["y_2km"] = data_list[i].y_image.values if data_list[i].band.values == 2: # 500m band this_dict[data_list[i].t.values[0]]["x_500m_WW"] = data_list[i].x.values[0] this_dict[data_list[i].t.values[0]]["x_500m_W"] = data_list[i].x.values[1] this_dict[data_list[i].t.values[0]]["x_500m_E"] = data_list[i].x.values[2] this_dict[data_list[i].t.values[0]]["x_500m_EE"] = data_list[i].x.values[3] this_dict[data_list[i].t.values[0]]["y_500m_SS"] = data_list[i].y.values[0] this_dict[data_list[i].t.values[0]]["y_500m_S"] = data_list[i].y.values[1] this_dict[data_list[i].t.values[0]]["y_500m_N"] = data_list[i].y.values[2] this_dict[data_list[i].t.values[0]]["y_500m_NN"] = data_list[i].y.values[3] this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_500m_NW_NW" ] = data_list[i].values.ravel()[12] this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_500m_NW_NE" ] = data_list[i].values.ravel()[13] this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_500m_NW_SW" ] = data_list[i].values.ravel()[8] this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_500m_NW_SE" ] = data_list[i].values.ravel()[9] this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_500m_NE_NW" ] = data_list[i].values.ravel()[14] this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_500m_NE_NE" ] = data_list[i].values.ravel()[15] this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_500m_NE_SW" ] = data_list[i].values.ravel()[10] this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_500m_NE_SE" ] = data_list[i].values.ravel()[11] this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_500m_SW_NW" ] = data_list[i].values.ravel()[4] this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_500m_SW_NE" ] = data_list[i].values.ravel()[5] this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_500m_SW_SW" ] = data_list[i].values.ravel()[0] this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_500m_SW_SE" ] = data_list[i].values.ravel()[1] this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_500m_SE_NW" ] = data_list[i].values.ravel()[6] this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_500m_SE_NE" ] = data_list[i].values.ravel()[7] this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_500m_SE_SW" ] = data_list[i].values.ravel()[2] this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_500m_SE_SE" ] = data_list[i].values.ravel()[3] this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_2km" ] = data_list[i].values.mean() elif data_list[i].band.values in [1, 3, 5]: # 1km bands this_dict[data_list[i].t.values[0]]["x_1km_W"] = data_list[i].x.values[0] this_dict[data_list[i].t.values[0]]["x_1km_E"] = data_list[i].x.values[1] this_dict[data_list[i].t.values[0]]["y_1km_N"] = data_list[i].y.values[1] this_dict[data_list[i].t.values[0]]["y_1km_S"] = data_list[i].y.values[0] this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_1km_NW" ] = data_list[i].values.ravel()[0] this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_1km_NE" ] = data_list[i].values.ravel()[1] this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_1km_SW" ] = data_list[i].values.ravel()[2] this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_1km_SE" ] = data_list[i].values.ravel()[3] this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_2km" ] = data_list[i].values.mean() else: # 2km bands this_dict[data_list[i].t.values[0]][ f"b{data_list[i].band.values[0]}_{data_list[i].name}_2km" ] = data_list[i].values.ravel()[0] # drop duplicates if there are any, keep the first one # df.drop_duplicates(['time'], keep='first', inplace=True) df = pd.DataFrame.from_dict(this_dict, orient="index") # set the dataframe intext to the timestamp column # df.set_index('time', inplace = True, verify_integrity = True) return df