Changeset 27881


Ignore:
Timestamp:
08/18/23 13:21:17 (19 months ago)
Author:
musselman
Message:

Added support for results.solution.steps objects and for lists of strings to write_netCDF_commit.py

File:
1 edited

Legend:

Unmodified
Added
Removed
  • issm/trunk/src/m/contrib/musselman/write_netCDF_commit.py

    r27874 r27881  
    55import numpy.ma as ma
    66import time
    7 from os import path, remove
     7import os
    88from model import *
    99from results import *
     
    2424
    2525def write_netCDF(model_var, model_name: str, filename: str):
    26     print('Python C2NetCDF4 v1.1.12')
     26    print('Python C2NetCDF4 v1.1.14')
    2727    '''
    2828    model_var = class object to be saved
     
    6262    # nested class instance variable as it appears in the model that we're trying to save
    6363    quality_control = []
     64
     65    # we save lists of instances to the netcdf
     66    solutions = []
     67    solutionsteps = []
     68    resultsdakotas = []
     69   
    6470    for class_instance_name in model_var.results.__dict__.keys():
     71        print(class_instance_name)
    6572        # for each class instance in results, see which class its from and record that info in the netcdf to recreate structure later
    6673        # check to see if there is a solutionstep class instance
    6774        if isinstance(model_var.results.__dict__[class_instance_name],solutionstep):
    6875            quality_control.append(1)
    69             write_string_to_netcdf(variable_name=str('solutionstep'), adress_of_child=str(class_instance_name), group=NetCDF.groups['results'])
     76            solutionsteps.append(class_instance_name)
     77
    7078        # check to see if there is a solution class instance
    7179        if isinstance(model_var.results.__dict__[class_instance_name],solution):
    7280            quality_control.append(1)
    73             write_string_to_netcdf(variable_name=str('solution'), adress_of_child=str(class_instance_name), group=NetCDF.groups['results'])
     81            solutions.append(class_instance_name)
     82
    7483        # check to see if there is a resultsdakota class instance
    7584        if isinstance(model_var.results.__dict__[class_instance_name],resultsdakota):
    7685            quality_control.append(1)
    77             write_string_to_netcdf(variable_name=str('resultsdakota'), adress_of_child=str(class_instance_name), group=NetCDF.groups['results'])
     86            resultsdakotas.append(class_instance_name)
     87
     88    if solutionsteps != []:
     89        write_string_to_netcdf(variable_name=str('solutionstep'), address_of_child=solutionsteps, group=NetCDF.groups['results'], list=True)
     90
     91    if solutions != []:
     92        write_string_to_netcdf(variable_name=str('solution'), address_of_child=solutions, group=NetCDF.groups['results'], list=True)
     93
     94    if resultsdakotas != []:
     95        write_string_to_netcdf(variable_name=str('resultsdakota'), address_of_child=resultsdakotas, group=NetCDF.groups['results'], list=True)
     96
     97   
    7898    if len(quality_control) != len(model_var.results.__dict__.keys()):
    7999        print('Error: The class instance within your model.results class is not currently supported by this application')
    80100        print(type(model_var.results.__dict__[class_instance_name]))
     101        print(solutions)
     102        print(solutionsteps)
     103        print(resultsdakotas)
    81104    else:
    82105        print('The results class was successfully stored on disk')
     
    86109def make_NetCDF(filename: str):
    87110    # If file already exists delete / rename it
    88     if path.exists(filename):
     111    if os.path.exists(filename):
    89112        print('File {} allready exist'.format(filename))
    90113   
     
    93116
    94117        if newname == 'delete':
    95             remove(filename)
     118            os.remove(filename)
    96119        else:
    97120            print(('New file name is {}'.format(newname)))
     
    113136    # Iterate over first layer of model_var attributes and assume this first layer is only classes
    114137    for group in model_var.__dict__.keys():
    115         adress = str(model_name + '.' + str(group))
     138        address = str(model_name + '.' + str(group))
    116139        # Recursively walk through subclasses
    117         walk_through_subclasses(model_var, adress, model_name)       
    118 
    119 
    120 
    121 def walk_through_subclasses(model_var, adress: str, model_name: str):
     140        walk_through_subclasses(model_var, address, model_name)       
     141
     142
     143
     144def walk_through_subclasses(model_var, address: str, model_name: str):
    122145    # Iterate over each subclass' attributes
    123146    # Use try/except since it's either a class w/ attributes or it's not, no unknown exceptions
     
    126149        # then compare attributes between models and write to netCDF if they differ
    127150        # if subclass found, walk through it and repeat
    128         for child in eval(adress + '.__dict__.keys()'):
     151        for child in eval(address + '.__dict__.keys()'):
    129152            # make a string variable so we can send thru this func again
    130             adress_of_child = str(adress + '.' + str(child))
     153            address_of_child = str(address + '.' + str(child))
    131154            # If the attribute is unchanged, move onto the next layer
    132             adress_of_child_in_empty_class = 'empty_model' + adress_of_child.removeprefix(str(model_name))
     155            address_of_child_in_empty_class = 'empty_model' + address_of_child.removeprefix(str(model_name))
    133156            # using try/except here because sometimes a model can have class instances/attributes that are not
    134157            # in the framework of an empty model. If this is the case, we move to the except statement
    135158            try:
     159                # if the current object is a results.<solution> object and has the steps attr it needs special treatment
     160                if isinstance(eval(address_of_child), solution) and len(eval(address_of_child + '.steps')) != 0:
     161                    create_group(model_var, address_of_child, is_struct = True)
    136162                # if the variable is an array, assume it has relevant data (this is because the next line cannot evaluate "==" with an array)
    137                 if isinstance(eval(adress_of_child), np.ndarray):
    138                     create_group(model_var, adress_of_child)
     163                elif isinstance(eval(address_of_child), np.ndarray):
     164                    create_group(model_var, address_of_child)
    139165                # if the attributes are identical we don't need to save anything
    140                 elif eval(adress_of_child) == eval(adress_of_child_in_empty_class):
    141                     walk_through_subclasses(model_var, adress_of_child, model_name)
     166                elif eval(address_of_child) == eval(address_of_child_in_empty_class):
     167                    walk_through_subclasses(model_var, address_of_child, model_name)
    142168                # If it has been modified, record it in the NetCDF file
    143169                else:
    144                     create_group(model_var, adress_of_child)
    145                     walk_through_subclasses(model_var, adress_of_child, model_name)
     170                    create_group(model_var, address_of_child)
     171                    walk_through_subclasses(model_var, address_of_child, model_name)
    146172            # AttributeError since the empty_model wouldn't have the same attribute as our model
    147173            except AttributeError:
    148174                # THE ORDER OF THESE LINES IS CRITICAL
    149                 walk_through_subclasses(model_var, adress_of_child, model_name)
    150                 create_group(model_var, adress_of_child)
     175                try:
     176                    walk_through_subclasses(model_var, address_of_child, model_name)
     177                    create_group(model_var, address_of_child)
     178                except:
     179                    pass
     180            except Exception as e: print(e)
    151181    except AttributeError: pass
    152182    except Exception as e: print(e)
     
    154184
    155185       
    156 def create_group(model_var, adress_of_child):
    157     # start by splitting the adress_of_child into its components
    158     levels_of_class = adress_of_child.split('.')
     186def create_group(model_var, address_of_child, is_struct = False):
     187    # start by splitting the address_of_child into its components
     188    levels_of_class = address_of_child.split('.')
    159189
    160190    # Handle the first layer of the group(s)
     
    174204
    175205    # Lastly, handle the variable(s)
    176     variable_name = levels_of_class[-1]
    177     create_var(variable_name, adress_of_child, group)
     206    if is_struct:
     207        parent_struct_name = address_of_child.split('.')[-1]
     208        copy_nested_results_struct(parent_struct_name, address_of_child, group)
     209    else:
     210        variable_name = levels_of_class[-1]
     211        create_var(variable_name, address_of_child, group)
    178212
    179213
     
    181215    # need to make sure that we have the right inversion class: inversion, m1qn3inversion, taoinversion
    182216    if isinstance(model_var.__dict__['inversion'], m1qn3inversion):
    183         write_string_to_netcdf(variable_name=str('inversion_class_name'), adress_of_child=str('m1qn3inversion'), group=NetCDF.groups['inversion'])
     217        write_string_to_netcdf(variable_name=str('inversion_class_name'), address_of_child=str('m1qn3inversion'), group=NetCDF.groups['inversion'])
    184218        print('Successfully saved inversion class instance ' + 'm1qn3inversion')
    185219    elif isinstance(model_var.__dict__['inversion'], taoinversion):
    186         write_string_to_netcdf(variable_name=str('inversion_class_name'), adress_of_child=str('taoinversion'), group=NetCDF.groups['inversion'])
     220        write_string_to_netcdf(variable_name=str('inversion_class_name'), address_of_child=str('taoinversion'), group=NetCDF.groups['inversion'])
    187221        print('Successfully saved inversion class instance ' + 'taoinversion')
    188222    else:
    189         write_string_to_netcdf(variable_name=str('inversion_class_name'), adress_of_child=str('inversion'), group=NetCDF.groups['inversion'])
     223        write_string_to_netcdf(variable_name=str('inversion_class_name'), address_of_child=str('inversion'), group=NetCDF.groups['inversion'])
    190224        print('Successfully saved inversion class instance ' + 'inversion')
    191225
     226
     227
     228
     229
     230def copy_nested_results_struct(parent_struct_name, address_of_struct, group):
     231    '''
     232        This function takes a solution class instance and saves the solutionstep instances from <solution>.steps to the netcdf.
     233
     234        To do this, we get the number of dimensions (substructs) of the parent struct.
     235        Next, we iterate through each substruct and record the data.
     236        For each substruct, we create a subgroup of the main struct.
     237        For each variable, we create dimensions that are assigned to each subgroup uniquely.
     238    '''
     239    print("Beginning transfer of nested MATLAB struct to the NetCDF")
     240   
     241    # make a new subgroup to contain all the others:
     242    group = group.createGroup(str(parent_struct_name))
     243
     244    # make sure other systems can flag the nested struct type
     245    write_string_to_netcdf('this_is_a_nested', 'struct', group, list=False)
     246
     247    # other systems know the name of the parent struct because it's covered by the results/qmu functions above
     248    address_of_struct_string = address_of_struct
     249    address_of_struct = eval(address_of_struct)
     250   
     251    no_of_dims = len(address_of_struct)
     252    for substruct in range(0, no_of_dims):
     253        # we start by making subgroups with nice names like "TransientSolution_substruct_44"
     254        name_of_subgroup = '1x' + str(substruct)
     255        subgroup = group.createGroup(str(name_of_subgroup))
     256
     257        # do some housekeeping to keep track of the current layer
     258        current_substruct = address_of_struct[substruct]
     259        current_substruct_string = address_of_struct_string + '[' + str(substruct) + ']'
     260        substruct_fields = current_substruct.__dict__.keys()
     261
     262        # now we need to iterate over each variable of the nested struct and save it to this new subgroup
     263        for variable in substruct_fields:
     264            address_of_child = current_substruct.__dict__[variable]
     265            address_of_child_string = current_substruct_string + '.' + str(variable)
     266            create_var(variable, address_of_child_string, subgroup)
     267   
     268    print(f'Successfully transferred struct {parent_struct_name} to the NetCDF\n')
     269   
    192270       
    193271
    194272
    195 def create_var(variable_name, adress_of_child, group):
     273
     274def create_var(variable_name, address_of_child, group):
    196275    # There are lots of different variable types that we need to handle from the model class
    197276   
    198277    # This first conditional statement will catch numpy arrays of any dimension and save them
    199     if isinstance(eval(adress_of_child), np.ndarray):
    200         write_numpy_array_to_netcdf(variable_name, adress_of_child, group)
     278    if isinstance(eval(address_of_child), np.ndarray):
     279        write_numpy_array_to_netcdf(variable_name, address_of_child, group)
    201280   
    202281    # check if it's an int
    203     elif isinstance(eval(adress_of_child), int) or isinstance(eval(adress_of_child), np.integer):
     282    elif isinstance(eval(address_of_child), int) or isinstance(eval(address_of_child), np.integer):
    204283        variable = group.createVariable(variable_name, int, ('int',))
    205         variable[:] = eval(adress_of_child)
     284        variable[:] = eval(address_of_child)
    206285   
    207286    # or a float
    208     elif isinstance(eval(adress_of_child), float) or isinstance(eval(adress_of_child), np.floating):
     287    elif isinstance(eval(address_of_child), float) or isinstance(eval(address_of_child), np.floating):
    209288        variable = group.createVariable(variable_name, float, ('float',))
    210         variable[:] = eval(adress_of_child)
     289        variable[:] = eval(address_of_child)
    211290
    212291    # or a string
    213     elif isinstance(eval(adress_of_child), str):
    214         write_string_to_netcdf(variable_name, adress_of_child, group)
     292    elif isinstance(eval(address_of_child), str):
     293        write_string_to_netcdf(variable_name, address_of_child, group)
    215294
    216295    #or a bool
    217     elif isinstance(eval(adress_of_child), bool) or isinstance(eval(adress_of_child), np.bool):
     296    elif isinstance(eval(address_of_child), bool) or isinstance(eval(address_of_child), np.bool_):
    218297        # netcdf4 can't handle bool types like True/False so we convert all to int 1/0 and add an attribute named units with value 'bool'
    219298        variable = group.createVariable(variable_name, int, ('int',))
    220         variable[:] = int(eval(adress_of_child))
     299        variable[:] = int(eval(address_of_child))
    221300        variable.units = "bool"
    222301       
    223302    # or an empty list
    224     elif isinstance(eval(adress_of_child), list) and len(eval(adress_of_child))==0:
     303    elif isinstance(eval(address_of_child), list) and len(eval(address_of_child))==0:
    225304        variable = group.createVariable(variable_name, int, ('int',))
    226305
    227306    # or a list of strings -- this needs work as it can only handle a list of 1 string
    228     elif isinstance(eval(adress_of_child),list) and isinstance(eval(adress_of_child)[0],str):
    229         for string in eval(adress_of_child):
     307    elif isinstance(eval(address_of_child),list) and isinstance(eval(address_of_child)[0],str):
     308        for string in eval(address_of_child):
    230309            write_string_to_netcdf(variable_name, string, group, list=True)
    231310
    232311    # or a regular list
    233     elif isinstance(eval(adress_of_child), list):
     312    elif isinstance(eval(address_of_child), list):
    234313        print('made list w/ unlim dim')
    235         variable = group.createVariable(variable_name, type(eval(adress_of_child)[0]), ('Unlim',))
    236         variable[:] = eval(adress_of_child)
     314        variable = group.createVariable(variable_name, type(eval(address_of_child)[0]), ('Unlim',))
     315        variable[:] = eval(address_of_child)
    237316
    238317    # anything else... (will likely need to add more cases; ie dict)
    239318    else:
    240319        try:
    241             variable = group.createVariable(variable_name, type(eval(adress_of_child)), ('Unlim',))
    242             variable[:] = eval(adress_of_child)
     320            variable = group.createVariable(variable_name, type(eval(address_of_child)), ('Unlim',))
     321            variable[:] = eval(address_of_child)
    243322            print('Used Unlim Dim')
    244         except Exception as e:
     323        except Exception as e:
     324            print(f'There was error with {variable_name} in {address_of_child}')
     325            print("The error message is:")
    245326            print(e)
    246             print('Datatype given: ' + str(type(eval(adress_of_child))))
    247 
    248     print('Successfully transferred data from ' + adress_of_child + ' to the NetCDF')
    249    
    250 
    251 
    252 def write_string_to_netcdf(variable_name, adress_of_child, group, list=False):
     327            print('Datatype given: ' + str(type(eval(address_of_child))))
     328
     329    print('Successfully transferred data from ' + address_of_child + ' to the NetCDF')
     330   
     331
     332
     333def write_string_to_netcdf(variable_name, address_of_child, group, list=False):
    253334    # netcdf and strings dont get along.. we have to do it 'custom':
    254     # if we hand it an adress we need to do it this way:
    255     try:
    256         the_string_to_save = eval(adress_of_child)
    257         length_of_the_string = len(the_string_to_save)
    258         numpy_datatype = 'S' + str(length_of_the_string)
    259         str_out = netCDF4.stringtochar(np.array([the_string_to_save], dtype=numpy_datatype))
    260     #otherwise we need to treat it like a string:
    261     except:
    262         the_string_to_save = adress_of_child
    263         length_of_the_string = len(the_string_to_save)
    264         numpy_datatype = 'S' + str(length_of_the_string)
    265         str_out = netCDF4.stringtochar(np.array([the_string_to_save], dtype=numpy_datatype))       
    266 
    267     # we'll need to make a new dimension for the string if it doesn't already exist
    268     name_of_dimension = 'char' + str(length_of_the_string)
    269     try:
    270         group.createDimension(name_of_dimension, length_of_the_string)
    271     except: pass
    272     # this is another band-aid to the results sub classes...
    273     try:
    274         if list == True:
    275             # now we can make a variable in this dimension:
    276             string = group.createVariable(variable_name, 'S1', (name_of_dimension))
    277             #finally we can write the variable:
    278             string[:] = [str_out]
    279         else:
     335    # if we hand it an address we need to do it this way:
     336    if list == True:
     337        """
     338        Save a list of strings to a NetCDF file.
     339   
     340        Convert a list of strings to a numpy.char_array with utf-8 encoded elements
     341        and size rows x cols with each row the same # of cols and save to NetCDF
     342        as char array.
     343        """
     344        try:
     345            strings = address_of_child
     346            # get dims of array to save
     347            rows = len(strings)
     348            cols = len(max(strings, key = len))
     349   
     350            # Define dimensions for the strings
     351            rows_name = 'rows' + str(rows)
     352            cols_name = 'cols' + str(cols)
     353            try:
     354                group.createDimension(rows_name, rows)
     355            except: pass
     356
     357            try:
     358                group.createDimension(cols_name, cols)
     359            except: pass
     360               
     361            # Create a variable to store the strings
     362            string_var = group.createVariable(str(variable_name), 'S1', (rows_name, cols_name))
     363   
     364            # break the list into a list of lists of words with the same length as the longest word:
     365            # make words same sizes by adding spaces
     366            modded_strings = [word + ' ' * (len(max(strings, key=len)) - len(word)) for word in strings]
     367            # encoded words into list of encoded lists
     368            new_list = [[s.encode('utf-8') for s in word] for word in modded_strings]
     369   
     370            # make numpy char array with dims rows x cols
     371            arr = np.chararray((rows, cols))
     372   
     373            # fill array with list of encoded lists
     374            for i in range(len(new_list)):
     375                arr[i] = new_list[i]
     376   
     377            # save array to netcdf file
     378            string_var[:] = arr
     379   
     380            print(f'Saved {len(modded_strings)} strings to {variable_name}')
     381   
     382        except Exception as e:
     383            print(f'Error: {e}')
     384       
     385    else:
     386        try:
     387            the_string_to_save = eval(address_of_child)
     388            length_of_the_string = len(the_string_to_save)
     389            numpy_datatype = 'S' + str(length_of_the_string)
     390            str_out = netCDF4.stringtochar(np.array([the_string_to_save], dtype=numpy_datatype))
     391        #otherwise we need to treat it like a string:
     392        except:
     393            the_string_to_save = address_of_child
     394            length_of_the_string = len(the_string_to_save)
     395            numpy_datatype = 'S' + str(length_of_the_string)
     396            str_out = netCDF4.stringtochar(np.array([the_string_to_save], dtype=numpy_datatype))       
     397   
     398        # we'll need to make a new dimension for the string if it doesn't already exist
     399        name_of_dimension = 'char' + str(length_of_the_string)
     400        try:
     401            group.createDimension(name_of_dimension, length_of_the_string)
     402        except: pass
     403        # this is another band-aid to the results sub classes...
     404        try:
    280405            # now we can make a variable in this dimension:
    281406            string = group.createVariable(variable_name, 'S1', (name_of_dimension))
    282407            #finally we can write the variable:
    283408            string[:] = str_out
    284     except RuntimeError: pass
    285     except Exception:
    286         print(Exception)
    287 
    288 
    289 def write_numpy_array_to_netcdf(variable_name, adress_of_child, group):
     409        except RuntimeError: pass
     410        except Exception:
     411            print(Exception)
     412
     413
     414
     415
     416
     417def write_numpy_array_to_netcdf(variable_name, address_of_child, group):
    290418    # to make a nested array in netCDF, we have to get the dimensions of the array,
    291419    # create corresponding dimensions in the netCDF file, then we can make a variable
     
    293421   
    294422    # start by getting the data type at the lowest level in the array:
    295     typeis = eval(adress_of_child + '.dtype')
     423    typeis = eval(address_of_child + '.dtype')
    296424
    297425    # catch boolean arrays here
    298426    if typeis == bool:
    299427        # sometimes an array has just 1 element in it, we account for those cases here:
    300         if len(eval(adress_of_child)) == 1:
     428        if len(eval(address_of_child)) == 1:
    301429            variable = group.createVariable(variable_name, int, ('int',))
    302             variable[:] = int(eval(adress_of_child))
     430            variable[:] = int(eval(address_of_child))
    303431            variable.units = "bool"
    304432        else:
    305433            # make the dimensions
    306434            dimensions = []
    307             for dimension in np.shape(eval(adress_of_child)):
     435            for dimension in np.shape(eval(address_of_child)):
    308436                dimensions.append(str('dim' + str(dimension)))
    309437                # if the dimension already exists we can't have a duplicate
     
    315443            variable = group.createVariable(variable_name, int, tuple(dimensions))
    316444            # write the variable:
    317             variable[:] = eval(adress_of_child + '.astype(int)')
     445            variable[:] = eval(address_of_child + '.astype(int)')
    318446            variable.units = "bool"
    319447
    320            
    321            
    322448    # handle all other datatypes here
    323449    else:
    324450        # sometimes an array has just 1 element in it, we account for those cases here:
    325         if len(eval(adress_of_child)) == 1:
     451        if len(eval(address_of_child)) == 1:
    326452            if typeis is np.dtype('float64'):
    327453                variable = group.createVariable(variable_name, typeis, ('float',))
    328                 variable[:] = eval(adress_of_child)           
     454                variable[:] = eval(address_of_child + '[0]')           
    329455            elif typeis is np.dtype('int64'):
    330456                variable = group.createVariable(variable_name, typeis, ('int',))
    331                 variable[:] = eval(adress_of_child)           
     457                variable[:] = eval(address_of_child + '[0]')           
    332458            else:
    333459                print('Encountered single datatype that was not float64 or int64, saving under unlimited dimension, may cause errors.')
    334460                variable = group.createVariable(variable_name, typeis, ('Unlim',))
    335                 variable[:] = eval(adress_of_child)
     461                variable[:] = eval(address_of_child + '[0]')
    336462   
    337463        # This catches all arrays/lists:
     
    339465            # make the dimensions
    340466            dimensions = []
    341             for dimension in np.shape(eval(adress_of_child)):
     467            for dimension in np.shape(eval(address_of_child)):
    342468                dimensions.append(str('dim' + str(dimension)))
    343469                # if the dimension already exists we can't have a duplicate
     
    350476   
    351477            # write the variable:
    352             variable[:] = eval(adress_of_child)
    353 
    354 
    355 
    356 
    357 
    358 
    359 
    360 
    361 
    362 
    363 
    364 
    365 
    366 
    367 
    368 
    369 
    370 
    371 
    372 
    373 
    374 
    375 
    376 
    377 
    378 
    379 
    380 
    381 
    382 
    383 
    384 
    385 
     478            variable[:] = eval(address_of_child)
     479
     480
     481
     482
     483
     484
     485
     486
     487
     488
     489
Note: See TracChangeset for help on using the changeset viewer.