parser-eLabFTW-NFFA-DI/src/main.py

import os, json, requests, h5py
from getpass import getpass
from APIHandler import APIHandler
from classes import *


def call_entrypoint_from_elabid(elabid):
    '''
    Calls an entrypoint sample from eLabFTW using its elabid, then returns an object of the Entrypoint class.

    If the entry is not a sample (category_title not matching exactly "Sample") returns ValueError.
    '''
    try:
        sample_data = APIHandler(apikey).get_entry_from_elabid(elabid, entryType="items")
        if not sample_data.get("category_title") == "Sample":
            raise ValueError("The resource you selected is not a sample, therefore it can't be used as an entrypoint.")
        sample_object = Entrypoint(sample_data)
    except ConnectionError as e:
        raise ConnectionError(e)
    return sample_object # Entrypoint-class object

def call_material_from_elabid(elabid):
    '''
    Calls a material from eLabFTW using its elabid, then returns an object of the Material class.

    If the entry is neither a PLD Target or a Substrate batch returns ValueError. Such entries always have a category_title key with its value matching exactly "PLD Target" or "Substrate".
    Because of an old typo, the value "Subtrate" (second 's' is missing) is also accepted.
    '''
    try:
        material_data = APIHandler(apikey).get_entry_from_elabid(elabid, entryType="items")
        material_category = material_data.get("category_title")
        # TO-DO: correct this typo on elabftw: Subtrate → Substrate.
        if not material_category in ["PLD Target", "Substrate", "Subtrate"]:
            print(f"Category of the resource: {material_category}.")
            raise ValueError(f"The referenced resource (elabid = {elabid}) is not a material.")
        elif material_category == "PLD Target":
            material_object = Target(material_data)
        else:
            material_object = Substrate(material_data)
    except ConnectionError as e:
        raise ConnectionError(e)
    return material_object # Material-class object

def call_layers_from_list(elabid_list):
    '''
    Calls a list of (PLD deposition) experiments from eLabFTW using their elabid - which means the input must be a list of integers instead of a single one - then returns a list of Layer-class objects.

    If one of the entries is not related to a deposition layer (category_title not matching exactly "PLD Deposition") that entry is skipped, with no error raised.
    '''
    list_of_layers = []
    for elabid in elabid_list:
        try:
            layer_data = APIHandler(apikey).get_entry_from_elabid(elabid, entryType="experiments")
            if not layer_data.get("category_title") == "PLD Deposition":
                continue
            layer_object = Layer(layer_data)
            list_of_layers.append(layer_object)
        except ConnectionError as e:
            nums = [ layer.layer_number for layer in list_of_layers ]
            nums.sort()
            print(f"LIST OF THE LAYERS PROCESSED (unordered):\n" + str(nums))
            raise ConnectionError(f"An error occurred while fetching the experiment with elabid = {elabid}:\n" +
                str(e) + f"\nPlease solve the problem before retrying." + "\n\n" +
                f"Last resource attempted to call: {ELABFTW_API_URL}/experiments/{elabid}"
            )
    return list_of_layers # list of Layer-class objects

def chain_entrypoint_to_batch(sample_object):
    '''
    Takes an Entrypoint-class object, looks at its .batch_elabid attribute and returns a Material-class object containing data on the substrate batch associated to the starting sample.

    Dependency: call_material_from_elabid.
    '''
    material_elabid = sample_object.batch_elabid
    material_object = call_material_from_elabid(material_elabid)
    return material_object

def chain_entrypoint_to_layers(sample_object):
    '''
    Takes an Entrypoint-class object, looks at its .linked_experiments_elabid attribute (list) and returns a list of Layer-class objects containing data on the deposition layers associated to the starting sample - using the function call_layers_from_list.

    The list is sorted by progressive layer number (layer_number attribute).

    Dependency: call_layers_from_list.
    '''
    linked_experiments_elabid = sample_object.linked_experiments_elabid # list of elabid
    layer_object_list = call_layers_from_list(linked_experiments_elabid)
    layer_object_list.sort(key=lambda x: x.layer_number)
    return layer_object_list

def chain_layer_to_target(layer_object):
    '''
    Takes a Layer-class object, looks at its .target_elabid attribute and returns a Material-class object containing data on the PLD target used in the deposition of said layer.

    Dependency: call_material_from_elabid.
    '''
    target_elabid = layer_object.target_elabid
    material_object = call_material_from_elabid(target_elabid)
    return material_object

def deduplicate_instruments_from_layers(layers):
    '''
    Takes a list of Layer-class objects and for each layer gets the instruments used (laser, depo chamber and RHEED), returns deduplicated list. Ideally, the lists should only contain one element.
    '''
    lasers = []
    chambers = []
    rheeds = []
    for lyr in layers:
        instruments = lyr.get_instruments(apikey)
        lasers.append(instruments["laser_system"])
        chambers.append(instruments["deposition_chamber"])
        rheeds.append(instruments["rheed_system"])
    instruments_used_dict = {
        "laser_system": list( set( lasers ) ),
        "deposition_chamber": list( set( chambers ) ),
        "rheed_system" : list( set( rheeds ) ),
    }
    # lasers = { f"layer_{lyr.layer_number}": lyr.laser_system for lyr in layers }
    # chambers = { f"layer_{lyr.layer_number}": lyr.deposition_chamber for lyr in layers }
    # rheeds = { f"layer_{lyr.layer_number}": lyr.rheed_system for lyr in layers }
    # instruments_used_dict = {
    #     "laser_system": lasers,
    #     "deposition_chamber": chambers,
    #     "rheed_system": rheeds,
    # }
    return instruments_used_dict

def make_nexus_schema_dictionary(substrate_object, layers):
    '''
    Main function, takes all the other functions to reconstruct the full dataset. Takes a Substrate-class object (output of the chain_entrypoint_to_batch() function) and a list of Layer-class objects (output of the chain_entrypoint_to_layers() function), returns dictionary with the same schema as the NeXus standard for PLD fabrications.
    '''
    pld_fabrication = {
        "sample": {
            "substrate": {
                "name": substrate_object.name,
                "chemical_formula" : substrate_object.get_compound_formula(apikey),
                "orientation" : substrate_object.orientation,
                "miscut_angle" : {
                    "value": substrate_object.miscut_angle,
                    "units": substrate_object.miscut_angle_unit
                },
                "miscut_direction" : substrate_object.miscut_direction,
                "thickness" : {
                    "value": substrate_object.thickness,
                    "units": substrate_object.thickness_unit,
                },
                "dimensions" : substrate_object.dimensions,
                "surface_treatment" : substrate_object.surface_treatment,
                "manufacturer" : substrate_object.manufacturer,
                "batch_id" : substrate_object.batch_id,
            },
            "multilayer": {},
        },
        "instruments_used": deduplicate_instruments_from_layers(layers),
    }
    multilayer = pld_fabrication["sample"]["multilayer"]
    for layer in layers:
        name = "layer_" + layer.layer_number
        target_object = chain_layer_to_target(layer)
        target_dict = {
            "name": target_object.name,
            "chemical_formula" : target_object.get_compound_formula(apikey),
            "description" : target_object.description,
            "shape" : target_object.shape,
            "dimensions" : target_object.dimensions,
            "thickness" : {
                "value": target_object.thickness,
                "units": target_object.thickness_unit,
            },
            "solid_form" : target_object.solid_form,
            "manufacturer" : target_object.manufacturer,
            "batch_id" : target_object.name,
            # TO-DO: currently not available:
        }
        multilayer[name] = {
            "target": target_dict,
            "start_time": layer.start_time,
            "operator": layer.operator,
            "description": layer.description,
            "number_of_pulses": layer.number_of_pulses,
            "deposition_time": {
                "value": layer.deposition_time,
                "units": layer.deposition_time_unit,
            },
            "temperature": {
                "value": layer.temperature,
                "units": layer.temperature_unit,
            },
            "heating_method": layer.heating_method,
            "layer_thickness": {
                "value": layer.layer_thickness,
                "units": layer.layer_thickness_unit,
            },
            "buffer_gas": layer.buffer_gas,
            "process_pressure": {
                "value": layer.process_pressure,
                "units": layer.process_pressure_unit,
            },
            "heater_target_distance": {
                "value": layer.heater_target_distance,
                "units": layer.heater_target_distance_unit,
            },
            "repetition_rate": {
                "value": layer.repetition_rate,
                "units": layer.repetition_rate_unit,
            },
            "laser_fluence": {
                "value": layer.laser_fluence,
                "units": layer.laser_fluence_unit,
            },
            "laser_spot_area": {
                "value": layer.laser_spot_area,
                "units": layer.laser_spot_area_unit,
            },
            "laser_energy": {
                "value": layer.laser_energy,
                "units": layer.laser_energy_unit,
            },
            "laser_rastering": {
                "geometry": layer.laser_rastering_geometry,
                "positions": layer.laser_rastering_positions,
                "velocities": layer.laser_rastering_velocities,
            },
            "pre_annealing": {
                "ambient_gas": layer.pre_annealing_ambient_gas,
                "pressure": {
                    "value": layer.pre_annealing_pressure,
                    "units": layer.pre_annealing_pressure_unit,
                },
                "temperature": {
                    "value": layer.pre_annealing_temperature,
                    "units": layer.pre_annealing_temperature_unit,
                },
                "duration": {
                    "value": layer.pre_annealing_duration,
                    "units": layer.pre_annealing_duration_unit,
                },
            },
            "post_annealing": {
                "ambient_gas": layer.post_annealing_ambient_gas,
                "pressure": {
                    "value": layer.post_annealing_pressure,
                    "units": layer.post_annealing_pressure_unit,
                },
                "temperature": {
                    "value": layer.post_annealing_temperature,
                    "units": layer.post_annealing_temperature_unit,
                },
                "duration": {
                    "value": layer.post_annealing_duration,
                    "units": layer.post_annealing_duration_unit,
                },
            },
        }
    return pld_fabrication

def build_nexus_file(pld_fabrication, output_path):
    # NOTE: look at the mail attachment from Emiliano...
    with h5py.File(output_path, "w") as f:
        nx_pld_entry = f.create_group("pld_fabrication")
        nx_pld_entry.attrs["NX_class"] = "NXentry"

        # Sample section
        nx_sample = nx_pld_entry.create_group("sample")
        nx_sample.attrs["NX_class"] = "NXsample"

        # Substrate section
        nx_substrate = nx_pld_entry.create_group("substrate")
        nx_substrate.attrs["NX_class"] = "NXsubentry"
    pass

if __name__=="__main__":
    # TO-DO: place the API base URL somewhere else.
    ELABFTW_API_URL = "https://elabftw.fisica.unina.it/api/v2"
    apikey = getpass("Paste API key here: ")
    elabid = input("Enter elabid of your starting sample [default= 1111]: ") or 1111
    data = APIHandler(apikey).get_entry_from_elabid(elabid)
    sample = Entrypoint(data)
    sample_name = sample.name.strip().replace(" ","_")
    substrate_object = chain_entrypoint_to_batch(sample) # Substrate-class object
    layers = chain_entrypoint_to_layers(sample) # list of Layer-class objects
    result = make_nexus_schema_dictionary(substrate_object, layers)
    # print(make_nexus_schema_dictionary(substrate_object, layers)) # debug
    with open (f"output/sample-{sample_name}.json", "w") as f:
        json.dump(result, f, indent=3)