Compare commits

...

5 Commits

Author SHA256 Message Date
1523c973f4 another attempt at parsing RWA - seems to work better 2026-03-20 15:02:12 +01:00
5cf67648af adds mod. suggested by ClaudeAI - still doesn't work
original code is commented below, rows 517-545
2026-03-18 15:15:31 +01:00
839799a13f adds new function to analyze rheed data, doesn't really work atm
thanks DeepSeek
2026-03-16 12:51:05 +01:00
10c68bf260 reworks how instruments are recorded in the nx file according to new ver
the instruments_used group is still present outside the multilayer group
but currently a new instruments_used sub-group is created in the
layer-specific group

instruments used to deposit a single layer are in
/sample/multilayer/layer_N/instruments_used and there's only one value
for each category (rheed, laser, chamber)
in /instruments_used (root) for each category there's a list of every
(unique) instrument involved in the full deposition process
2026-03-13 15:11:53 +01:00
bab5e958cb NOT WORKING: starts changing the structure of function "deduplicate..." 2026-03-11 15:43:11 +01:00

View File

@@ -108,52 +108,58 @@ def deduplicate_instruments_from_layers(layers):
lasers = [] lasers = []
chambers = [] chambers = []
rheeds = [] rheeds = []
elegant_dict = {}
for lyr in layers: for lyr in layers:
instruments = lyr.get_instruments(apikey) instruments = lyr.get_instruments(apikey)
lasers.append(instruments["laser_system"]) lasers.append(instruments["laser_system"])
chambers.append(instruments["deposition_chamber"]) chambers.append(instruments["deposition_chamber"])
rheeds.append(instruments["rheed_system"]) rheeds.append(instruments["rheed_system"])
elegant_dict[f"layer_{lyr.layer_number}"] = {
"laser_system": instruments["laser_system"],
"deposition_chamber": instruments["deposition_chamber"],
"rheed_system": instruments["rheed_system"],
}
ded_lasers = list( set( lasers ) ) ded_lasers = list( set( lasers ) )
ded_chambers = list( set( chambers ) ) ded_chambers = list( set( chambers ) )
ded_rheeds = list( set( rheeds ) ) ded_rheeds = list( set( rheeds ) )
elegant_dict = { elegant_dict["multilayer"] = {
# Keep key names human readable since they're used in the messages of the following errors # Keep key names human readable since they're used in the messages of the following errors
"Laser Systems": ded_lasers, "laser_system": ", ".join(ded_lasers),
"Deposition Chamber": ded_chambers, "deposition_chamber": ", ".join(ded_chambers),
"RHEED Systems": ded_rheeds "rheed_system": ", ".join(ded_rheeds)
} # dictionary's name's a joke } # dictionary's name is a joke
updated_dict = {} # use this for containing the final dataset # updated_dict = {} # use this for containing the final dataset
for ded in elegant_dict: # for ded in elegant_dict:
if len(elegant_dict[ded]) == 0: # if len(elegant_dict[ded]) == 0:
# if len of list is 0 - empty list - raise error # # if len of list is 0 - empty list - raise error
raise IndexError(f"Missing data: no Laser System, Chamber and/or RHEED System is specified in any of the Deposition-type experiments related to this sample. Fix this on eLabFTW before retrying. Affected list: {ded}.") # raise IndexError(f"Missing data: no Laser System, Chamber and/or RHEED System is specified in any of the Deposition-type experiments related to this sample. Fix this on eLabFTW before retrying. Affected list: {ded}.")
elif len(elegant_dict[ded]) > 1: # elif len(elegant_dict[ded]) > 1:
# if len of list is > 1 - too many values - allow the user to pick one # # if len of list is > 1 - too many values - allow the user to pick one
print("Warning: different instruments have been used for different layers - which is currently not allowed.") # print("Warning: different instruments have been used for different layers - which is currently not allowed.")
# there's a better way to do this but I can't remember now for the life of me... # # there's a better way to do this but I can't remember now for the life of me...
i = 0 # i = 0
while i < len(elegant_dict[ded]): # while i < len(elegant_dict[ded]):
print(f"{i} - {elegant_dict[ded][i]}") # print(f"{i} - {elegant_dict[ded][i]}")
i += 1 # i += 1
ans = None # ans = None
while not type(ans) == int or not ans in range(0, len(elegant_dict[ded])): # while not type(ans) == int or not ans in range(0, len(elegant_dict[ded])):
ans = input("Please pick one of the previous (0, 1, ...) [default = 0]: ") or "0" # ans = input("Please pick one of the previous (0, 1, ...) [default = 0]: ") or "0"
if ans.isdigit(): # if ans.isdigit():
ans = int(ans) # ans = int(ans)
continue # unnecessary? # continue # unnecessary?
updated_dict[ded] = elegant_dict[ded][ans] # updated_dict[ded] = elegant_dict[ded][ans]
elif elegant_dict[ded][0] in ["", 0, None]: # elif elegant_dict[ded][0] in ["", 0, None]:
# if len is 1 BUT value is "", 0 or None raise error # # if len is 1 BUT value is "", 0 or None raise error
raise ValueError(f"Missing data: a Laser System, Chamber and/or RHEED System which is specified across all the Deposition-type experiments related to this sample is either empty or invalid. Fix this on eLabFTW before retrying. Affected list: {ded}.") # raise ValueError(f"Missing data: a Laser System, Chamber and/or RHEED System which is specified across all the Deposition-type experiments related to this sample is either empty or invalid. Fix this on eLabFTW before retrying. Affected list: {ded}.")
else: # else:
# if none of the previous (only 1 value), that single value is used # # if none of the previous (only 1 value), that single value is used
updated_dict[ded] = elegant_dict[ded][0] # updated_dict[ded] = elegant_dict[ded][0]
instruments_used_dict = { # instruments_used_dict = {
"laser_system": updated_dict["Laser Systems"], # "laser_system": updated_dict["Laser Systems"],
"deposition_chamber": updated_dict["Deposition Chamber"], # "deposition_chamber": updated_dict["Deposition Chamber"],
"rheed_system": updated_dict["RHEED Systems"], # "rheed_system": updated_dict["RHEED Systems"],
} # }
return instruments_used_dict return elegant_dict
### OLD CODE ### OLD CODE
# if 0 in [ len(i) for i in elegant_list ]: # if 0 in [ len(i) for i in elegant_list ]:
@@ -173,10 +179,51 @@ def deduplicate_instruments_from_layers(layers):
# "rheed_system": rheeds, # "rheed_system": rheeds,
# } # }
def analyse_rheed_data(data):
'''
Takes the content of a tsv file and returns a dictionary with timestamps and intensities.
The file should contain a 2D array composed of 4 columns - where the first column is a timestamp and the other three are RHEED intensities - and an unspecified number of rows.
-----
Time Layer1_Int1 Layer1_Int2 Layer1_Int3
-----
Distinct ValueErrors are raised if:
- The array is not 2-dimensional;
- The total number of columns does not equate exactly 1+3 (= 4).
Time is expressed in seconds, intensities are normalized (adimensional).
# TO-DO: complete this description...
Written with help from DeepSeek.
'''
# Verifying the format of the input file:
if data.ndim != 2:
raise ValueError(f"Unexpected trace format: expected 2D array, got ndim = {data.ndim}.")
n_cols = data.shape[1] # 0 = rows, 1 = columns
if n_cols > 4:
print(f"Warning! The input file (for Realtime Window Analysis) has {n_cols-4} more than needed.\nOnly 4 columns will be considered - with the first representing time and the others representing RHEED intensities.")
if n_cols < 4:
raise ValueError(f"Insufficient number of columns: expected 4, got n_cols = {n_cols}.")
n_time_points = data.shape[0]
# Get time (all rows of col 0) as Float64:
time = data[:, 0].astype(np.float64, copy=False) # copy=False suggested by LLM for mem. eff.
# Get intensities (all rows of cols 1,2,3) as Float32:
intensities = data[:, 1:4].astype(np.float32, copy=False)
return {
"time": time,
"intensity": intensities,
}
def make_nexus_schema_dictionary(substrate_object, layers): def make_nexus_schema_dictionary(substrate_object, layers):
''' '''
Main function, takes all the other functions to reconstruct the full dataset. Takes a Substrate-class object (output of the chain_entrypoint_to_batch() function) and a list of Layer-class objects (output of the chain_entrypoint_to_layers() function), returns dictionary with the same schema as the NeXus standard for PLD fabrications. Main function, takes all the other functions to reconstruct the full dataset. Takes a Substrate-class object (output of the chain_entrypoint_to_batch() function) and a list of Layer-class objects (output of the chain_entrypoint_to_layers() function), returns dictionary with the same schema as the NeXus standard for PLD fabrications.
''' '''
instruments = deduplicate_instruments_from_layers(layers)
pld_fabrication = { pld_fabrication = {
"sample": { "sample": {
"substrate": { "substrate": {
@@ -199,7 +246,7 @@ def make_nexus_schema_dictionary(substrate_object, layers):
}, },
"multilayer": {}, "multilayer": {},
}, },
"instruments_used": deduplicate_instruments_from_layers(layers), "instruments_used": instruments["multilayer"],
} }
multilayer = pld_fabrication["sample"]["multilayer"] multilayer = pld_fabrication["sample"]["multilayer"]
for layer in layers: for layer in layers:
@@ -299,6 +346,7 @@ def make_nexus_schema_dictionary(substrate_object, layers):
"units": layer.post_annealing_duration_unit, "units": layer.post_annealing_duration_unit,
}, },
}, },
"instruments_used": instruments[name],
} }
return pld_fabrication return pld_fabrication
@@ -359,6 +407,9 @@ def build_nexus_file(pld_fabrication, output_path, rheed_osc=None):
nx_post_annealing = nx_layer.create_group("post_annealing") nx_post_annealing = nx_layer.create_group("post_annealing")
nx_post_annealing.attrs["NX_class"] = "NXprocess" nx_post_annealing.attrs["NX_class"] = "NXprocess"
post_ann_dict = layer_dict["post_annealing"] post_ann_dict = layer_dict["post_annealing"]
nx_layer_instruments = nx_layer.create_group("instruments_used")
nx_layer_instruments.attrs["NX_class"] = "NXinstrument"
layer_instruments_dict = layer_dict["instruments_used"]
## Target metadata ## Target metadata
try: try:
@@ -429,6 +480,12 @@ def build_nexus_file(pld_fabrication, output_path, rheed_osc=None):
nx_post_annealing["duration"].attrs["units"] = post_ann_dict["duration"]["units"] nx_post_annealing["duration"].attrs["units"] = post_ann_dict["duration"]["units"]
except TypeError as te: except TypeError as te:
raise TypeError(te) raise TypeError(te)
try:
nx_layer_instruments.create_dataset("laser_system", data = layer_instruments_dict["laser_system"])
nx_layer_instruments.create_dataset("deposition_chamber", data = layer_instruments_dict["deposition_chamber"])
nx_layer_instruments.create_dataset("rheed_system", data = layer_instruments_dict["rheed_system"])
except TypeError as te:
raise TypeError(te)
# Instruments used section # Instruments used section
nx_instruments = nx_pld_entry.create_group("instruments_used") nx_instruments = nx_pld_entry.create_group("instruments_used")
@@ -440,9 +497,41 @@ def build_nexus_file(pld_fabrication, output_path, rheed_osc=None):
nx_instruments.create_dataset("rheed_system", data = instruments_dict["rheed_system"]) nx_instruments.create_dataset("rheed_system", data = instruments_dict["rheed_system"])
except TypeError as te: except TypeError as te:
raise TypeError(te) raise TypeError(te)
nx_rheed = nx_pld_entry.create_group("rheed_data")
nx_rheed.attrs["NX_class"] = "NXdata" # RHEED data section
nx_rheed.create_dataset("intensity", data=rheed_osc) if rheed_osc is not None:
nx_rheed = nx_pld_entry.create_group("rheed_data")
nx_rheed.attrs["NX_class"] = "NXdata"
# Asse temporale
t_ds = nx_rheed.create_dataset("time", data=rheed_osc["time"])
t_ds.attrs["units"] = "s"
t_ds.attrs["long_name"] = "Time"
# Intensità: shape (n_layers, n_timepoints, 3)
i_ds = nx_rheed.create_dataset("intensity", data=rheed_osc["intensity"])
i_ds.attrs["units"] = "a.u."
i_ds.attrs["long_name"] = "RHEED Intensity"
# Attributi NXdata — notazione NeXus 3.x corretta
nx_rheed.attrs["signal"] = "intensity"
nx_rheed.attrs["axes"] = [".", "time", "."] # solo l'asse 1 (time) è denominato
nx_rheed.attrs["time_indices"] = np.array([1], dtype=np.int32)
# ###########
# nx_rheed = nx_pld_entry.create_group("rheed_data")
# nx_rheed.attrs["NX_class"] = "NXdata"
# nx_rheed.create_dataset("time", data=rheed_osc["time"])
# nx_rheed["time"].attrs["units"] = "s"
# nx_rheed.create_dataset("intensity", data=rheed_osc["intensity"])
# #nx_rheed["intensity"].attrs["units"] = "counts"
# nx_rheed["intensity"].attrs["long_name"] = "RHEED intensity"
# nx_rheed.attrs["signal"] = "intensity"
# nx_rheed.attrs["axes"] = "layer:time:channel"
# nx_rheed.attrs["layer_indices"] = [0] # asse layer
# nx_rheed.attrs["time_indices"] = [1] # asse tempo
# nx_rheed.attrs["channel_indices"] = [2]
return return
if __name__=="__main__": if __name__=="__main__":
@@ -455,10 +544,19 @@ if __name__=="__main__":
sample_name = sample.name.strip().replace(" ","_") sample_name = sample.name.strip().replace(" ","_")
substrate_object = chain_entrypoint_to_batch(sample) # Substrate-class object substrate_object = chain_entrypoint_to_batch(sample) # Substrate-class object
layers = chain_entrypoint_to_layers(sample) # list of Layer-class objects layers = chain_entrypoint_to_layers(sample) # list of Layer-class objects
n_layers = len(layers) # total number of layers on the sample
result = make_nexus_schema_dictionary(substrate_object, layers) result = make_nexus_schema_dictionary(substrate_object, layers)
# print(make_nexus_schema_dictionary(substrate_object, layers)) # debug # print(make_nexus_schema_dictionary(substrate_object, layers)) # debug
with open (f"output/sample-{sample_name}.json", "w") as f: with open (f"output/sample-{sample_name}.json", "w") as f:
json.dump(result, f, indent=3) json.dump(result, f, indent=3)
# TO-DO: remove the hard-coded path of the RWA file
# ideally the script should download a TXT/CSV file from each layer
# (IF PRESENT ←→ also handle missing file error)
# and merge all data in a single file to analyse it
with open(f"tests/Realtime_Window_Analysis.txt", "r") as o: with open(f"tests/Realtime_Window_Analysis.txt", "r") as o:
osc = np.loadtxt(o) osc = np.loadtxt(o, delimiter="\t")
build_nexus_file(result, output_path=f"output/sample-{sample_name}-nexus.h5", rheed_osc=osc) try:
rheed_osc = analyse_rheed_data(data=osc) or None # analyze rheed data first, build the file later
except ValueError as ve:
raise ValueError(f"Error with function analyse_rheed_data. {ve}\nPlease make sure the Realtime Window Analysis file is exactly 4 columns wide - where the first column represents time and the others are RHEED intensities.")
build_nexus_file(result, output_path=f"output/sample-{sample_name}-nexus.h5", rheed_osc=rheed_osc)