another attempt at parsing RWA - seems to work better

adds mod. suggested by ClaudeAI - still doesn't work
original code is commented below, rows 517-545
2026-03-20 15:02:12 +01:00 · 2026-03-18 15:15:31 +01:00 · 2026-03-16 12:51:05 +01:00 · 2026-03-13 15:11:53 +01:00 · 2026-03-11 15:43:11 +01:00 · 2026-03-11 15:01:04 +01:00
3 changed files with 38075 additions and 102 deletions
--- a/src/functions.py
+++ b/src/functions.py
@@ -1,62 +0,0 @@
-"""
-Currently unused!
-"""
-import json, requests
-from APIHandler import APIHandler
-
-def get_entry_from_elabid(elabid, entryType="items"):
-    '''
-    Function which returns entrypoint data (as dictionary) from its elabid.
-    '''
-    header = APIHandler(apikey).dump
-    response = requests.get(
-        headers = header,
-        url = f"{ELABFTW_API_URL}/{entryType}/{elabid}",
-        verify=True
-    )
-    if response.status_code // 100 in [2,3]:
-        entry_data = response.json()
-        return entry_data
-    else:
-        raise ConnectionError(f"HTTP request failed with status code: {response.status_code}.")
-
-def get_sample_layers_data(elabid):
-    '''
-    Return the following data from every eLabFTW experiment linked
-    to a certain sample, identified by elabid.
-
-    - Title of the experiment
-    - Category (should check it's "PLD Deposition")
-    - Layer number - if present (PLD depositions)
-    - Deposition time - returns error if not present
-    - Repetition rate - returns error if not present
-    '''
-    # header = {
-    #     "Authorization": apikey,
-    #     "Content-Type": "application/json"
-    # }
-    sample_data = requests.get(
-        headers = header,
-        url = f"https://elabftw.fisica.unina.it/api/v2/items/{elabid}",
-        verify=True
-    ).json()
-    related_experiments = sample_data["related_experiments_links"]
-    result = []
-    for exp in related_experiments:
-        experiment_data = requests.get(
-            headers = header,
-            url = f"https://elabftw.fisica.unina.it/api/v2/experiments/{exp.get("entityid")}",
-            verify=True
-        ).json()
-        extra = experiment_data["metadata_decoded"]["extra_fields"]
-        result.append(
-            {"title": exp.get("title"),
-             "layer_number": extra.get("Layer Progressive Number").get("value"),
-             "category": exp.get("category_title"),
-             "deposition_time": extra.get("Duration").get("value"),
-             "repetition_rate": extra.get("Repetition rate").get("value")}
-        )
-    return result
-
-if __name__=="__main__":
-    print("Warning: you're not supposed to be running this as the main program.")
--- a/src/main.py
+++ b/src/main.py
@@ -1,4 +1,5 @@
 import os, json, requests, h5py
+import numpy as np
 from getpass import getpass
 from APIHandler import APIHandler
 from classes import *
@@ -107,52 +108,58 @@ def deduplicate_instruments_from_layers(layers):
    lasers = []
    chambers = []
    rheeds = []
+    elegant_dict = {}
    for lyr in layers:
        instruments = lyr.get_instruments(apikey)
        lasers.append(instruments["laser_system"])
        chambers.append(instruments["deposition_chamber"])
        rheeds.append(instruments["rheed_system"])
+        elegant_dict[f"layer_{lyr.layer_number}"] = {
+            "laser_system": instruments["laser_system"],
+            "deposition_chamber": instruments["deposition_chamber"],
+            "rheed_system": instruments["rheed_system"],
+        }
    ded_lasers = list( set( lasers ) )
    ded_chambers = list( set( chambers ) )
    ded_rheeds = list( set( rheeds ) )
-    elegant_dict = {
+    elegant_dict["multilayer"] = {
        # Keep key names human readable since they're used in the messages of the following errors
-        "Laser Systems": ded_lasers,
-        "Deposition Chamber": ded_chambers,
-        "RHEED Systems": ded_rheeds
-    } # dictionary's name's a joke
-    updated_dict = {} # use this for containing the final dataset
-    for ded in elegant_dict:
-        if len(elegant_dict[ded]) == 0:
-            # if len of list is 0 - empty list - raise error
-            raise IndexError(f"Missing data: no Laser System, Chamber and/or RHEED System is specified in any of the Deposition-type experiments related to this sample. Fix this on eLabFTW before retrying. Affected list: {ded}.")
-        elif len(elegant_dict[ded]) > 1:
-            # if len of list is > 1 - too many values - allow the user to pick one
-            print("Warning: different instruments have been used for different layers - which is currently not allowed.")
-            # there's a better way to do this but I can't remember now for the life of me...
-            i = 0
-            while i < len(elegant_dict[ded]):
-                print(f"{i} - {elegant_dict[ded][i]}")
-                i += 1
-            ans = None
-            while not type(ans) == int or not ans in range(0, len(elegant_dict[ded])):
-                ans = input("Please pick one of the previous (0, 1, ...) [default = 0]: ") or "0"
-                if ans.isdigit():
-                    ans = int(ans)
-                continue # unnecessary?
-            updated_dict[ded] = elegant_dict[ded][ans]
-        elif elegant_dict[ded][0] in ["", 0, None]:
-            # if len is 1 BUT value is "", 0 or None raise error
-            raise ValueError(f"Missing data: a Laser System, Chamber and/or RHEED System which is specified across all the Deposition-type experiments related to this sample is either empty or invalid. Fix this on eLabFTW before retrying. Affected list: {ded}.")
-        else:
-            # if none of the previous (only 1 value), that single value is used
-            updated_dict[ded] = elegant_dict[ded][0]
-    instruments_used_dict = {
-        "laser_system": updated_dict["Laser Systems"],
-        "deposition_chamber": updated_dict["Deposition Chamber"],
-        "rheed_system": updated_dict["RHEED Systems"],
-    }
-    return instruments_used_dict
+        "laser_system": ", ".join(ded_lasers),
+        "deposition_chamber": ", ".join(ded_chambers),
+        "rheed_system": ", ".join(ded_rheeds)
+    } # dictionary's name is a joke
+    # updated_dict = {} # use this for containing the final dataset
+    # for ded in elegant_dict:
+    #     if len(elegant_dict[ded]) == 0:
+    #         # if len of list is 0 - empty list - raise error
+    #         raise IndexError(f"Missing data: no Laser System, Chamber and/or RHEED System is specified in any of the Deposition-type experiments related to this sample. Fix this on eLabFTW before retrying. Affected list: {ded}.")
+    #     elif len(elegant_dict[ded]) > 1:
+    #         # if len of list is > 1 - too many values - allow the user to pick one
+    #         print("Warning: different instruments have been used for different layers - which is currently not allowed.")
+    #         # there's a better way to do this but I can't remember now for the life of me...
+    #         i = 0
+    #         while i < len(elegant_dict[ded]):
+    #             print(f"{i} - {elegant_dict[ded][i]}")
+    #             i += 1
+    #         ans = None
+    #         while not type(ans) == int or not ans in range(0, len(elegant_dict[ded])):
+    #             ans = input("Please pick one of the previous (0, 1, ...) [default = 0]: ") or "0"
+    #             if ans.isdigit():
+    #                 ans = int(ans)
+    #             continue # unnecessary?
+    #         updated_dict[ded] = elegant_dict[ded][ans]
+    #     elif elegant_dict[ded][0] in ["", 0, None]:
+    #         # if len is 1 BUT value is "", 0 or None raise error
+    #         raise ValueError(f"Missing data: a Laser System, Chamber and/or RHEED System which is specified across all the Deposition-type experiments related to this sample is either empty or invalid. Fix this on eLabFTW before retrying. Affected list: {ded}.")
+    #     else:
+    #         # if none of the previous (only 1 value), that single value is used
+    #         updated_dict[ded] = elegant_dict[ded][0]
+    # instruments_used_dict = {
+    #     "laser_system": updated_dict["Laser Systems"],
+    #     "deposition_chamber": updated_dict["Deposition Chamber"],
+    #     "rheed_system": updated_dict["RHEED Systems"],
+    # }
+    return elegant_dict
    
    ### OLD CODE
    # if 0 in [ len(i) for i in elegant_list ]:
@@ -172,10 +179,51 @@ def deduplicate_instruments_from_layers(layers):
    #     "rheed_system": rheeds,
    # }

+def analyse_rheed_data(data):
+    '''
+    Takes the content of a tsv file and returns a dictionary with timestamps and intensities.
+    The file should contain a 2D array composed of 4 columns - where the first column is a timestamp and the other three are RHEED intensities - and an unspecified number of rows.
+    
+    -----
+    Time    Layer1_Int1     Layer1_Int2     Layer1_Int3
+    -----
+
+    Distinct ValueErrors are raised if:
+    - The array is not 2-dimensional;
+    - The total number of columns does not equate exactly 1+3 (= 4).
+
+    Time is expressed in seconds, intensities are normalized (adimensional).
+
+    # TO-DO: complete this description...
+    Written with help from DeepSeek.
+    '''
+    # Verifying the format of the input file:
+    if data.ndim != 2:
+        raise ValueError(f"Unexpected trace format: expected 2D array, got ndim = {data.ndim}.")
+    n_cols = data.shape[1] # 0 = rows, 1 = columns
+    if n_cols > 4:
+        print(f"Warning! The input file (for Realtime Window Analysis) has {n_cols-4} more than needed.\nOnly 4 columns will be considered - with the first representing time and the others representing RHEED intensities.")
+    if n_cols < 4:
+        raise ValueError(f"Insufficient number of columns: expected 4, got n_cols = {n_cols}.")
+    n_time_points = data.shape[0]
+
+    # Get time (all rows of col 0) as Float64:
+    time = data[:, 0].astype(np.float64, copy=False) # copy=False suggested by LLM for mem. eff.
+
+    # Get intensities (all rows of cols 1,2,3) as Float32:
+    intensities = data[:, 1:4].astype(np.float32, copy=False)
+
+    return {
+        "time": time,
+        "intensity": intensities,
+    }
+
+
 def make_nexus_schema_dictionary(substrate_object, layers):
    '''
    Main function, takes all the other functions to reconstruct the full dataset. Takes a Substrate-class object (output of the chain_entrypoint_to_batch() function) and a list of Layer-class objects (output of the chain_entrypoint_to_layers() function), returns dictionary with the same schema as the NeXus standard for PLD fabrications.
    '''
+    instruments = deduplicate_instruments_from_layers(layers)
    pld_fabrication = {
        "sample": {
            "substrate": {
@@ -198,7 +246,7 @@ def make_nexus_schema_dictionary(substrate_object, layers):
            },
            "multilayer": {},
        },
-        "instruments_used": deduplicate_instruments_from_layers(layers),
+        "instruments_used": instruments["multilayer"],
    }
    multilayer = pld_fabrication["sample"]["multilayer"]
    for layer in layers:
@@ -298,10 +346,11 @@ def make_nexus_schema_dictionary(substrate_object, layers):
                    "units": layer.post_annealing_duration_unit,
                },
            },
+            "instruments_used": instruments[name],
        }
    return pld_fabrication

-def build_nexus_file(pld_fabrication, output_path):
+def build_nexus_file(pld_fabrication, output_path, rheed_osc=None):
    # NOTE: look at the mail attachment from Emiliano...
    with h5py.File(output_path, "w") as f:
        nx_pld_entry = f.create_group("pld_fabrication")
@@ -358,6 +407,9 @@ def build_nexus_file(pld_fabrication, output_path):
            nx_post_annealing = nx_layer.create_group("post_annealing")
            nx_post_annealing.attrs["NX_class"] = "NXprocess"
            post_ann_dict = layer_dict["post_annealing"]
+            nx_layer_instruments = nx_layer.create_group("instruments_used")
+            nx_layer_instruments.attrs["NX_class"] = "NXinstrument"
+            layer_instruments_dict = layer_dict["instruments_used"]

            ## Target metadata
            try:
@@ -428,6 +480,12 @@ def build_nexus_file(pld_fabrication, output_path):
                nx_post_annealing["duration"].attrs["units"] = post_ann_dict["duration"]["units"]
            except TypeError as te:
                raise TypeError(te)
+            try:
+                nx_layer_instruments.create_dataset("laser_system", data = layer_instruments_dict["laser_system"])
+                nx_layer_instruments.create_dataset("deposition_chamber", data = layer_instruments_dict["deposition_chamber"])
+                nx_layer_instruments.create_dataset("rheed_system", data = layer_instruments_dict["rheed_system"])
+            except TypeError as te:
+                raise TypeError(te)
        
        # Instruments used section
        nx_instruments = nx_pld_entry.create_group("instruments_used")
@@ -439,6 +497,41 @@ def build_nexus_file(pld_fabrication, output_path):
            nx_instruments.create_dataset("rheed_system", data = instruments_dict["rheed_system"])
        except TypeError as te:
            raise TypeError(te)
+        
+        # RHEED data section
+        if rheed_osc is not None:
+            nx_rheed = nx_pld_entry.create_group("rheed_data")
+            nx_rheed.attrs["NX_class"] = "NXdata"
+
+            # Asse temporale
+            t_ds = nx_rheed.create_dataset("time", data=rheed_osc["time"])
+            t_ds.attrs["units"] = "s"
+            t_ds.attrs["long_name"] = "Time"
+
+            # Intensità: shape (n_layers, n_timepoints, 3)
+            i_ds = nx_rheed.create_dataset("intensity", data=rheed_osc["intensity"])
+            i_ds.attrs["units"] = "a.u."
+            i_ds.attrs["long_name"] = "RHEED Intensity"
+
+            # Attributi NXdata — notazione NeXus 3.x corretta
+            nx_rheed.attrs["signal"] = "intensity"
+            nx_rheed.attrs["axes"] = [".", "time", "."]   # solo l'asse 1 (time) è denominato
+            nx_rheed.attrs["time_indices"] = np.array([1], dtype=np.int32)
+            # ###########
+            # nx_rheed = nx_pld_entry.create_group("rheed_data")
+            # nx_rheed.attrs["NX_class"] = "NXdata"
+
+            # nx_rheed.create_dataset("time", data=rheed_osc["time"])
+            # nx_rheed["time"].attrs["units"] = "s"
+
+            # nx_rheed.create_dataset("intensity", data=rheed_osc["intensity"])
+            # #nx_rheed["intensity"].attrs["units"] = "counts"
+            # nx_rheed["intensity"].attrs["long_name"] = "RHEED intensity"
+            # nx_rheed.attrs["signal"] = "intensity"
+            # nx_rheed.attrs["axes"] = "layer:time:channel"
+            # nx_rheed.attrs["layer_indices"] = [0]  # asse layer
+            # nx_rheed.attrs["time_indices"] = [1]   # asse tempo
+            # nx_rheed.attrs["channel_indices"] = [2]
    return

 if __name__=="__main__":
@@ -451,8 +544,19 @@ if __name__=="__main__":
    sample_name = sample.name.strip().replace(" ","_")
    substrate_object = chain_entrypoint_to_batch(sample) # Substrate-class object
    layers = chain_entrypoint_to_layers(sample) # list of Layer-class objects
+    n_layers = len(layers) # total number of layers on the sample
    result = make_nexus_schema_dictionary(substrate_object, layers)
    # print(make_nexus_schema_dictionary(substrate_object, layers)) # debug
    with open (f"output/sample-{sample_name}.json", "w") as f:
        json.dump(result, f, indent=3)
-    build_nexus_file(result, output_path=f"output/sample-{sample_name}-nexus.h5")
+    # TO-DO: remove the hard-coded path of the RWA file
+    # ideally the script should download a TXT/CSV file from each layer
+    # (IF PRESENT ←→ also handle missing file error)
+    # and merge all data in a single file to analyse it
+    with open(f"tests/Realtime_Window_Analysis.txt", "r") as o:
+        osc = np.loadtxt(o, delimiter="\t")
+    try:
+        rheed_osc = analyse_rheed_data(data=osc) or None # analyze rheed data first, build the file later
+    except ValueError as ve:
+        raise ValueError(f"Error with function analyse_rheed_data. {ve}\nPlease make sure the Realtime Window Analysis file is exactly 4 columns wide - where the first column represents time and the others are RHEED intensities.")
+    build_nexus_file(result, output_path=f"output/sample-{sample_name}-nexus.h5", rheed_osc=rheed_osc)
--- a/tests/Realtime_Window_Analysis.txt
+++ b/tests/Realtime_Window_Analysis.txt
Author	SHA256	Message	Date
PioApocalypse	1523c973f4	another attempt at parsing RWA - seems to work better	2026-03-20 15:02:12 +01:00
PioApocalypse	5cf67648af	adds mod. suggested by ClaudeAI - still doesn't work original code is commented below, rows 517-545	2026-03-18 15:15:31 +01:00
PioApocalypse	839799a13f	adds new function to analyze rheed data, doesn't really work atm thanks DeepSeek	2026-03-16 12:51:05 +01:00
PioApocalypse	10c68bf260	reworks how instruments are recorded in the nx file according to new ver the instruments_used group is still present outside the multilayer group but currently a new instruments_used sub-group is created in the layer-specific group instruments used to deposit a single layer are in /sample/multilayer/layer_N/instruments_used and there's only one value for each category (rheed, laser, chamber) in /instruments_used (root) for each category there's a list of every (unique) instrument involved in the full deposition process	2026-03-13 15:11:53 +01:00
PioApocalypse	bab5e958cb	NOT WORKING: starts changing the structure of function "deduplicate..."	2026-03-11 15:43:11 +01:00
PioApocalypse	fc150be724	main now turns content of realtime window analysis into nx dataset the data is not parsed or analysed, it's written as text (well, tsv technically) - this is only for testing and first attempts	2026-03-11 15:01:04 +01:00
PioApocalypse	aa3bf531f9	adds example realtime windows analysis	2026-03-11 15:00:15 +01:00
PioApocalypse	3f97ccee25	removes functions.py	2026-02-17 16:20:08 +01:00