minimal modifications

untested: adds methods to Layer class to fetch attachments list
one method fetches all one filters textual uploads one filters png and bmp images
2026-05-09 00:15:52 +02:00 · 2026-05-08 23:40:14 +02:00 · 2026-05-08 23:31:36 +02:00 · 2026-05-08 18:11:53 +02:00 · 2026-05-08 18:10:15 +02:00 · 2026-05-08 18:09:03 +02:00
13 changed files with 76820 additions and 222 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,8 +1,10 @@
 # ignores logs of h5tojson, jsontoh5
 *.log
-# ignores output json of main.py
+# ignores any output of main.py
 output/*.json
 output/*.h5
 output/*.nxs
 # ---> Python
 # Byte-compiled / optimized / DLL files
--- a/output/attachments/placeholder
+++ b/output/attachments/placeholder
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,5 @@
 requests
 asyncio
 h5py
 pillow
 elabapi_python
--- a/src/APIHandler.py
+++ b/src/APIHandler.py
@@ -1,41 +1,150 @@
-import requests
+import os, requests
 import elabapi_python as elabapi
 class APIHandler:
-    '''
+    """
-    Class to standardize the format of the headers of our http requests.
+    Class which handles all interactions with the eLabFTW API.
-    '''
+    It provides methods to retrieve data from the API and download attachments.
    It relies minimally on the elabapi-python library, which is used only for downloading attachments
    (since the API doesn't support downloading attachments AFAIK).
    Args:
        api_key:          A valid API key for the eLabFTW instance where the data is stored, with permissions to access the relevant entries.
                          eLabFTW's API keys are well documented here: https://doc.elabftw.net/docs/usage/api/.
                          If you don't have an API key and are uncapable of creating one, contact your eLabFTW administrator.
                          Or RTFM and create one yourself, it's not that hard.
        ELABFTW_API_URL:  Complete URL of the eLabFTW instance's root for the API endpoints.
                          In full caps because it won't (shouldn't) be changed much.
    """
    # TO-DO: remove static url.
-    def __init__(self, apikey="", ELABFTW_API_URL="https://elabftw.fisica.unina.it/api/v2"):
+    def __init__(
-        '''Init method, apikey suggested but not required (empty by default).'''
+        self, api_key="", ELABFTW_API_URL="https://elabftw.fisica.unina.it/api/v2"
-        self.auth = {"Authorization" : apikey}
+    ):
-        self.content = {"Content-Type" : "application/json"}
+        """Init method, apikey suggested but not required (empty by default)."""
        self.api_key = api_key
        self.auth = {"Authorization": api_key}
        self.content = {"Content-Type": "application/json"}
        self.header = {**self.auth, **self.content}
        self.elaburl = ELABFTW_API_URL
    def get_entry_from_elabid(self, elabid, entryType="items"):
        '''
        Method which returns a resource's raw data (as dictionary) from its elabid and entry type.
-        Entry type can be either "experiments" or "items".
+    def get_entry_from_elabid(self, elabid, entryType="items"):
-        '''
+        """
-        # TO-DO: validation and error handling on entryType value.
+        Returns raw data (as dictionary) from its elabid and entry type.
        args:
            elabid:     elabftw internal id of the selected resource.
            entryType:  Resource type. Anything other than "experiments" or "items" WILL raise an error.
        """
        if entryType not in ["experiments", "items"]:
            raise Exception(
                "You can only download attachments from experiments or items."
            )
        header = self.header
        response = requests.get(
-            headers = header,
+            headers=header, url=f"{self.elaburl}/{entryType}/{elabid}", verify=True
            url = f"{self.elaburl}/{entryType}/{elabid}",
            verify=True
        )
-        if response.status_code // 100 in [1,2,3]:
+
        # Response is 5xx = server error:
        if response.status_code // 100 == 5:
            raise ConnectionError(
                f"There's a problem on the server. Status code: {response.status_code}."
            )
        # Response is 4xx = client error:
        if response.status_code // 100 == 4:
            match response.status_code:
                case 401 | 403:
                    # Forbidden or unauthorized:
                    raise ConnectionError(
                        f"Invalid API key, authentication method or elabid. Check if an item with ID = {elabid} actually exists."
                    )
                case 404:
                    # Lapalissian:
                    raise ConnectionError(
                        f"404: Not Found. This means there's no resource with this elabid (wrong elabid?) on your eLabFTW (wrong endpoint?)."
                    )
                case 400:
                    # I genuinely have no idea:
                    raise ConnectionError(
                        f"400: Bad Request. This means the API endpoint you tried to reach is invalid. Did you tamper with the source code? If not, contact the developer."
                    )
                case _:
                    # For some fucking reason, this is the only error I actually get from the API...
                    raise ConnectionError(
                        f"HTTP request failed with status code: {response.status_code} (NOTE: 4xx means user's fault)."
                    )
        entry_data = response.json()
        return entry_data
-        elif response.status_code // 100 == 4:
+
-            match response.status_code:
+    def download_attachments_data(self, elabid, entryType="experiments"):
-                case 401|403:
+        """
-                    raise ConnectionError(f"Invalid API key or authentication method.")
+        Downloads attachments of a certain eLabFTW experiment (default) or item.
-                case 404:
+        Only returns their binary data. Use method download_attachments_to_disk to save to file.
-                    raise ConnectionError(f"404: Not Found. This means there's no resource with this elabid (wrong elabid?) on your eLabFTW (wrong endpoint?).")
+        NOTE: Output is a dictionary where:
-                case 400:
+            * The keys are the attachments' filenames;
-                    raise ConnectionError(f"400: Bad Request. This means the API endpoint you tried to reach is invalid. Did you tamper with the source code? If not, contact the developer.")
+            * The values are the binary data for those attachments.
-                case _:
+
-                    raise ConnectionError(f"HTTP request failed with status code: {response.status_code} (NOTE: 4xx means user's fault).")
+        Args:
-        else:
+            elabid:     eLabFTW internal ID of the selected resource.
-            raise ConnectionError(f"There's a problem on the server. Status code: {response.status_code}.")
+            entryType:  Resource type. Anything other than "experiments" or "items" WILL raise an error.
        """
        if entryType not in ["experiments", "items"]:
            raise Exception(
                "You can only download attachments from experiments or items."
            )
        config = elabapi.Configuration()
        config.api_key["api_key"] = api_key
        config.api_key_prefix["api_key"] = "Authorization"
        config.host = self.elaburl
        config.debug = False
        api_client = elabapi.ApiClient(config)
        api_client.set_default_header(
            header_name="Authorization", header_value=self.api_key
        )
        uploads_api = elabapi.UploadsApi(api_client)
        # Actual uploads (dictionary):
        uploads = {
            upload.real_name: uploads_api.read_upload(
                entryType, elabid, upload.id, format="binary", _preload_content=False
            ).data
            for upload in uploads_api.read_uploads(entryType, elabid)
        }
        return uploads
    def download_attachments_to_disk(
        self,
        elabid,
        entryType="experiments",
        dump_dir="output/attachments",
        # persistent=True,
    ):
        """
        Downloads attachments of a certain eLabFTW experiment (default) or item.
        Downloads their binary data through method download_attachments_data and dumps it to dump_dir.
        Args:
            elabid:     eLabFTW internal ID of the selected resource.
            entryType:  Resource type. Anything other than "experiments" or "items" WILL raise an error.
            dump_dir:   Directory to which to save the attachments. Default is "output/attachments".
            persistent: [Unused] Decides if the files will stay on disk after all operations are completed.
                        If set to False, deletes the file upon exiting.
        """
        if entryType not in ["experiments", "items"]:
            raise Exception(
                "You can only download attachments from experiments or items."
            )
        uploads = download_attachments_data(elabid, entryType=entryType)
        for file in uploads:
            raw_data = uploads["file"]
            with open(os.path.join(dump_dir, f"exp{elabid}-{file}"), "wb") as f:
                f.write(raw_data)
        return
--- a/src/classes.py
+++ b/src/classes.py
@@ -1,8 +1,9 @@
 import os, json, requests
 from APIHandler import APIHandler
 class Layer:
-    '''
+    """
    Layer(layer_data) - where layer_data is a Python dictionary.
    Meant to be used for eLabFTW Experiments of the "PLD Deposition" category.
@@ -10,22 +11,29 @@ class Layer:
    eLabFTW experiments contain most of the data required by the NeXus file - although every layer is on a different eLab entry;
    unfortunately, some data like the target's chemical formula must be retrieved through additional HTTP requests.
    Attributes 'target_elabid', 'rheed_system_elabid' and 'laser_system_elabid' contain elabid's for these resources, which are all items.
-    '''
+    """
    def __init__(self, layer_data):
        try:
            self.elabid = layer_data["id"]
            self.operator = layer_data["fullname"]
            self.extra = layer_data["metadata_decoded"]["extra_fields"]
-            self.layer_number = self.extra["Layer Progressive Number"]["value"] # integer
+            self.uploads = layer_data["uploads"]  # dict
            self.layer_number = self.extra["Layer Progressive Number"][
                "value"
            ]  # integer
            self.target_elabid = self.extra["Target"]["value"]  # elabid
            self.laser_system_elabid = self.extra["Laser System"]["value"]  # elabid
            self.chamber_elabid = self.extra["Chamber"]["value"]  # elabid
            self.rheed_system_elabid = self.extra["RHEED System"]["value"]  # elabid
            self.start_time = layer_data.get("created_at")
            self.operator = layer_data.get("fullname")
            self.description = layer_data.get("body")
            self.deposition_time = self.extra["Duration"]["value"]
            self.deposition_time_unit = self.extra["Duration"]["unit"]
            self.repetition_rate = self.extra["Repetition rate"]["value"]
            self.repetition_rate_unit = self.extra["Repetition rate"]["unit"]
            try:
-                self.number_of_pulses = (float(self.deposition_time) * float(self.repetition_rate)).__floor__()
+                self.number_of_pulses = (
                    float(self.deposition_time) * float(self.repetition_rate)
                ).__floor__()
            except ValueError:
                # Since number_of_pulses is required, if it can't be calculated raise error:
                raise ValueError("""
@@ -33,16 +41,33 @@ class Layer:
    This has to be an error, since these fields are required by the NeXus standard.
    Please edit your eLabFTW entry and retry.
                """)
-            self.temperature = self.extra["Heater temperature"]["value"] # Note: this field used to have a trailing space in its name
+            self.temperature = self.extra["Heater temperature"][
-            self.process_pressure = self.extra["Process pressure"]["value"] # Note: this field used to have a trailing space in its name
+                "value"
            ]  # Note: this field used to have a trailing space in its name
            self.temperature_unit = self.extra["Heater temperature"]["unit"]
            self.process_pressure = self.extra["Process pressure"][
                "value"
            ]  # Note: this field used to have a trailing space in its name
            self.process_pressure_unit = self.extra["Process pressure"]["unit"]
            self.heating_method = self.extra["Heating Method"]["value"]
            self.layer_thickness = self.extra["Thickness"]["value"]
            self.layer_thickness_unit = self.extra["Thickness"]["unit"]
            self.buffer_gas = self.extra["Buffer gas"]["value"]
            self.heater_target_distance = self.extra["Heater-target distance"]["value"]
-            self.laser_fluence = self.extra["Laser Intensity"]["value"] # here fluence = intensity
+            self.heater_target_distance_unit = self.extra["Heater-target distance"][
                "unit"
            ]
            self.laser_fluence = self.extra["Laser Intensity"][
                "value"
            ]  # here fluence = intensity
            self.laser_fluence_unit = "J/(s cm^2)"
            self.laser_spot_area = self.extra["Spot Area"]["value"]
            self.laser_spot_area_unit = "mm^2"
            try:
-                self.laser_energy = (float(self.laser_fluence) * float(self.laser_spot_area)).__round__(3)
+                self.laser_energy = (
                    float(self.laser_fluence) * float(self.laser_spot_area) / 100
                ).__round__(3)
                self.laser_energy_unit = "J/s"
            except ValueError:
                # Since laser_energy is NOT required, if it can't be calculated warn user but allow the software to continue execution:
                print("""
@@ -51,31 +76,69 @@ class Layer:
    Setting Laser Energy to NoneType.
                """)
                # Placeholder
-                self.laser_energy = None
+                self.laser_energy = "N/A"
                self.laser_energy_unit = "J/s"
            # Laser rasternig section
-            self.laser_rastering_geometry = self.extra["Laser Rastering Geometry"]["value"]
+            self.laser_rastering_geometry = self.extra["Laser Rastering Geometry"][
-            self.laser_rastering_positions = self.extra["Laser Rastering Position"]["value"]
+                "value"
-            self.laser_rastering_velocities = self.extra["Laser Rastering Speed"]["value"]
+            ]
            self.laser_rastering_positions = self.extra["Laser Rastering Position"][
                "value"
            ]
            self.laser_rastering_velocities = self.extra["Laser Rastering Speed"][
                "value"
            ]
            # Pre annealing section
            self.pre_annealing_ambient_gas = self.extra["Buffer gas Pre"]["value"]
            self.pre_annealing_pressure = self.extra["Process pressure Pre"]["value"]
-            self.pre_annealing_temperature = self.extra["Heater temperature Pre"]["value"]
+            self.pre_annealing_temperature = self.extra["Heater temperature Pre"][
                "value"
            ]
            self.pre_annealing_duration = self.extra["Duration Pre"]["value"]
            self.pre_annealing_pressure_unit = self.extra["Process pressure Pre"][
                "unit"
            ]
            self.pre_annealing_temperature_unit = self.extra["Heater temperature Pre"][
                "unit"
            ]
            self.pre_annealing_duration_unit = self.extra["Duration Pre"]["unit"]
            # Post annealing section
            self.post_annealing_ambient_gas = self.extra["Buffer gas PA"]["value"]
            self.post_annealing_pressure = self.extra["Process pressure PA"]["value"]
-            self.post_annealing_temperature = self.extra["Heater temperature PA"]["value"]
+            self.post_annealing_temperature = self.extra["Heater temperature PA"][
                "value"
            ]
            self.post_annealing_duration = self.extra["Duration PA"]["value"]
            self.post_annealing_pressure_unit = self.extra["Process pressure PA"][
                "unit"
            ]
            self.post_annealing_temperature_unit = self.extra["Heater temperature PA"][
                "unit"
            ]
            self.post_annealing_duration_unit = self.extra["Duration PA"]["unit"]
            # Rejected but suggested by the NeXus standard:
-            #self.laser_rastering_coefficients = None
+            # self.laser_rastering_coefficients = None
        except KeyError as k:
            # Some keys are not required and can be called through the .get() method - which is permissive and allows null values;
            # Other keys are required so if they can't be called (invalid or null) raise error and stop execution of the program:
-            raise KeyError(f"The provided dictionary lacks a \"{k}\" key. Check the deposition layer entry on eLabFTW and make sure you used the correct Experiment template.")
+            raise KeyError(
-    def get_instruments(self, apikey):
+                f'The provided dictionary lacks a "{k}" key. Check the deposition layer entry on eLabFTW and make sure you used the correct Experiment template.'
-        raw_lasersys_data = APIHandler(apikey).get_entry_from_elabid(self.laser_system_elabid, entryType="items")
+            )
-        raw_chamber_data = APIHandler(apikey).get_entry_from_elabid(self.chamber_elabid, entryType="items")
+        # Optional
-        raw_rheedsys_data = APIHandler(apikey).get_entry_from_elabid(self.rheed_system_elabid, entryType="items")
+        self.start_time = layer_data.get("created_at") or None
        self.description = layer_data.get("body") or None
    def get_instruments(self, api_key):
        raw_lasersys_data = APIHandler(api_key).get_entry_from_elabid(
            self.laser_system_elabid, entryType="items"
        )
        raw_chamber_data = APIHandler(api_key).get_entry_from_elabid(
            self.chamber_elabid, entryType="items"
        )
        raw_rheedsys_data = APIHandler(api_key).get_entry_from_elabid(
            self.rheed_system_elabid, entryType="items"
        )
        instruments_used = {
            "laser_system": raw_lasersys_data.get("title") or None,
            "deposition_chamber": raw_chamber_data.get("title") or None,
@@ -83,31 +146,96 @@ class Layer:
        }
        return instruments_used
    def list_attachments(self):
        """
        Returns a dictionary of all the attachments linked to the layer, where:
            * Each key is the attachment's elabid;
            * Each value is a dictionary containing the attachment's filename, hashname and related experiment elabid (= self.elabid).
        Data is already in layer_data, so the API key is unrequired. Same goes for:
            * fetch_textual_uploads() - no arguments;
            * fetch_images() - no arguments.
        """
        # Remember: Layers are experiments, so we only need to look for attachments in the experiment endpoint.
        attachments = {
            attachment["id"]: {
                "filename": attachment["real_name"],
                "hashname": attachment["long_name"],
                "related_experiment": attachment["item_id"],
            }
            for attachment in self.uploads
        }
        return attachments
    def fetch_textual_uploads(self):
        """
        Starting from the list of attachments, filters out and returns a list of the textual uploads linked to the layer, which can be either plain text, csv, tsv etc.
        Returns only their names, so that the user may select which one to import into the NeXus file as a dataset.
        It only looks for .txt, .csv and .tsv files, although it could be easily modified to include other formats.
        It is also file extension-sensitive, so anything not ending with .txt, .csv or .tsv won't be retrieved.
        That's because the API (v5.3.11) doesn't provide MIME Type or similar metadata on the attachments, so the only way to know if an attachment is an image or not is through its filename.
        """
        attachments = self.list_attachments()
        textual_uploads = {
            attachment: attachments[attachment]
            for attachment in attachments
            if attachments[attachments]["filename"][-4:] in (".txt", ".csv", ".tsv")
        }
        return textual_uploads
    def fetch_images(self):
        """
        Starting from the list of attachments, filters out and returns a Starting from the list of attachments, filters out and returns a list of all the (PNG or BMP) images attached to the layer.
        Hopefully one of them is a RHEED pattern.
        Returns only their names, so that the user may select which one to import into the NeXus file as a RHEED acquisition.
        It only looks for .png and .bmp files, although it could be easily modified to include other formats.
        It is also file extension-sensitive, so anything not ending with .png or .bmp won't be retrieved, even if it's an actual image.
        That's because the API (v5.3.11) doesn't provide MIME Type or similar metadata on the attachments, so the only way to know if an attachment is an image or not is through its filename.
        """
        attachments = self.list_attachments()
        pictures = {
            attachment: attachments[attachment]
            for attachment in attachments
            if attachments[attachments]["filename"][-4:] in (".png", ".bmp")
        }
        return pictures
 class Entrypoint:
-    '''
+    """
    Entrypoint(sample_data) - where sample_data is a Python dictionary.
    Meant to be used for eLabFTW Resources of the "Sample" category.
    The entrypoint is the starting point of the process of resolving the data chain.
    The entrypoint must be a dictionary containing the data of a sample, created directly from the JSON of the item endpoint on eLabFTW - which can be done through the function get_entry_from_elabid.
-    '''
+    """
    def __init__(self, sample_data):
        try:
            self.extra = sample_data["metadata_decoded"]["extra_fields"]
            self.linked_items = sample_data["items_links"]  # dict
            self.batch_elabid = self.extra["Substrate batch"]["value"]  # elabid
            self.linked_experiments = sample_data["related_experiments_links"]  # dict
-            self.linked_experiments_elabid = [ i["entityid"] for i in self.linked_experiments ] # list of elabid
+            self.linked_experiments_elabid = [
                i["entityid"] for i in self.linked_experiments
            ]  # list of elabid
        except KeyError as k:
            # Some keys are not required and can be called through the .get() method - which is permissive and allows null values;
            # Other keys are required so if they can't be called (invalid or null) raise error and stop execution of the program:
-            raise KeyError(f"The provided dictionary lacks a \"{k}\" key. Check the sample entry on eLabFTW and make sure you used the correct Resource template.")
+            raise KeyError(
                f'The provided dictionary lacks a "{k}" key. Check the sample entry on eLabFTW and make sure you used the correct Resource template.'
            )
        # Non-required attributes:
-        self.name = sample_data.get("title") or None # error prevention is more important than preventing empty fields here
+        self.name = (
            sample_data.get("title") or None
        )  # error prevention is more important than preventing empty fields here
 class Material:
-    '''
+    """
    Material(material_data) - where material_data is a Python dictionary.
    Meant to be used for eLabFTW Resources of either the "PLD Target" or the "Substrate" categories.
@@ -116,64 +244,87 @@ class Material:
        * Name and formula;
        * Shape and dimensions;
        * Misc.
-    '''
+    """
    def __init__(self, material_data):
        try:
            self.name = material_data["title"]  # required
            self.extra = material_data["metadata_decoded"]["extra_fields"]
            self.compound_elabid = self.extra["Compound"]["value"]
-            self.dimensions = self.extra["Size"]["value"]
+            self.dimensions = str(
                self.extra["Size"]["value"]
            )  # strings have a .count() method
            if self.dimensions.count("mm") == 2:
                self.dimensions_unit = "mm x mm"
            elif self.dimensions[-1] == '"':
                self.dimensions_unit = "inches"
            else:
                self.dimensions_unit = None
        except KeyError as k:
            # Some keys are not required and can be called through the .get() method - which is permissive and allows null values;
            # Other keys are required so if they can't be called (invalid or null) raise error and stop execution of the program:
-            raise KeyError(f"The provided dictionary lacks a \"{k}\" key. Check the target/substrate entry on eLabFTW and make sure you used the correct Resource template.")
+            raise KeyError(
                f'The provided dictionary lacks a "{k}" key. Check the target/substrate entry on eLabFTW and make sure you used the correct Resource template.'
            )
    def get_compound_data(self, apikey):
-        raw_compound_data = APIHandler(apikey).get_entry_from_elabid(self.compound_elabid, entryType="items")
+        raw_compound_data = APIHandler(apikey).get_entry_from_elabid(
            self.compound_elabid, entryType="items"
        )
        name = raw_compound_data["title"]
        extra = raw_compound_data["metadata_decoded"]["extra_fields"]
        formula = extra.get("Chemical formula")
-        cas = extra.get("CAS number ") or { "value": None }
+        cas = extra.get("CAS number ") or {"value": None}
        compound_data = {
-            "name" : name,
+            "name": name,
-            "chemical_formula" : formula.get("value"),
+            "chemical_formula": formula.get("value"),
-            "cas_number" : cas.get("value")
+            "cas_number": cas.get("value"),
        }
        return compound_data
    def get_compound_formula(self, apikey):
        formula = self.get_compound_data(apikey).get("chemical_formula")
        return formula
 class Substrate(Material):
    def __init__(self, material_data):
        super().__init__(material_data)
        try:
            self.orientation = self.extra["Orientation"]["value"]
            self.miscut_angle = self.extra["Miscut Angle"]["value"]
            self.miscut_angle_unit = self.extra["Miscut Angle"]["unit"]
            self.miscut_direction = self.extra["Miscut Direction"]["value"]
            # Not present (yet) on eLabFTW for Substrates:
-            self.thickness = None #self.extra["Thickness"]["value"]
+            self.thickness = ""  # self.extra["Thickness"]["value"]
            self.thickness_unit = "μm"  # self.extra["Thickness"]["unit"]
            self.surface_treatment = self.extra["Surface treatment"]["value"]
            self.manufacturer = self.extra["Supplier"]["value"]
            self.batch_id = self.extra["Batch ID"]["value"]
        except KeyError as k:
-            raise KeyError(f"The provided dictionary lacks a \"{k}\" key - which is specific for substrates. Check the {self.name} substrate entry on eLabFTW and make sure you used the correct Resource template.")
+            raise KeyError(
                f'The provided dictionary lacks a "{k}" key - which is specific for substrates. Check the {self.name} substrate entry on eLabFTW and make sure you used the correct Resource template.'
            )
 class Target(Material):
    def __init__(self, material_data):
        super().__init__(material_data)
        try:
            self.thickness = self.extra["Thickness"]["value"]
            self.thickness_unit = self.extra["Thickness"]["unit"]
            self.shape = self.extra["shape"]["value"]
            self.solid_form = self.extra["Solid form"]["value"]
            self.manufacturer = self.extra["Supplier"]["value"]
        except KeyError as k:
-            raise KeyError(f"The provided dictionary lacks a \"{k}\" key - which is specific for PLD targets. Check the {self.name} target entry on eLabFTW and make sure you used the correct Resource template.")
+            raise KeyError(
                f'The provided dictionary lacks a "{k}" key - which is specific for PLD targets. Check the {self.name} target entry on eLabFTW and make sure you used the correct Resource template.'
            )
        # Non-required attributes:
        self.description = material_data.get("body") or ""
-
+if __name__ == "__main__":
-if __name__=="__main__":
+    head = APIHandler("MyApiKey-123456789abcdef")
    head = Header("MyApiKey-123456789abcdef")
    print(f"Example header:\n\t{head.header}\n")
    print("Warning: you're not supposed to be running this as the main program.")
--- a/src/functions.py
+++ b/src/functions.py
@@ -1,62 +0,0 @@
 """
 Currently unused!
 """
 import json, requests
 from APIHandler import APIHandler
 def get_entry_from_elabid(elabid, entryType="items"):
    '''
    Function which returns entrypoint data (as dictionary) from its elabid.
    '''
    header = APIHandler(apikey).dump
    response = requests.get(
        headers = header,
        url = f"{ELABFTW_API_URL}/{entryType}/{elabid}",
        verify=True
    )
    if response.status_code // 100 in [2,3]:
        entry_data = response.json()
        return entry_data
    else:
        raise ConnectionError(f"HTTP request failed with status code: {response.status_code}.")
 def get_sample_layers_data(elabid):
    '''
    Return the following data from every eLabFTW experiment linked
    to a certain sample, identified by elabid.
    - Title of the experiment
    - Category (should check it's "PLD Deposition")
    - Layer number - if present (PLD depositions)
    - Deposition time - returns error if not present
    - Repetition rate - returns error if not present
    '''
    # header = {
    #     "Authorization": apikey,
    #     "Content-Type": "application/json"
    # }
    sample_data = requests.get(
        headers = header,
        url = f"https://elabftw.fisica.unina.it/api/v2/items/{elabid}",
        verify=True
    ).json()
    related_experiments = sample_data["related_experiments_links"]
    result = []
    for exp in related_experiments:
        experiment_data = requests.get(
            headers = header,
            url = f"https://elabftw.fisica.unina.it/api/v2/experiments/{exp.get("entityid")}",
            verify=True
        ).json()
        extra = experiment_data["metadata_decoded"]["extra_fields"]
        result.append(
            {"title": exp.get("title"),
             "layer_number": extra.get("Layer Progressive Number").get("value"),
             "category": exp.get("category_title"),
             "deposition_time": extra.get("Duration").get("value"),
             "repetition_rate": extra.get("Repetition rate").get("value")}
        )
    return result
 if __name__=="__main__":
    print("Warning: you're not supposed to be running this as the main program.")
--- a/src/main.py
+++ b/src/main.py
@@ -1,38 +1,51 @@
-import os, json, requests
+#!/usr/bin/env python3
 import os, json, requests, h5py
 import numpy as np
 from getpass import getpass
 from APIHandler import APIHandler
 from classes import *
 from PIL import Image
 # from schema import pld_deposition
 def call_entrypoint_from_elabid(elabid):
-    '''
+    """
    Calls an entrypoint sample from eLabFTW using its elabid, then returns an object of the Entrypoint class.
    If the entry is not a sample (category_title not matching exactly "Sample") returns ValueError.
-    '''
+    """
    try:
-        sample_data = APIHandler(apikey).get_entry_from_elabid(elabid, entryType="items")
+        sample_data = APIHandler(apikey).get_entry_from_elabid(
            elabid, entryType="items"
        )
        if not sample_data.get("category_title") == "Sample":
-            raise ValueError("The resource you selected is not a sample, therefore it can't be used as an entrypoint.")
+            raise ValueError(
                "The resource you selected is not a sample, therefore it can't be used as an entrypoint."
            )
        sample_object = Entrypoint(sample_data)
    except ConnectionError as e:
        raise ConnectionError(e)
    return sample_object  # Entrypoint-class object
 def call_material_from_elabid(elabid):
-    '''
+    """
    Calls a material from eLabFTW using its elabid, then returns an object of the Material class.
    If the entry is neither a PLD Target or a Substrate batch returns ValueError. Such entries always have a category_title key with its value matching exactly "PLD Target" or "Substrate".
    Because of an old typo, the value "Subtrate" (second 's' is missing) is also accepted.
-    '''
+    """
    try:
-        material_data = APIHandler(apikey).get_entry_from_elabid(elabid, entryType="items")
+        material_data = APIHandler(apikey).get_entry_from_elabid(
            elabid, entryType="items"
        )
        material_category = material_data.get("category_title")
        # TO-DO: correct this typo on elabftw: Subtrate → Substrate.
        if not material_category in ["PLD Target", "Substrate", "Subtrate"]:
            print(f"Category of the resource: {material_category}.")
-            raise ValueError(f"The referenced resource (elabid = {elabid}) is not a material.")
+            raise ValueError(
                f"The referenced resource (elabid = {elabid}) is not a material."
            )
        elif material_category == "PLD Target":
            material_object = Target(material_data)
        else:
@@ -41,80 +54,146 @@ def call_material_from_elabid(elabid):
        raise ConnectionError(e)
    return material_object  # Material-class object
 def call_layers_from_list(elabid_list):
-    '''
+    """
    Calls a list of (PLD deposition) experiments from eLabFTW using their elabid - which means the input must be a list of integers instead of a single one - then returns a list of Layer-class objects.
    If one of the entries is not related to a deposition layer (category_title not matching exactly "PLD Deposition") that entry is skipped, with no error raised.
-    '''
+    """
    list_of_layers = []
    for elabid in elabid_list:
        try:
-            layer_data = APIHandler(apikey).get_entry_from_elabid(elabid, entryType="experiments")
+            layer_data = APIHandler(apikey).get_entry_from_elabid(
                elabid, entryType="experiments"
            )
            if not layer_data.get("category_title") == "PLD Deposition":
                continue
            layer_object = Layer(layer_data)
            list_of_layers.append(layer_object)
        except ConnectionError as e:
-            nums = [ layer.layer_number for layer in list_of_layers ]
+            nums = [layer.layer_number for layer in list_of_layers]
            nums.sort()
            print(f"LIST OF THE LAYERS PROCESSED (unordered):\n" + str(nums))
-            raise ConnectionError(f"An error occurred while fetching the experiment with elabid = {elabid}:\n" +
+            raise ConnectionError(
-                str(e) + f"\nPlease solve the problem before retrying." + "\n\n" +
+                f"An error occurred while fetching the experiment with elabid = {elabid}:\n"
-                f"Last resource attempted to call: {ELABFTW_API_URL}/experiments/{elabid}"
+                + str(e)
                + f"\nPlease solve the problem before retrying."
                + "\n\n"
                + f"Last resource attempted to call: {ELABFTW_API_URL}/experiments/{elabid}"
            )
    return list_of_layers  # list of Layer-class objects
 def chain_entrypoint_to_batch(sample_object):
-    '''
+    """
    Takes an Entrypoint-class object, looks at its .batch_elabid attribute and returns a Material-class object containing data on the substrate batch associated to the starting sample.
    Dependency: call_material_from_elabid.
-    '''
+    """
    material_elabid = sample_object.batch_elabid
    material_object = call_material_from_elabid(material_elabid)
    return material_object
 def chain_entrypoint_to_layers(sample_object):
-    '''
+    """
    Takes an Entrypoint-class object, looks at its .linked_experiments_elabid attribute (list) and returns a list of Layer-class objects containing data on the deposition layers associated to the starting sample - using the function call_layers_from_list.
    The list is sorted by progressive layer number (layer_number attribute).
    Dependency: call_layers_from_list.
-    '''
+    """
-    linked_experiments_elabid = sample_object.linked_experiments_elabid # list of elabid
+    linked_experiments_elabid = (
        sample_object.linked_experiments_elabid
    )  # list of elabid
    layer_object_list = call_layers_from_list(linked_experiments_elabid)
    layer_object_list.sort(key=lambda x: x.layer_number)
    return layer_object_list
 def chain_layer_to_target(layer_object):
-    '''
+    """
    Takes a Layer-class object, looks at its .target_elabid attribute and returns a Material-class object containing data on the PLD target used in the deposition of said layer.
    Dependency: call_material_from_elabid.
-    '''
+    """
    target_elabid = layer_object.target_elabid
    material_object = call_material_from_elabid(target_elabid)
    return material_object
 def deduplicate_instruments_from_layers(layers):
-    '''
+    """
-    Takes a list of Layer-class objects and for each layer gets the instruments used (laser, depo chamber and RHEED), returns deduplicated list. Ideally, the lists should only contain one element.
+    Takes a list of Layer-class objects and for each layer gets the instruments used (laser, depo chamber and RHEED), returns dictionary with one item per category. This means that if more layers share the same instruments it returns a dictionary with just their names as strings (no lists or sub-dictionaries).
-    '''
+
    If different layers have different instruments (e.g. laser systems) the user is prompted to only select one.
    """
    lasers = []
    chambers = []
    rheeds = []
    elegant_dict = {}
    for lyr in layers:
        instruments = lyr.get_instruments(apikey)
        lasers.append(instruments["laser_system"])
        chambers.append(instruments["deposition_chamber"])
        rheeds.append(instruments["rheed_system"])
-    instruments_used_dict = {
+        elegant_dict[f"layer_{lyr.layer_number}"] = {
-        "laser_system": list( set( lasers ) ),
+            "laser_system": instruments["laser_system"],
-        "deposition_chamber": list( set( chambers ) ),
+            "deposition_chamber": instruments["deposition_chamber"],
-        "rheed_system" : list( set( rheeds ) ),
+            "rheed_system": instruments["rheed_system"],
        }
    ded_lasers = list(set(lasers))
    ded_chambers = list(set(chambers))
    ded_rheeds = list(set(rheeds))
    elegant_dict["multilayer"] = {
        # Keep key names human readable since they're used in the messages of the following errors
        "laser_system": ", ".join(ded_lasers),
        "deposition_chamber": ", ".join(ded_chambers),
        "rheed_system": ", ".join(ded_rheeds),
    }  # dictionary's name is a joke
    # updated_dict = {} # use this for containing the final dataset
    # for ded in elegant_dict:
    #     if len(elegant_dict[ded]) == 0:
    #         # if len of list is 0 - empty list - raise error
    #         raise IndexError(f"Missing data: no Laser System, Chamber and/or RHEED System is specified in any of the Deposition-type experiments related to this sample. Fix this on eLabFTW before retrying. Affected list: {ded}.")
    #     elif len(elegant_dict[ded]) > 1:
    #         # if len of list is > 1 - too many values - allow the user to pick one
    #         print("Warning: different instruments have been used for different layers - which is currently not allowed.")
    #         # there's a better way to do this but I can't remember now for the life of me...
    #         i = 0
    #         while i < len(elegant_dict[ded]):
    #             print(f"{i} - {elegant_dict[ded][i]}")
    #             i += 1
    #         ans = None
    #         while not type(ans) == int or not ans in range(0, len(elegant_dict[ded])):
    #             ans = input("Please pick one of the previous (0, 1, ...) [default = 0]: ") or "0"
    #             if ans.isdigit():
    #                 ans = int(ans)
    #             continue # unnecessary?
    #         updated_dict[ded] = elegant_dict[ded][ans]
    #     elif elegant_dict[ded][0] in ["", 0, None]:
    #         # if len is 1 BUT value is "", 0 or None raise error
    #         raise ValueError(f"Missing data: a Laser System, Chamber and/or RHEED System which is specified across all the Deposition-type experiments related to this sample is either empty or invalid. Fix this on eLabFTW before retrying. Affected list: {ded}.")
    #     else:
    #         # if none of the previous (only 1 value), that single value is used
    #         updated_dict[ded] = elegant_dict[ded][0]
    # instruments_used_dict = {
    #     "laser_system": updated_dict["Laser Systems"],
    #     "deposition_chamber": updated_dict["Deposition Chamber"],
    #     "rheed_system": updated_dict["RHEED Systems"],
    # }
    return elegant_dict
    ### OLD CODE
    # if 0 in [ len(i) for i in elegant_list ]:
    #     # i.e. if length of one of the lists in elegant_list is zero (missing data):
    #     raise IndexError("Missing data: no Laser System, Chamber and/or RHEED System is specified in any of the Deposition-type experiments related to this sample.")
    # if not all([ len(i) == 1 for i in elegant_list ]):
    #     print("Warning: different instruments have been used for different layers - which is currently not allowed.")
    #     # for every element in elegant list check if len > 1 and if it is
    #     print("Selecting the first occurence for every category...")
    ###
    # lasers = { f"layer_{lyr.layer_number}": lyr.laser_system for lyr in layers }
    # chambers = { f"layer_{lyr.layer_number}": lyr.deposition_chamber for lyr in layers }
    # rheeds = { f"layer_{lyr.layer_number}": lyr.rheed_system for lyr in layers }
@@ -123,29 +202,84 @@ def deduplicate_instruments_from_layers(layers):
    #     "deposition_chamber": chambers,
    #     "rheed_system": rheeds,
    # }
-    return instruments_used_dict
+
 def analyse_rheed_data(data):
    """
    Takes the content of a tsv file and returns a dictionary with timestamps and intensities.
    The file should contain a 2D array composed of 4 columns - where the first column is a timestamp and the other three are RHEED intensities - and an unspecified number of rows.
    -----
    Time    Layer1_Int1     Layer1_Int2     Layer1_Int3
    -----
    Distinct ValueErrors are raised if:
    * The array is not 2-dimensional;
    * The total number of columns does not equate exactly 1+3 (= 4).
    Time is expressed in seconds, intensities are adimensional on 8 bits (min. 0, max. 255).
    # TO-DO: complete this description...
    Written with help from DeepSeek.
    """
    # Verifying the format of the input file:
    if data.ndim != 2:
        raise ValueError(
            f"Unexpected trace format: expected 2D array, got ndim = {data.ndim}."
        )
    n_cols = data.shape[1]  # 0 = rows, 1 = columns
    if n_cols > 4:
        print(
            f"Warning! The input file (for Realtime Window Analysis) has {n_cols - 4} more than needed.\nOnly 4 columns will be considered - with the first representing time and the others representing RHEED intensities."
        )
    if n_cols < 4:
        raise ValueError(
            f"Insufficient number of columns: expected 4, got n_cols = {n_cols}."
        )
    n_time_points = data.shape[0]
    # Get time (all rows of col 0) as Float64:
    time = data[:, 0].astype(
        np.float64, copy=False
    )  # copy=False suggested by LLM for mem. eff.
    # Get intensities (all rows of cols 1,2,3) as Float32:
    intensities = data[:, 1:4].astype(np.float32, copy=False)
    return {
        "time": np.transpose(time),
        "intensity": np.transpose(intensities),
    }
 def make_nexus_schema_dictionary(substrate_object, layers):
-    '''
+    """
    Main function, takes all the other functions to reconstruct the full dataset. Takes a Substrate-class object (output of the chain_entrypoint_to_batch() function) and a list of Layer-class objects (output of the chain_entrypoint_to_layers() function), returns dictionary with the same schema as the NeXus standard for PLD fabrications.
-    '''
+    """
    instruments = deduplicate_instruments_from_layers(layers)
    pld_fabrication = {
        "sample": {
            "substrate": {
                "name": substrate_object.name,
-                "chemical_formula" : substrate_object.get_compound_formula(apikey),
+                "chemical_formula": substrate_object.get_compound_formula(apikey),
-                "orientation" : substrate_object.orientation,
+                "orientation": substrate_object.orientation,
-                "miscut_angle" : substrate_object.miscut_angle,
+                "miscut_angle": {
-                "miscut_direction" : substrate_object.miscut_direction,
+                    "value": substrate_object.miscut_angle,
-                "thickness" : substrate_object.thickness,
+                    "units": substrate_object.miscut_angle_unit,
-                "dimensions" : substrate_object.dimensions,
+                },
-                "surface_treatment" : substrate_object.surface_treatment,
+                "miscut_direction": substrate_object.miscut_direction,
-                "manufacturer" : substrate_object.manufacturer,
+                "thickness": {
-                "batch_id" : substrate_object.batch_id,
+                    "value": substrate_object.thickness,
                    "units": substrate_object.thickness_unit,
                },
                "dimensions": substrate_object.dimensions,
                "surface_treatment": substrate_object.surface_treatment,
                "manufacturer": substrate_object.manufacturer,
                "batch_id": substrate_object.batch_id,
            },
            "multilayer": {},
        },
-        "instruments_used": deduplicate_instruments_from_layers(layers),
+        "instruments_used": instruments["multilayer"],
    }
    multilayer = pld_fabrication["sample"]["multilayer"]
    for layer in layers:
@@ -153,15 +287,18 @@ def make_nexus_schema_dictionary(substrate_object, layers):
        target_object = chain_layer_to_target(layer)
        target_dict = {
            "name": target_object.name,
-            "chemical_formula" : target_object.get_compound_formula(apikey),
+            "chemical_formula": target_object.get_compound_formula(apikey),
-            "description" : target_object.description,
+            "description": target_object.description,
-            "shape" : target_object.shape,
+            "shape": target_object.shape,
-            "dimensions" : target_object.dimensions,
+            "dimensions": target_object.dimensions,
-            "thickness" : target_object.thickness,
+            "thickness": {
-            "solid_form" : target_object.solid_form,
+                "value": target_object.thickness,
-            "manufacturer" : target_object.manufacturer,
+                "units": target_object.thickness_unit,
            },
            "solid_form": target_object.solid_form,
            "manufacturer": target_object.manufacturer,
            "batch_id": target_object.name,
            # TO-DO: currently not available:
            # "batch_id" : target_object.batch_id,
        }
        multilayer[name] = {
            "target": target_dict,
@@ -169,17 +306,44 @@ def make_nexus_schema_dictionary(substrate_object, layers):
            "operator": layer.operator,
            "description": layer.description,
            "number_of_pulses": layer.number_of_pulses,
-            "deposition_time": layer.deposition_time,
+            "deposition_time": {
-            "temperature": layer.temperature,
+                "value": layer.deposition_time,
                "units": layer.deposition_time_unit,
            },
            "temperature": {
                "value": layer.temperature,
                "units": layer.temperature_unit,
            },
            "heating_method": layer.heating_method,
-            "layer_thickness": layer.layer_thickness,
+            "layer_thickness": {
                "value": layer.layer_thickness,
                "units": layer.layer_thickness_unit,
            },
            "buffer_gas": layer.buffer_gas,
-            "process_pressure": layer.process_pressure,
+            "process_pressure": {
-            "heater_target_distance": layer.heater_target_distance,
+                "value": layer.process_pressure,
-            "repetition_rate": layer.repetition_rate,
+                "units": layer.process_pressure_unit,
-            "laser_fluence": layer.laser_fluence,
+            },
-            "laser_spot_area": layer.laser_spot_area,
+            "heater_target_distance": {
-            "laser_energy": layer.laser_energy,
+                "value": layer.heater_target_distance,
                "units": layer.heater_target_distance_unit,
            },
            "repetition_rate": {
                "value": layer.repetition_rate,
                "units": layer.repetition_rate_unit,
            },
            "laser_fluence": {
                "value": layer.laser_fluence,
                "units": layer.laser_fluence_unit,
            },
            "laser_spot_area": {
                "value": layer.laser_spot_area,
                "units": layer.laser_spot_area_unit,
            },
            "laser_energy": {
                "value": layer.laser_energy,
                "units": layer.laser_energy_unit,
            },
            "laser_rastering": {
                "geometry": layer.laser_rastering_geometry,
                "positions": layer.laser_rastering_positions,
@@ -187,29 +351,395 @@ def make_nexus_schema_dictionary(substrate_object, layers):
            },
            "pre_annealing": {
                "ambient_gas": layer.pre_annealing_ambient_gas,
-                "pressure": layer.pre_annealing_pressure,
+                "pressure": {
-                "temperature": layer.pre_annealing_temperature,
+                    "value": layer.pre_annealing_pressure,
-                "duration": layer.pre_annealing_duration,
+                    "units": layer.pre_annealing_pressure_unit,
                },
                "temperature": {
                    "value": layer.pre_annealing_temperature,
                    "units": layer.pre_annealing_temperature_unit,
                },
                "duration": {
                    "value": layer.pre_annealing_duration,
                    "units": layer.pre_annealing_duration_unit,
                },
            },
            "post_annealing": {
                "ambient_gas": layer.post_annealing_ambient_gas,
-                "pressure": layer.post_annealing_pressure,
+                "pressure": {
-                "temperature": layer.post_annealing_temperature,
+                    "value": layer.post_annealing_pressure,
-                "duration": layer.post_annealing_duration,
+                    "units": layer.post_annealing_pressure_unit,
                },
                "temperature": {
                    "value": layer.post_annealing_temperature,
                    "units": layer.post_annealing_temperature_unit,
                },
                "duration": {
                    "value": layer.post_annealing_duration,
                    "units": layer.post_annealing_duration_unit,
                },
            },
            "instruments_used": instruments[name],
        }
    return pld_fabrication
-if __name__=="__main__":
+
 def build_nexus_file(pld_fabrication, output_path, rheed_osc=None, heatmap_matrix=None):
    # NOTE: look at the mail attachment from Emiliano...
    with h5py.File(output_path, "w") as f:
        nx_pld_entry = f.create_group("pld_fabrication")
        nx_pld_entry.attrs["NX_class"] = "NXentry"
        # Sample section
        nx_sample = nx_pld_entry.create_group("sample")
        nx_sample.attrs["NX_class"] = "NXsample"
        sample_dict = pld_fabrication["sample"]
        # Substrate sub-section
        nx_substrate = nx_sample.create_group("substrate")
        nx_substrate.attrs["NX_class"] = "NXsubentry"
        substrate_dict = sample_dict["substrate"]
        try:
            # Substrate fields (datasets)
            nx_substrate.create_dataset("name", data=substrate_dict["name"])
            nx_substrate.create_dataset(
                "chemical_formula", data=substrate_dict["chemical_formula"]
            )
            nx_substrate.create_dataset(
                "orientation", data=substrate_dict["orientation"]
            )
            nx_substrate.create_dataset(
                "miscut_angle", data=substrate_dict["miscut_angle"]["value"]
            )  # float
            nx_substrate["miscut_angle"].attrs["units"] = substrate_dict[
                "miscut_angle"
            ]["units"]
            nx_substrate.create_dataset(
                "miscut_direction", data=substrate_dict["miscut_direction"]
            )
            nx_substrate.create_dataset(
                "thickness", data=substrate_dict["thickness"]["value"]
            )  # float/int
            nx_substrate["thickness"].attrs["units"] = substrate_dict["thickness"][
                "units"
            ]
            nx_substrate.create_dataset("dimensions", data=substrate_dict["dimensions"])
            nx_substrate.create_dataset(
                "surface_treatment", data=substrate_dict["surface_treatment"]
            )
            nx_substrate.create_dataset(
                "manufacturer", data=substrate_dict["manufacturer"]
            )
            nx_substrate.create_dataset("batch_id", data=substrate_dict["batch_id"])
        except TypeError as te:
            # sooner or later I'll handle this too - not today tho
            raise TypeError(te)
        # Multilayer sub-section
        nx_multilayer = nx_sample.create_group("multilayer")
        nx_multilayer.attrs["NX_class"] = "NXsubentry"
        multilayer_dict = sample_dict["multilayer"]
        # Repeat FOR EACH LAYER:
        for layer in multilayer_dict:
            nx_layer = nx_multilayer.create_group(layer)
            nx_layer.attrs["NX_class"] = "NXsubentry"
            layer_dict = multilayer_dict[layer]
            # Sub-groups of a layer
            ## Target
            nx_target = nx_layer.create_group("target")
            nx_target.attrs["NX_class"] = "NXsample"
            target_dict = layer_dict["target"]
            ## Rastering and Annealing
            nx_laser_rastering = nx_layer.create_group("laser_rastering")
            nx_laser_rastering.attrs["NX_class"] = "NXprocess"
            rastering_dict = layer_dict["laser_rastering"]
            nx_pre_annealing = nx_layer.create_group("pre_annealing")
            nx_pre_annealing.attrs["NX_class"] = "NXprocess"
            pre_ann_dict = layer_dict["pre_annealing"]
            nx_post_annealing = nx_layer.create_group("post_annealing")
            nx_post_annealing.attrs["NX_class"] = "NXprocess"
            post_ann_dict = layer_dict["post_annealing"]
            nx_layer_instruments = nx_layer.create_group("instruments_used")
            nx_layer_instruments.attrs["NX_class"] = "NXinstrument"
            layer_instruments_dict = layer_dict["instruments_used"]
            ## Target metadata
            try:
                nx_target.create_dataset("name", data=target_dict["name"])
                nx_target.create_dataset(
                    "chemical_formula", data=target_dict["chemical_formula"]
                )
                nx_target.create_dataset("description", data=target_dict["description"])
                nx_target.create_dataset("shape", data=target_dict["shape"])
                nx_target.create_dataset("dimensions", data=target_dict["dimensions"])
                nx_target.create_dataset(
                    "thickness", data=target_dict["thickness"]["value"]
                )  # float/int
                nx_target["thickness"].attrs["units"] = target_dict["thickness"][
                    "units"
                ]
                nx_target.create_dataset("solid_form", data=target_dict["solid_form"])
                nx_target.create_dataset(
                    "manufacturer", data=target_dict["manufacturer"]
                )
                nx_target.create_dataset("batch_id", data=target_dict["batch_id"])
            except TypeError as te:
                raise TypeError(te)
            ## Other layer-specific metadata
            try:
                nx_layer.create_dataset("start_time", data=layer_dict["start_time"])
                nx_layer.create_dataset("operator", data=layer_dict["operator"])
                nx_layer.create_dataset(
                    "number_of_pulses", data=layer_dict["number_of_pulses"]
                )
                nx_layer.create_dataset(
                    "deposition_time", data=layer_dict["deposition_time"]["value"]
                )
                nx_layer["deposition_time"].attrs["units"] = layer_dict[
                    "deposition_time"
                ]["units"]
                nx_layer.create_dataset(
                    "repetition_rate", data=layer_dict["repetition_rate"]["value"]
                )
                nx_layer["repetition_rate"].attrs["units"] = layer_dict[
                    "repetition_rate"
                ]["units"]
                nx_layer.create_dataset(
                    "temperature", data=layer_dict["temperature"]["value"]
                )
                nx_layer["temperature"].attrs["units"] = layer_dict["temperature"][
                    "units"
                ]
                nx_layer.create_dataset(
                    "heating_method", data=layer_dict["heating_method"]
                )
                nx_layer.create_dataset(
                    "layer_thickness", data=layer_dict["layer_thickness"]["value"]
                )
                nx_layer["layer_thickness"].attrs["units"] = layer_dict[
                    "layer_thickness"
                ]["units"]
                nx_layer.create_dataset("buffer_gas", data=layer_dict["buffer_gas"])
                nx_layer.create_dataset(
                    "process_pressure", data=layer_dict["process_pressure"]["value"]
                )
                nx_layer["process_pressure"].attrs["units"] = layer_dict[
                    "process_pressure"
                ]["units"]
                nx_layer.create_dataset(
                    "heater_target_distance",
                    data=layer_dict["heater_target_distance"]["value"],
                )
                nx_layer["heater_target_distance"].attrs["units"] = layer_dict[
                    "heater_target_distance"
                ]["units"]
                nx_layer.create_dataset(
                    "laser_fluence", data=layer_dict["laser_fluence"]["value"]
                )
                nx_layer["laser_fluence"].attrs["units"] = layer_dict["laser_fluence"][
                    "units"
                ]
                nx_layer.create_dataset(
                    "laser_spot_area", data=layer_dict["laser_spot_area"]["value"]
                )
                nx_layer["laser_spot_area"].attrs["units"] = layer_dict[
                    "laser_spot_area"
                ]["units"]
                nx_layer.create_dataset(
                    "laser_energy", data=layer_dict["laser_energy"]["value"]
                )
                nx_layer["laser_energy"].attrs["units"] = layer_dict["laser_energy"][
                    "units"
                ]
            except TypeError as te:
                raise TypeError(te)
            ## Rastering metadata
            try:
                nx_laser_rastering.create_dataset(
                    "geometry", data=rastering_dict["geometry"]
                )
                nx_laser_rastering.create_dataset(
                    "positions", data=rastering_dict["positions"]
                )
                nx_laser_rastering.create_dataset(
                    "velocities", data=rastering_dict["velocities"]
                )
            except TypeError as te:
                raise TypeError(te)
            ## Annealing metadata
            try:
                nx_pre_annealing.create_dataset(
                    "ambient_gas", data=pre_ann_dict["ambient_gas"]
                )
                nx_pre_annealing.create_dataset(
                    "pressure", data=pre_ann_dict["pressure"]["value"]
                )
                nx_pre_annealing["pressure"].attrs["units"] = pre_ann_dict["pressure"][
                    "units"
                ]
                nx_pre_annealing.create_dataset(
                    "temperature", data=pre_ann_dict["temperature"]["value"]
                )
                nx_pre_annealing["temperature"].attrs["units"] = pre_ann_dict[
                    "temperature"
                ]["units"]
                nx_pre_annealing.create_dataset(
                    "duration", data=pre_ann_dict["duration"]["value"]
                )
                nx_pre_annealing["duration"].attrs["units"] = pre_ann_dict["duration"][
                    "units"
                ]
            except TypeError as te:
                raise TypeError(te)
            try:
                nx_post_annealing.create_dataset(
                    "ambient_gas", data=post_ann_dict["ambient_gas"]
                )
                nx_post_annealing.create_dataset(
                    "pressure", data=post_ann_dict["pressure"]["value"]
                )
                nx_post_annealing["pressure"].attrs["units"] = post_ann_dict[
                    "pressure"
                ]["units"]
                nx_post_annealing.create_dataset(
                    "temperature", data=post_ann_dict["temperature"]["value"]
                )
                nx_post_annealing["temperature"].attrs["units"] = post_ann_dict[
                    "temperature"
                ]["units"]
                nx_post_annealing.create_dataset(
                    "duration", data=post_ann_dict["duration"]["value"]
                )
                nx_post_annealing["duration"].attrs["units"] = post_ann_dict[
                    "duration"
                ]["units"]
            except TypeError as te:
                raise TypeError(te)
            try:
                nx_layer_instruments.create_dataset(
                    "laser_system", data=layer_instruments_dict["laser_system"]
                )
                nx_layer_instruments.create_dataset(
                    "deposition_chamber",
                    data=layer_instruments_dict["deposition_chamber"],
                )
                nx_layer_instruments.create_dataset(
                    "rheed_system", data=layer_instruments_dict["rheed_system"]
                )
            except TypeError as te:
                raise TypeError(te)
        # Instruments used section
        nx_instruments = nx_pld_entry.create_group("instruments_used")
        nx_instruments.attrs["NX_class"] = "NXinstrument"
        instruments_dict = pld_fabrication["instruments_used"]
        try:
            nx_instruments.create_dataset(
                "laser_system", data=instruments_dict["laser_system"]
            )
            nx_instruments.create_dataset(
                "deposition_chamber", data=instruments_dict["deposition_chamber"]
            )
            nx_instruments.create_dataset(
                "rheed_system", data=instruments_dict["rheed_system"]
            )
        except TypeError as te:
            raise TypeError(te)
        # RHEED data section
        nx_rheed = nx_pld_entry.create_group("rheed_data")
        nx_rheed.attrs["NX_class"] = "NXdata"
        if rheed_osc is not None:
            # Asse temporale
            t_ds = nx_rheed.create_dataset("time", data=rheed_osc["time"])
            t_ds.attrs["units"] = "s"
            t_ds.attrs["long_name"] = "Time"
            # Intensità: shape (n_layers, n_timepoints, 3)
            i_ds = nx_rheed.create_dataset("intensity", data=rheed_osc["intensity"])
            i_ds.attrs["units"] = "a.u."
            i_ds.attrs["long_name"] = "RHEED Intensity"
            # Attributi NXdata — notazione NeXus 3.x corretta
            nx_rheed.attrs["signal"] = "intensity"
            nx_rheed.attrs["axes"] = [
                ".",
                "time",
                ".",
            ]  # solo l'asse 1 (time) è denominato
            nx_rheed.attrs["time_indices"] = np.array([1], dtype=np.int32)
            # ###########
            # nx_rheed = nx_pld_entry.create_group("rheed_data")
            # nx_rheed.attrs["NX_class"] = "NXdata"
            # nx_rheed.create_dataset("time", data=rheed_osc["time"])
            # nx_rheed["time"].attrs["units"] = "s"
            # nx_rheed.create_dataset("intensity", data=rheed_osc["intensity"])
            # #nx_rheed["intensity"].attrs["units"] = "counts"
            # nx_rheed["intensity"].attrs["long_name"] = "RHEED intensity"
            # nx_rheed.attrs["signal"] = "intensity"
            # nx_rheed.attrs["axes"] = "layer:time:channel"
            # nx_rheed.attrs["layer_indices"] = [0]  # asse layer
            # nx_rheed.attrs["time_indices"] = [1]   # asse tempo
            # nx_rheed.attrs["channel_indices"] = [2]
        if heatmap_matrix is not None:
            heatmap = nx_rheed.create_dataset("diffraction_image", data=heatmap_matrix)
            heatmap.attrs["long_name"] = "Diffraction Image"
            heatmap.attrs["units"] = "a.u."
            # this is of my own initiative. good???
            heatmap.attrs["interpretation"] = "spectrum"
            # suggested by DeepSeek, useful? probably not.
            # heatmap.attrs["suggested_colormap"] = "inferno"
            # heatmap.attrs["scale_min"] = 0.0
            # heatmap.attrs["scale_max"] = 1.0
    return
 if __name__ == "__main__":
    # TO-DO: place the API base URL somewhere else.
    ELABFTW_API_URL = "https://elabftw.fisica.unina.it/api/v2"
    apikey = getpass("Paste API key here: ")
-    elabid = input("Enter elabid of your starting sample [default= 1111]: ") or 1111
+    elabid = input("Enter elabid of your starting sample [default = 1111]: ") or 1111
    data = APIHandler(apikey).get_entry_from_elabid(elabid)
    sample = Entrypoint(data)
    sample_name = sample.name.strip().replace(" ", "_")
    substrate_object = chain_entrypoint_to_batch(sample)  # Substrate-class object
    layers = chain_entrypoint_to_layers(sample)  # list of Layer-class objects
    n_layers = len(layers)  # total number of layers on the sample
    result = make_nexus_schema_dictionary(substrate_object, layers)
    # print(make_nexus_schema_dictionary(substrate_object, layers)) # debug
-    with open (f"output/sample-{elabid}.json", "w") as f:
+    with open(f"output/sample-{sample_name}.json", "w") as f:
        json.dump(result, f, indent=3)
    # TO-DO: remove the hard-coded path of the RWA file
    # ideally the script should download a TXT/CSV file from each layer
    # (IF PRESENT ←→ also handle missing file error)
    # and merge all data in a single file to analyse it
    # WARNING: fails if file is missing
    with open("tests/Realtime_Window_Analysis.txt", "r") as o:
        osc = np.loadtxt(o, delimiter="\t")
    try:
        rheed_osc = (
            analyse_rheed_data(data=osc) or None
        )  # analyze rheed data first, build the file later
    except ValueError as ve:
        raise ValueError(
            f"Error with function analyse_rheed_data. {ve}\nPlease make sure the Realtime Window Analysis file is exactly 4 columns wide - where the first column represents time and the others are RHEED intensities."
        )
    # This one tries to open a png image.
    # Emiliano said to keep it to one image per layer tops.
    # In this test I will only consider one image.
    # TO-DO: make it format-agnostic. If not possible, make it PNG-only.
    if os.path.isfile("tests/LAO_16min50s_736C_STO.bmp"):  # if BMP
        # if os.path.isfile("tests/LAO_16min50s_736C_STO.png"): # if PNG
        img = Image.open("tests/LAO_16min50s_736C_STO.bmp").convert("L")
        mx = np.array(img, dtype=np.uint8)
        # mx = mx.astype(np.float32) / 255.0  # consider deleting???
    build_nexus_file(
        result,
        output_path=f"output/sample-{sample_name}-nexus.h5",
        rheed_osc=rheed_osc,
        heatmap_matrix=mx,
    )
--- a/src/schema/init.py
+++ b/src/schema/init.py
--- a/src/schema/pld_deposition.py
+++ b/src/schema/pld_deposition.py
@@ -0,0 +1,3 @@
 class Prova:
    def __init__(self):
        self.hello = "Hello world"
--- a/tests/Image10.bmp
+++ b/tests/Image10.bmp
--- a/tests/LAO_16min50s_736C_STO.bmp
+++ b/tests/LAO_16min50s_736C_STO.bmp
--- a/tests/Realtime_Window_Analysis.txt
+++ b/tests/Realtime_Window_Analysis.txt
--- a/tests/Realtime_Window_Analysis_Noise.txt
+++ b/tests/Realtime_Window_Analysis_Noise.txt
Author	SHA256	Message	Date
PioApocalypse	c5b17bb3f8	minimal modifications	2026-05-09 00:15:52 +02:00
PioApocalypse	865f5cab6b	untested: adds methods to Layer class to fetch attachments list one method fetches all one filters textual uploads one filters png and bmp images	2026-05-08 23:40:14 +02:00
PioApocalypse	0102bb282e	improves documentation, tabbing and error handling in APIHandler class Claude Code helped with autocompletion, the rest is my work	2026-05-08 23:31:36 +02:00
PioApocalypse	1ef944288e	creates APIHandler methods for downloading attachments method 'download_attachments_data" works with elabapi.UploadsApi() class to download binary data and other metadata of our files. CURRENTLY it downloads every single attachment which is not intended and it's only for testing purposes "download_attachments_to_disk" saves binary data to "output/attachments"	2026-05-08 18:11:53 +02:00
PioApocalypse	8e7a424320	adds new bmp RHEED picture for testing	2026-05-08 18:10:15 +02:00
PioApocalypse	008bcff826	LazyVim tab fix + new unused Layer-class methods to fetch uploads	2026-05-08 18:09:03 +02:00
PioApocalypse	51b8ea7dd7	adds elabapi_python to requirements	2026-05-08 17:52:32 +02:00
PioApocalypse	8c616dee2c	adds a randomly generated RWA RWA_Noise has 4 columns: time and 3 intensities. the RWA is generated through python-random starting from the original RWA, so that every value is its corresponent in the original file times a random float number bw/ .8 and 1.2 (noise)	2026-05-08 15:27:45 +02:00
PioApocalypse	bb1ea8f1c3	proposed: schemas are placed in src/schema (module) separating schemas from main.py might be a good idea since the parser will support more fabrication methods, but since every method has its dictionary is it even possible?	2026-05-08 11:20:10 +02:00
PioApocalypse	207de511fa	transposes rheed intensities, adds shebang to main.py	2026-05-08 10:05:47 +02:00
PioApocalypse	aa5c114b3b	matrix no more normalized	2026-05-05 12:15:57 +02:00
PioApocalypse	b26433d7ec	test image	2026-05-05 12:15:45 +02:00
PioApocalypse	7a871a9f6d	adds useless attrs suggested by DeepSeek leaving this here as a memento that LLM's allucinate	2026-05-05 12:11:27 +02:00
PioApocalypse	a278119be4	diffraction image successfully loaded in nexus file	2026-05-05 12:02:39 +02:00
PioApocalypse	707ce28156	lazy vim auto clean + starting point for image analysis	2026-05-05 11:40:57 +02:00
PioApocalypse	173ae24aa8	adds pillow (PIL) to requirements for image processing	2026-04-27 15:23:18 +02:00
PioApocalypse	1d8fd5af15	handles absence of laser energy value	2026-04-27 15:09:52 +02:00
PioApocalypse	038f1920ba	error message includes missing item case	2026-04-24 10:37:10 +02:00
PioApocalypse	1523c973f4	another attempt at parsing RWA - seems to work better	2026-03-20 15:02:12 +01:00
PioApocalypse	5cf67648af	adds mod. suggested by ClaudeAI - still doesn't work original code is commented below, rows 517-545	2026-03-18 15:15:31 +01:00
PioApocalypse	839799a13f	adds new function to analyze rheed data, doesn't really work atm thanks DeepSeek	2026-03-16 12:51:05 +01:00
PioApocalypse	10c68bf260	reworks how instruments are recorded in the nx file according to new ver the instruments_used group is still present outside the multilayer group but currently a new instruments_used sub-group is created in the layer-specific group instruments used to deposit a single layer are in /sample/multilayer/layer_N/instruments_used and there's only one value for each category (rheed, laser, chamber) in /instruments_used (root) for each category there's a list of every (unique) instrument involved in the full deposition process	2026-03-13 15:11:53 +01:00
PioApocalypse	bab5e958cb	NOT WORKING: starts changing the structure of function "deduplicate..."	2026-03-11 15:43:11 +01:00
PioApocalypse	fc150be724	main now turns content of realtime window analysis into nx dataset the data is not parsed or analysed, it's written as text (well, tsv technically) - this is only for testing and first attempts	2026-03-11 15:01:04 +01:00
PioApocalypse	aa3bf531f9	adds example realtime windows analysis	2026-03-11 15:00:15 +01:00
PioApocalypse	3f97ccee25	removes functions.py	2026-02-17 16:20:08 +01:00
PioApocalypse	3ae6b86b8e	more elegant solution for deduplicating instruments also edits help for deduplicate_instruments... to better explain what it does; also fixes small typo ('default=' instead of 'default ='), row 448	2026-02-17 16:15:17 +01:00
PioApocalypse	d83873c763	raises IndexError if no laser, rheed sys. or chamber is ever specified i.e. if one or more of these fields aren't specified thru all layers	2026-02-17 14:54:33 +01:00
PioApocalypse	de401b5474	adds instruments metadata to h5 file	2026-02-17 14:39:04 +01:00
PioApocalypse	fde2615107	changes method of instrument list deduplication picks first occurrence in every set (ded_lasers, ded_chambers, ded_rheeds) and eventually warns user if duplicates exist	2026-02-17 14:37:35 +01:00
PioApocalypse	59e173c54f	adds rastering and annealing metadata incl. UoM's	2026-02-16 19:40:23 +01:00
PioApocalypse	712cbc4788	cleans code	2026-02-16 19:40:09 +01:00
PioApocalypse	207d166227	adds most of the required metadata to function build_nexus_file the file is generated into the "output" folder w/ .h5 extension the most has been done already (probably)	2026-02-16 15:43:07 +01:00
PioApocalypse	74b8c9cfae	extends pld_fabrication dictionary with UoM's now keys with numeric values are sub-dictionaries with a "value" and a "units" key - unitS not unit to comply directly with NeXus format, which turned out to be a good idea to avoid confusion since eLabFTW uses the word "units" for the list of accepted units and "unit" for the selected one... NOTE: UoM = Unit of Measurement	2026-02-16 15:39:32 +01:00
PioApocalypse	1b1834d4e6	some attributes don't default to NoneType anymore Target.description defaults to "" (empty str) Substrate.thickness defaults to "" (empty str) Substrate.thickness_unit is now hardcoded to "μm" did you know? apparently h5py does NOT like null values	2026-02-16 15:35:22 +01:00
PioApocalypse	dfd3c07d2f	ignores h5 and nxs files	2026-02-16 11:50:44 +01:00
PioApocalypse	d094a60725	replaces elabid with sample name in the names of output files	2026-02-16 11:49:48 +01:00
PioApocalypse	41ff025098	adds units of measurement (UoM) in Material class and children	2026-02-16 11:30:08 +01:00
PioApocalypse	ca2cdbfded	adds units of measurement in Layer class plus moves around fullname/operator, created_at and description/body so that operator is required while the others aren't	2026-02-16 11:28:17 +01:00
PioApocalypse	b4d7373933	starts working on nexus file creation	2026-02-13 16:23:42 +01:00
PioApocalypse	2f4985c443	adds h5py to requirements	2026-02-13 16:23:24 +01:00