{ "cells": [ { "cell_type": "markdown", "id": "dacba8a3-91fe-45ad-af7b-f5082466b969", "metadata": {}, "source": [ "# Basic JSON file parsing\n", "## Info gathered by the scientist on Experiment 41 \"NEW PLD Deposition Layer\"\n", "### General info\n", "* Date and time of creation\n", "* Category\n", "* Full name of the scientist\n", "* Related items (sample, PLD target)\n", "\n", "### Instrument\n", "* Chamber\n", "* Laser system\n", "* RHEED system\n", "\n", "### Process\n", "* Sample\n", "* Layer progressive number\n", "* Target\n", "* Heater temperature\n", "* Heater target distance\n", "* Buffer gas\n", "* Process pressure\n", "* Heating method\n", "* Laser intensity\n", "* Duration\n", "* Repetition rate\n", "* Thickness\n", "\n", "### Post annealing\n", "* Buffer gas used in PA\n", "* Process pressure of PA\n", "* Heater temperature of PA\n", "* Duration of PA\n", "\n", "## Basic parser\n", "Let's start by loading and printing the contents of Experiment 41's JSON as downloaded from eLabFTW." ] }, { "cell_type": "code", "execution_count": 1, "id": "dfa122ad-6de1-4282-bb67-8fcd877e6678", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"access_key\": null,\n", " \"body\": \"\",\n", " \"body_html\": \"\",\n", " \"canread\": \"{\\\"base\\\": 40, \\\"teams\\\": [], \\\"users\\\": [], \\\"teamgroups\\\": []}\",\n", " \"canread_is_immutable\": 0,\n", " \"canwrite\": \"{\\\"base\\\": 20, \\\"teams\\\": [], \\\"users\\\": [], \\\"teamgroups\\\": []}\",\n", " \"canwrite_is_immutable\": 0,\n", " \"category\": 2,\n", " \"category_color\": \"8b8d43\",\n", " \"category_title\": \"Deposition\",\n", " \"comments\": [],\n", " \"compounds\": [],\n", " \"containers\": [],\n", " \"content_type\": 1,\n", " \"created_at\": \"2026-01-07 13:22:35\",\n", " \"custom_id\": null,\n", " \"date\": \"2026-01-07\",\n", " \"elabid\": \"20260107-6884c5639c2573cd4104a3f3b93bcd5a16e4c63e\",\n", " \"events_start\": null,\n", " \"events_start_itemid\": null,\n", " \"exclusive_edit_mode\": null,\n", " \"experiments_links\": [],\n", " \"firstname\": \"Emiliano\",\n", " \"fullname\": \"Emiliano Di Gennaro\",\n", " \"id\": 41,\n", " \"is_pinned\": 0,\n", " \"items_links\": [\n", " {\n", " \"entityid\": 855,\n", " \"title\": \"Na-26-001\",\n", " \"custom_id\": null,\n", " \"elabid\": \"20260107-e83642d2b806e5db5ebb0d6309d874f4b4461114\",\n", " \"link_state\": 1,\n", " \"is_bookable\": 0,\n", " \"page\": \"database.php\",\n", " \"type\": \"items\",\n", " \"category_title\": \"NEW_Sample\",\n", " \"category_color\": \"29aeb9\",\n", " \"status_title\": \"Available\",\n", " \"status_color\": \"6a7753\"\n", " },\n", " {\n", " \"entityid\": 466,\n", " \"title\": \"Target di prova\",\n", " \"custom_id\": null,\n", " \"elabid\": \"20250224-58dbf45c5bb2f6c4f6745889da7b05a32645f83b\",\n", " \"link_state\": 1,\n", " \"is_bookable\": 0,\n", " \"page\": \"database.php\",\n", " \"type\": \"items\",\n", " \"category_title\": \"PLD Target\",\n", " \"category_color\": \"1a5fb4\",\n", " \"status_title\": null,\n", " \"status_color\": null\n", " }\n", " ],\n", " \"lastchangeby\": 2,\n", " \"lastname\": \"Di Gennaro\",\n", " \"locked\": 0,\n", " \"locked_at\": null,\n", " \"lockedby\": null,\n", " \"metadata\": \"{\\\"elabftw\\\": {\\\"extra_fields_groups\\\": [{\\\"id\\\": 4, \\\"name\\\": \\\"Process\\\"}, {\\\"id\\\": 6, \\\"name\\\": \\\"Post Annealing\\\"}, {\\\"id\\\": 3, \\\"name\\\": \\\"Instruments\\\"}]}, \\\"extra_fields\\\": {\\\"Sample\\\": {\\\"type\\\": \\\"items\\\", \\\"value\\\": 855, \\\"group_id\\\": 4, \\\"position\\\": 0}, \\\"Target\\\": {\\\"type\\\": \\\"items\\\", \\\"value\\\": 466, \\\"group_id\\\": 4, \\\"position\\\": 2, \\\"required\\\": true}, \\\"Chamber\\\": {\\\"type\\\": \\\"items\\\", \\\"value\\\": 72, \\\"group_id\\\": 3, \\\"position\\\": 0}, \\\"Duration\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"s\\\", \\\"units\\\": [\\\"s\\\", \\\"min\\\"], \\\"value\\\": \\\"365\\\", \\\"group_id\\\": 4, \\\"position\\\": 9}, \\\"Thickness\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"u.c.\\\", \\\"units\\\": [\\\"u.c.\\\", \\\"s\\\"], \\\"value\\\": \\\"4\\\", \\\"group_id\\\": 4, \\\"position\\\": 11}, \\\"Buffer gas\\\": {\\\"type\\\": \\\"select\\\", \\\"value\\\": \\\"O2\\\", \\\"options\\\": [\\\"O2\\\", \\\"N2\\\", \\\"Ar\\\", \\\"\\\"], \\\"group_id\\\": 4, \\\"position\\\": 5}, \\\"Duration PA\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"s\\\", \\\"units\\\": [\\\"s\\\", \\\"min\\\"], \\\"value\\\": \\\"\\\", \\\"group_id\\\": 6}, \\\"Laser System\\\": {\\\"type\\\": \\\"text\\\", \\\"value\\\": \\\"excimer 1\\\", \\\"group_id\\\": 3}, \\\"RHEED System\\\": {\\\"type\\\": \\\"text\\\", \\\"value\\\": \\\"staib 2\\\", \\\"group_id\\\": 3}, \\\"Buffer gas PA\\\": {\\\"type\\\": \\\"select\\\", \\\"value\\\": \\\"O2\\\", \\\"options\\\": [\\\"O2\\\", \\\"N2\\\", \\\"Ar\\\", \\\"\\\"], \\\"group_id\\\": 6, \\\"position\\\": 3}, \\\"Heating Method\\\": {\\\"type\\\": \\\"select\\\", \\\"value\\\": \\\"Radiative Heater\\\", \\\"options\\\": [\\\"Radiative Heater\\\", \\\"Laser Heater\\\"], \\\"group_id\\\": 4, \\\"position\\\": 7}, \\\"Laser Intensity\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"J/(s cm^2)\\\", \\\"units\\\": [\\\"J/(s cm^2)\\\"], \\\"value\\\": \\\"1.5\\\", \\\"group_id\\\": 4, \\\"position\\\": 8}, \\\"Repetition rate\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"Hz\\\", \\\"units\\\": [\\\"Hz\\\"], \\\"value\\\": \\\"1\\\", \\\"group_id\\\": 4, \\\"position\\\": 10}, \\\"Process pressure \\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"mbar\\\", \\\"units\\\": [\\\"mbar\\\"], \\\"value\\\": \\\"1e-3\\\", \\\"group_id\\\": 4, \\\"position\\\": 6}, \\\"Heater temperature \\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"\\u00b0C\\\", \\\"units\\\": [\\\"\\u00b0C\\\"], \\\"value\\\": \\\"650\\\", \\\"group_id\\\": 4, \\\"position\\\": 3}, \\\"Process pressure PA\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"mbar\\\", \\\"units\\\": [\\\"mbar\\\"], \\\"value\\\": \\\"\\\", \\\"group_id\\\": 6, \\\"position\\\": 4}, \\\"Heater temperature PA\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"\\u00b0C\\\", \\\"units\\\": [\\\"\\u00b0C\\\"], \\\"value\\\": \\\"\\\", \\\"group_id\\\": 6}, \\\"Heater-target distance\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"mm\\\", \\\"units\\\": [\\\"mm\\\"], \\\"value\\\": \\\"4\\\", \\\"group_id\\\": 4, \\\"position\\\": 4}, \\\"Layer Progressive Number\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"\\\", \\\"units\\\": [], \\\"value\\\": \\\"1\\\", \\\"group_id\\\": 4, \\\"position\\\": 1, \\\"required\\\": true}}}\",\n", " \"metadata_decoded\": {\n", " \"elabftw\": {\n", " \"extra_fields_groups\": [\n", " {\n", " \"id\": 4,\n", " \"name\": \"Process\"\n", " },\n", " {\n", " \"id\": 6,\n", " \"name\": \"Post Annealing\"\n", " },\n", " {\n", " \"id\": 3,\n", " \"name\": \"Instruments\"\n", " }\n", " ]\n", " },\n", " \"extra_fields\": {\n", " \"Sample\": {\n", " \"type\": \"items\",\n", " \"value\": 855,\n", " \"group_id\": 4,\n", " \"position\": 0\n", " },\n", " \"Target\": {\n", " \"type\": \"items\",\n", " \"value\": 466,\n", " \"group_id\": 4,\n", " \"position\": 2,\n", " \"required\": true\n", " },\n", " \"Chamber\": {\n", " \"type\": \"items\",\n", " \"value\": 72,\n", " \"group_id\": 3,\n", " \"position\": 0\n", " },\n", " \"Duration\": {\n", " \"type\": \"number\",\n", " \"unit\": \"s\",\n", " \"units\": [\n", " \"s\",\n", " \"min\"\n", " ],\n", " \"value\": \"365\",\n", " \"group_id\": 4,\n", " \"position\": 9\n", " },\n", " \"Thickness\": {\n", " \"type\": \"number\",\n", " \"unit\": \"u.c.\",\n", " \"units\": [\n", " \"u.c.\",\n", " \"s\"\n", " ],\n", " \"value\": \"4\",\n", " \"group_id\": 4,\n", " \"position\": 11\n", " },\n", " \"Buffer gas\": {\n", " \"type\": \"select\",\n", " \"value\": \"O2\",\n", " \"options\": [\n", " \"O2\",\n", " \"N2\",\n", " \"Ar\",\n", " \"\"\n", " ],\n", " \"group_id\": 4,\n", " \"position\": 5\n", " },\n", " \"Duration PA\": {\n", " \"type\": \"number\",\n", " \"unit\": \"s\",\n", " \"units\": [\n", " \"s\",\n", " \"min\"\n", " ],\n", " \"value\": \"\",\n", " \"group_id\": 6\n", " },\n", " \"Laser System\": {\n", " \"type\": \"text\",\n", " \"value\": \"excimer 1\",\n", " \"group_id\": 3\n", " },\n", " \"RHEED System\": {\n", " \"type\": \"text\",\n", " \"value\": \"staib 2\",\n", " \"group_id\": 3\n", " },\n", " \"Buffer gas PA\": {\n", " \"type\": \"select\",\n", " \"value\": \"O2\",\n", " \"options\": [\n", " \"O2\",\n", " \"N2\",\n", " \"Ar\",\n", " \"\"\n", " ],\n", " \"group_id\": 6,\n", " \"position\": 3\n", " },\n", " \"Heating Method\": {\n", " \"type\": \"select\",\n", " \"value\": \"Radiative Heater\",\n", " \"options\": [\n", " \"Radiative Heater\",\n", " \"Laser Heater\"\n", " ],\n", " \"group_id\": 4,\n", " \"position\": 7\n", " },\n", " \"Laser Intensity\": {\n", " \"type\": \"number\",\n", " \"unit\": \"J/(s cm^2)\",\n", " \"units\": [\n", " \"J/(s cm^2)\"\n", " ],\n", " \"value\": \"1.5\",\n", " \"group_id\": 4,\n", " \"position\": 8\n", " },\n", " \"Repetition rate\": {\n", " \"type\": \"number\",\n", " \"unit\": \"Hz\",\n", " \"units\": [\n", " \"Hz\"\n", " ],\n", " \"value\": \"1\",\n", " \"group_id\": 4,\n", " \"position\": 10\n", " },\n", " \"Process pressure \": {\n", " \"type\": \"number\",\n", " \"unit\": \"mbar\",\n", " \"units\": [\n", " \"mbar\"\n", " ],\n", " \"value\": \"1e-3\",\n", " \"group_id\": 4,\n", " \"position\": 6\n", " },\n", " \"Heater temperature \": {\n", " \"type\": \"number\",\n", " \"unit\": \"\\u00b0C\",\n", " \"units\": [\n", " \"\\u00b0C\"\n", " ],\n", " \"value\": \"650\",\n", " \"group_id\": 4,\n", " \"position\": 3\n", " },\n", " \"Process pressure PA\": {\n", " \"type\": \"number\",\n", " \"unit\": \"mbar\",\n", " \"units\": [\n", " \"mbar\"\n", " ],\n", " \"value\": \"\",\n", " \"group_id\": 6,\n", " \"position\": 4\n", " },\n", " \"Heater temperature PA\": {\n", " \"type\": \"number\",\n", " \"unit\": \"\\u00b0C\",\n", " \"units\": [\n", " \"\\u00b0C\"\n", " ],\n", " \"value\": \"\",\n", " \"group_id\": 6\n", " },\n", " \"Heater-target distance\": {\n", " \"type\": \"number\",\n", " \"unit\": \"mm\",\n", " \"units\": [\n", " \"mm\"\n", " ],\n", " \"value\": \"4\",\n", " \"group_id\": 4,\n", " \"position\": 4\n", " },\n", " \"Layer Progressive Number\": {\n", " \"type\": \"number\",\n", " \"unit\": \"\",\n", " \"units\": [],\n", " \"value\": \"1\",\n", " \"group_id\": 4,\n", " \"position\": 1,\n", " \"required\": true\n", " }\n", " }\n", " },\n", " \"modified_at\": \"2026-01-07 13:24:45\",\n", " \"next_step\": \"add process data\",\n", " \"orcid\": \"0000-0003-4231-9776\",\n", " \"page\": \"experiments\",\n", " \"rating\": 0,\n", " \"recent_comment\": null,\n", " \"related_experiments_links\": [\n", " {\n", " \"entityid\": 43,\n", " \"title\": \"NEW PLD Deposition Layer I\",\n", " \"custom_id\": null,\n", " \"link_state\": 1,\n", " \"page\": \"experiments.php\",\n", " \"type\": \"experiments\",\n", " \"category_title\": \"Deposition\",\n", " \"category_color\": \"8b8d43\",\n", " \"status_title\": null,\n", " \"status_color\": null\n", " }\n", " ],\n", " \"related_items_links\": [],\n", " \"sharelink\": \"https://elabftw.fisica.unina.it:8080/experiments.php?mode=view&id=41\",\n", " \"state\": 1,\n", " \"status\": 1,\n", " \"status_color\": \"29AEB9\",\n", " \"status_title\": \"Running\",\n", " \"steps\": [\n", " {\n", " \"id\": 23,\n", " \"item_id\": 41,\n", " \"body\": \"add process data\",\n", " \"ordering\": 1,\n", " \"finished\": 0,\n", " \"finished_time\": null,\n", " \"deadline\": null,\n", " \"deadline_notif\": 0\n", " },\n", " {\n", " \"id\": 24,\n", " \"item_id\": 41,\n", " \"body\": \"add RHEED data\",\n", " \"ordering\": 2,\n", " \"finished\": 0,\n", " \"finished_time\": null,\n", " \"deadline\": null,\n", " \"deadline_notif\": 0\n", " },\n", " {\n", " \"id\": 25,\n", " \"item_id\": 41,\n", " \"body\": \"add RHEED images\",\n", " \"ordering\": 3,\n", " \"finished\": 0,\n", " \"finished_time\": null,\n", " \"deadline\": null,\n", " \"deadline_notif\": 0\n", " }\n", " ],\n", " \"tags\": null,\n", " \"tags_id\": null,\n", " \"team\": 1,\n", " \"team_name\": \"Default team\",\n", " \"timestamped\": 0,\n", " \"timestamped_at\": null,\n", " \"timestampedby\": null,\n", " \"title\": \"NEW PLD Deposition Layer\",\n", " \"type\": \"experiments\",\n", " \"uploads\": [],\n", " \"userid\": 2\n", "}\n" ] } ], "source": [ "import json\n", "\n", "with open(\"../tests/objects/experiment_41_elab.json\", \"r\") as f:\n", " x = json.load(f)\n", " print(json.dumps(x,indent=2))\n", " f.close()" ] }, { "cell_type": "markdown", "id": "a2e857a5-efdb-4177-a5cc-063270985531", "metadata": {}, "source": [ "For testing purposes now we'll create and print a simplified dictionary containing a sample of the harvested data for each group of metadata." ] }, { "cell_type": "code", "execution_count": 2, "id": "dae4b791-fd08-4f41-ba42-82edcf4e3cde", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"instrument\": {\n", " \"deposition_chamber\": 72,\n", " \"laser_system\": \"excimer 1\",\n", " \"rheed_system\": \"staib 2\"\n", " },\n", " \"multilayer\": {\n", " \"layer_1\": {\n", " \"operator\": \"Emiliano Di Gennaro\",\n", " \"sample\": 855,\n", " \"temperature\": \"650\",\n", " \"target\": 466\n", " }\n", " }\n", "}\n" ] } ], "source": [ "with open(\"../tests/objects/experiment_41_elab.json\", \"r\") as f:\n", " rawdata = json.load(f)\n", " extra = rawdata[\"metadata_decoded\"][\"extra_fields\"]\n", " ordered = {\n", " \"instrument\": {\n", " \"deposition_chamber\": extra[\"Chamber\"][\"value\"], # ID of associated resource (PLD chamber) - useless as is!\n", " \"laser_system\": extra[\"Laser System\"][\"value\"],\n", " \"rheed_system\": extra[\"RHEED System\"][\"value\"]\n", " },\n", " \"multilayer\": {\n", " \"layer_1\": {\n", " \"operator\": rawdata[\"fullname\"],\n", " \"sample\": extra[\"Sample\"][\"value\"], # ID of associated sample - useless as is!\n", " \"temperature\": extra[\"Heater temperature \"][\"value\"], # space at the end is a config error in eLab!\n", " \"target\": extra[\"Target\"][\"value\"] # ID of associated resource (PLD target) - useless as is!\n", " }\n", " },\n", " }\n", " print(json.dumps(ordered,indent=2))\n", " f.close()" ] }, { "cell_type": "markdown", "id": "c9992a47-ce3c-47ec-94bd-26fbec020962", "metadata": {}, "source": [ "Some issues rise here:\n", "* First of all the fields \"deposition_chamber\", \"sample\" and \"target\" **refer to the value of the eLabFTW ID of the associated resource** which is useless as is since it does not contain any relevant data on these objects;\n", "* Second, the same sample can have two different eLab Experiments associated to it, each representing **a different layer** of the deposition.\n", "\n", "> Note: a layer progressive number is tracked by the scientist, and it can be found in the JSON dictionary under `metadata_decoded -> extra_fields -> Layer Progressive Number -> value`.\n", "\n", "### Multiple layers from known sources\n", "One problem at a time: first of all I can create an \"ordered\" dictionary with an empty \"multilayer\" key and append the layer-specific value of every layer later using the *dict().update()* method." ] }, { "cell_type": "code", "execution_count": 3, "id": "b3afa42e-e982-4dd3-9ea6-cf7918dc276f", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"instrument\": {\n", " \"deposition_chamber\": 72,\n", " \"laser_system\": \"excimer 1\",\n", " \"rheed_system\": \"staib 2\"\n", " },\n", " \"multilayer\": {\n", " \"layer_1\": {\n", " \"operator\": \"Emiliano Di Gennaro\",\n", " \"sample\": {\n", " \"type\": \"items\",\n", " \"value\": 855,\n", " \"group_id\": 4,\n", " \"position\": 0\n", " },\n", " \"temperature\": {\n", " \"type\": \"number\",\n", " \"unit\": \"\\u00b0C\",\n", " \"units\": [\n", " \"\\u00b0C\"\n", " ],\n", " \"value\": \"650\",\n", " \"group_id\": 4,\n", " \"position\": 3\n", " },\n", " \"target\": {\n", " \"type\": \"items\",\n", " \"value\": 466,\n", " \"group_id\": 4,\n", " \"position\": 2,\n", " \"required\": true\n", " }\n", " }\n", " }\n", "}\n" ] } ], "source": [ "with open(\"../tests/objects/experiment_41_elab.json\", \"r\") as f:\n", " rawdata = json.load(f)\n", " extra = rawdata[\"metadata_decoded\"][\"extra_fields\"]\n", " layers = {\n", " \"layer_1\": {\n", " \"operator\": rawdata[\"fullname\"],\n", " \"sample\": extra[\"Sample\"], # ID of associated sample - useless as is!\n", " \"temperature\": extra[\"Heater temperature \"], # space at the end is a config error in eLab!\n", " \"target\": extra[\"Target\"]\n", " }\n", " }\n", "\n", " ordered = {\n", " \"instrument\": {\n", " \"deposition_chamber\": extra[\"Chamber\"][\"value\"], # ID of associated resource (PLD chamber) - useless as is!\n", " \"laser_system\": extra[\"Laser System\"][\"value\"],\n", " \"rheed_system\": extra[\"RHEED System\"][\"value\"]\n", " },\n", " \"multilayer\": {\n", " },\n", " }\n", " for l in layers:\n", " ordered[\"multilayer\"].update(\n", " {l: layers[l]}\n", " )\n", " print(json.dumps(ordered, indent=2))\n", " f.close()" ] }, { "cell_type": "markdown", "id": "4a7ff14f-d2fc-4485-a174-a23248791a6f", "metadata": {}, "source": [ "Now entering the second layer: Experiment 43.\n", "\n", "If I were to create a \"layers\" dictionary with the same info from the two different experiments it would look like this:" ] }, { "cell_type": "code", "execution_count": 4, "id": "9212afba-9868-467f-ac4d-8cbce0f1537a", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"layer_1\": {\n", " \"operator\": \"Emiliano Di Gennaro\",\n", " \"sample\": {\n", " \"type\": \"items\",\n", " \"value\": 855,\n", " \"group_id\": 4,\n", " \"position\": 0\n", " },\n", " \"temperature\": {\n", " \"type\": \"number\",\n", " \"unit\": \"\\u00b0C\",\n", " \"units\": [\n", " \"\\u00b0C\"\n", " ],\n", " \"value\": \"550\",\n", " \"group_id\": 4,\n", " \"position\": 3\n", " },\n", " \"target\": {\n", " \"type\": \"items\",\n", " \"value\": 858,\n", " \"group_id\": 4,\n", " \"position\": 2,\n", " \"required\": true\n", " }\n", " },\n", " \"layer_2\": \"\",\n", " \"layer_0\": {\n", " \"operator\": \"Emiliano Di Gennaro\",\n", " \"sample\": {\n", " \"type\": \"items\",\n", " \"value\": 855,\n", " \"group_id\": 4,\n", " \"position\": 0\n", " },\n", " \"temperature\": {\n", " \"type\": \"number\",\n", " \"unit\": \"\\u00b0C\",\n", " \"units\": [\n", " \"\\u00b0C\"\n", " ],\n", " \"value\": \"650\",\n", " \"group_id\": 4,\n", " \"position\": 3\n", " },\n", " \"target\": {\n", " \"type\": \"items\",\n", " \"value\": 466,\n", " \"group_id\": 4,\n", " \"position\": 2,\n", " \"required\": true\n", " }\n", " }\n", "}\n" ] } ], "source": [ "with open(\"../tests/objects/experiment_41_elab.json\", \"r\") as L01file, open(\"../tests/objects/experiment_43_elab.json\", \"r\") as L02file:\n", " raw01 = json.load(L01file)\n", " raw02 = json.load(L02file)\n", " layer_list = [raw01, raw02]\n", "\n", " layers = { \"layer_\" + str(index + 1) : \"\" for index in range(len(layer_list)) }\n", " for i,layer in enumerate(layer_list):\n", " extra = layer[\"metadata_decoded\"][\"extra_fields\"]\n", " layers.update({\n", " f\"layer_{i}\": {\n", " \"operator\": layer[\"fullname\"],\n", " \"sample\": extra[\"Sample\"], # ID of associated sample - useless as is!\n", " \"temperature\": extra[\"Heater temperature \"], # space at the end is a config error in eLab!\n", " \"target\": extra[\"Target\"]\n", " }\n", " })\n", "\n", " print(json.dumps(layers, indent=2))\n", "\n", " L01file.close()\n", " L02file.close()" ] }, { "cell_type": "markdown", "id": "82700735-73fb-4b0a-aa97-3072c6330a48", "metadata": {}, "source": [ "But that only works because I know exactly how many layers there are and in which order they're stored.\n", "\n", "How we're storing and downloading the experiment data related to a same sample is still subject of discussion. The parser should be able to associate different JSON files to their own sample and group the files related to the same experiment; each file has a *Layer Progressive Number* which associates the data saved in the file to a specific layer, and it's imperative that the parser:\n", "* Recognises the absence of a layer (e.g. [1, 2, 4], returns that no 3rd layer exists);\n", "* Names every layer \"layer_X\" where X is the progressive number starting from 1 (not 0).\n", "\n", "### Multiple layers from uncategorized files\n", "Supposing I don't know that files *experiment_41_elab.json* and *experiment_43_elab.json* contain data of layers 1 and 2 of the same sample NA-26-001 I can always load every file in the folder indiscriminately and:\n", "* Filter out every non-eLabFTW file (by some recognition pattern).\n", "* Group the data by the sample it's associated to.\n", "\n", "\n", "#### Filter out non-eLabFTW files using the key \"elabid\" as challenge\n", "If the key *elabid* is present in the root of a JSON file then assume the file is an eLabFTW experiment output." ] }, { "cell_type": "code", "execution_count": 5, "id": "d8b83ba0-6b5b-425a-b365-8f5fa6ab4117", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../tests/objects/experiment_41_elab.json\n", "../tests/objects/experiment_43_elab.json\n" ] } ], "source": [ "import os\n", "\n", "elabfiles = []\n", "for filename in os.listdir(\"../tests/objects\"):\n", " if filename.endswith(\".json\"):\n", " try:\n", " with open(os.path.join(\"../tests/objects\", filename), \"r\") as f:\n", " data = json.load(f)\n", " if data.get(\"elabid\"): # insert specific NeXus requirements here later\n", " \n", " elabfiles.append(filename)\n", " f.close()\n", " except json.decoder.JSONDecodeError as e: # invalid files \"masked\" as JSON\n", " #print(f\"wait a moment: {e}\") # just for debug\n", " pass\n", "\n", "for i in elabfiles:\n", " print(os.path.join(\"../tests/objects/\", i))" ] }, { "cell_type": "markdown", "id": "825f32d1-eb8f-4f9a-af94-d7258e897f8f", "metadata": {}, "source": [ "#### Group the data by sample\n", "Lookup the value of the key \"*Sample*\" in the extra fields; two experiments with that same value are associated to the same sample. To obtain this result the best course of action is *probably* to create a dictionary with every unique sample and all the data associated with it:\n", "* The dictionary starts empty.\n", "* The parser then reads the data from the first eLab-compliant file, in particular it reads the ID of the sample associated.\n", "* If the ID (later the name) of the sample is not a key in the root of sample_dict create a new key, otherwise skip.\n", "* Add layer-specific data to new layer in the sample_dict." ] }, { "cell_type": "code", "execution_count": 6, "id": "4b7961e6-817f-44b7-b2de-16d15d9ec26a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{855: {'instrument': {'deposition_chamber': 72, 'laser_system': 'excimer 1', 'rheed_system': 'staib 2'}, 'multilayer': {'layer_1': {'operator': 'Emiliano Di Gennaro', 'created_at': '2026-01-07 13:22:35', 'sample': {'type': 'items', 'value': 855, 'group_id': 4, 'position': 0}, 'temperature': {'type': 'number', 'unit': '°C', 'units': ['°C'], 'value': '650', 'group_id': 4, 'position': 3}, 'target': {'type': 'items', 'value': 466, 'group_id': 4, 'position': 2, 'required': True}}, 'layer_2': {'operator': 'Emiliano Di Gennaro', 'created_at': '2026-01-16 16:24:55', 'sample': {'type': 'items', 'value': 855, 'group_id': 4, 'position': 0}, 'temperature': {'type': 'number', 'unit': '°C', 'units': ['°C'], 'value': '550', 'group_id': 4, 'position': 3}, 'target': {'type': 'items', 'value': 858, 'group_id': 4, 'position': 2, 'required': True}}}}}\n" ] } ], "source": [ "sample_dict = {}\n", "for filename in elabfiles:\n", " with open(os.path.join(\"../tests/objects/\", filename), \"r\") as f:\n", " layer = json.load(f)\n", " extra = layer[\"metadata_decoded\"][\"extra_fields\"]\n", " sample = extra[\"Sample\"][\"value\"]\n", " lpn = int(extra[\"Layer Progressive Number\"][\"value\"]) # Layer Progressive Number\n", " if not sample_dict.get(sample): # if not existent yet, initialize\n", " sample_dict[sample] = {\n", " \"instrument\": {\n", " \"deposition_chamber\": extra[\"Chamber\"][\"value\"], # ID of associated resource (PLD chamber) - useless as is!\n", " \"laser_system\": extra[\"Laser System\"][\"value\"],\n", " \"rheed_system\": extra[\"RHEED System\"][\"value\"]\n", " },\n", " \"multilayer\": {}\n", " }\n", " sample_dict[sample][\"multilayer\"][f\"layer_{lpn}\"] = {\n", " \"operator\": layer[\"fullname\"],\n", " \"created_at\": layer[\"created_at\"],\n", " \"sample\": extra[\"Sample\"], # ID of associated sample - useless as is!\n", " \"temperature\": extra[\"Heater temperature \"], # space at the end is a config error in eLab!\n", " \"target\": extra[\"Target\"]\n", " }\n", " \n", "print(sample_dict)" ] }, { "cell_type": "markdown", "id": "27f876e0-291e-43d6-8b9d-3ce533896e5e", "metadata": {}, "source": [ "#### Look out for missing layers\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "6a5281dc-7fc3-4de7-845c-2bc2b54d4bb1", "metadata": {}, "outputs": [], "source": [ "#sample_dict[855][\"multilayer\"][\"layer_4\"] = {} # for debug purposes\n", "\n", "def find_missing(lst):\n", " '''\n", " Finds missing integers in sorted list.\n", " Time complexity is NlogN but since N is at most 10^2 it's not a problem for us.\n", " Source: geekforgeeks.org.\n", " '''\n", " return sorted(set(range(lst[0], lst[-1])) - set(lst))\n", "\n", "for item in sample_dict:\n", " layer_names = list(sample_dict[item].get(\"multilayer\").keys())\n", " numbers = sorted(int(layer.split('_')[1]) for layer in layer_names)\n", " missing = find_missing(numbers)\n", " if missing:\n", " print(\"Warning: some layers appear to be missing.\")\n", " print(f\"The missing layers are: \")\n", " for i in missing:\n", " print(f\"* layer_{i}\")\n" ] }, { "cell_type": "markdown", "id": "e2716da5-ff75-45d1-b765-d8bfb2ecaf71", "metadata": {}, "source": [ "### Names not ID's\n", "> TO-DO: Replace ID's of eLabFTW items with their actual names (might need a working API key)." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 5 }