{ "cells": [ { "cell_type": "markdown", "id": "dacba8a3-91fe-45ad-af7b-f5082466b969", "metadata": {}, "source": [ "# Basic JSON file parsing\n", "## Info gathered by the scientist on Experiment 41 \"NEW PLD Deposition Layer\"\n", "### General info\n", "* Date and time of creation\n", "* Category\n", "* Full name of the scientist\n", "* Related items (sample, PLD target)\n", "\n", "### Instrument\n", "* Chamber\n", "* Laser system\n", "* RHEED system\n", "\n", "### Process\n", "* Sample\n", "* Layer progressive number\n", "* Target\n", "* Heater temperature\n", "* Heater target distance\n", "* Buffer gas\n", "* Process pressure\n", "* Heating method\n", "* Laser intensity\n", "* Duration\n", "* Repetition rate\n", "* Thickness\n", "\n", "### Post annealing\n", "* Buffer gas used in PA\n", "* Process pressure of PA\n", "* Heater temperature of PA\n", "* Duration of PA\n", "\n", "## Basic parser\n", "Let's start by loading and printing the contents of Experiment 41's JSON as downloaded from eLabFTW." ] }, { "cell_type": "code", "execution_count": 1, "id": "dfa122ad-6de1-4282-bb67-8fcd877e6678", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"access_key\": null,\n", " \"body\": \"\",\n", " \"body_html\": \"\",\n", " \"canread\": \"{\\\"base\\\": 40, \\\"teams\\\": [], \\\"users\\\": [], \\\"teamgroups\\\": []}\",\n", " \"canread_is_immutable\": 0,\n", " \"canwrite\": \"{\\\"base\\\": 20, \\\"teams\\\": [], \\\"users\\\": [], \\\"teamgroups\\\": []}\",\n", " \"canwrite_is_immutable\": 0,\n", " \"category\": 2,\n", " \"category_color\": \"8b8d43\",\n", " \"category_title\": \"Deposition\",\n", " \"comments\": [],\n", " \"compounds\": [],\n", " \"containers\": [],\n", " \"content_type\": 1,\n", " \"created_at\": \"2026-01-20 16:11:32\",\n", " \"custom_id\": null,\n", " \"date\": \"2026-01-20\",\n", " \"elabid\": \"20260120-b72ca9659e21e904f5dbd12b2625a007ed97ed57\",\n", " \"events_start\": null,\n", " \"events_start_itemid\": null,\n", " \"exclusive_edit_mode\": null,\n", " \"experiments_links\": [],\n", " \"firstname\": \"Emiliano\",\n", " \"fullname\": \"Emiliano Di Gennaro\",\n", " \"id\": 45,\n", " \"is_pinned\": 0,\n", " \"items_links\": [\n", " {\n", " \"entityid\": 854,\n", " \"title\": \"LAO_single_crystal_01\",\n", " \"custom_id\": null,\n", " \"elabid\": \"20260107-1f8d0a6d9cf61be826b8b35f5516959848815310\",\n", " \"link_state\": 1,\n", " \"is_bookable\": 0,\n", " \"page\": \"database.php\",\n", " \"type\": \"items\",\n", " \"category_title\": \"NEW_PLD Target\",\n", " \"category_color\": \"29aeb9\",\n", " \"status_title\": \"Available\",\n", " \"status_color\": \"6a7753\"\n", " },\n", " {\n", " \"entityid\": 855,\n", " \"title\": \"Na-26-001\",\n", " \"custom_id\": null,\n", " \"elabid\": \"20260107-e83642d2b806e5db5ebb0d6309d874f4b4461114\",\n", " \"link_state\": 1,\n", " \"is_bookable\": 0,\n", " \"page\": \"database.php\",\n", " \"type\": \"items\",\n", " \"category_title\": \"NEW_Sample\",\n", " \"category_color\": \"29aeb9\",\n", " \"status_title\": \"Available\",\n", " \"status_color\": \"6a7753\"\n", " }\n", " ],\n", " \"lastchangeby\": 2,\n", " \"lastname\": \"Di Gennaro\",\n", " \"locked\": 0,\n", " \"locked_at\": null,\n", " \"lockedby\": null,\n", " \"metadata\": \"{\\\"elabftw\\\": {\\\"extra_fields_groups\\\": [{\\\"id\\\": 4, \\\"name\\\": \\\"Process\\\"}, {\\\"id\\\": 7, \\\"name\\\": \\\"Laser\\\"}, {\\\"id\\\": 8, \\\"name\\\": \\\"Pre Annealing\\\"}, {\\\"id\\\": 6, \\\"name\\\": \\\"Post Annealing\\\"}, {\\\"id\\\": 3, \\\"name\\\": \\\"Instruments\\\"}]}, \\\"extra_fields\\\": {\\\"Sample\\\": {\\\"type\\\": \\\"items\\\", \\\"value\\\": 855, \\\"group_id\\\": 4, \\\"position\\\": 0}, \\\"Target\\\": {\\\"type\\\": \\\"items\\\", \\\"value\\\": 854, \\\"group_id\\\": 4, \\\"position\\\": 2, \\\"required\\\": true}, \\\"Chamber\\\": {\\\"type\\\": \\\"items\\\", \\\"value\\\": 72, \\\"group_id\\\": 3, \\\"position\\\": 0}, \\\"Duration\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"s\\\", \\\"units\\\": [\\\"s\\\", \\\"min\\\"], \\\"value\\\": \\\"340\\\", \\\"group_id\\\": 4, \\\"position\\\": 5}, \\\"Spot Area\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"mm^2\\\", \\\"units\\\": [\\\"mm^2\\\"], \\\"value\\\": \\\"\\\", \\\"group_id\\\": 7, \\\"position\\\": 1}, \\\"Thickness\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"u.c.\\\", \\\"units\\\": [\\\"u.c.\\\", \\\"s\\\"], \\\"value\\\": \\\"10\\\", \\\"group_id\\\": 4, \\\"position\\\": 6}, \\\"Buffer gas\\\": {\\\"type\\\": \\\"select\\\", \\\"value\\\": \\\"O2\\\", \\\"options\\\": [\\\"O2\\\", \\\"N2\\\", \\\"Ar\\\", \\\"\\\"], \\\"group_id\\\": 4, \\\"position\\\": 3}, \\\"Duration PA\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"s\\\", \\\"units\\\": [\\\"s\\\", \\\"min\\\"], \\\"value\\\": \\\"\\\", \\\"group_id\\\": 6}, \\\"Duration Pre\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"s\\\", \\\"units\\\": [\\\"s\\\"], \\\"value\\\": \\\"\\\", \\\"group_id\\\": 8}, \\\"Laser System\\\": {\\\"type\\\": \\\"text\\\", \\\"value\\\": \\\"Excimer \\\", \\\"group_id\\\": 3}, \\\"RHEED System\\\": {\\\"type\\\": \\\"text\\\", \\\"value\\\": \\\"staib\\\", \\\"group_id\\\": 3}, \\\"Buffer gas PA\\\": {\\\"type\\\": \\\"select\\\", \\\"value\\\": \\\"O2\\\", \\\"options\\\": [\\\"O2\\\", \\\"N2\\\", \\\"Ar\\\", \\\"\\\"], \\\"group_id\\\": 6, \\\"position\\\": 3}, \\\"Buffer gas Pre\\\": {\\\"type\\\": \\\"select\\\", \\\"value\\\": \\\"O2\\\", \\\"options\\\": [\\\"O2\\\", \\\"N2\\\", \\\"Ar\\\", \\\"\\\"], \\\"group_id\\\": 8, \\\"position\\\": 3}, \\\"Heating Method\\\": {\\\"type\\\": \\\"select\\\", \\\"value\\\": \\\"Radiative Heater\\\", \\\"options\\\": [\\\"Radiative Heater\\\", \\\"Laser Heater\\\"], \\\"group_id\\\": 4, \\\"position\\\": 9}, \\\"Laser Intensity\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"J/(s cm^2)\\\", \\\"units\\\": [\\\"J/(s cm^2)\\\"], \\\"value\\\": \\\"1.5\\\", \\\"group_id\\\": 7, \\\"position\\\": 0}, \\\"Repetition rate\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"Hz\\\", \\\"units\\\": [\\\"Hz\\\"], \\\"value\\\": \\\"1\\\", \\\"group_id\\\": 7, \\\"position\\\": 4}, \\\"Process pressure \\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"mbar\\\", \\\"units\\\": [\\\"mbar\\\"], \\\"value\\\": \\\"1e-3\\\", \\\"group_id\\\": 4, \\\"position\\\": 4}, \\\"Heater temperature \\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"\\u00b0C\\\", \\\"units\\\": [\\\"\\u00b0C\\\"], \\\"value\\\": \\\"760\\\", \\\"group_id\\\": 4, \\\"position\\\": 7}, \\\"Process pressure PA\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"mbar\\\", \\\"units\\\": [\\\"mbar\\\"], \\\"value\\\": \\\"\\\", \\\"group_id\\\": 6, \\\"position\\\": 4}, \\\"Process pressure Pre\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"mbar\\\", \\\"units\\\": [\\\"mbar\\\"], \\\"value\\\": \\\"\\\", \\\"group_id\\\": 8, \\\"position\\\": 4}, \\\"Heater temperature PA\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"\\u00b0C\\\", \\\"units\\\": [\\\"\\u00b0C\\\"], \\\"value\\\": \\\"\\\", \\\"group_id\\\": 6}, \\\"Laser Rastering Speed\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"\\\", \\\"units\\\": [], \\\"value\\\": \\\"\\\", \\\"group_id\\\": 7, \\\"position\\\": 3}, \\\"Heater temperature Pre\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"\\u00b0C\\\", \\\"units\\\": [\\\"\\u00b0C\\\"], \\\"value\\\": \\\"\\\", \\\"group_id\\\": 8}, \\\"Heater-target distance\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"mm\\\", \\\"units\\\": [\\\"mm\\\"], \\\"value\\\": \\\"38\\\", \\\"group_id\\\": 4, \\\"position\\\": 8}, \\\"Laser Rastering Geometry\\\": {\\\"type\\\": \\\"select\\\", \\\"value\\\": \\\"none\\\", \\\"options\\\": [\\\"none\\\", \\\"on a square\\\", \\\"on a rectangle\\\", \\\"on a line\\\", \\\"other\\\"], \\\"group_id\\\": 7, \\\"position\\\": 2}, \\\"Layer Progressive Number\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"\\\", \\\"units\\\": [], \\\"value\\\": \\\"1\\\", \\\"group_id\\\": 4, \\\"position\\\": 1, \\\"required\\\": true}}}\",\n", " \"metadata_decoded\": {\n", " \"elabftw\": {\n", " \"extra_fields_groups\": [\n", " {\n", " \"id\": 4,\n", " \"name\": \"Process\"\n", " },\n", " {\n", " \"id\": 7,\n", " \"name\": \"Laser\"\n", " },\n", " {\n", " \"id\": 8,\n", " \"name\": \"Pre Annealing\"\n", " },\n", " {\n", " \"id\": 6,\n", " \"name\": \"Post Annealing\"\n", " },\n", " {\n", " \"id\": 3,\n", " \"name\": \"Instruments\"\n", " }\n", " ]\n", " },\n", " \"extra_fields\": {\n", " \"Sample\": {\n", " \"type\": \"items\",\n", " \"value\": 855,\n", " \"group_id\": 4,\n", " \"position\": 0\n", " },\n", " \"Target\": {\n", " \"type\": \"items\",\n", " \"value\": 854,\n", " \"group_id\": 4,\n", " \"position\": 2,\n", " \"required\": true\n", " },\n", " \"Chamber\": {\n", " \"type\": \"items\",\n", " \"value\": 72,\n", " \"group_id\": 3,\n", " \"position\": 0\n", " },\n", " \"Duration\": {\n", " \"type\": \"number\",\n", " \"unit\": \"s\",\n", " \"units\": [\n", " \"s\",\n", " \"min\"\n", " ],\n", " \"value\": \"340\",\n", " \"group_id\": 4,\n", " \"position\": 5\n", " },\n", " \"Spot Area\": {\n", " \"type\": \"number\",\n", " \"unit\": \"mm^2\",\n", " \"units\": [\n", " \"mm^2\"\n", " ],\n", " \"value\": \"\",\n", " \"group_id\": 7,\n", " \"position\": 1\n", " },\n", " \"Thickness\": {\n", " \"type\": \"number\",\n", " \"unit\": \"u.c.\",\n", " \"units\": [\n", " \"u.c.\",\n", " \"s\"\n", " ],\n", " \"value\": \"10\",\n", " \"group_id\": 4,\n", " \"position\": 6\n", " },\n", " \"Buffer gas\": {\n", " \"type\": \"select\",\n", " \"value\": \"O2\",\n", " \"options\": [\n", " \"O2\",\n", " \"N2\",\n", " \"Ar\",\n", " \"\"\n", " ],\n", " \"group_id\": 4,\n", " \"position\": 3\n", " },\n", " \"Duration PA\": {\n", " \"type\": \"number\",\n", " \"unit\": \"s\",\n", " \"units\": [\n", " \"s\",\n", " \"min\"\n", " ],\n", " \"value\": \"\",\n", " \"group_id\": 6\n", " },\n", " \"Duration Pre\": {\n", " \"type\": \"number\",\n", " \"unit\": \"s\",\n", " \"units\": [\n", " \"s\"\n", " ],\n", " \"value\": \"\",\n", " \"group_id\": 8\n", " },\n", " \"Laser System\": {\n", " \"type\": \"text\",\n", " \"value\": \"Excimer \",\n", " \"group_id\": 3\n", " },\n", " \"RHEED System\": {\n", " \"type\": \"text\",\n", " \"value\": \"staib\",\n", " \"group_id\": 3\n", " },\n", " \"Buffer gas PA\": {\n", " \"type\": \"select\",\n", " \"value\": \"O2\",\n", " \"options\": [\n", " \"O2\",\n", " \"N2\",\n", " \"Ar\",\n", " \"\"\n", " ],\n", " \"group_id\": 6,\n", " \"position\": 3\n", " },\n", " \"Buffer gas Pre\": {\n", " \"type\": \"select\",\n", " \"value\": \"O2\",\n", " \"options\": [\n", " \"O2\",\n", " \"N2\",\n", " \"Ar\",\n", " \"\"\n", " ],\n", " \"group_id\": 8,\n", " \"position\": 3\n", " },\n", " \"Heating Method\": {\n", " \"type\": \"select\",\n", " \"value\": \"Radiative Heater\",\n", " \"options\": [\n", " \"Radiative Heater\",\n", " \"Laser Heater\"\n", " ],\n", " \"group_id\": 4,\n", " \"position\": 9\n", " },\n", " \"Laser Intensity\": {\n", " \"type\": \"number\",\n", " \"unit\": \"J/(s cm^2)\",\n", " \"units\": [\n", " \"J/(s cm^2)\"\n", " ],\n", " \"value\": \"1.5\",\n", " \"group_id\": 7,\n", " \"position\": 0\n", " },\n", " \"Repetition rate\": {\n", " \"type\": \"number\",\n", " \"unit\": \"Hz\",\n", " \"units\": [\n", " \"Hz\"\n", " ],\n", " \"value\": \"1\",\n", " \"group_id\": 7,\n", " \"position\": 4\n", " },\n", " \"Process pressure \": {\n", " \"type\": \"number\",\n", " \"unit\": \"mbar\",\n", " \"units\": [\n", " \"mbar\"\n", " ],\n", " \"value\": \"1e-3\",\n", " \"group_id\": 4,\n", " \"position\": 4\n", " },\n", " \"Heater temperature \": {\n", " \"type\": \"number\",\n", " \"unit\": \"\\u00b0C\",\n", " \"units\": [\n", " \"\\u00b0C\"\n", " ],\n", " \"value\": \"760\",\n", " \"group_id\": 4,\n", " \"position\": 7\n", " },\n", " \"Process pressure PA\": {\n", " \"type\": \"number\",\n", " \"unit\": \"mbar\",\n", " \"units\": [\n", " \"mbar\"\n", " ],\n", " \"value\": \"\",\n", " \"group_id\": 6,\n", " \"position\": 4\n", " },\n", " \"Process pressure Pre\": {\n", " \"type\": \"number\",\n", " \"unit\": \"mbar\",\n", " \"units\": [\n", " \"mbar\"\n", " ],\n", " \"value\": \"\",\n", " \"group_id\": 8,\n", " \"position\": 4\n", " },\n", " \"Heater temperature PA\": {\n", " \"type\": \"number\",\n", " \"unit\": \"\\u00b0C\",\n", " \"units\": [\n", " \"\\u00b0C\"\n", " ],\n", " \"value\": \"\",\n", " \"group_id\": 6\n", " },\n", " \"Laser Rastering Speed\": {\n", " \"type\": \"number\",\n", " \"unit\": \"\",\n", " \"units\": [],\n", " \"value\": \"\",\n", " \"group_id\": 7,\n", " \"position\": 3\n", " },\n", " \"Heater temperature Pre\": {\n", " \"type\": \"number\",\n", " \"unit\": \"\\u00b0C\",\n", " \"units\": [\n", " \"\\u00b0C\"\n", " ],\n", " \"value\": \"\",\n", " \"group_id\": 8\n", " },\n", " \"Heater-target distance\": {\n", " \"type\": \"number\",\n", " \"unit\": \"mm\",\n", " \"units\": [\n", " \"mm\"\n", " ],\n", " \"value\": \"38\",\n", " \"group_id\": 4,\n", " \"position\": 8\n", " },\n", " \"Laser Rastering Geometry\": {\n", " \"type\": \"select\",\n", " \"value\": \"none\",\n", " \"options\": [\n", " \"none\",\n", " \"on a square\",\n", " \"on a rectangle\",\n", " \"on a line\",\n", " \"other\"\n", " ],\n", " \"group_id\": 7,\n", " \"position\": 2\n", " },\n", " \"Layer Progressive Number\": {\n", " \"type\": \"number\",\n", " \"unit\": \"\",\n", " \"units\": [],\n", " \"value\": \"1\",\n", " \"group_id\": 4,\n", " \"position\": 1,\n", " \"required\": true\n", " }\n", " }\n", " },\n", " \"modified_at\": \"2026-01-20 16:17:59\",\n", " \"next_step\": \"add process data\",\n", " \"orcid\": \"0000-0003-4231-9776\",\n", " \"page\": \"experiments\",\n", " \"rating\": 0,\n", " \"recent_comment\": null,\n", " \"related_experiments_links\": [\n", " {\n", " \"entityid\": 46,\n", " \"title\": \"Na-26-001 deposition test II\",\n", " \"custom_id\": null,\n", " \"link_state\": 1,\n", " \"page\": \"experiments.php\",\n", " \"type\": \"experiments\",\n", " \"category_title\": \"Deposition\",\n", " \"category_color\": \"8b8d43\",\n", " \"status_title\": null,\n", " \"status_color\": null\n", " }\n", " ],\n", " \"related_items_links\": [],\n", " \"sharelink\": \"https://elabftw.fisica.unina.it:8080/experiments.php?mode=view&id=45\",\n", " \"state\": 1,\n", " \"status\": null,\n", " \"status_color\": null,\n", " \"status_title\": null,\n", " \"steps\": [\n", " {\n", " \"id\": 35,\n", " \"item_id\": 45,\n", " \"body\": \"add process data\",\n", " \"ordering\": 1,\n", " \"finished\": 0,\n", " \"finished_time\": null,\n", " \"deadline\": null,\n", " \"deadline_notif\": 0\n", " },\n", " {\n", " \"id\": 36,\n", " \"item_id\": 45,\n", " \"body\": \"add RHEED data\",\n", " \"ordering\": 2,\n", " \"finished\": 0,\n", " \"finished_time\": null,\n", " \"deadline\": null,\n", " \"deadline_notif\": 0\n", " },\n", " {\n", " \"id\": 37,\n", " \"item_id\": 45,\n", " \"body\": \"add RHEED images\",\n", " \"ordering\": 3,\n", " \"finished\": 0,\n", " \"finished_time\": null,\n", " \"deadline\": null,\n", " \"deadline_notif\": 0\n", " }\n", " ],\n", " \"tags\": null,\n", " \"tags_id\": null,\n", " \"team\": 1,\n", " \"team_name\": \"Default team\",\n", " \"timestamped\": 0,\n", " \"timestamped_at\": null,\n", " \"timestampedby\": null,\n", " \"title\": \"Na-26-001 deposition test I\",\n", " \"type\": \"experiments\",\n", " \"uploads\": [],\n", " \"userid\": 2\n", "}\n" ] } ], "source": [ "import json\n", "\n", "with open(\"../tests/objects/experiment_45_elab.json\", \"r\") as f:\n", " x = json.load(f)\n", " print(json.dumps(x,indent=2))\n", " f.close()" ] }, { "cell_type": "markdown", "id": "a2e857a5-efdb-4177-a5cc-063270985531", "metadata": {}, "source": [ "For testing purposes now we'll create and print a simplified dictionary containing a sample of the harvested data for each group of metadata." ] }, { "cell_type": "code", "execution_count": 2, "id": "dae4b791-fd08-4f41-ba42-82edcf4e3cde", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"instrument\": {\n", " \"deposition_chamber\": 72,\n", " \"laser_system\": \"Excimer \",\n", " \"rheed_system\": \"staib\"\n", " },\n", " \"multilayer\": {\n", " \"layer_1\": {\n", " \"operator\": \"Emiliano Di Gennaro\",\n", " \"sample\": 855,\n", " \"temperature\": \"760\",\n", " \"target\": 854\n", " }\n", " }\n", "}\n" ] } ], "source": [ "with open(\"../tests/objects/experiment_45_elab.json\", \"r\") as f:\n", " rawdata = json.load(f)\n", " extra = rawdata[\"metadata_decoded\"][\"extra_fields\"]\n", " ordered = {\n", " \"instrument\": {\n", " \"deposition_chamber\": extra[\"Chamber\"][\"value\"], # ID of associated resource (PLD chamber) - useless as is!\n", " \"laser_system\": extra[\"Laser System\"][\"value\"],\n", " \"rheed_system\": extra[\"RHEED System\"][\"value\"]\n", " },\n", " \"multilayer\": {\n", " \"layer_1\": {\n", " \"operator\": rawdata[\"fullname\"],\n", " \"sample\": extra[\"Sample\"][\"value\"], # ID of associated sample - useless as is!\n", " \"temperature\": extra[\"Heater temperature \"][\"value\"], # space at the end is a config error in eLab!\n", " \"target\": extra[\"Target\"][\"value\"] # ID of associated resource (PLD target) - useless as is!\n", " }\n", " },\n", " }\n", " print(json.dumps(ordered,indent=2))\n", " f.close()" ] }, { "cell_type": "markdown", "id": "c9992a47-ce3c-47ec-94bd-26fbec020962", "metadata": {}, "source": [ "Some issues rise here:\n", "* First of all the fields \"deposition_chamber\", \"sample\" and \"target\" **refer to the value of the eLabFTW ID of the associated resource** which is useless as is since it does not contain any relevant data on these objects;\n", "* Second, the same sample can have two different eLab Experiments associated to it, each representing **a different layer** of the deposition.\n", "\n", "> Note: a layer progressive number is tracked by the scientist, and it can be found in the JSON dictionary under `metadata_decoded -> extra_fields -> Layer Progressive Number -> value`.\n", "\n", "### Multiple layers from known sources\n", "One problem at a time: first of all I can create an \"ordered\" dictionary with an empty \"multilayer\" key and append the layer-specific value of every layer later using the *dict().update()* method." ] }, { "cell_type": "code", "execution_count": 3, "id": "b3afa42e-e982-4dd3-9ea6-cf7918dc276f", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"instrument\": {\n", " \"deposition_chamber\": 72,\n", " \"laser_system\": \"Excimer \",\n", " \"rheed_system\": \"staib\"\n", " },\n", " \"multilayer\": {\n", " \"layer_1\": {\n", " \"operator\": \"Emiliano Di Gennaro\",\n", " \"sample\": {\n", " \"type\": \"items\",\n", " \"value\": 855,\n", " \"group_id\": 4,\n", " \"position\": 0\n", " },\n", " \"temperature\": {\n", " \"type\": \"number\",\n", " \"unit\": \"\\u00b0C\",\n", " \"units\": [\n", " \"\\u00b0C\"\n", " ],\n", " \"value\": \"760\",\n", " \"group_id\": 4,\n", " \"position\": 7\n", " },\n", " \"target\": {\n", " \"type\": \"items\",\n", " \"value\": 854,\n", " \"group_id\": 4,\n", " \"position\": 2,\n", " \"required\": true\n", " }\n", " }\n", " }\n", "}\n" ] } ], "source": [ "with open(\"../tests/objects/experiment_45_elab.json\", \"r\") as f:\n", " rawdata = json.load(f)\n", " extra = rawdata[\"metadata_decoded\"][\"extra_fields\"]\n", " layers = {\n", " \"layer_1\": {\n", " \"operator\": rawdata[\"fullname\"],\n", " \"sample\": extra[\"Sample\"], # ID of associated sample - useless as is!\n", " \"temperature\": extra[\"Heater temperature \"], # space at the end is a config error in eLab!\n", " \"target\": extra[\"Target\"]\n", " }\n", " }\n", "\n", " ordered = {\n", " \"instrument\": {\n", " \"deposition_chamber\": extra[\"Chamber\"][\"value\"], # ID of associated resource (PLD chamber) - useless as is!\n", " \"laser_system\": extra[\"Laser System\"][\"value\"],\n", " \"rheed_system\": extra[\"RHEED System\"][\"value\"]\n", " },\n", " \"multilayer\": {\n", " },\n", " }\n", " for l in layers:\n", " ordered[\"multilayer\"].update(\n", " {l: layers[l]}\n", " )\n", " print(json.dumps(ordered, indent=2))\n", " f.close()" ] }, { "cell_type": "markdown", "id": "4a7ff14f-d2fc-4485-a174-a23248791a6f", "metadata": {}, "source": [ "Now entering the second layer: Experiment 43.\n", "\n", "If I were to create a \"layers\" dictionary with the same info from the two different experiments it would look like this:" ] }, { "cell_type": "code", "execution_count": 4, "id": "9212afba-9868-467f-ac4d-8cbce0f1537a", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"layer_1\": {\n", " \"operator\": \"Emiliano Di Gennaro\",\n", " \"sample\": {\n", " \"type\": \"items\",\n", " \"value\": 855,\n", " \"group_id\": 4,\n", " \"position\": 0\n", " },\n", " \"temperature\": {\n", " \"type\": \"number\",\n", " \"unit\": \"\\u00b0C\",\n", " \"units\": [\n", " \"\\u00b0C\"\n", " ],\n", " \"value\": \"500\",\n", " \"group_id\": 4,\n", " \"position\": 7\n", " },\n", " \"target\": {\n", " \"type\": \"items\",\n", " \"value\": 466,\n", " \"group_id\": 4,\n", " \"position\": 2,\n", " \"required\": true\n", " }\n", " },\n", " \"layer_2\": \"\",\n", " \"layer_0\": {\n", " \"operator\": \"Emiliano Di Gennaro\",\n", " \"sample\": {\n", " \"type\": \"items\",\n", " \"value\": 855,\n", " \"group_id\": 4,\n", " \"position\": 0\n", " },\n", " \"temperature\": {\n", " \"type\": \"number\",\n", " \"unit\": \"\\u00b0C\",\n", " \"units\": [\n", " \"\\u00b0C\"\n", " ],\n", " \"value\": \"760\",\n", " \"group_id\": 4,\n", " \"position\": 7\n", " },\n", " \"target\": {\n", " \"type\": \"items\",\n", " \"value\": 854,\n", " \"group_id\": 4,\n", " \"position\": 2,\n", " \"required\": true\n", " }\n", " }\n", "}\n" ] } ], "source": [ "with open(\"../tests/objects/experiment_45_elab.json\", \"r\") as L01file, open(\"../tests/objects/experiment_46_elab.json\", \"r\") as L02file:\n", " raw01 = json.load(L01file)\n", " raw02 = json.load(L02file)\n", " layer_list = [raw01, raw02]\n", "\n", " layers = { \"layer_\" + str(index + 1) : \"\" for index in range(len(layer_list)) }\n", " for i,layer in enumerate(layer_list):\n", " extra = layer[\"metadata_decoded\"][\"extra_fields\"]\n", " layers.update({\n", " f\"layer_{i}\": {\n", " \"operator\": layer[\"fullname\"],\n", " \"sample\": extra[\"Sample\"], # ID of associated sample - useless as is!\n", " \"temperature\": extra[\"Heater temperature \"], # space at the end is a config error in eLab!\n", " \"target\": extra[\"Target\"]\n", " }\n", " })\n", "\n", " print(json.dumps(layers, indent=2))\n", "\n", " L01file.close()\n", " L02file.close()" ] }, { "cell_type": "markdown", "id": "82700735-73fb-4b0a-aa97-3072c6330a48", "metadata": {}, "source": [ "But that only works because I know exactly how many layers there are and in which order they're stored.\n", "\n", "How we're storing and downloading the experiment data related to a same sample is still subject of discussion. The parser should be able to associate different JSON files to their own sample and group the files related to the same experiment; each file has a *Layer Progressive Number* which associates the data saved in the file to a specific layer, and it's imperative that the parser:\n", "* Recognises the absence of a layer (e.g. [1, 2, 4], returns that no 3rd layer exists);\n", "* Names every layer \"layer_X\" where X is the progressive number starting from 1 (not 0).\n", "\n", "### Multiple layers from uncategorized files\n", "Supposing I don't know that files *experiment_41_elab.json* and *experiment_43_elab.json* contain data of layers 1 and 2 of the same sample NA-26-001 I can always load every file in the folder indiscriminately and:\n", "* Filter out every non-eLabFTW file (by some recognition pattern).\n", "* Group the data by the sample it's associated to.\n", "\n", "\n", "#### Filter out non-eLabFTW files using the key \"elabid\" as challenge\n", "If the key *elabid* is present in the root of a JSON file then assume the file is an eLabFTW experiment output." ] }, { "cell_type": "code", "execution_count": 5, "id": "d8b83ba0-6b5b-425a-b365-8f5fa6ab4117", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../tests/objects/experiment_46_elab.json\n", "../tests/objects/experiment_45_elab.json\n" ] } ], "source": [ "import os\n", "\n", "elabfiles = []\n", "for filename in os.listdir(\"../tests/objects\"):\n", " if filename.endswith(\".json\"):\n", " try:\n", " with open(os.path.join(\"../tests/objects\", filename), \"r\") as f:\n", " data = json.load(f)\n", " if data.get(\"elabid\"): # insert specific NeXus requirements here later\n", " \n", " elabfiles.append(filename)\n", " f.close()\n", " except json.decoder.JSONDecodeError as e: # invalid files \"masked\" as JSON\n", " #print(f\"wait a moment: {e}\") # just for debug\n", " pass\n", "\n", "for i in elabfiles:\n", " print(os.path.join(\"../tests/objects/\", i))" ] }, { "cell_type": "markdown", "id": "825f32d1-eb8f-4f9a-af94-d7258e897f8f", "metadata": {}, "source": [ "#### Group the data by sample\n", "Lookup the value of the key \"*Sample*\" in the extra fields; two experiments with that same value are associated to the same sample. To obtain this result the best course of action is *probably* to create a dictionary with every unique sample and all the data associated with it:\n", "* The dictionary starts empty.\n", "* The parser then reads the data from the first eLab-compliant file, in particular it reads the ID of the sample associated.\n", "* If the ID (later the name) of the sample is not a key in the root of sample_dict create a new key, otherwise skip.\n", "* Add layer-specific data to new layer in the sample_dict." ] }, { "cell_type": "code", "execution_count": 6, "id": "4b7961e6-817f-44b7-b2de-16d15d9ec26a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{855: {'instrument': {'deposition_chamber': 72, 'laser_system': 'Excimer ', 'rheed_system': 'staib'}, 'multilayer': {'layer_2': {'operator': 'Emiliano Di Gennaro', 'created_at': '2026-01-20 16:18:48', 'sample': {'type': 'items', 'value': 855, 'group_id': 4, 'position': 0}, 'temperature': {'type': 'number', 'unit': '°C', 'units': ['°C'], 'value': '500', 'group_id': 4, 'position': 7}, 'target': {'type': 'items', 'value': 466, 'group_id': 4, 'position': 2, 'required': True}}, 'layer_1': {'operator': 'Emiliano Di Gennaro', 'created_at': '2026-01-20 16:11:32', 'sample': {'type': 'items', 'value': 855, 'group_id': 4, 'position': 0}, 'temperature': {'type': 'number', 'unit': '°C', 'units': ['°C'], 'value': '760', 'group_id': 4, 'position': 7}, 'target': {'type': 'items', 'value': 854, 'group_id': 4, 'position': 2, 'required': True}}}}}\n" ] } ], "source": [ "sample_dict = {}\n", "for filename in elabfiles:\n", " with open(os.path.join(\"../tests/objects/\", filename), \"r\") as f:\n", " layer = json.load(f)\n", " extra = layer[\"metadata_decoded\"][\"extra_fields\"]\n", " sample = extra[\"Sample\"][\"value\"]\n", " lpn = int(extra[\"Layer Progressive Number\"][\"value\"]) # Layer Progressive Number\n", " if not sample_dict.get(sample): # if not existent yet, initialize\n", " sample_dict[sample] = {\n", " \"instrument\": {\n", " \"deposition_chamber\": extra[\"Chamber\"][\"value\"], # ID of associated resource (PLD chamber) - useless as is!\n", " \"laser_system\": extra[\"Laser System\"][\"value\"],\n", " \"rheed_system\": extra[\"RHEED System\"][\"value\"]\n", " },\n", " \"multilayer\": {}\n", " }\n", " sample_dict[sample][\"multilayer\"][f\"layer_{lpn}\"] = {\n", " \"operator\": layer[\"fullname\"],\n", " \"created_at\": layer[\"created_at\"],\n", " \"sample\": extra[\"Sample\"], # ID of associated sample - useless as is!\n", " \"temperature\": extra[\"Heater temperature \"], # space at the end is a config error in eLab!\n", " \"target\": extra[\"Target\"]\n", " }\n", " \n", "print(sample_dict)" ] }, { "cell_type": "markdown", "id": "27f876e0-291e-43d6-8b9d-3ce533896e5e", "metadata": {}, "source": [ "#### Look out for missing layers\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "6a5281dc-7fc3-4de7-845c-2bc2b54d4bb1", "metadata": {}, "outputs": [], "source": [ "#sample_dict[855][\"multilayer\"][\"layer_4\"] = {} # for debug purposes\n", "\n", "def find_missing(lst):\n", " '''\n", " Finds missing integers in sorted list.\n", " Time complexity is NlogN but since N is at most 10^2 it's not a problem for us.\n", " Source: geekforgeeks.org.\n", " '''\n", " return sorted(set(range(lst[0], lst[-1])) - set(lst))\n", "\n", "for item in sample_dict:\n", " layer_names = list(sample_dict[item].get(\"multilayer\").keys())\n", " numbers = sorted(int(layer.split('_')[1]) for layer in layer_names)\n", " missing = find_missing(numbers)\n", " if missing:\n", " print(\"Warning: some layers appear to be missing.\")\n", " print(f\"The missing layers are: \")\n", " for i in missing:\n", " print(f\"* layer_{i}\")\n" ] }, { "cell_type": "markdown", "id": "e2716da5-ff75-45d1-b765-d8bfb2ecaf71", "metadata": {}, "source": [ "### Names not ID's\n", "> TO-DO: Replace ID's of eLabFTW items with their actual names (might need a working API key).\n", "\n", "Since eLab ID's are not relevant to this project we need factual information about the sample itself. For instance, we want to fetch its name, chemical formula and dimensions - all data present on the eLabFTW entry, obtainable on the *items* API endpoint using the eLab ID of the sample." ] }, { "cell_type": "code", "execution_count": 8, "id": "24793e2b-67bb-4e8b-9d93-7802d3af7fca", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdin", "output_type": "stream", "text": [ "Paste API key here: ········\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "{'access_key': None, 'available': 1, 'body': '', 'body_html': '', 'book_can_overlap': 1, 'book_cancel_minutes': 0, 'book_is_cancellable': 1, 'book_max_minutes': 0, 'book_max_slots': 0, 'book_users_can_in_past': 0, 'canbook': '{\"base\": 40, \"teams\": [], \"users\": [], \"teamgroups\": []}', 'canread': '{\"base\": 40, \"teams\": [], \"users\": [], \"teamgroups\": []}', 'canread_is_immutable': 0, 'canwrite': '{\"base\": 30, \"teams\": [], \"users\": [], \"teamgroups\": []}', 'canwrite_is_immutable': 0, 'category': 19, 'category_color': '29aeb9', 'category_title': 'NEW_Sample', 'comments': [], 'compounds': [], 'containers': [], 'content_type': 1, 'created_at': '2026-01-07 13:20:02', 'custom_id': None, 'date': '2026-01-07', 'elabid': '20260107-e83642d2b806e5db5ebb0d6309d874f4b4461114', 'events_start': None, 'events_start_itemid': None, 'exclusive_edit_mode': None, 'experiments_links': [], 'firstname': 'Emiliano', 'fullname': 'Emiliano Di Gennaro', 'id': 855, 'is_bookable': 0, 'is_pinned': 0, 'is_procurable': 0, 'items_links': [{'entityid': 853, 'title': 'test_01', 'custom_id': None, 'elabid': '20260107-8a39f3d60b8422a878f14dbc5aa1956a6b939e07', 'link_state': 1, 'is_bookable': 0, 'page': 'database.php', 'type': 'items', 'category_title': 'NEW_Substrates batch', 'category_color': '29aeb9', 'status_title': 'Available', 'status_color': '6a7753'}, {'entityid': 180, 'title': 'NFFA -01', 'custom_id': None, 'elabid': '20240422-d90c61f98b5a368b877b3c7bcdc5448612037b0e', 'link_state': 1, 'is_bookable': 0, 'page': 'database.php', 'type': 'items', 'category_title': 'Proposal', 'category_color': 'cdab8f', 'status_title': None, 'status_color': None}, {'entityid': 826, 'title': 'CART - 6', 'custom_id': None, 'elabid': '20250415-b8c364bae6a2f74be82de1d9370c96d6b031c4d4', 'link_state': 1, 'is_bookable': 0, 'page': 'database.php', 'type': 'items', 'category_title': 'Sample Position', 'category_color': '26a269', 'status_title': None, 'status_color': None}], 'lastchangeby': 11, 'lastname': 'Di Gennaro', 'locked': 0, 'locked_at': None, 'lockedby': None, 'metadata': '{\"extra_fields\": {\"Owner\": {\"type\": \"users\", \"value\": 2, \"position\": 1, \"required\": true}, \"STD-ID\": {\"type\": \"number\", \"unit\": \"\", \"units\": [], \"value\": \"26001\", \"position\": 0, \"required\": true, \"description\": \"This is an internal ID identifier\"}, \"Position\": {\"type\": \"items\", \"value\": 826, \"position\": 4}, \"Proposal\": {\"type\": \"items\", \"value\": 180, \"position\": 5}, \"Subtrate batch\": {\"type\": \"items\", \"value\": 853, \"position\": 2}, \"Substrate Holder\": {\"type\": \"text\", \"value\": \"1\", \"position\": 3}}}', 'metadata_decoded': {'extra_fields': {'Owner': {'type': 'users', 'value': 2, 'position': 1, 'required': True}, 'STD-ID': {'type': 'number', 'unit': '', 'units': [], 'value': '26001', 'position': 0, 'required': True, 'description': 'This is an internal ID identifier'}, 'Position': {'type': 'items', 'value': 826, 'position': 4}, 'Proposal': {'type': 'items', 'value': 180, 'position': 5}, 'Subtrate batch': {'type': 'items', 'value': 853, 'position': 2}, 'Substrate Holder': {'type': 'text', 'value': '1', 'position': 3}}}, 'modified_at': '2026-01-21 22:04:27', 'next_step': None, 'orcid': '0000-0003-4231-9776', 'page': 'database', 'proc_currency': 0, 'proc_pack_qty': 0, 'proc_price_notax': '0.00', 'proc_price_tax': '0.00', 'rating': 0, 'recent_comment': None, 'related_experiments_links': [{'entityid': 41, 'title': 'NEW PLD Deposition Layer', 'custom_id': None, 'link_state': 1, 'page': 'experiments.php', 'type': 'experiments', 'category_title': 'Deposition', 'category_color': '8b8d43', 'status_title': 'Running', 'status_color': '29AEB9'}, {'entityid': 43, 'title': 'NEW PLD Deposition Layer I', 'custom_id': None, 'link_state': 1, 'page': 'experiments.php', 'type': 'experiments', 'category_title': 'Deposition', 'category_color': '8b8d43', 'status_title': None, 'status_color': None}, {'entityid': 45, 'title': 'Na-26-001 deposition test I', 'custom_id': None, 'link_state': 1, 'page': 'experiments.php', 'type': 'experiments', 'category_title': 'Deposition', 'category_color': '8b8d43', 'status_title': None, 'status_color': None}, {'entityid': 46, 'title': 'Na-26-001 deposition test II', 'custom_id': None, 'link_state': 1, 'page': 'experiments.php', 'type': 'experiments', 'category_title': 'Deposition', 'category_color': '8b8d43', 'status_title': None, 'status_color': None}], 'related_items_links': [], 'sharelink': 'https://elabftw.fisica.unina.it:8080/database.php?mode=view&id=855', 'state': 1, 'status': 1, 'status_color': '6a7753', 'status_title': 'Available', 'steps': [], 'tags': None, 'tags_id': None, 'team': 1, 'team_name': 'Default team', 'timestamped': 0, 'timestamped_at': None, 'timestampedby': None, 'title': 'Na-26-001', 'type': 'items', 'uploads': [], 'userid': 2}\n" ] } ], "source": [ "import requests\n", "from getpass import getpass # not to leak my key through jupyter + git\n", "\n", "def call_sample(API_KEY, elabid, API_URL=\"https://elabftw.fisica.unina.it/\"):\n", " full_elab_url = f\"{API_URL}api/v2\" # API endpoint root for eLabFTW\n", " items_url = f\"{full_elab_url}/items\" # API endpoint /items\n", " header = {\n", " \"Authorization\": API_KEY,\n", " \"Content-Type\": \"application/json\"\n", " }\n", " sample = requests.get(\n", " headers=header,\n", " url=f\"{items_url}/{elabid}\",\n", " verify=True\n", " )\n", " return sample.json()\n", "\n", "apikey = getpass(\"Paste API key here: \")\n", "testing = call_sample(apikey, 855)\n", "print(testing)" ] }, { "cell_type": "markdown", "id": "89d89d7a-0e13-42c2-83ba-fb03d5a2c39b", "metadata": {}, "source": [ "#### Filtering data\n", "Now let's select only the useful data, which at the moment is just the name." ] }, { "cell_type": "code", "execution_count": 10, "id": "0ffa8e82-2d7e-4dae-9081-a776f1e5ba9f", "metadata": {}, "outputs": [ { "name": "stdin", "output_type": "stream", "text": [ "Paste API key here: ········\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Na-26-001\n" ] } ], "source": [ "resources_ids = [ i for i in sample_dict ]\n", "first_sample = resources_ids[0]\n", "\n", "apikey = getpass(\"Paste API key here: \")\n", "sample_data = call_sample(apikey, first_sample)\n", "sample_title = sample_data[\"title\"]\n", "print(sample_title)" ] }, { "cell_type": "code", "execution_count": null, "id": "ba4f0459-8da0-494d-b0c2-23dae509538c", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.12" } }, "nbformat": 4, "nbformat_minor": 5 }