diff --git a/jupyter/parsing.ipynb b/jupyter/parsing.ipynb new file mode 100644 index 0000000..8faa10c --- /dev/null +++ b/jupyter/parsing.ipynb @@ -0,0 +1,834 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "dacba8a3-91fe-45ad-af7b-f5082466b969", + "metadata": {}, + "source": [ + "# Basic JSON file parsing\n", + "## Info gathered by the scientist on Experiment 41 \"NEW PLD Deposition Layer\"\n", + "### General info\n", + "* Date and time of creation\n", + "* Category\n", + "* Full name of the scientist\n", + "* Related items (sample, PLD target)\n", + "\n", + "### Instrument\n", + "* Chamber\n", + "* Laser system\n", + "* RHEED system\n", + "\n", + "### Process\n", + "* Sample\n", + "* Layer progressive number\n", + "* Target\n", + "* Heater temperature\n", + "* Heater target distance\n", + "* Buffer gas\n", + "* Process pressure\n", + "* Heating method\n", + "* Laser intensity\n", + "* Duration\n", + "* Repetition rate\n", + "* Thickness\n", + "\n", + "### Post annealing\n", + "* Buffer gas used in PA\n", + "* Process pressure of PA\n", + "* Heater temperature of PA\n", + "* Duration of PA\n", + "\n", + "## Basic parser\n", + "Let's start by loading and printing the contents of Experiment 41's JSON as downloaded from eLabFTW." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "dfa122ad-6de1-4282-bb67-8fcd877e6678", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"access_key\": null,\n", + " \"body\": \"\",\n", + " \"body_html\": \"\",\n", + " \"canread\": \"{\\\"base\\\": 40, \\\"teams\\\": [], \\\"users\\\": [], \\\"teamgroups\\\": []}\",\n", + " \"canread_is_immutable\": 0,\n", + " \"canwrite\": \"{\\\"base\\\": 20, \\\"teams\\\": [], \\\"users\\\": [], \\\"teamgroups\\\": []}\",\n", + " \"canwrite_is_immutable\": 0,\n", + " \"category\": 2,\n", + " \"category_color\": \"8b8d43\",\n", + " \"category_title\": \"Deposition\",\n", + " \"comments\": [],\n", + " \"compounds\": [],\n", + " \"containers\": [],\n", + " \"content_type\": 1,\n", + " \"created_at\": \"2026-01-07 13:22:35\",\n", + " \"custom_id\": null,\n", + " \"date\": \"2026-01-07\",\n", + " \"elabid\": \"20260107-6884c5639c2573cd4104a3f3b93bcd5a16e4c63e\",\n", + " \"events_start\": null,\n", + " \"events_start_itemid\": null,\n", + " \"exclusive_edit_mode\": null,\n", + " \"experiments_links\": [],\n", + " \"firstname\": \"Emiliano\",\n", + " \"fullname\": \"Emiliano Di Gennaro\",\n", + " \"id\": 41,\n", + " \"is_pinned\": 0,\n", + " \"items_links\": [\n", + " {\n", + " \"entityid\": 855,\n", + " \"title\": \"Na-26-001\",\n", + " \"custom_id\": null,\n", + " \"elabid\": \"20260107-e83642d2b806e5db5ebb0d6309d874f4b4461114\",\n", + " \"link_state\": 1,\n", + " \"is_bookable\": 0,\n", + " \"page\": \"database.php\",\n", + " \"type\": \"items\",\n", + " \"category_title\": \"NEW_Sample\",\n", + " \"category_color\": \"29aeb9\",\n", + " \"status_title\": \"Available\",\n", + " \"status_color\": \"6a7753\"\n", + " },\n", + " {\n", + " \"entityid\": 466,\n", + " \"title\": \"Target di prova\",\n", + " \"custom_id\": null,\n", + " \"elabid\": \"20250224-58dbf45c5bb2f6c4f6745889da7b05a32645f83b\",\n", + " \"link_state\": 1,\n", + " \"is_bookable\": 0,\n", + " \"page\": \"database.php\",\n", + " \"type\": \"items\",\n", + " \"category_title\": \"PLD Target\",\n", + " \"category_color\": \"1a5fb4\",\n", + " \"status_title\": null,\n", + " \"status_color\": null\n", + " }\n", + " ],\n", + " \"lastchangeby\": 2,\n", + " \"lastname\": \"Di Gennaro\",\n", + " \"locked\": 0,\n", + " \"locked_at\": null,\n", + " \"lockedby\": null,\n", + " \"metadata\": \"{\\\"elabftw\\\": {\\\"extra_fields_groups\\\": [{\\\"id\\\": 4, \\\"name\\\": \\\"Process\\\"}, {\\\"id\\\": 6, \\\"name\\\": \\\"Post Annealing\\\"}, {\\\"id\\\": 3, \\\"name\\\": \\\"Instruments\\\"}]}, \\\"extra_fields\\\": {\\\"Sample\\\": {\\\"type\\\": \\\"items\\\", \\\"value\\\": 855, \\\"group_id\\\": 4, \\\"position\\\": 0}, \\\"Target\\\": {\\\"type\\\": \\\"items\\\", \\\"value\\\": 466, \\\"group_id\\\": 4, \\\"position\\\": 2, \\\"required\\\": true}, \\\"Chamber\\\": {\\\"type\\\": \\\"items\\\", \\\"value\\\": 72, \\\"group_id\\\": 3, \\\"position\\\": 0}, \\\"Duration\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"s\\\", \\\"units\\\": [\\\"s\\\", \\\"min\\\"], \\\"value\\\": \\\"365\\\", \\\"group_id\\\": 4, \\\"position\\\": 9}, \\\"Thickness\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"u.c.\\\", \\\"units\\\": [\\\"u.c.\\\", \\\"s\\\"], \\\"value\\\": \\\"4\\\", \\\"group_id\\\": 4, \\\"position\\\": 11}, \\\"Buffer gas\\\": {\\\"type\\\": \\\"select\\\", \\\"value\\\": \\\"O2\\\", \\\"options\\\": [\\\"O2\\\", \\\"N2\\\", \\\"Ar\\\", \\\"\\\"], \\\"group_id\\\": 4, \\\"position\\\": 5}, \\\"Duration PA\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"s\\\", \\\"units\\\": [\\\"s\\\", \\\"min\\\"], \\\"value\\\": \\\"\\\", \\\"group_id\\\": 6}, \\\"Laser System\\\": {\\\"type\\\": \\\"text\\\", \\\"value\\\": \\\"excimer 1\\\", \\\"group_id\\\": 3}, \\\"RHEED System\\\": {\\\"type\\\": \\\"text\\\", \\\"value\\\": \\\"staib 2\\\", \\\"group_id\\\": 3}, \\\"Buffer gas PA\\\": {\\\"type\\\": \\\"select\\\", \\\"value\\\": \\\"O2\\\", \\\"options\\\": [\\\"O2\\\", \\\"N2\\\", \\\"Ar\\\", \\\"\\\"], \\\"group_id\\\": 6, \\\"position\\\": 3}, \\\"Heating Method\\\": {\\\"type\\\": \\\"select\\\", \\\"value\\\": \\\"Radiative Heater\\\", \\\"options\\\": [\\\"Radiative Heater\\\", \\\"Laser Heater\\\"], \\\"group_id\\\": 4, \\\"position\\\": 7}, \\\"Laser Intensity\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"J/(s cm^2)\\\", \\\"units\\\": [\\\"J/(s cm^2)\\\"], \\\"value\\\": \\\"1.5\\\", \\\"group_id\\\": 4, \\\"position\\\": 8}, \\\"Repetition rate\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"Hz\\\", \\\"units\\\": [\\\"Hz\\\"], \\\"value\\\": \\\"1\\\", \\\"group_id\\\": 4, \\\"position\\\": 10}, \\\"Process pressure \\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"mbar\\\", \\\"units\\\": [\\\"mbar\\\"], \\\"value\\\": \\\"1e-3\\\", \\\"group_id\\\": 4, \\\"position\\\": 6}, \\\"Heater temperature \\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"\\u00b0C\\\", \\\"units\\\": [\\\"\\u00b0C\\\"], \\\"value\\\": \\\"650\\\", \\\"group_id\\\": 4, \\\"position\\\": 3}, \\\"Process pressure PA\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"mbar\\\", \\\"units\\\": [\\\"mbar\\\"], \\\"value\\\": \\\"\\\", \\\"group_id\\\": 6, \\\"position\\\": 4}, \\\"Heater temperature PA\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"\\u00b0C\\\", \\\"units\\\": [\\\"\\u00b0C\\\"], \\\"value\\\": \\\"\\\", \\\"group_id\\\": 6}, \\\"Heater-target distance\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"mm\\\", \\\"units\\\": [\\\"mm\\\"], \\\"value\\\": \\\"4\\\", \\\"group_id\\\": 4, \\\"position\\\": 4}, \\\"Layer Progressive Number\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"\\\", \\\"units\\\": [], \\\"value\\\": \\\"1\\\", \\\"group_id\\\": 4, \\\"position\\\": 1, \\\"required\\\": true}}}\",\n", + " \"metadata_decoded\": {\n", + " \"elabftw\": {\n", + " \"extra_fields_groups\": [\n", + " {\n", + " \"id\": 4,\n", + " \"name\": \"Process\"\n", + " },\n", + " {\n", + " \"id\": 6,\n", + " \"name\": \"Post Annealing\"\n", + " },\n", + " {\n", + " \"id\": 3,\n", + " \"name\": \"Instruments\"\n", + " }\n", + " ]\n", + " },\n", + " \"extra_fields\": {\n", + " \"Sample\": {\n", + " \"type\": \"items\",\n", + " \"value\": 855,\n", + " \"group_id\": 4,\n", + " \"position\": 0\n", + " },\n", + " \"Target\": {\n", + " \"type\": \"items\",\n", + " \"value\": 466,\n", + " \"group_id\": 4,\n", + " \"position\": 2,\n", + " \"required\": true\n", + " },\n", + " \"Chamber\": {\n", + " \"type\": \"items\",\n", + " \"value\": 72,\n", + " \"group_id\": 3,\n", + " \"position\": 0\n", + " },\n", + " \"Duration\": {\n", + " \"type\": \"number\",\n", + " \"unit\": \"s\",\n", + " \"units\": [\n", + " \"s\",\n", + " \"min\"\n", + " ],\n", + " \"value\": \"365\",\n", + " \"group_id\": 4,\n", + " \"position\": 9\n", + " },\n", + " \"Thickness\": {\n", + " \"type\": \"number\",\n", + " \"unit\": \"u.c.\",\n", + " \"units\": [\n", + " \"u.c.\",\n", + " \"s\"\n", + " ],\n", + " \"value\": \"4\",\n", + " \"group_id\": 4,\n", + " \"position\": 11\n", + " },\n", + " \"Buffer gas\": {\n", + " \"type\": \"select\",\n", + " \"value\": \"O2\",\n", + " \"options\": [\n", + " \"O2\",\n", + " \"N2\",\n", + " \"Ar\",\n", + " \"\"\n", + " ],\n", + " \"group_id\": 4,\n", + " \"position\": 5\n", + " },\n", + " \"Duration PA\": {\n", + " \"type\": \"number\",\n", + " \"unit\": \"s\",\n", + " \"units\": [\n", + " \"s\",\n", + " \"min\"\n", + " ],\n", + " \"value\": \"\",\n", + " \"group_id\": 6\n", + " },\n", + " \"Laser System\": {\n", + " \"type\": \"text\",\n", + " \"value\": \"excimer 1\",\n", + " \"group_id\": 3\n", + " },\n", + " \"RHEED System\": {\n", + " \"type\": \"text\",\n", + " \"value\": \"staib 2\",\n", + " \"group_id\": 3\n", + " },\n", + " \"Buffer gas PA\": {\n", + " \"type\": \"select\",\n", + " \"value\": \"O2\",\n", + " \"options\": [\n", + " \"O2\",\n", + " \"N2\",\n", + " \"Ar\",\n", + " \"\"\n", + " ],\n", + " \"group_id\": 6,\n", + " \"position\": 3\n", + " },\n", + " \"Heating Method\": {\n", + " \"type\": \"select\",\n", + " \"value\": \"Radiative Heater\",\n", + " \"options\": [\n", + " \"Radiative Heater\",\n", + " \"Laser Heater\"\n", + " ],\n", + " \"group_id\": 4,\n", + " \"position\": 7\n", + " },\n", + " \"Laser Intensity\": {\n", + " \"type\": \"number\",\n", + " \"unit\": \"J/(s cm^2)\",\n", + " \"units\": [\n", + " \"J/(s cm^2)\"\n", + " ],\n", + " \"value\": \"1.5\",\n", + " \"group_id\": 4,\n", + " \"position\": 8\n", + " },\n", + " \"Repetition rate\": {\n", + " \"type\": \"number\",\n", + " \"unit\": \"Hz\",\n", + " \"units\": [\n", + " \"Hz\"\n", + " ],\n", + " \"value\": \"1\",\n", + " \"group_id\": 4,\n", + " \"position\": 10\n", + " },\n", + " \"Process pressure \": {\n", + " \"type\": \"number\",\n", + " \"unit\": \"mbar\",\n", + " \"units\": [\n", + " \"mbar\"\n", + " ],\n", + " \"value\": \"1e-3\",\n", + " \"group_id\": 4,\n", + " \"position\": 6\n", + " },\n", + " \"Heater temperature \": {\n", + " \"type\": \"number\",\n", + " \"unit\": \"\\u00b0C\",\n", + " \"units\": [\n", + " \"\\u00b0C\"\n", + " ],\n", + " \"value\": \"650\",\n", + " \"group_id\": 4,\n", + " \"position\": 3\n", + " },\n", + " \"Process pressure PA\": {\n", + " \"type\": \"number\",\n", + " \"unit\": \"mbar\",\n", + " \"units\": [\n", + " \"mbar\"\n", + " ],\n", + " \"value\": \"\",\n", + " \"group_id\": 6,\n", + " \"position\": 4\n", + " },\n", + " \"Heater temperature PA\": {\n", + " \"type\": \"number\",\n", + " \"unit\": \"\\u00b0C\",\n", + " \"units\": [\n", + " \"\\u00b0C\"\n", + " ],\n", + " \"value\": \"\",\n", + " \"group_id\": 6\n", + " },\n", + " \"Heater-target distance\": {\n", + " \"type\": \"number\",\n", + " \"unit\": \"mm\",\n", + " \"units\": [\n", + " \"mm\"\n", + " ],\n", + " \"value\": \"4\",\n", + " \"group_id\": 4,\n", + " \"position\": 4\n", + " },\n", + " \"Layer Progressive Number\": {\n", + " \"type\": \"number\",\n", + " \"unit\": \"\",\n", + " \"units\": [],\n", + " \"value\": \"1\",\n", + " \"group_id\": 4,\n", + " \"position\": 1,\n", + " \"required\": true\n", + " }\n", + " }\n", + " },\n", + " \"modified_at\": \"2026-01-07 13:24:45\",\n", + " \"next_step\": \"add process data\",\n", + " \"orcid\": \"0000-0003-4231-9776\",\n", + " \"page\": \"experiments\",\n", + " \"rating\": 0,\n", + " \"recent_comment\": null,\n", + " \"related_experiments_links\": [\n", + " {\n", + " \"entityid\": 43,\n", + " \"title\": \"NEW PLD Deposition Layer I\",\n", + " \"custom_id\": null,\n", + " \"link_state\": 1,\n", + " \"page\": \"experiments.php\",\n", + " \"type\": \"experiments\",\n", + " \"category_title\": \"Deposition\",\n", + " \"category_color\": \"8b8d43\",\n", + " \"status_title\": null,\n", + " \"status_color\": null\n", + " }\n", + " ],\n", + " \"related_items_links\": [],\n", + " \"sharelink\": \"https://elabftw.fisica.unina.it:8080/experiments.php?mode=view&id=41\",\n", + " \"state\": 1,\n", + " \"status\": 1,\n", + " \"status_color\": \"29AEB9\",\n", + " \"status_title\": \"Running\",\n", + " \"steps\": [\n", + " {\n", + " \"id\": 23,\n", + " \"item_id\": 41,\n", + " \"body\": \"add process data\",\n", + " \"ordering\": 1,\n", + " \"finished\": 0,\n", + " \"finished_time\": null,\n", + " \"deadline\": null,\n", + " \"deadline_notif\": 0\n", + " },\n", + " {\n", + " \"id\": 24,\n", + " \"item_id\": 41,\n", + " \"body\": \"add RHEED data\",\n", + " \"ordering\": 2,\n", + " \"finished\": 0,\n", + " \"finished_time\": null,\n", + " \"deadline\": null,\n", + " \"deadline_notif\": 0\n", + " },\n", + " {\n", + " \"id\": 25,\n", + " \"item_id\": 41,\n", + " \"body\": \"add RHEED images\",\n", + " \"ordering\": 3,\n", + " \"finished\": 0,\n", + " \"finished_time\": null,\n", + " \"deadline\": null,\n", + " \"deadline_notif\": 0\n", + " }\n", + " ],\n", + " \"tags\": null,\n", + " \"tags_id\": null,\n", + " \"team\": 1,\n", + " \"team_name\": \"Default team\",\n", + " \"timestamped\": 0,\n", + " \"timestamped_at\": null,\n", + " \"timestampedby\": null,\n", + " \"title\": \"NEW PLD Deposition Layer\",\n", + " \"type\": \"experiments\",\n", + " \"uploads\": [],\n", + " \"userid\": 2\n", + "}\n" + ] + } + ], + "source": [ + "import json\n", + "\n", + "with open(\"../tests/objects/experiment_41_elab.json\", \"r\") as f:\n", + " x = json.load(f)\n", + " print(json.dumps(x,indent=2))\n", + " f.close()" + ] + }, + { + "cell_type": "markdown", + "id": "a2e857a5-efdb-4177-a5cc-063270985531", + "metadata": {}, + "source": [ + "For testing purposes now we'll create and print a simplified dictionary containing a sample of the harvested data for each group of metadata." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "dae4b791-fd08-4f41-ba42-82edcf4e3cde", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"instrument\": {\n", + " \"deposition_chamber\": 72,\n", + " \"laser_system\": \"excimer 1\",\n", + " \"rheed_system\": \"staib 2\"\n", + " },\n", + " \"multilayer\": {\n", + " \"layer_1\": {\n", + " \"operator\": \"Emiliano Di Gennaro\",\n", + " \"sample\": 855,\n", + " \"temperature\": \"650\",\n", + " \"target\": 466\n", + " }\n", + " }\n", + "}\n" + ] + } + ], + "source": [ + "with open(\"../tests/objects/experiment_41_elab.json\", \"r\") as f:\n", + " rawdata = json.load(f)\n", + " extra = rawdata[\"metadata_decoded\"][\"extra_fields\"]\n", + " ordered = {\n", + " \"instrument\": {\n", + " \"deposition_chamber\": extra[\"Chamber\"][\"value\"], # ID of associated resource (PLD chamber) - useless as is!\n", + " \"laser_system\": extra[\"Laser System\"][\"value\"],\n", + " \"rheed_system\": extra[\"RHEED System\"][\"value\"]\n", + " },\n", + " \"multilayer\": {\n", + " \"layer_1\": {\n", + " \"operator\": rawdata[\"fullname\"],\n", + " \"sample\": extra[\"Sample\"][\"value\"], # ID of associated sample - useless as is!\n", + " \"temperature\": extra[\"Heater temperature \"][\"value\"], # space at the end is a config error in eLab!\n", + " \"target\": extra[\"Target\"][\"value\"] # ID of associated resource (PLD target) - useless as is!\n", + " }\n", + " },\n", + " }\n", + " print(json.dumps(ordered,indent=2))\n", + " f.close()" + ] + }, + { + "cell_type": "markdown", + "id": "c9992a47-ce3c-47ec-94bd-26fbec020962", + "metadata": {}, + "source": [ + "Some issues rise here:\n", + "* First of all the fields \"deposition_chamber\", \"sample\" and \"target\" **refer to the value of the eLabFTW ID of the associated resource** which is useless as is since it does not contain any relevant data on these objects;\n", + "* Second, the same sample can have two different eLab Experiments associated to it, each representing **a different layer** of the deposition.\n", + "\n", + "> Note: a layer progressive number is tracked by the scientist, and it can be found in the JSON dictionary under `metadata_decoded -> extra_fields -> Layer Progressive Number -> value`.\n", + "\n", + "### Multiple layers from known sources\n", + "One problem at a time: first of all I can create an \"ordered\" dictionary with an empty \"multilayer\" key and append the layer-specific value of every layer later using the *dict().update()* method." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b3afa42e-e982-4dd3-9ea6-cf7918dc276f", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"instrument\": {\n", + " \"deposition_chamber\": 72,\n", + " \"laser_system\": \"excimer 1\",\n", + " \"rheed_system\": \"staib 2\"\n", + " },\n", + " \"multilayer\": {\n", + " \"layer_1\": {\n", + " \"operator\": \"Emiliano Di Gennaro\",\n", + " \"sample\": {\n", + " \"type\": \"items\",\n", + " \"value\": 855,\n", + " \"group_id\": 4,\n", + " \"position\": 0\n", + " },\n", + " \"temperature\": {\n", + " \"type\": \"number\",\n", + " \"unit\": \"\\u00b0C\",\n", + " \"units\": [\n", + " \"\\u00b0C\"\n", + " ],\n", + " \"value\": \"650\",\n", + " \"group_id\": 4,\n", + " \"position\": 3\n", + " },\n", + " \"target\": {\n", + " \"type\": \"items\",\n", + " \"value\": 466,\n", + " \"group_id\": 4,\n", + " \"position\": 2,\n", + " \"required\": true\n", + " }\n", + " }\n", + " }\n", + "}\n" + ] + } + ], + "source": [ + "with open(\"../tests/objects/experiment_41_elab.json\", \"r\") as f:\n", + " rawdata = json.load(f)\n", + " extra = rawdata[\"metadata_decoded\"][\"extra_fields\"]\n", + " layers = {\n", + " \"layer_1\": {\n", + " \"operator\": rawdata[\"fullname\"],\n", + " \"sample\": extra[\"Sample\"], # ID of associated sample - useless as is!\n", + " \"temperature\": extra[\"Heater temperature \"], # space at the end is a config error in eLab!\n", + " \"target\": extra[\"Target\"]\n", + " }\n", + " }\n", + "\n", + " ordered = {\n", + " \"instrument\": {\n", + " \"deposition_chamber\": extra[\"Chamber\"][\"value\"], # ID of associated resource (PLD chamber) - useless as is!\n", + " \"laser_system\": extra[\"Laser System\"][\"value\"],\n", + " \"rheed_system\": extra[\"RHEED System\"][\"value\"]\n", + " },\n", + " \"multilayer\": {\n", + " },\n", + " }\n", + " for l in layers:\n", + " ordered[\"multilayer\"].update(\n", + " {l: layers[l]}\n", + " )\n", + " print(json.dumps(ordered, indent=2))\n", + " f.close()" + ] + }, + { + "cell_type": "markdown", + "id": "4a7ff14f-d2fc-4485-a174-a23248791a6f", + "metadata": {}, + "source": [ + "Now entering the second layer: Experiment 43.\n", + "\n", + "If I were to create a \"layers\" dictionary with the same info from the two different experiments it would look like this:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9212afba-9868-467f-ac4d-8cbce0f1537a", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"layer_1\": {\n", + " \"operator\": \"Emiliano Di Gennaro\",\n", + " \"sample\": {\n", + " \"type\": \"items\",\n", + " \"value\": 855,\n", + " \"group_id\": 4,\n", + " \"position\": 0\n", + " },\n", + " \"temperature\": {\n", + " \"type\": \"number\",\n", + " \"unit\": \"\\u00b0C\",\n", + " \"units\": [\n", + " \"\\u00b0C\"\n", + " ],\n", + " \"value\": \"550\",\n", + " \"group_id\": 4,\n", + " \"position\": 3\n", + " },\n", + " \"target\": {\n", + " \"type\": \"items\",\n", + " \"value\": 858,\n", + " \"group_id\": 4,\n", + " \"position\": 2,\n", + " \"required\": true\n", + " }\n", + " },\n", + " \"layer_2\": \"\",\n", + " \"layer_0\": {\n", + " \"operator\": \"Emiliano Di Gennaro\",\n", + " \"sample\": {\n", + " \"type\": \"items\",\n", + " \"value\": 855,\n", + " \"group_id\": 4,\n", + " \"position\": 0\n", + " },\n", + " \"temperature\": {\n", + " \"type\": \"number\",\n", + " \"unit\": \"\\u00b0C\",\n", + " \"units\": [\n", + " \"\\u00b0C\"\n", + " ],\n", + " \"value\": \"650\",\n", + " \"group_id\": 4,\n", + " \"position\": 3\n", + " },\n", + " \"target\": {\n", + " \"type\": \"items\",\n", + " \"value\": 466,\n", + " \"group_id\": 4,\n", + " \"position\": 2,\n", + " \"required\": true\n", + " }\n", + " }\n", + "}\n" + ] + } + ], + "source": [ + "with open(\"../tests/objects/experiment_41_elab.json\", \"r\") as L01file, open(\"../tests/objects/experiment_43_elab.json\", \"r\") as L02file:\n", + " raw01 = json.load(L01file)\n", + " raw02 = json.load(L02file)\n", + " layer_list = [raw01, raw02]\n", + "\n", + " layers = { \"layer_\" + str(index + 1) : \"\" for index in range(len(layer_list)) }\n", + " for i,layer in enumerate(layer_list):\n", + " extra = layer[\"metadata_decoded\"][\"extra_fields\"]\n", + " layers.update({\n", + " f\"layer_{i}\": {\n", + " \"operator\": layer[\"fullname\"],\n", + " \"sample\": extra[\"Sample\"], # ID of associated sample - useless as is!\n", + " \"temperature\": extra[\"Heater temperature \"], # space at the end is a config error in eLab!\n", + " \"target\": extra[\"Target\"]\n", + " }\n", + " })\n", + "\n", + " print(json.dumps(layers, indent=2))\n", + "\n", + " L01file.close()\n", + " L02file.close()" + ] + }, + { + "cell_type": "markdown", + "id": "82700735-73fb-4b0a-aa97-3072c6330a48", + "metadata": {}, + "source": [ + "But that only works because I know exactly how many layers there are and in which order they're stored.\n", + "\n", + "How we're storing and downloading the experiment data related to a same sample is still subject of discussion. The parser should be able to associate different JSON files to their own sample and group the files related to the same experiment; each file has a *Layer Progressive Number* which associates the data saved in the file to a specific layer, and it's imperative that the parser:\n", + "* Recognises the absence of a layer (e.g. [1, 2, 4], returns that no 3rd layer exists);\n", + "* Names every layer \"layer_X\" where X is the progressive number starting from 1 (not 0).\n", + "\n", + "### Multiple layers from uncategorized files\n", + "Supposing I don't know that files *experiment_41_elab.json* and *experiment_43_elab.json* contain data of layers 1 and 2 of the same sample NA-26-001 I can always load every file in the folder indiscriminately and:\n", + "* Filter out every non-eLabFTW file (by some recognition pattern).\n", + "* Group the data by the sample it's associated to.\n", + "\n", + "\n", + "#### Filter out non-eLabFTW files using the key \"elabid\" as challenge\n", + "If the key *elabid* is present in the root of a JSON file then assume the file is an eLabFTW experiment output." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d8b83ba0-6b5b-425a-b365-8f5fa6ab4117", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "../tests/objects/experiment_41_elab.json\n", + "../tests/objects/experiment_43_elab.json\n" + ] + } + ], + "source": [ + "import os\n", + "\n", + "elabfiles = []\n", + "for filename in os.listdir(\"../tests/objects\"):\n", + " if filename.endswith(\".json\"):\n", + " try:\n", + " with open(os.path.join(\"../tests/objects\", filename), \"r\") as f:\n", + " data = json.load(f)\n", + " if data.get(\"elabid\"): # insert specific NeXus requirements here later\n", + " \n", + " elabfiles.append(filename)\n", + " f.close()\n", + " except json.decoder.JSONDecodeError as e: # invalid files \"masked\" as JSON\n", + " #print(f\"wait a moment: {e}\") # just for debug\n", + " pass\n", + "\n", + "for i in elabfiles:\n", + " print(os.path.join(\"../tests/objects/\", i))" + ] + }, + { + "cell_type": "markdown", + "id": "825f32d1-eb8f-4f9a-af94-d7258e897f8f", + "metadata": {}, + "source": [ + "#### Group the data by sample\n", + "Lookup the value of the key \"*Sample*\" in the extra fields; two experiments with that same value are associated to the same sample. To obtain this result the best course of action is *probably* to create a dictionary with every unique sample and all the data associated with it:\n", + "* The dictionary starts empty.\n", + "* The parser then reads the data from the first eLab-compliant file, in particular it reads the ID of the sample associated.\n", + "* If the ID (later the name) of the sample is not a key in the root of sample_dict create a new key, otherwise skip.\n", + "* Add layer-specific data to new layer in the sample_dict." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "4b7961e6-817f-44b7-b2de-16d15d9ec26a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{855: {'instrument': {'deposition_chamber': 72, 'laser_system': 'excimer 1', 'rheed_system': 'staib 2'}, 'multilayer': {'layer_1': {'operator': 'Emiliano Di Gennaro', 'created_at': '2026-01-07 13:22:35', 'sample': {'type': 'items', 'value': 855, 'group_id': 4, 'position': 0}, 'temperature': {'type': 'number', 'unit': '°C', 'units': ['°C'], 'value': '650', 'group_id': 4, 'position': 3}, 'target': {'type': 'items', 'value': 466, 'group_id': 4, 'position': 2, 'required': True}}, 'layer_2': {'operator': 'Emiliano Di Gennaro', 'created_at': '2026-01-16 16:24:55', 'sample': {'type': 'items', 'value': 855, 'group_id': 4, 'position': 0}, 'temperature': {'type': 'number', 'unit': '°C', 'units': ['°C'], 'value': '550', 'group_id': 4, 'position': 3}, 'target': {'type': 'items', 'value': 858, 'group_id': 4, 'position': 2, 'required': True}}}}}\n" + ] + } + ], + "source": [ + "sample_dict = {}\n", + "for filename in elabfiles:\n", + " with open(os.path.join(\"../tests/objects/\", filename), \"r\") as f:\n", + " layer = json.load(f)\n", + " extra = layer[\"metadata_decoded\"][\"extra_fields\"]\n", + " sample = extra[\"Sample\"][\"value\"]\n", + " lpn = int(extra[\"Layer Progressive Number\"][\"value\"]) # Layer Progressive Number\n", + " if not sample_dict.get(sample): # if not existent yet, initialize\n", + " sample_dict[sample] = {\n", + " \"instrument\": {\n", + " \"deposition_chamber\": extra[\"Chamber\"][\"value\"], # ID of associated resource (PLD chamber) - useless as is!\n", + " \"laser_system\": extra[\"Laser System\"][\"value\"],\n", + " \"rheed_system\": extra[\"RHEED System\"][\"value\"]\n", + " },\n", + " \"multilayer\": {}\n", + " }\n", + " sample_dict[sample][\"multilayer\"][f\"layer_{lpn}\"] = {\n", + " \"operator\": layer[\"fullname\"],\n", + " \"created_at\": layer[\"created_at\"],\n", + " \"sample\": extra[\"Sample\"], # ID of associated sample - useless as is!\n", + " \"temperature\": extra[\"Heater temperature \"], # space at the end is a config error in eLab!\n", + " \"target\": extra[\"Target\"]\n", + " }\n", + " \n", + "print(sample_dict)" + ] + }, + { + "cell_type": "markdown", + "id": "27f876e0-291e-43d6-8b9d-3ce533896e5e", + "metadata": {}, + "source": [ + "#### Look out for missing layers\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "6a5281dc-7fc3-4de7-845c-2bc2b54d4bb1", + "metadata": {}, + "outputs": [], + "source": [ + "#sample_dict[855][\"multilayer\"][\"layer_4\"] = {} # for debug purposes\n", + "\n", + "def find_missing(lst):\n", + " '''\n", + " Finds missing integers in sorted list.\n", + " Time complexity is NlogN but since N is at most 10^2 it's not a problem for us.\n", + " Source: geekforgeeks.org.\n", + " '''\n", + " return sorted(set(range(lst[0], lst[-1])) - set(lst))\n", + "\n", + "for item in sample_dict:\n", + " layer_names = list(sample_dict[item].get(\"multilayer\").keys())\n", + " numbers = sorted(int(layer.split('_')[1]) for layer in layer_names)\n", + " missing = find_missing(numbers)\n", + " if missing:\n", + " print(\"Warning: some layers appear to be missing.\")\n", + " print(f\"The missing layers are: \")\n", + " for i in missing:\n", + " print(f\"* layer_{i}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "e2716da5-ff75-45d1-b765-d8bfb2ecaf71", + "metadata": {}, + "source": [ + "### Names not ID's\n", + "> TO-DO: Replace ID's of eLabFTW items with their actual names (might need a working API key)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}