parser-eLabFTW-jupyter/jupyter/basic_parser.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "dacba8a3-91fe-45ad-af7b-f5082466b969",
   "metadata": {},
   "source": [
    "# Basic JSON file parsing\n",
    "## Info gathered by the scientist on eLabFTW\n",
    "### Experiment 45 \"NEW PLD Deposition Layer\"\n",
    "#### General info\n",
    "* Date and time of creation\n",
    "* Category\n",
    "* Full name of the scientist\n",
    "* Related items (sample, PLD target)\n",
    "\n",
    "#### Instrument\n",
    "* Chamber (by ID)\n",
    "* Laser system\n",
    "* RHEED system\n",
    "\n",
    "#### Process\n",
    "* Sample (by ID)\n",
    "* Layer progressive number\n",
    "* Target (by ID)\n",
    "* Heater temperature\n",
    "* Heater target distance\n",
    "* Buffer gas\n",
    "* Process pressure\n",
    "* Heating method\n",
    "* Laser intensity\n",
    "* Duration\n",
    "* Repetition rate\n",
    "* Thickness\n",
    "\n",
    "#### Post annealing\n",
    "* Buffer gas used in PA\n",
    "* Process pressure of PA\n",
    "* Heater temperature of PA\n",
    "* Duration of PA\n",
    "\n",
    "### Chamber\n",
    "\n",
    "### Sample\n",
    "\n",
    "### Target"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c6321d97-4c3e-4e73-a3a2-e3f23ae0a733",
   "metadata": {},
   "source": [
    "## Brick by brick\n",
    "Let's start by loading and printing the contents of Experiment 45's JSON as downloaded from eLabFTW."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "dfa122ad-6de1-4282-bb67-8fcd877e6678",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"access_key\": null,\n",
      "  \"body\": \"\",\n",
      "  \"body_html\": \"\",\n",
      "  \"canread\": \"{\\\"base\\\": 40, \\\"teams\\\": [], \\\"users\\\": [], \\\"teamgroups\\\": []}\",\n",
      "  \"canread_is_immutable\": 0,\n",
      "  \"canwrite\": \"{\\\"base\\\": 20, \\\"teams\\\": [], \\\"users\\\": [], \\\"teamgroups\\\": []}\",\n",
      "  \"canwrite_is_immutable\": 0,\n",
      "  \"category\": 2,\n",
      "  \"category_color\": \"8b8d43\",\n",
      "  \"category_title\": \"Deposition\",\n",
      "  \"comments\": [],\n",
      "  \"compounds\": [],\n",
      "  \"containers\": [],\n",
      "  \"content_type\": 1,\n",
      "  \"created_at\": \"2026-01-20 16:11:32\",\n",
      "  \"custom_id\": null,\n",
      "  \"date\": \"2026-01-20\",\n",
      "  \"elabid\": \"20260120-b72ca9659e21e904f5dbd12b2625a007ed97ed57\",\n",
      "  \"events_start\": null,\n",
      "  \"events_start_itemid\": null,\n",
      "  \"exclusive_edit_mode\": null,\n",
      "  \"experiments_links\": [],\n",
      "  \"firstname\": \"Emiliano\",\n",
      "  \"fullname\": \"Emiliano Di Gennaro\",\n",
      "  \"id\": 45,\n",
      "  \"is_pinned\": 0,\n",
      "  \"items_links\": [\n",
      "    {\n",
      "      \"entityid\": 854,\n",
      "      \"title\": \"LAO_single_crystal_01\",\n",
      "      \"custom_id\": null,\n",
      "      \"elabid\": \"20260107-1f8d0a6d9cf61be826b8b35f5516959848815310\",\n",
      "      \"link_state\": 1,\n",
      "      \"is_bookable\": 0,\n",
      "      \"page\": \"database.php\",\n",
      "      \"type\": \"items\",\n",
      "      \"category_title\": \"NEW_PLD Target\",\n",
      "      \"category_color\": \"29aeb9\",\n",
      "      \"status_title\": \"Available\",\n",
      "      \"status_color\": \"6a7753\"\n",
      "    },\n",
      "    {\n",
      "      \"entityid\": 855,\n",
      "      \"title\": \"Na-26-001\",\n",
      "      \"custom_id\": null,\n",
      "      \"elabid\": \"20260107-e83642d2b806e5db5ebb0d6309d874f4b4461114\",\n",
      "      \"link_state\": 1,\n",
      "      \"is_bookable\": 0,\n",
      "      \"page\": \"database.php\",\n",
      "      \"type\": \"items\",\n",
      "      \"category_title\": \"NEW_Sample\",\n",
      "      \"category_color\": \"29aeb9\",\n",
      "      \"status_title\": \"Available\",\n",
      "      \"status_color\": \"6a7753\"\n",
      "    }\n",
      "  ],\n",
      "  \"lastchangeby\": 2,\n",
      "  \"lastname\": \"Di Gennaro\",\n",
      "  \"locked\": 0,\n",
      "  \"locked_at\": null,\n",
      "  \"lockedby\": null,\n",
      "  \"metadata\": \"{\\\"elabftw\\\": {\\\"extra_fields_groups\\\": [{\\\"id\\\": 4, \\\"name\\\": \\\"Process\\\"}, {\\\"id\\\": 7, \\\"name\\\": \\\"Laser\\\"}, {\\\"id\\\": 8, \\\"name\\\": \\\"Pre Annealing\\\"}, {\\\"id\\\": 6, \\\"name\\\": \\\"Post Annealing\\\"}, {\\\"id\\\": 3, \\\"name\\\": \\\"Instruments\\\"}]}, \\\"extra_fields\\\": {\\\"Sample\\\": {\\\"type\\\": \\\"items\\\", \\\"value\\\": 855, \\\"group_id\\\": 4, \\\"position\\\": 0}, \\\"Target\\\": {\\\"type\\\": \\\"items\\\", \\\"value\\\": 854, \\\"group_id\\\": 4, \\\"position\\\": 2, \\\"required\\\": true}, \\\"Chamber\\\": {\\\"type\\\": \\\"items\\\", \\\"value\\\": 72, \\\"group_id\\\": 3, \\\"position\\\": 0}, \\\"Duration\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"s\\\", \\\"units\\\": [\\\"s\\\", \\\"min\\\"], \\\"value\\\": \\\"340\\\", \\\"group_id\\\": 4, \\\"position\\\": 5}, \\\"Spot Area\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"mm^2\\\", \\\"units\\\": [\\\"mm^2\\\"], \\\"value\\\": \\\"\\\", \\\"group_id\\\": 7, \\\"position\\\": 1}, \\\"Thickness\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"u.c.\\\", \\\"units\\\": [\\\"u.c.\\\", \\\"s\\\"], \\\"value\\\": \\\"10\\\", \\\"group_id\\\": 4, \\\"position\\\": 6}, \\\"Buffer gas\\\": {\\\"type\\\": \\\"select\\\", \\\"value\\\": \\\"O2\\\", \\\"options\\\": [\\\"O2\\\", \\\"N2\\\", \\\"Ar\\\", \\\"\\\"], \\\"group_id\\\": 4, \\\"position\\\": 3}, \\\"Duration PA\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"s\\\", \\\"units\\\": [\\\"s\\\", \\\"min\\\"], \\\"value\\\": \\\"\\\", \\\"group_id\\\": 6}, \\\"Duration Pre\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"s\\\", \\\"units\\\": [\\\"s\\\"], \\\"value\\\": \\\"\\\", \\\"group_id\\\": 8}, \\\"Laser System\\\": {\\\"type\\\": \\\"text\\\", \\\"value\\\": \\\"Excimer \\\", \\\"group_id\\\": 3}, \\\"RHEED System\\\": {\\\"type\\\": \\\"text\\\", \\\"value\\\": \\\"staib\\\", \\\"group_id\\\": 3}, \\\"Buffer gas PA\\\": {\\\"type\\\": \\\"select\\\", \\\"value\\\": \\\"O2\\\", \\\"options\\\": [\\\"O2\\\", \\\"N2\\\", \\\"Ar\\\", \\\"\\\"], \\\"group_id\\\": 6, \\\"position\\\": 3}, \\\"Buffer gas Pre\\\": {\\\"type\\\": \\\"select\\\", \\\"value\\\": \\\"O2\\\", \\\"options\\\": [\\\"O2\\\", \\\"N2\\\", \\\"Ar\\\", \\\"\\\"], \\\"group_id\\\": 8, \\\"position\\\": 3}, \\\"Heating Method\\\": {\\\"type\\\": \\\"select\\\", \\\"value\\\": \\\"Radiative Heater\\\", \\\"options\\\": [\\\"Radiative Heater\\\", \\\"Laser Heater\\\"], \\\"group_id\\\": 4, \\\"position\\\": 9}, \\\"Laser Intensity\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"J/(s cm^2)\\\", \\\"units\\\": [\\\"J/(s cm^2)\\\"], \\\"value\\\": \\\"1.5\\\", \\\"group_id\\\": 7, \\\"position\\\": 0}, \\\"Repetition rate\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"Hz\\\", \\\"units\\\": [\\\"Hz\\\"], \\\"value\\\": \\\"1\\\", \\\"group_id\\\": 7, \\\"position\\\": 4}, \\\"Process pressure \\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"mbar\\\", \\\"units\\\": [\\\"mbar\\\"], \\\"value\\\": \\\"1e-3\\\", \\\"group_id\\\": 4, \\\"position\\\": 4}, \\\"Heater temperature \\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"\\u00b0C\\\", \\\"units\\\": [\\\"\\u00b0C\\\"], \\\"value\\\": \\\"760\\\", \\\"group_id\\\": 4, \\\"position\\\": 7}, \\\"Process pressure PA\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"mbar\\\", \\\"units\\\": [\\\"mbar\\\"], \\\"value\\\": \\\"\\\", \\\"group_id\\\": 6, \\\"position\\\": 4}, \\\"Process pressure Pre\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"mbar\\\", \\\"units\\\": [\\\"mbar\\\"], \\\"value\\\": \\\"\\\", \\\"group_id\\\": 8, \\\"position\\\": 4}, \\\"Heater temperature PA\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"\\u00b0C\\\", \\\"units\\\": [\\\"\\u00b0C\\\"], \\\"value\\\": \\\"\\\", \\\"group_id\\\": 6}, \\\"Laser Rastering Speed\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"\\\", \\\"units\\\": [], \\\"value\\\": \\\"\\\", \\\"group_id\\\": 7, \\\"position\\\": 3}, \\\"Heater temperature Pre\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"\\u00b0C\\\", \\\"units\\\": [\\\"\\u00b0C\\\"], \\\"value\\\": \\\"\\\", \\\"group_id\\\": 8}, \\\"Heater-target distance\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"mm\\\", \\\"units\\\": [\\\"mm\\\"], \\\"value\\\": \\\"38\\\", \\\"group_id\\\": 4, \\\"position\\\": 8}, \\\"Laser Rastering Geometry\\\": {\\\"type\\\": \\\"select\\\", \\\"value\\\": \\\"none\\\", \\\"options\\\": [\\\"none\\\", \\\"on a square\\\", \\\"on a rectangle\\\", \\\"on a line\\\", \\\"other\\\"], \\\"group_id\\\": 7, \\\"position\\\": 2}, \\\"Layer Progressive Number\\\": {\\\"type\\\": \\\"number\\\", \\\"unit\\\": \\\"\\\", \\\"units\\\": [], \\\"value\\\": \\\"1\\\", \\\"group_id\\\": 4, \\\"position\\\": 1, \\\"required\\\": true}}}\",\n",
      "  \"metadata_decoded\": {\n",
      "    \"elabftw\": {\n",
      "      \"extra_fields_groups\": [\n",
      "        {\n",
      "          \"id\": 4,\n",
      "          \"name\": \"Process\"\n",
      "        },\n",
      "        {\n",
      "          \"id\": 7,\n",
      "          \"name\": \"Laser\"\n",
      "        },\n",
      "        {\n",
      "          \"id\": 8,\n",
      "          \"name\": \"Pre Annealing\"\n",
      "        },\n",
      "        {\n",
      "          \"id\": 6,\n",
      "          \"name\": \"Post Annealing\"\n",
      "        },\n",
      "        {\n",
      "          \"id\": 3,\n",
      "          \"name\": \"Instruments\"\n",
      "        }\n",
      "      ]\n",
      "    },\n",
      "    \"extra_fields\": {\n",
      "      \"Sample\": {\n",
      "        \"type\": \"items\",\n",
      "        \"value\": 855,\n",
      "        \"group_id\": 4,\n",
      "        \"position\": 0\n",
      "      },\n",
      "      \"Target\": {\n",
      "        \"type\": \"items\",\n",
      "        \"value\": 854,\n",
      "        \"group_id\": 4,\n",
      "        \"position\": 2,\n",
      "        \"required\": true\n",
      "      },\n",
      "      \"Chamber\": {\n",
      "        \"type\": \"items\",\n",
      "        \"value\": 72,\n",
      "        \"group_id\": 3,\n",
      "        \"position\": 0\n",
      "      },\n",
      "      \"Duration\": {\n",
      "        \"type\": \"number\",\n",
      "        \"unit\": \"s\",\n",
      "        \"units\": [\n",
      "          \"s\",\n",
      "          \"min\"\n",
      "        ],\n",
      "        \"value\": \"340\",\n",
      "        \"group_id\": 4,\n",
      "        \"position\": 5\n",
      "      },\n",
      "      \"Spot Area\": {\n",
      "        \"type\": \"number\",\n",
      "        \"unit\": \"mm^2\",\n",
      "        \"units\": [\n",
      "          \"mm^2\"\n",
      "        ],\n",
      "        \"value\": \"\",\n",
      "        \"group_id\": 7,\n",
      "        \"position\": 1\n",
      "      },\n",
      "      \"Thickness\": {\n",
      "        \"type\": \"number\",\n",
      "        \"unit\": \"u.c.\",\n",
      "        \"units\": [\n",
      "          \"u.c.\",\n",
      "          \"s\"\n",
      "        ],\n",
      "        \"value\": \"10\",\n",
      "        \"group_id\": 4,\n",
      "        \"position\": 6\n",
      "      },\n",
      "      \"Buffer gas\": {\n",
      "        \"type\": \"select\",\n",
      "        \"value\": \"O2\",\n",
      "        \"options\": [\n",
      "          \"O2\",\n",
      "          \"N2\",\n",
      "          \"Ar\",\n",
      "          \"\"\n",
      "        ],\n",
      "        \"group_id\": 4,\n",
      "        \"position\": 3\n",
      "      },\n",
      "      \"Duration PA\": {\n",
      "        \"type\": \"number\",\n",
      "        \"unit\": \"s\",\n",
      "        \"units\": [\n",
      "          \"s\",\n",
      "          \"min\"\n",
      "        ],\n",
      "        \"value\": \"\",\n",
      "        \"group_id\": 6\n",
      "      },\n",
      "      \"Duration Pre\": {\n",
      "        \"type\": \"number\",\n",
      "        \"unit\": \"s\",\n",
      "        \"units\": [\n",
      "          \"s\"\n",
      "        ],\n",
      "        \"value\": \"\",\n",
      "        \"group_id\": 8\n",
      "      },\n",
      "      \"Laser System\": {\n",
      "        \"type\": \"text\",\n",
      "        \"value\": \"Excimer \",\n",
      "        \"group_id\": 3\n",
      "      },\n",
      "      \"RHEED System\": {\n",
      "        \"type\": \"text\",\n",
      "        \"value\": \"staib\",\n",
      "        \"group_id\": 3\n",
      "      },\n",
      "      \"Buffer gas PA\": {\n",
      "        \"type\": \"select\",\n",
      "        \"value\": \"O2\",\n",
      "        \"options\": [\n",
      "          \"O2\",\n",
      "          \"N2\",\n",
      "          \"Ar\",\n",
      "          \"\"\n",
      "        ],\n",
      "        \"group_id\": 6,\n",
      "        \"position\": 3\n",
      "      },\n",
      "      \"Buffer gas Pre\": {\n",
      "        \"type\": \"select\",\n",
      "        \"value\": \"O2\",\n",
      "        \"options\": [\n",
      "          \"O2\",\n",
      "          \"N2\",\n",
      "          \"Ar\",\n",
      "          \"\"\n",
      "        ],\n",
      "        \"group_id\": 8,\n",
      "        \"position\": 3\n",
      "      },\n",
      "      \"Heating Method\": {\n",
      "        \"type\": \"select\",\n",
      "        \"value\": \"Radiative Heater\",\n",
      "        \"options\": [\n",
      "          \"Radiative Heater\",\n",
      "          \"Laser Heater\"\n",
      "        ],\n",
      "        \"group_id\": 4,\n",
      "        \"position\": 9\n",
      "      },\n",
      "      \"Laser Intensity\": {\n",
      "        \"type\": \"number\",\n",
      "        \"unit\": \"J/(s cm^2)\",\n",
      "        \"units\": [\n",
      "          \"J/(s cm^2)\"\n",
      "        ],\n",
      "        \"value\": \"1.5\",\n",
      "        \"group_id\": 7,\n",
      "        \"position\": 0\n",
      "      },\n",
      "      \"Repetition rate\": {\n",
      "        \"type\": \"number\",\n",
      "        \"unit\": \"Hz\",\n",
      "        \"units\": [\n",
      "          \"Hz\"\n",
      "        ],\n",
      "        \"value\": \"1\",\n",
      "        \"group_id\": 7,\n",
      "        \"position\": 4\n",
      "      },\n",
      "      \"Process pressure \": {\n",
      "        \"type\": \"number\",\n",
      "        \"unit\": \"mbar\",\n",
      "        \"units\": [\n",
      "          \"mbar\"\n",
      "        ],\n",
      "        \"value\": \"1e-3\",\n",
      "        \"group_id\": 4,\n",
      "        \"position\": 4\n",
      "      },\n",
      "      \"Heater temperature \": {\n",
      "        \"type\": \"number\",\n",
      "        \"unit\": \"\\u00b0C\",\n",
      "        \"units\": [\n",
      "          \"\\u00b0C\"\n",
      "        ],\n",
      "        \"value\": \"760\",\n",
      "        \"group_id\": 4,\n",
      "        \"position\": 7\n",
      "      },\n",
      "      \"Process pressure PA\": {\n",
      "        \"type\": \"number\",\n",
      "        \"unit\": \"mbar\",\n",
      "        \"units\": [\n",
      "          \"mbar\"\n",
      "        ],\n",
      "        \"value\": \"\",\n",
      "        \"group_id\": 6,\n",
      "        \"position\": 4\n",
      "      },\n",
      "      \"Process pressure Pre\": {\n",
      "        \"type\": \"number\",\n",
      "        \"unit\": \"mbar\",\n",
      "        \"units\": [\n",
      "          \"mbar\"\n",
      "        ],\n",
      "        \"value\": \"\",\n",
      "        \"group_id\": 8,\n",
      "        \"position\": 4\n",
      "      },\n",
      "      \"Heater temperature PA\": {\n",
      "        \"type\": \"number\",\n",
      "        \"unit\": \"\\u00b0C\",\n",
      "        \"units\": [\n",
      "          \"\\u00b0C\"\n",
      "        ],\n",
      "        \"value\": \"\",\n",
      "        \"group_id\": 6\n",
      "      },\n",
      "      \"Laser Rastering Speed\": {\n",
      "        \"type\": \"number\",\n",
      "        \"unit\": \"\",\n",
      "        \"units\": [],\n",
      "        \"value\": \"\",\n",
      "        \"group_id\": 7,\n",
      "        \"position\": 3\n",
      "      },\n",
      "      \"Heater temperature Pre\": {\n",
      "        \"type\": \"number\",\n",
      "        \"unit\": \"\\u00b0C\",\n",
      "        \"units\": [\n",
      "          \"\\u00b0C\"\n",
      "        ],\n",
      "        \"value\": \"\",\n",
      "        \"group_id\": 8\n",
      "      },\n",
      "      \"Heater-target distance\": {\n",
      "        \"type\": \"number\",\n",
      "        \"unit\": \"mm\",\n",
      "        \"units\": [\n",
      "          \"mm\"\n",
      "        ],\n",
      "        \"value\": \"38\",\n",
      "        \"group_id\": 4,\n",
      "        \"position\": 8\n",
      "      },\n",
      "      \"Laser Rastering Geometry\": {\n",
      "        \"type\": \"select\",\n",
      "        \"value\": \"none\",\n",
      "        \"options\": [\n",
      "          \"none\",\n",
      "          \"on a square\",\n",
      "          \"on a rectangle\",\n",
      "          \"on a line\",\n",
      "          \"other\"\n",
      "        ],\n",
      "        \"group_id\": 7,\n",
      "        \"position\": 2\n",
      "      },\n",
      "      \"Layer Progressive Number\": {\n",
      "        \"type\": \"number\",\n",
      "        \"unit\": \"\",\n",
      "        \"units\": [],\n",
      "        \"value\": \"1\",\n",
      "        \"group_id\": 4,\n",
      "        \"position\": 1,\n",
      "        \"required\": true\n",
      "      }\n",
      "    }\n",
      "  },\n",
      "  \"modified_at\": \"2026-01-20 16:17:59\",\n",
      "  \"next_step\": \"add process data\",\n",
      "  \"orcid\": \"0000-0003-4231-9776\",\n",
      "  \"page\": \"experiments\",\n",
      "  \"rating\": 0,\n",
      "  \"recent_comment\": null,\n",
      "  \"related_experiments_links\": [\n",
      "    {\n",
      "      \"entityid\": 46,\n",
      "      \"title\": \"Na-26-001 deposition test II\",\n",
      "      \"custom_id\": null,\n",
      "      \"link_state\": 1,\n",
      "      \"page\": \"experiments.php\",\n",
      "      \"type\": \"experiments\",\n",
      "      \"category_title\": \"Deposition\",\n",
      "      \"category_color\": \"8b8d43\",\n",
      "      \"status_title\": null,\n",
      "      \"status_color\": null\n",
      "    }\n",
      "  ],\n",
      "  \"related_items_links\": [],\n",
      "  \"sharelink\": \"https://elabftw.fisica.unina.it:8080/experiments.php?mode=view&id=45\",\n",
      "  \"state\": 1,\n",
      "  \"status\": null,\n",
      "  \"status_color\": null,\n",
      "  \"status_title\": null,\n",
      "  \"steps\": [\n",
      "    {\n",
      "      \"id\": 35,\n",
      "      \"item_id\": 45,\n",
      "      \"body\": \"add process data\",\n",
      "      \"ordering\": 1,\n",
      "      \"finished\": 0,\n",
      "      \"finished_time\": null,\n",
      "      \"deadline\": null,\n",
      "      \"deadline_notif\": 0\n",
      "    },\n",
      "    {\n",
      "      \"id\": 36,\n",
      "      \"item_id\": 45,\n",
      "      \"body\": \"add RHEED data\",\n",
      "      \"ordering\": 2,\n",
      "      \"finished\": 0,\n",
      "      \"finished_time\": null,\n",
      "      \"deadline\": null,\n",
      "      \"deadline_notif\": 0\n",
      "    },\n",
      "    {\n",
      "      \"id\": 37,\n",
      "      \"item_id\": 45,\n",
      "      \"body\": \"add RHEED images\",\n",
      "      \"ordering\": 3,\n",
      "      \"finished\": 0,\n",
      "      \"finished_time\": null,\n",
      "      \"deadline\": null,\n",
      "      \"deadline_notif\": 0\n",
      "    }\n",
      "  ],\n",
      "  \"tags\": null,\n",
      "  \"tags_id\": null,\n",
      "  \"team\": 1,\n",
      "  \"team_name\": \"Default team\",\n",
      "  \"timestamped\": 0,\n",
      "  \"timestamped_at\": null,\n",
      "  \"timestampedby\": null,\n",
      "  \"title\": \"Na-26-001 deposition test I\",\n",
      "  \"type\": \"experiments\",\n",
      "  \"uploads\": [],\n",
      "  \"userid\": 2\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "\n",
    "with open(\"../tests/objects/experiment_45_elab.json\", \"r\") as f:\n",
    "    x = json.load(f)\n",
    "    print(json.dumps(x,indent=2))\n",
    "    f.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a2e857a5-efdb-4177-a5cc-063270985531",
   "metadata": {},
   "source": [
    "For testing purposes now we'll create and print a simplified dictionary containing a sample of the harvested data for each group of metadata."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "dae4b791-fd08-4f41-ba42-82edcf4e3cde",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"instrument\": {\n",
      "    \"deposition_chamber\": 72,\n",
      "    \"laser_system\": \"Excimer \",\n",
      "    \"rheed_system\": \"staib\"\n",
      "  },\n",
      "  \"multilayer\": {\n",
      "    \"layer_1\": {\n",
      "      \"operator\": \"Emiliano Di Gennaro\",\n",
      "      \"sample\": 855,\n",
      "      \"temperature\": \"760\",\n",
      "      \"target\": 854\n",
      "    }\n",
      "  }\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "with open(\"../tests/objects/experiment_45_elab.json\", \"r\") as f:\n",
    "    rawdata = json.load(f)\n",
    "    extra = rawdata[\"metadata_decoded\"][\"extra_fields\"]\n",
    "    ordered = {\n",
    "        \"instrument\": {\n",
    "            \"deposition_chamber\": extra[\"Chamber\"][\"value\"], # ID of associated resource (PLD chamber) - useless as is!\n",
    "            \"laser_system\": extra[\"Laser System\"][\"value\"],\n",
    "            \"rheed_system\": extra[\"RHEED System\"][\"value\"]\n",
    "        },\n",
    "        \"multilayer\": {\n",
    "            \"layer_1\": {\n",
    "                \"operator\": rawdata[\"fullname\"],\n",
    "                \"sample\": extra[\"Sample\"][\"value\"], # ID of associated sample - useless as is!\n",
    "                \"temperature\": extra[\"Heater temperature \"][\"value\"], # space at the end is a config error in eLab!\n",
    "                \"target\": extra[\"Target\"][\"value\"] # ID of associated resource (PLD target) - useless as is!\n",
    "            }\n",
    "        },\n",
    "    }\n",
    "    print(json.dumps(ordered,indent=2))\n",
    "    f.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c9992a47-ce3c-47ec-94bd-26fbec020962",
   "metadata": {},
   "source": [
    "Some issues rise here:\n",
    "* First of all the fields \"deposition_chamber\", \"sample\" and \"target\" **refer to the value of the eLabFTW ID of the associated resource** which is useless as is since it does not contain any relevant data on these objects;\n",
    "* Second, the same sample can have two different eLab Experiments associated to it, each representing **a different layer** of the deposition.\n",
    "\n",
    "> Note: a layer progressive number is tracked by the scientist, and it can be found in the JSON dictionary under `metadata_decoded -> extra_fields -> Layer Progressive Number -> value`.\n",
    "\n",
    "### Multiple layers from known sources\n",
    "One problem at a time: first of all I can create an \"ordered\" dictionary with an empty \"multilayer\" key and append the layer-specific value of every layer later using the *dict().update()* method."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b3afa42e-e982-4dd3-9ea6-cf7918dc276f",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"instrument\": {\n",
      "    \"deposition_chamber\": 72,\n",
      "    \"laser_system\": \"Excimer \",\n",
      "    \"rheed_system\": \"staib\"\n",
      "  },\n",
      "  \"multilayer\": {\n",
      "    \"layer_1\": {\n",
      "      \"operator\": \"Emiliano Di Gennaro\",\n",
      "      \"sample\": {\n",
      "        \"type\": \"items\",\n",
      "        \"value\": 855,\n",
      "        \"group_id\": 4,\n",
      "        \"position\": 0\n",
      "      },\n",
      "      \"temperature\": {\n",
      "        \"type\": \"number\",\n",
      "        \"unit\": \"\\u00b0C\",\n",
      "        \"units\": [\n",
      "          \"\\u00b0C\"\n",
      "        ],\n",
      "        \"value\": \"760\",\n",
      "        \"group_id\": 4,\n",
      "        \"position\": 7\n",
      "      },\n",
      "      \"target\": {\n",
      "        \"type\": \"items\",\n",
      "        \"value\": 854,\n",
      "        \"group_id\": 4,\n",
      "        \"position\": 2,\n",
      "        \"required\": true\n",
      "      }\n",
      "    }\n",
      "  }\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "with open(\"../tests/objects/experiment_45_elab.json\", \"r\") as f:\n",
    "    rawdata = json.load(f)\n",
    "    extra = rawdata[\"metadata_decoded\"][\"extra_fields\"]\n",
    "    layers = {\n",
    "        \"layer_1\": {\n",
    "                \"operator\": rawdata[\"fullname\"],\n",
    "                \"sample\": extra[\"Sample\"], # ID of associated sample - useless as is!\n",
    "                \"temperature\": extra[\"Heater temperature \"], # space at the end is a config error in eLab!\n",
    "                \"target\": extra[\"Target\"]\n",
    "            }\n",
    "    }\n",
    "\n",
    "    ordered = {\n",
    "        \"instrument\": {\n",
    "            \"deposition_chamber\": extra[\"Chamber\"][\"value\"], # ID of associated resource (PLD chamber) - useless as is!\n",
    "            \"laser_system\": extra[\"Laser System\"][\"value\"],\n",
    "            \"rheed_system\": extra[\"RHEED System\"][\"value\"]\n",
    "        },\n",
    "        \"multilayer\": {\n",
    "        },\n",
    "    }\n",
    "    for l in layers:\n",
    "        ordered[\"multilayer\"].update(\n",
    "            {l: layers[l]}\n",
    "        )\n",
    "    print(json.dumps(ordered, indent=2))\n",
    "    f.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4a7ff14f-d2fc-4485-a174-a23248791a6f",
   "metadata": {},
   "source": [
    "Now entering the second layer: Experiment 46.\n",
    "\n",
    "If I were to create a \"layers\" dictionary with the same info from the two different experiments it would look like this:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "9212afba-9868-467f-ac4d-8cbce0f1537a",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"layer_1\": {\n",
      "    \"operator\": \"Emiliano Di Gennaro\",\n",
      "    \"sample\": {\n",
      "      \"type\": \"items\",\n",
      "      \"value\": 855,\n",
      "      \"group_id\": 4,\n",
      "      \"position\": 0\n",
      "    },\n",
      "    \"temperature\": {\n",
      "      \"type\": \"number\",\n",
      "      \"unit\": \"\\u00b0C\",\n",
      "      \"units\": [\n",
      "        \"\\u00b0C\"\n",
      "      ],\n",
      "      \"value\": \"500\",\n",
      "      \"group_id\": 4,\n",
      "      \"position\": 7\n",
      "    },\n",
      "    \"target\": {\n",
      "      \"type\": \"items\",\n",
      "      \"value\": 466,\n",
      "      \"group_id\": 4,\n",
      "      \"position\": 2,\n",
      "      \"required\": true\n",
      "    }\n",
      "  },\n",
      "  \"layer_2\": \"\",\n",
      "  \"layer_0\": {\n",
      "    \"operator\": \"Emiliano Di Gennaro\",\n",
      "    \"sample\": {\n",
      "      \"type\": \"items\",\n",
      "      \"value\": 855,\n",
      "      \"group_id\": 4,\n",
      "      \"position\": 0\n",
      "    },\n",
      "    \"temperature\": {\n",
      "      \"type\": \"number\",\n",
      "      \"unit\": \"\\u00b0C\",\n",
      "      \"units\": [\n",
      "        \"\\u00b0C\"\n",
      "      ],\n",
      "      \"value\": \"760\",\n",
      "      \"group_id\": 4,\n",
      "      \"position\": 7\n",
      "    },\n",
      "    \"target\": {\n",
      "      \"type\": \"items\",\n",
      "      \"value\": 854,\n",
      "      \"group_id\": 4,\n",
      "      \"position\": 2,\n",
      "      \"required\": true\n",
      "    }\n",
      "  }\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "with open(\"../tests/objects/experiment_45_elab.json\", \"r\") as L01file, open(\"../tests/objects/experiment_46_elab.json\", \"r\") as L02file:\n",
    "    raw01 = json.load(L01file)\n",
    "    raw02 = json.load(L02file)\n",
    "    layer_list = [raw01, raw02]\n",
    "\n",
    "    layers = { \"layer_\" + str(index + 1) : \"\" for index in range(len(layer_list)) }\n",
    "    for i,layer in enumerate(layer_list):\n",
    "        extra = layer[\"metadata_decoded\"][\"extra_fields\"]\n",
    "        layers.update({\n",
    "            f\"layer_{i}\": {\n",
    "                \"operator\": layer[\"fullname\"],\n",
    "                \"sample\": extra[\"Sample\"], # ID of associated sample - useless as is!\n",
    "                \"temperature\": extra[\"Heater temperature \"], # space at the end is a config error in eLab!\n",
    "                \"target\": extra[\"Target\"]\n",
    "            }\n",
    "        })\n",
    "\n",
    "    print(json.dumps(layers, indent=2))\n",
    "\n",
    "    L01file.close()\n",
    "    L02file.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "82700735-73fb-4b0a-aa97-3072c6330a48",
   "metadata": {},
   "source": [
    "But that only works because I know exactly how many layers there are and in which order they're stored.\n",
    "\n",
    "How we're storing and downloading the experiment data related to a same sample is still subject of discussion. The parser should be able to associate different JSON files to their own sample and group the files related to the same experiment; each file has a *Layer Progressive Number* which associates the data saved in the file to a specific layer, and it's imperative that the parser:\n",
    "* Recognises the absence of a layer (e.g. [1, 2, 4], returns that no 3rd layer exists);\n",
    "* Names every layer \"layer_X\" where X is the progressive number starting from 1 (not 0).\n",
    "\n",
    "### Multiple layers from uncategorized files\n",
    "Supposing I don't know that files *experiment_45_elab.json* and *experiment_46_elab.json* contain data of layers 1 and 2 of the same sample NA-26-001 I can always load every file in the folder indiscriminately and:\n",
    "* Filter out every non-eLabFTW file (by some recognition pattern).\n",
    "* Group the data by the sample it's associated to.\n",
    "<!--* For every sample, sort the data based on the layer it refers to.-->\n",
    "\n",
    "#### Filter out non-eLabFTW files using the key \"elabid\" as challenge\n",
    "If the key *elabid* is present in the root of a JSON file then assume the file is an eLabFTW experiment output."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "d8b83ba0-6b5b-425a-b365-8f5fa6ab4117",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "../tests/objects/experiment_45_elab.json\n",
      "../tests/objects/experiment_46_elab.json\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "\n",
    "elabfiles = []\n",
    "for filename in os.listdir(\"../tests/objects\"):\n",
    "    if filename.endswith(\".json\"):\n",
    "        try:\n",
    "            with open(os.path.join(\"../tests/objects\", filename), \"r\") as f:\n",
    "                data = json.load(f)\n",
    "                if data.get(\"elabid\"): # insert specific NeXus requirements here later\n",
    "                    \n",
    "                    elabfiles.append(filename)\n",
    "            f.close()\n",
    "        except json.decoder.JSONDecodeError as e: # invalid files \"masked\" as JSON\n",
    "            #print(f\"wait a moment: {e}\") # just for debug\n",
    "            pass\n",
    "\n",
    "for i in elabfiles:\n",
    "    print(os.path.join(\"../tests/objects/\", i))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "825f32d1-eb8f-4f9a-af94-d7258e897f8f",
   "metadata": {},
   "source": [
    "#### Group the data by sample\n",
    "Lookup the value of the key \"*Sample*\" in the extra fields; two experiments with that same value are associated to the same sample. To obtain this result the best course of action is *probably* to create a dictionary with every unique sample and all the data associated with it:\n",
    "* The dictionary starts empty.\n",
    "* The parser then reads the data from the first eLab-compliant file, in particular it reads the ID of the sample associated.\n",
    "* If the ID (later the name) of the sample is not a key in the root of sample_dict create a new key, otherwise skip.\n",
    "* Add layer-specific data to new layer in the sample_dict."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "4b7961e6-817f-44b7-b2de-16d15d9ec26a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{855: {'instrument': {'deposition_chamber': 72, 'laser_system': 'Excimer ', 'rheed_system': 'staib'}, 'multilayer': {'layer_1': {'operator': 'Emiliano Di Gennaro', 'created_at': '2026-01-20 16:11:32', 'sample': {'type': 'items', 'value': 855, 'group_id': 4, 'position': 0}, 'temperature': {'type': 'number', 'unit': '°C', 'units': ['°C'], 'value': '760', 'group_id': 4, 'position': 7}, 'target': {'type': 'items', 'value': 854, 'group_id': 4, 'position': 2, 'required': True}}, 'layer_2': {'operator': 'Emiliano Di Gennaro', 'created_at': '2026-01-20 16:18:48', 'sample': {'type': 'items', 'value': 855, 'group_id': 4, 'position': 0}, 'temperature': {'type': 'number', 'unit': '°C', 'units': ['°C'], 'value': '500', 'group_id': 4, 'position': 7}, 'target': {'type': 'items', 'value': 466, 'group_id': 4, 'position': 2, 'required': True}}}}}\n"
     ]
    }
   ],
   "source": [
    "sample_dict = {}\n",
    "for filename in elabfiles:\n",
    "    with open(os.path.join(\"../tests/objects/\", filename), \"r\") as f:\n",
    "        layer = json.load(f)\n",
    "        extra = layer[\"metadata_decoded\"][\"extra_fields\"]\n",
    "        sample = extra[\"Sample\"][\"value\"]\n",
    "        lpn = int(extra[\"Layer Progressive Number\"][\"value\"]) # Layer Progressive Number\n",
    "        if not sample_dict.get(sample): # if not existent yet, initialize\n",
    "            sample_dict[sample] = {\n",
    "                \"instrument\": {\n",
    "                    \"deposition_chamber\": extra[\"Chamber\"][\"value\"], # ID of associated resource (PLD chamber) - useless as is!\n",
    "                    \"laser_system\": extra[\"Laser System\"][\"value\"],\n",
    "                    \"rheed_system\": extra[\"RHEED System\"][\"value\"]\n",
    "                },\n",
    "                \"multilayer\": {}\n",
    "                }\n",
    "        sample_dict[sample][\"multilayer\"][f\"layer_{lpn}\"] = {\n",
    "            \"operator\": layer[\"fullname\"],\n",
    "            \"created_at\": layer[\"created_at\"],\n",
    "            \"sample\": extra[\"Sample\"], # ID of associated sample - useless as is!\n",
    "            \"temperature\": extra[\"Heater temperature \"], # space at the end is a config error in eLab!\n",
    "            \"target\": extra[\"Target\"]\n",
    "        }\n",
    "            \n",
    "print(sample_dict)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "27f876e0-291e-43d6-8b9d-3ce533896e5e",
   "metadata": {},
   "source": [
    "#### Look out for missing layers\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "6a5281dc-7fc3-4de7-845c-2bc2b54d4bb1",
   "metadata": {},
   "outputs": [],
   "source": [
    "#sample_dict[855][\"multilayer\"][\"layer_4\"] = {} # for debug purposes\n",
    "\n",
    "def find_missing(lst):\n",
    "    '''\n",
    "    Finds missing integers in unsorted list.\n",
    "    Time complexity is NlogN but since N is at most 10^1 it's not a problem for us.\n",
    "    Source: geekforgeeks.org.\n",
    "    '''\n",
    "    lst.sort() # sorts list\n",
    "    return sorted(set(range(lst[0], lst[-1])) - set(lst))\n",
    "\n",
    "for item in sample_dict:\n",
    "    layer_names = list(sample_dict[item].get(\"multilayer\").keys())\n",
    "    numbers = sorted(int(layer.split('_')[1]) for layer in layer_names)\n",
    "    missing = find_missing(numbers)\n",
    "    if missing:\n",
    "        print(\"Warning: some layers appear to be missing.\")\n",
    "        print(f\"The missing layers are: \")\n",
    "        for i in missing:\n",
    "            print(f\"* layer_{i}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "028ac2b1-3389-472d-ba05-de6cfc9a9fda",
   "metadata": {},
   "source": [
    "#### Find duplicates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "14583887-8feb-4507-a06d-ddc557c0a875",
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_duplicates(lst): # list of integers\n",
    "    result = []\n",
    "    lst.sort() # sort list just in case\n",
    "    for i in range(len(lst)-1):\n",
    "        #print(lst[i]) # debug\n",
    "        if lst[i] == lst[i+1]:\n",
    "            result.append(lst[i])\n",
    "    return sorted(set(result))\n",
    "\n",
    "for item in sample_dict:\n",
    "    layer_names = list(sample_dict[item].get(\"multilayer\").keys())\n",
    "    numbers = sorted(int(layer.split('_')[1]) for layer in layer_names)\n",
    "    dupes = find_duplicates(numbers)\n",
    "    if dupes:\n",
    "        print(\"Warning: some layers are duplicated.\")\n",
    "        print(f\"The duplicate layers are: \")\n",
    "        for i in dupes:\n",
    "            print(f\"* layer_{i}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e2716da5-ff75-45d1-b765-d8bfb2ecaf71",
   "metadata": {},
   "source": [
    "### Names not ID's\n",
    "> TO-DO: Replace ID's of eLabFTW items with their actual names (might need a working API key).\n",
    "\n",
    "Since eLab ID's are not relevant to this project we need factual information about the sample itself. For instance, we want to fetch its name, chemical formula and dimensions - all data present on the eLabFTW entry, obtainable on the *items* API endpoint using the eLab ID of the sample."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "24793e2b-67bb-4e8b-9d93-7802d3af7fca",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      "Paste API key here:  ········\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'access_key': None, 'available': 1, 'body': '', 'body_html': '', 'book_can_overlap': 1, 'book_cancel_minutes': 0, 'book_is_cancellable': 1, 'book_max_minutes': 0, 'book_max_slots': 0, 'book_users_can_in_past': 0, 'canbook': '{\"base\": 40, \"teams\": [], \"users\": [], \"teamgroups\": []}', 'canread': '{\"base\": 40, \"teams\": [], \"users\": [], \"teamgroups\": []}', 'canread_is_immutable': 0, 'canwrite': '{\"base\": 30, \"teams\": [], \"users\": [], \"teamgroups\": []}', 'canwrite_is_immutable': 0, 'category': 19, 'category_color': '29aeb9', 'category_title': 'NEW_Sample', 'comments': [], 'compounds': [], 'containers': [], 'content_type': 1, 'created_at': '2026-01-07 13:20:02', 'custom_id': None, 'date': '2026-01-07', 'elabid': '20260107-e83642d2b806e5db5ebb0d6309d874f4b4461114', 'events_start': None, 'events_start_itemid': None, 'exclusive_edit_mode': None, 'experiments_links': [], 'firstname': 'Emiliano', 'fullname': 'Emiliano Di Gennaro', 'id': 855, 'is_bookable': 0, 'is_pinned': 0, 'is_procurable': 0, 'items_links': [{'entityid': 853, 'title': 'test_01', 'custom_id': None, 'elabid': '20260107-8a39f3d60b8422a878f14dbc5aa1956a6b939e07', 'link_state': 1, 'is_bookable': 0, 'page': 'database.php', 'type': 'items', 'category_title': 'NEW_Substrates batch', 'category_color': '29aeb9', 'status_title': 'Available', 'status_color': '6a7753'}, {'entityid': 180, 'title': 'NFFA -01', 'custom_id': None, 'elabid': '20240422-d90c61f98b5a368b877b3c7bcdc5448612037b0e', 'link_state': 1, 'is_bookable': 0, 'page': 'database.php', 'type': 'items', 'category_title': 'Proposal', 'category_color': 'cdab8f', 'status_title': None, 'status_color': None}, {'entityid': 826, 'title': 'CART - 6', 'custom_id': None, 'elabid': '20250415-b8c364bae6a2f74be82de1d9370c96d6b031c4d4', 'link_state': 1, 'is_bookable': 0, 'page': 'database.php', 'type': 'items', 'category_title': 'Sample Position', 'category_color': '26a269', 'status_title': None, 'status_color': None}], 'lastchangeby': 11, 'lastname': 'Di Gennaro', 'locked': 0, 'locked_at': None, 'lockedby': None, 'metadata': '{\"extra_fields\": {\"Owner\": {\"type\": \"users\", \"value\": 2, \"position\": 1, \"required\": true}, \"STD-ID\": {\"type\": \"number\", \"unit\": \"\", \"units\": [], \"value\": \"26001\", \"position\": 0, \"required\": true, \"description\": \"This is an internal ID identifier\"}, \"Position\": {\"type\": \"items\", \"value\": 826, \"position\": 4}, \"Proposal\": {\"type\": \"items\", \"value\": 180, \"position\": 5}, \"Subtrate batch\": {\"type\": \"items\", \"value\": 853, \"position\": 2}, \"Substrate Holder\": {\"type\": \"text\", \"value\": \"1\", \"position\": 3}}}', 'metadata_decoded': {'extra_fields': {'Owner': {'type': 'users', 'value': 2, 'position': 1, 'required': True}, 'STD-ID': {'type': 'number', 'unit': '', 'units': [], 'value': '26001', 'position': 0, 'required': True, 'description': 'This is an internal ID identifier'}, 'Position': {'type': 'items', 'value': 826, 'position': 4}, 'Proposal': {'type': 'items', 'value': 180, 'position': 5}, 'Subtrate batch': {'type': 'items', 'value': 853, 'position': 2}, 'Substrate Holder': {'type': 'text', 'value': '1', 'position': 3}}}, 'modified_at': '2026-01-21 22:04:27', 'next_step': None, 'orcid': '0000-0003-4231-9776', 'page': 'database', 'proc_currency': 0, 'proc_pack_qty': 0, 'proc_price_notax': '0.00', 'proc_price_tax': '0.00', 'rating': 0, 'recent_comment': None, 'related_experiments_links': [{'entityid': 41, 'title': 'NEW PLD Deposition Layer', 'custom_id': None, 'link_state': 1, 'page': 'experiments.php', 'type': 'experiments', 'category_title': 'Deposition', 'category_color': '8b8d43', 'status_title': 'Running', 'status_color': '29AEB9'}, {'entityid': 43, 'title': 'NEW PLD Deposition Layer I', 'custom_id': None, 'link_state': 1, 'page': 'experiments.php', 'type': 'experiments', 'category_title': 'Deposition', 'category_color': '8b8d43', 'status_title': None, 'status_color': None}, {'entityid': 45, 'title': 'Na-26-001 deposition test I', 'custom_id': None, 'link_state': 1, 'page': 'experiments.php', 'type': 'experiments', 'category_title': 'Deposition', 'category_color': '8b8d43', 'status_title': None, 'status_color': None}, {'entityid': 46, 'title': 'Na-26-001 deposition test II', 'custom_id': None, 'link_state': 1, 'page': 'experiments.php', 'type': 'experiments', 'category_title': 'Deposition', 'category_color': '8b8d43', 'status_title': None, 'status_color': None}], 'related_items_links': [], 'sharelink': 'https://elabftw.fisica.unina.it:8080/database.php?mode=view&id=855', 'state': 1, 'status': 1, 'status_color': '6a7753', 'status_title': 'Available', 'steps': [], 'tags': None, 'tags_id': None, 'team': 1, 'team_name': 'Default team', 'timestamped': 0, 'timestamped_at': None, 'timestampedby': None, 'title': 'Na-26-001', 'type': 'items', 'uploads': [], 'userid': 2}\n"
     ]
    }
   ],
   "source": [
    "import requests\n",
    "from getpass import getpass # not to leak my key through jupyter + git\n",
    "\n",
    "def call_sample(API_KEY, elabid, API_URL=\"https://elabftw.fisica.unina.it/\"):\n",
    "    full_elab_url = f\"{API_URL}api/v2\" # API endpoint root for eLabFTW\n",
    "    items_url = f\"{full_elab_url}/items\" # API endpoint /items\n",
    "    header = {\n",
    "        \"Authorization\": API_KEY,\n",
    "        \"Content-Type\": \"application/json\"\n",
    "    }\n",
    "    sample = requests.get(\n",
    "        headers=header,\n",
    "        url=f\"{items_url}/{elabid}\",\n",
    "        verify=True\n",
    "    )\n",
    "    return sample.json()\n",
    "\n",
    "apikey = getpass(\"Paste API key here: \")\n",
    "testing = call_sample(apikey, 855)\n",
    "print(testing)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "89d89d7a-0e13-42c2-83ba-fb03d5a2c39b",
   "metadata": {},
   "source": [
    "#### Filtering data\n",
    "Now let's select only the useful data, which at the moment is just the name."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "0ffa8e82-2d7e-4dae-9081-a776f1e5ba9f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      "Paste API key here:  ········\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Na-26-001\n"
     ]
    }
   ],
   "source": [
    "resources_ids = [ i for i in sample_dict ]\n",
    "first_sample = resources_ids[0]\n",
    "\n",
    "apikey = getpass(\"Paste API key here: \")\n",
    "sample_data = call_sample(apikey, first_sample)\n",
    "sample_title = sample_data[\"title\"]\n",
    "print(sample_title)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ba4f0459-8da0-494d-b0c2-23dae509538c",
   "metadata": {},
   "source": [
    "Now all that's left for us to do is merge the results to create a single dictionary with the name of the sample and its different layers."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d714bde9-73a2-4365-b54e-d129533aa3de",
   "metadata": {},
   "source": [
    "## Basic parser\n",
    "The parser needs:\n",
    "* The code from the section \"*Multiple layers from uncategorized files*\" responsible for fetching and grouping data on the layers.\n",
    "* The `find_missing` and `find_duplicates` functions.\n",
    "* The code from the previous section to collect the names of the samples."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "daa59593-fd40-4b8a-b7f5-5cdbd6482fc3",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      "Paste API key here:  ········\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "   \"Na-26-001\": {\n",
      "      \"instrument\": {\n",
      "         \"deposition_chamber\": 72,\n",
      "         \"laser_system\": \"Excimer \",\n",
      "         \"rheed_system\": \"staib\"\n",
      "      },\n",
      "      \"multilayer\": {\n",
      "         \"layer_1\": {\n",
      "            \"operator\": \"Emiliano Di Gennaro\",\n",
      "            \"created_at\": \"2026-01-20 16:11:32\",\n",
      "            \"sample\": {\n",
      "               \"type\": \"items\",\n",
      "               \"value\": 855,\n",
      "               \"group_id\": 4,\n",
      "               \"position\": 0\n",
      "            },\n",
      "            \"temperature\": {\n",
      "               \"type\": \"number\",\n",
      "               \"unit\": \"\\u00b0C\",\n",
      "               \"units\": [\n",
      "                  \"\\u00b0C\"\n",
      "               ],\n",
      "               \"value\": \"760\",\n",
      "               \"group_id\": 4,\n",
      "               \"position\": 7\n",
      "            },\n",
      "            \"target\": {\n",
      "               \"type\": \"items\",\n",
      "               \"value\": 854,\n",
      "               \"group_id\": 4,\n",
      "               \"position\": 2,\n",
      "               \"required\": true\n",
      "            }\n",
      "         },\n",
      "         \"layer_2\": {\n",
      "            \"operator\": \"Emiliano Di Gennaro\",\n",
      "            \"created_at\": \"2026-01-20 16:18:48\",\n",
      "            \"sample\": {\n",
      "               \"type\": \"items\",\n",
      "               \"value\": 855,\n",
      "               \"group_id\": 4,\n",
      "               \"position\": 0\n",
      "            },\n",
      "            \"temperature\": {\n",
      "               \"type\": \"number\",\n",
      "               \"unit\": \"\\u00b0C\",\n",
      "               \"units\": [\n",
      "                  \"\\u00b0C\"\n",
      "               ],\n",
      "               \"value\": \"500\",\n",
      "               \"group_id\": 4,\n",
      "               \"position\": 7\n",
      "            },\n",
      "            \"target\": {\n",
      "               \"type\": \"items\",\n",
      "               \"value\": 466,\n",
      "               \"group_id\": 4,\n",
      "               \"position\": 2,\n",
      "               \"required\": true\n",
      "            }\n",
      "         }\n",
      "      }\n",
      "   }\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "import os, json, requests\n",
    "from getpass import getpass\n",
    "\n",
    "def valid_elabfiles(path):\n",
    "    '''Lookup directory \"path\" and\n",
    "       returns list of valid eLabFTW\n",
    "       Experiment JSON files.'''\n",
    "    elabfiles = []\n",
    "    for filename in os.listdir(path):\n",
    "        if filename.endswith(\".json\"):\n",
    "            try:\n",
    "                with open(os.path.join(path, filename), \"r\") as f:\n",
    "                    data = json.load(f)\n",
    "                    if data.get(\"elabid\"): # insert specific NeXus requirements here later\n",
    "                        \n",
    "                        elabfiles.append(filename)\n",
    "                f.close()\n",
    "            except json.decoder.JSONDecodeError as e: # invalid files \"masked\" as JSON\n",
    "                #print(f\"wait a moment: {e}\") # just for debug\n",
    "                pass\n",
    "    return elabfiles\n",
    "\n",
    "def call_sample(apikey, elabid, SERVER_URL=\"https://elabftw.fisica.unina.it/\"): # TO-DO: rm default server\n",
    "    '''Queries the Resources (/items) API endpoint\n",
    "       of eLabFTW instance to request data (JSON)\n",
    "       on a certain sample given its eLab-ID.\n",
    "       \n",
    "       Requires an active (RO/RW) API key.\n",
    "       Defaults to elabftw.fisica.unina.it.'''\n",
    "    full_elab_url = f\"{SERVER_URL}api/v2\" # API endpoint root for eLabFTW\n",
    "    items_url = f\"{full_elab_url}/items\" # API endpoint /items\n",
    "    header = {\n",
    "        \"Authorization\": apikey,\n",
    "        \"Content-Type\": \"application/json\"\n",
    "    }\n",
    "    sample = requests.get(\n",
    "        headers=header,\n",
    "        url=f\"{items_url}/{elabid}\",\n",
    "        verify=True\n",
    "    )\n",
    "    return sample.json()\n",
    "\n",
    "def id2sample(apikey, elabid):\n",
    "    '''Fetches sample data (JSON) from eLabFTW\n",
    "       instance (using function \"call_sample()\")\n",
    "       and extracts significant information.\n",
    "       \n",
    "       Currently, it only returns the sample's title.'''\n",
    "    #apikey = getpass(\"Paste API key here: \") # move outside loops\n",
    "    sample_data = call_sample(apikey, elabid)\n",
    "    sample_title = sample_data[\"title\"]\n",
    "    return sample_title\n",
    "\n",
    "def fetch_and_group(path):\n",
    "    '''Fetches experiment data from eLabFTW JSON\n",
    "       files in a given folder, then \n",
    "    '''\n",
    "    sample_dict = {}\n",
    "    apikey = getpass(\"Paste API key here: \")\n",
    "    for filename in valid_elabfiles(path):\n",
    "        with open(os.path.join(path, filename), \"r\") as f:\n",
    "            layer = json.load(f)\n",
    "            extra = layer[\"metadata_decoded\"][\"extra_fields\"]\n",
    "            sample_id = extra[\"Sample\"][\"value\"]\n",
    "            sample_title = id2sample(apikey, sample_id)\n",
    "            lpn = int(extra[\"Layer Progressive Number\"][\"value\"]) # Layer Progressive Number\n",
    "            if not sample_dict.get(sample_title): # if not existent yet, initialize\n",
    "                sample_dict[sample_title] = {\n",
    "                    \"instrument\": {\n",
    "                        \"deposition_chamber\": extra[\"Chamber\"][\"value\"], # ID of associated resource (PLD chamber) - useless as is!\n",
    "                        \"laser_system\": extra[\"Laser System\"][\"value\"],\n",
    "                        \"rheed_system\": extra[\"RHEED System\"][\"value\"]\n",
    "                    },\n",
    "                    \"multilayer\": {}\n",
    "                    }\n",
    "            sample_dict[sample_title][\"multilayer\"][f\"layer_{lpn}\"] = {\n",
    "                \"operator\": layer[\"fullname\"],\n",
    "                \"created_at\": layer[\"created_at\"],\n",
    "                \"sample\": extra[\"Sample\"], # ID of associated sample - useless as is!\n",
    "                \"temperature\": extra[\"Heater temperature \"], # space at the end is a config error in eLab!\n",
    "                \"target\": extra[\"Target\"]\n",
    "            }\n",
    "    return sample_dict\n",
    "\n",
    "\n",
    "sample_dict = fetch_and_group(\"../tests/objects\")\n",
    "print(json.dumps(sample_dict, indent=3))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "de1b1870-7fc3-4ee5-8cce-c098e5bf909a",
   "metadata": {},
   "source": [
    "For debug purposes, let's see which info is included in the sample_dict dictionary."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "0fc6e88f-881d-413a-bfe1-377213f7dda2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "# Info about sample Na-26-001:\n",
      "* The deposition chamber is 72.\n",
      "* The laser system is EXCIMER.\n",
      "* The RHEED system is STAIB.\n",
      "\n",
      "## Layers of Na-26-001:\n",
      "\n",
      "### layer_1\n",
      "* It was created at 2026-01-20 16:11:32.\n",
      "* The operator was Emiliano Di Gennaro.\n",
      "* The deposition temperature was 760 °C.\n",
      "* The target eLabID was 854.\n",
      "\n",
      "### layer_2\n",
      "* It was created at 2026-01-20 16:18:48.\n",
      "* The operator was Emiliano Di Gennaro.\n",
      "* The deposition temperature was 500 °C.\n",
      "* The target eLabID was 466.\n"
     ]
    }
   ],
   "source": [
    "for sample in sample_dict:\n",
    "    print(f\"# Info about sample {sample}:\")\n",
    "    multilayer = sample_dict[sample][\"multilayer\"]\n",
    "    instrument = sample_dict[sample][\"instrument\"]\n",
    "    deposition_chamber = instrument[\"deposition_chamber\"] # integer\n",
    "    laser_system = str(instrument[\"laser_system\"]).strip().upper() # string\n",
    "    rheed_system = str(instrument[\"rheed_system\"]).strip().upper() # string\n",
    "    \n",
    "    print(f\"* The deposition chamber is {deposition_chamber}.\")\n",
    "    print(f\"* The laser system is {laser_system}.\")\n",
    "    print(f\"* The RHEED system is {rheed_system}.\")\n",
    "    print(f\"\\n## Layers of {sample}:\")\n",
    "    for layer in multilayer:\n",
    "        print(f\"\\n### {layer}\")\n",
    "        layerdata = multilayer[layer]\n",
    "        operator = layerdata[\"operator\"]\n",
    "        created_at = layerdata[\"created_at\"]\n",
    "        temperature = layerdata[\"temperature\"][\"value\"]\n",
    "        temperature_unit = layerdata[\"temperature\"][\"unit\"]\n",
    "        target = layerdata[\"target\"][\"value\"]\n",
    "        \n",
    "        print(f\"* It was created at {created_at}.\")\n",
    "        print(f\"* The operator was {operator}.\")\n",
    "        print(f\"* The deposition temperature was {temperature} {temperature_unit}.\")\n",
    "        print(f\"* The target eLabID was {target}.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e16c6f3d-cc3a-45c1-9988-bbf0de3baf08",
   "metadata": {},
   "source": [
    "## To the next level: creating a dictionary with the same hierarchy as the final NeXus file\n",
    "\n",
    "```\n",
    "pld_fabrication\n",
    "|-sample\n",
    "| |-substrate\n",
    "| | |-name\n",
    "| |-multilayer\n",
    "| | |-LAYER\n",
    "| | | |-target\n",
    "| | | | |-name\n",
    "| | | | |-chemical_formula\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a615c496-8cb9-451f-a088-beb4672379bf",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}