another attempt at parsing RWA - seems to work better

This commit is contained in:
2026-03-20 15:02:12 +01:00
parent 5cf67648af
commit 1523c973f4

View File

@@ -179,19 +179,20 @@ def deduplicate_instruments_from_layers(layers):
# "rheed_system": rheeds, # "rheed_system": rheeds,
# } # }
def analyse_rheed_data(data, n_layers: int): def analyse_rheed_data(data):
''' '''
Takes the content of a tsv file and returns a dictionary with timestamps and intensities. Takes the content of a tsv file and returns a dictionary with timestamps and intensities.
The file should contain a 2D array composed of 3N+1 columns - where N is the total number of layers in a given sample - and an unspecified number of rows. The file should contain a 2D array composed of 4 columns - where the first column is a timestamp and the other three are RHEED intensities - and an unspecified number of rows.
----- -----
Time Layer1_Int1 Layer1_Int2 Layer1_Int3 (repeat...) Time Layer1_Int1 Layer1_Int2 Layer1_Int3
----- -----
Distinct ValueErrors are raised if: Distinct ValueErrors are raised if:
- The array is not 2-dimensional; - The array is not 2-dimensional;
- The number of (intensity) columns is not a multiple of 3; - The total number of columns does not equate exactly 1+3 (= 4).
- The total number of columns does not equate exactly 3N+1.
Time is expressed in seconds, intensities are normalized (adimensional).
# TO-DO: complete this description... # TO-DO: complete this description...
Written with help from DeepSeek. Written with help from DeepSeek.
@@ -200,29 +201,17 @@ def analyse_rheed_data(data, n_layers: int):
if data.ndim != 2: if data.ndim != 2:
raise ValueError(f"Unexpected trace format: expected 2D array, got ndim = {data.ndim}.") raise ValueError(f"Unexpected trace format: expected 2D array, got ndim = {data.ndim}.")
n_cols = data.shape[1] # 0 = rows, 1 = columns n_cols = data.shape[1] # 0 = rows, 1 = columns
if (n_cols - 1) % 3 != 0: if n_cols > 4:
raise ValueError(f"Unexpected number of columns: expected 3N+1 columns, got {n_cols}.") print(f"Warning! The input file (for Realtime Window Analysis) has {n_cols-4} more than needed.\nOnly 4 columns will be considered - with the first representing time and the others representing RHEED intensities.")
if (n_cols - 1) // 3 != n_layers: if n_cols < 4:
exp = n_layers * 3 + 1 raise ValueError(f"Insufficient number of columns: expected 4, got n_cols = {n_cols}.")
raise ValueError(f"Unexpected volume of data: found {n_layers} layers, expected {exp} (3N+1) columns, got {n_cols}.")
n_time_points = data.shape[0] n_time_points = data.shape[0]
# Get time (all rows of col 0) as Float64: # Get time (all rows of col 0) as Float64:
time = data[:, 0].astype(np.float64, copy=False) # copy=False suggested by LLM for mem. eff. time = data[:, 0].astype(np.float64, copy=False) # copy=False suggested by LLM for mem. eff.
# Empty 3D array for intensities: # Get intensities (all rows of cols 1,2,3) as Float32:
intensities = np.zeros( intensities = data[:, 1:4].astype(np.float32, copy=False)
(n_layers, n_time_points, 3)
)
# Loop through layers:
for layer_index in range(n_layers):
layer_name = f"layer_{layer_index + 1}"
# Columns for this layer are from 3i+1 to 3i+3 incl. (= 3i+4 excl.)
start_col = 1 + layer_index * 3
end_col = start_col + 3 # remember this gets excluded!
# Get layer-specific intensities (all rows of columns start_col:end_col) as Float32:
intensities[layer_index, :, :] = data[:, start_col:end_col].astype(np.float32, copy=False)
return { return {
"time": time, "time": time,
@@ -566,5 +555,8 @@ if __name__=="__main__":
# and merge all data in a single file to analyse it # and merge all data in a single file to analyse it
with open(f"tests/Realtime_Window_Analysis.txt", "r") as o: with open(f"tests/Realtime_Window_Analysis.txt", "r") as o:
osc = np.loadtxt(o, delimiter="\t") osc = np.loadtxt(o, delimiter="\t")
rheed_osc = analyse_rheed_data(data=osc, n_layers=n_layers) or None # analyze rheed data first, build the file later try:
rheed_osc = analyse_rheed_data(data=osc) or None # analyze rheed data first, build the file later
except ValueError as ve:
raise ValueError(f"Error with function analyse_rheed_data. {ve}\nPlease make sure the Realtime Window Analysis file is exactly 4 columns wide - where the first column represents time and the others are RHEED intensities.")
build_nexus_file(result, output_path=f"output/sample-{sample_name}-nexus.h5", rheed_osc=rheed_osc) build_nexus_file(result, output_path=f"output/sample-{sample_name}-nexus.h5", rheed_osc=rheed_osc)