Coverage for tests/utility/test_data.py: 100%
349 statements
« prev ^ index » next coverage.py v7.10.5, created at 2025-08-28 09:13 +0000
« prev ^ index » next coverage.py v7.10.5, created at 2025-08-28 09:13 +0000
1"""Test module for the functions in the `utility/data.py` module.
3This module contains unit tests for the functions implemented in the `data.py` module. The purpose of these tests is to
4ensure the correct functionality of each function in different scenarios and to validate that the expected outputs are
5returned.
7Tests should cover various edge cases, valid inputs, and any other conditions that are necessary to confirm the
8robustness of the functions."""
10import os
11import tempfile
13import pytest
14from bs4 import BeautifulSoup
16from app.utility.data import *
19class TestMatrixToString:
21 def test_basic_conversion(self) -> None:
22 arrays = [np.array([1.2, 2, 5]), np.array([1.6, 2])]
23 header = ["A", "B"]
24 result = matrix_to_string(arrays, header)
25 expected = "A,B\n1.20000E+00,1.60000E+00\n2.00000E+00,2.00000E+00\n5.00000E+00,"
26 assert result == expected
28 def test_no_header(self) -> None:
29 arrays = [np.array([1.2, 2, 5]), np.array([1.6, 2])]
30 result = matrix_to_string(arrays)
31 expected = "1.20000E+00,1.60000E+00\n2.00000E+00,2.00000E+00\n5.00000E+00,"
32 assert result == expected
34 def test_single_column(self) -> None:
35 arrays = [np.array([1.2, 2, 5])]
36 result = matrix_to_string(arrays, ["A"])
37 expected = "A\n1.20000E+00\n2.00000E+00\n5.00000E+00"
38 assert result == expected
40 def test_mixed_lengths(self) -> None:
41 arrays = [np.array([1.2, 2]), np.array([1.6])]
42 header = ["A", "B"]
43 result = matrix_to_string(arrays, header)
44 expected = "A,B\n1.20000E+00,1.60000E+00\n2.00000E+00,"
45 assert result == expected
47 def test_all_empty(self) -> None:
48 arrays = [np.array([]), np.array([])]
49 header = ["A", "B"]
50 result = matrix_to_string(arrays, header)
51 assert result == "A,B\n"
53 def test_no_trailing_comma(self) -> None:
54 arrays = [np.array([1.2, 2]), np.array([1.6, 2])]
55 header = ["A", "B"]
56 result = matrix_to_string(arrays, header)
57 expected = "A,B\n1.20000E+00,1.60000E+00\n2.00000E+00,2.00000E+00"
58 assert result == expected
61class TestGenerateDownloadLink:
63 def test_basic_functionality(self) -> None:
64 """Test basic functionality of generate_download_link."""
65 x_data = [np.array([1, 2, 3])]
66 y_data = [np.array([4, 5, 6])]
67 header = ["X", "Y"]
68 text = "Download Data"
69 result = generate_download_link((x_data, y_data), header, text)
70 assert '<a href="data:text/csv;base64,' in result
71 assert "Download Data" in result
73 def test_no_header(self) -> None:
74 """Test when no header is provided."""
75 x_data = [np.array([1, 2, 3])]
76 y_data = [np.array([4, 5, 6])]
77 text = "Download Data"
78 result = generate_download_link((x_data, y_data), None, text)
79 assert '<a href="data:text/csv;base64,' in result
80 assert "Download Data" in result
82 def test_with_special_characters(self) -> None:
83 """Test if the function handles special characters in the header and text."""
84 x_data = [np.array([1, 2])]
85 y_data = [np.array([3, 4])]
86 header = ["Col@1", "Col#2"]
87 text = "Download with Special Characters"
88 result = generate_download_link((x_data, y_data), header, text)
90 # Extract the base64 string from the result
91 base64_string = result.split("base64,")[1].split('"')[0]
93 # Decode the base64 string to get the original string
94 decoded_string = base64.b64decode(base64_string).decode()
96 # Now check if the decoded string contains the header with special characters
97 assert "Col@1" in decoded_string
98 assert "Col#2" in decoded_string
99 assert "Download with Special Characters" in result
101 def test_no_text_provided(self) -> None:
102 """Test if no text is provided (empty string)."""
103 x_data = [np.array([1, 2, 3])]
104 y_data = [np.array([4, 5, 6])]
105 header = ["X", "Y"]
106 result = generate_download_link((x_data, y_data), header, "")
107 assert '<a href="data:text/csv;base64,' in result
108 assert 'href="data:text/csv;base64,' in result
109 assert "Download" not in result # Should not have any text if empty
111 def test_large_data(self) -> None:
112 """Test with large data to check performance (no specific checks)."""
113 x_data = [np.random.rand(100)]
114 y_data = [np.random.rand(100)]
115 header = [f"Col{i}" for i in range(100)]
116 result = generate_download_link((x_data, y_data), header, "Download Large Data")
117 assert '<a href="data:text/csv;base64,' in result
118 assert "Download Large Data" in result
120 def test_b64_encoding(self) -> None:
121 """Test to ensure base64 encoding is correct."""
122 x_data = [np.array([1, 2, 3])]
123 y_data = [np.array([4, 5, 6])]
124 header = ["X", "Y"]
125 result = generate_download_link((x_data, y_data), header, "Test Encoding")
126 # Check if base64 encoding exists within the result
127 assert "base64," in result
130class TestProcessData:
132 @pytest.fixture
133 def gaussian_data(self) -> tuple[list[np.ndarray], list[np.ndarray]]:
134 """Fixture to generate a Gaussian dataset with 20 data points."""
135 # Generate Gaussian data with mean 0 and standard deviation 1
136 n_points = 10
137 x_data = np.linspace(-10, 5, n_points) # x values from -5 to 5
138 y_data = np.exp(-0.5 * (x_data**2)) # Gaussian function: e^(-x^2/2)
139 return [x_data, x_data], [y_data, y_data * 2]
141 def test_no_normalisation(self, gaussian_data) -> None:
142 """Test with no normalisation."""
143 xs_data, ys_data = gaussian_data
145 # Process data without normalisation
146 result_xs, result_ys = process_data(xs_data, ys_data, normalise=False)
148 # Check if x values are reduced and shifted, and y values are reduced
149 expected_xs = [np.array([0.0, 1.66666667, 3.33333333, 5.0]), np.array([0.0, 1.66666667, 3.33333333, 5.0])]
150 expected_ys = [
151 np.array([1.00000000e00, 2.49352209e-01, 3.86592014e-03, 3.72665317e-06]),
152 np.array([2.00000000e00, 4.98704418e-01, 7.73184028e-03, 7.45330634e-06]),
153 ]
155 assert np.allclose(result_xs, expected_xs)
156 assert np.allclose(result_ys, expected_ys)
158 def test_with_normalisation(self, gaussian_data) -> None:
159 """Test with normalisation."""
160 xs_data, ys_data = gaussian_data
162 # Process data with normalisation
163 result_xs, result_ys = process_data(xs_data, ys_data, normalise=True)
165 # Check if x values are reduced and shifted, and y values are normalised
166 expected_xs = [np.array([0.0, 1.66666667, 3.33333333, 5.0]), np.array([0.0, 1.66666667, 3.33333333, 5.0])]
167 expected_ys = [
168 np.array([1.00000000e00, 2.49352209e-01, 3.86592014e-03, 3.72665317e-06]),
169 np.array([1.00000000e00, 2.49352209e-01, 3.86592014e-03, 3.72665317e-06]),
170 ]
172 assert np.allclose(result_xs, expected_xs)
173 assert np.allclose(result_ys, expected_ys)
175 def test_edge_case_empty_data(self) -> None:
176 """Test with empty input data."""
177 xs_data = []
178 ys_data = []
180 # Process empty data
181 result_xs, result_ys = process_data(xs_data, ys_data, normalise=False)
183 # Check that the result is also empty
184 assert result_xs == []
185 assert result_ys == []
188class TestGetDataIndex:
190 def test_no_delimiter(self) -> None:
191 """Test without a delimiter (default None)"""
192 content = ["header", "data starts here", "1 2 3", "4 5 6"]
193 result = get_data_index(content)
194 assert result == 2 # the first line with float data is at index 2
196 def test_with_delimiter(self) -> None:
197 """Test with a specified delimiter"""
198 content = ["header", "data starts here", "1,2,3", "4,5,6"]
199 result = get_data_index(content, delimiter=",")
200 assert result == 2 # the first line with float data is at index 2
202 def test_no_data(self) -> None:
203 """Test case when there are no float data lines"""
204 content = ["header", "some text", "more text"]
205 result = get_data_index(content)
206 assert result is None # No line contains float data
208 def test_empty_list(self) -> None:
209 """Test with an empty list"""
210 content = []
211 result = get_data_index(content)
212 assert result is None # No data in the list
214 def test_mixed_data(self) -> None:
215 """Test with mixed data (some numeric and some non-numeric)"""
216 content = ["header", "text", "1 2 3", "text again", "4 5 6"]
217 result = get_data_index(content)
218 assert result == 2 # the first line with numeric data is at index 2
220 def test_non_matching_delimiter(self) -> None:
221 """Test with a delimiter that doesn't match any line"""
222 content = ["header", "text", "1 2 3", "4 5 6"]
223 result = get_data_index(content, delimiter=",")
224 assert result is None # no lines with comma as delimiter
227class TestLoadData:
229 def test_x_y1_y2_y3_format(self) -> None:
230 """Test the X/Y1/Y2/Y3... format where all columns have the same length"""
231 content = "1 2 3 4\n5 6 7 8\n9 10 11 12\n" # X/Y1/Y2/Y3 data format
232 delimiter = " "
233 data_format = "X/Y1/Y2/Y3..."
235 # Simulate loading data from a file
236 xs_data, ys_data = load_data(content.encode(), delimiter, data_format)
238 # Expected results
239 expected_xs = [np.array([1.0, 5.0, 9.0]), np.array([1.0, 5.0, 9.0]), np.array([1.0, 5.0, 9.0])]
240 expected_ys = [np.array([2.0, 6.0, 10.0]), np.array([3.0, 7.0, 11.0]), np.array([4.0, 8.0, 12.0])]
242 assert np.array_equal(xs_data, expected_xs)
243 assert np.array_equal(ys_data, expected_ys)
245 def test_x1_y1_x2_y2_format(self) -> None:
246 """Test the X1/Y1/X2/Y2... format where all columns have the same length"""
247 content = "1,2,3,4\n5,6,7,8\n9,10,,\n" # X/Y1/Y2/Y3 data format
248 delimiter = ","
249 data_format = "X1/Y1/X2/Y2..."
251 # Simulate loading data from a file
252 xs_data, ys_data = load_data(content.encode(), delimiter, data_format)
254 # Expected results
255 expected_xs = [np.array([1.0, 5.0, 9.0]), np.array([3.0, 7.0])]
256 expected_ys = [np.array([2.0, 6.0, 10.0]), np.array([4.0, 8.0])]
258 for x_array, expected_x_array in zip(xs_data, expected_xs):
259 assert np.array_equal(x_array, expected_x_array)
260 for y_array, expected_y_array in zip(ys_data, expected_ys):
261 assert np.array_equal(y_array, expected_y_array)
264class TestComparisonFunctions:
266 def test_identical_simple_values(self) -> None:
267 """Test identical simple values."""
268 assert are_identical(5, 5)
269 assert are_identical("test", "test")
270 assert are_identical(None, None)
271 assert not are_identical(5, 6)
272 assert not are_identical("test", "different")
274 def test_identical_lists(self) -> None:
275 """Test identical lists."""
276 assert are_identical([1, 2, 3], [1, 2, 3])
277 assert are_identical([], [])
278 assert are_identical([1, [2, 3]], [1, [2, 3]])
279 assert not are_identical([1, 2, 3], [1, 2, 4])
280 assert not are_identical([1, 2, 3], [1, 2])
281 assert not are_identical([1, 2], [1, 2, 3])
283 def test_identical_tuples(self) -> None:
284 """Test identical tuples."""
285 assert are_identical((1, 2, 3), (1, 2, 3))
286 assert are_identical((), ())
287 assert are_identical((1, (2, 3)), (1, (2, 3)))
288 assert not are_identical((1, 2, 3), (1, 2, 4))
290 def test_identical_dicts(self) -> None:
291 """Test identical dictionaries."""
292 assert are_identical({"a": 1, "b": 2}, {"a": 1, "b": 2})
293 assert are_identical({}, {})
294 assert are_identical({"a": 1, "b": {"c": 3}}, {"a": 1, "b": {"c": 3}})
295 assert not are_identical({"a": 1, "b": 2}, {"a": 1, "b": 3})
296 assert not are_identical({"a": 1, "b": 2}, {"a": 1, "c": 2})
297 assert not are_identical({"a": 1}, {"a": 1, "b": 2})
299 def test_identical_nested_structures(self) -> None:
300 """Test identical nested structures."""
301 nested1 = {"a": [1, 2, {"b": (3, 4)}]}
302 nested2 = {"a": [1, 2, {"b": (3, 4)}]}
303 different = {"a": [1, 2, {"b": (3, 5)}]}
305 assert are_identical(nested1, nested2)
306 assert not are_identical(nested1, different)
308 def test_identical_numpy_arrays(self) -> None:
309 """Test identical numpy arrays."""
310 arr1 = np.array([1, 2, 3])
311 arr2 = np.array([1, 2, 3])
312 arr3 = np.array([1, 2, 4])
314 assert are_identical(arr1, arr2)
315 assert not are_identical(arr1, arr3)
317 def test_identical_with_rtol(self) -> None:
318 """Test identical with relative tolerance for floating point values."""
319 assert are_identical(1.0, 1.001, rtol=1e-2)
320 assert not are_identical(1.0, 1.001, rtol=1e-4)
322 arr1 = np.array([1.0, 2.0, 3.0])
323 arr2 = np.array([1.001, 2.002, 3.003])
325 assert are_identical(arr1, arr2, rtol=1e-2)
326 assert not are_identical(arr1, arr2, rtol=1e-4)
328 def test_identical_mixed_types(self) -> None:
329 """Test identical with mixed types - should use strict equality."""
330 assert are_identical(1, 1.0) # Different types
331 assert are_identical(True, 1) # Different types
333 def test_close_simple_values(self) -> None:
334 """Test are_close with simple values."""
335 assert are_close(1.0, 1.0009) # Default rtol=1e-3
336 assert are_close(1.0, 1.002, rtol=1e-2)
337 assert not are_close(1.0, 1.01) # Default rtol too small
339 def test_close_numpy_arrays(self) -> None:
340 """Test are_close with numpy arrays."""
341 arr1 = np.array([1.0, 2.0, 3.0])
342 arr2 = np.array([1.0009, 2.001, 3.0008])
343 arr3 = np.array([1.01, 2.01, 3.01])
345 assert are_close(arr1, arr2) # Default rtol=1e-3 is enough
346 assert not are_close(arr1, arr3) # Default rtol too small
347 assert are_close(arr1, arr3, rtol=1e-1) # Larger rtol works
349 def test_close_nested_structures(self) -> None:
350 """Test are_close with nested structures."""
351 nested1 = {"a": [1.0, 2.0, {"b": np.array([3.0, 4.0])}]}
352 nested2 = {"a": [1.0009, 2.001, {"b": np.array([3.0008, 4.0009])}]}
353 nested3 = {"a": [1.01, 2.01, {"b": np.array([3.01, 4.01])}]}
355 assert are_close(nested1, nested2) # Default rtol=1e-3 is enough
356 assert not are_close(nested1, nested3) # Default rtol too small
357 assert are_close(nested1, nested3, rtol=1e-1) # Larger rtol works
359 def test_close_different_structures(self) -> None:
360 """Test are_close with different structures - should return False."""
361 assert not are_close([1.0, 2.0], [1.0, 2.0, 3.0])
362 assert not are_close({"a": 1.0}, {"a": 1.0, "b": 2.0})
363 assert not are_close({"a": 1.0, "b": 2.0}, {"a": 1.0, "c": 2.0})
365 def test_edge_cases(self) -> None:
366 """Test edge cases."""
367 # Empty structures are identical
368 assert are_close([], [])
369 assert are_close({}, {})
371 # NaN values
372 nan_array1 = np.array([1.0, np.nan, 3.0])
373 nan_array2 = np.array([1.0, np.nan, 3.0])
374 assert not are_close(nan_array1, nan_array2) # np.allclose returns False for NaN
376 # Infinity values
377 inf_array1 = np.array([1.0, np.inf, 3.0])
378 inf_array2 = np.array([1.0, np.inf, 3.0])
379 assert are_close(inf_array1, inf_array2)
381 # Mixed infinity and regular values
382 mixed_inf1 = np.array([1.0, np.inf, 3.0])
383 mixed_inf2 = np.array([1.0001, np.inf, 3.0001])
384 assert are_close(mixed_inf1, mixed_inf2)
387class TestRenderImage:
389 def test_valid_image_file(self) -> None:
391 image_data = b"\x89PNG\r\n\x1a\n" + b"fakeimagecontent"
392 expected_mime = "image/png"
394 # Create a temporary image file
395 with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
396 tmp.write(image_data)
397 tmp_path = tmp.name
399 html_output = render_image(tmp_path, width=200)
401 encoded = base64.b64encode(image_data).decode()
402 expected_html = f'<center><img src="data:{expected_mime};base64,{encoded}" width="200px"/></center>'
403 assert html_output == expected_html
405 def test_unknown_mime_type_raises(self) -> None:
407 with tempfile.NamedTemporaryFile(delete=False, suffix=".unknown") as tmp:
408 tmp.write(b"Some binary content")
409 tmp_path = tmp.name
411 with pytest.raises(ValueError, match=r"Could not determine MIME type"):
412 render_image(tmp_path)
414 def test_default_width(self) -> None:
416 image_data = b"GIF87a" + b"fakegifdata"
417 with tempfile.NamedTemporaryFile(delete=False, suffix=".gif") as tmp:
418 tmp.write(image_data)
419 tmp_path = tmp.name
421 html_output = render_image(tmp_path)
422 assert 'width="100px"' in html_output
423 assert "data:image/gif;base64," in html_output
426class TestReadTxtFile:
427 """Test class for the read_txt_file function."""
429 # File path for temporary test file
430 TEMP_FILE = "_temp.txt"
432 def teardown_method(self) -> None:
433 """Teardown method that runs after each test."""
435 # Clean up test file after each test
436 if os.path.exists(self.TEMP_FILE):
437 os.remove(self.TEMP_FILE)
439 # Clear the cache
440 st.cache_resource.clear()
442 def test_read_existing_file(self) -> None:
443 """Test reading from an existing file with valid content."""
445 # Create file with some content
446 with open(self.TEMP_FILE, "w") as f:
447 f.write("Hello, World!")
449 # Read the content using our function
450 content = read_txt_file(self.TEMP_FILE)
452 # Assert the content matches what we wrote
453 assert content == "Hello, World!"
455 def test_read_multiline_file(self) -> None:
456 """Test reading from a file with multiple lines."""
458 # Create a file with multiline content
459 with open(self.TEMP_FILE, "w") as f:
460 f.write("Line 1\nLine 2\nLine 3")
462 # Read the content using our function
463 content = read_txt_file(self.TEMP_FILE)
465 # Assert the content matches what we wrote
466 assert content == "Line 1\nLine 2\nLine 3"
467 # Additional check for line count
468 assert len(content.splitlines()) == 3
470 def test_nonexistent_file(self) -> None:
471 """Test that trying to read a nonexistent file raises an error."""
473 # Check that the function raises FileNotFoundError
474 with pytest.raises(FileNotFoundError):
475 read_txt_file(self.TEMP_FILE)
478class TestGenerateHtmlTable:
479 @pytest.fixture
480 def sample_dataframe(self) -> pd.DataFrame:
481 """Create a sample dataframe for testing."""
483 data = {"A": [1, 2, 3, 4], "B": [5, 6, 7, 8], "C": [9, 10, 11, 12]}
484 df = pd.DataFrame(data, index=["row1", "row2", "row3", "row4"])
485 return df
487 @pytest.fixture
488 def dataframe_with_identical_rows(self) -> pd.DataFrame:
489 """Create a dataframe with some identical rows."""
491 data = {"A": [1, 2, 2, 3], "B": [5, 2, 2, 7], "C": [9, 2, 2, 11]}
492 df = pd.DataFrame(data, index=["row1", "row2", "row3", "row4"])
493 return df
495 @pytest.fixture
496 def dataframe_with_column_name(self) -> pd.DataFrame:
497 """Create a dataframe with a column name."""
499 data = {"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}
500 df = pd.DataFrame(data, index=["row1", "row2", "row3"])
501 df.columns.name = "Categories"
502 return df
504 def test_basic_table_generation(self, sample_dataframe) -> None:
505 """Test basic HTML table generation."""
507 html = generate_html_table(sample_dataframe)
509 # Verify structure using BeautifulSoup
510 soup = BeautifulSoup(html, "html.parser")
512 # Check table exists
513 table = soup.find("table")
514 assert table is not None
516 # Check number of rows (header + data rows)
517 rows = table.find_all("tr")
518 assert len(rows) == 5 # 1 header + 4 data rows
520 # Check header row
521 header_cells = rows[0].find_all(["th"])
522 assert len(header_cells) == 4 # corner cell + 3 columns
524 # Check data cells
525 data_rows = rows[1:]
526 for i, row in enumerate(data_rows):
527 cells = row.find_all("td")
528 assert cells[0].text == f"row{i + 1}" # Check row name
530 def test_merged_cells_for_identical_values(self, dataframe_with_identical_rows) -> None:
531 """Test that cells are merged when all values in a row are identical."""
532 html = generate_html_table(dataframe_with_identical_rows)
534 soup = BeautifulSoup(html, "html.parser")
535 rows = soup.find_all("tr")
537 # Check row2 and row3 have merged cells
538 row2 = rows[2] # index 2 corresponds to row2
539 row3 = rows[3] # index 3 corresponds to row3
541 # Check for colspan in row2 and row3
542 assert row2.find_all("td")[1].has_attr("colspan")
543 assert row2.find_all("td")[1]["colspan"] == "3"
544 assert row3.find_all("td")[1].has_attr("colspan")
545 assert row3.find_all("td")[1]["colspan"] == "3"
547 # Check normal rows don't have merged cells
548 row1 = rows[1] # index 1 corresponds to row1
549 row4 = rows[4] # index 4 corresponds to row4
550 assert len(row1.find_all("td")) == 4 # 1 row name + 3 data cells
551 assert len(row4.find_all("td")) == 4 # 1 row name + 3 data cells
553 def test_column_name_in_corner(self, dataframe_with_column_name) -> None:
554 """Test that the column name appears in the corner cell."""
555 html = generate_html_table(dataframe_with_column_name)
557 soup = BeautifulSoup(html, "html.parser")
558 corner_cell = soup.find("tr").find("th")
560 assert corner_cell.text == "Categories"
562 def test_empty_corner_cell_with_no_column_name(self, sample_dataframe) -> None:
563 """Test that the corner cell is empty when no column name is provided."""
564 html = generate_html_table(sample_dataframe)
566 soup = BeautifulSoup(html, "html.parser")
567 corner_cell = soup.find("tr").find("th")
569 assert corner_cell.text == ""
571 def test_div_wrapper(self, sample_dataframe) -> None:
572 """Test that the table is wrapped in a div with correct styling."""
573 html = generate_html_table(sample_dataframe)
575 soup = BeautifulSoup(html, "html.parser")
576 div = soup.find("div")
578 assert div is not None
579 assert div.has_attr("style")
580 assert "margin: auto" in div["style"]
581 assert "display: table" in div["style"]