Coverage for tests/utility/test

1"""Test module for the functions in the `utility/data.py` module.

3This module contains unit tests for the functions implemented in the `data.py` module. The purpose of these tests is to

4ensure the correct functionality of each function in different scenarios and to validate that the expected outputs are

5returned.

7Tests should cover various edge cases, valid inputs, and any other conditions that are necessary to confirm the

8robustness of the functions."""

10import os

11import tempfile

13import pytest

14from bs4 import BeautifulSoup

16from app.utility.data import *

19class TestMatrixToString:

21 def test_basic_conversion(self) -> None:

22 arrays = [np.array([1.2, 2, 5]), np.array([1.6, 2])]

23 header = ["A", "B"]

24 result = matrix_to_string(arrays, header)

25 expected = "A,B\n1.20000E+00,1.60000E+00\n2.00000E+00,2.00000E+00\n5.00000E+00,"

26 assert result == expected

28 def test_no_header(self) -> None:

29 arrays = [np.array([1.2, 2, 5]), np.array([1.6, 2])]

30 result = matrix_to_string(arrays)

31 expected = "1.20000E+00,1.60000E+00\n2.00000E+00,2.00000E+00\n5.00000E+00,"

32 assert result == expected

34 def test_single_column(self) -> None:

35 arrays = [np.array([1.2, 2, 5])]

36 result = matrix_to_string(arrays, ["A"])

37 expected = "A\n1.20000E+00\n2.00000E+00\n5.00000E+00"

38 assert result == expected

40 def test_mixed_lengths(self) -> None:

41 arrays = [np.array([1.2, 2]), np.array([1.6])]

42 header = ["A", "B"]

43 result = matrix_to_string(arrays, header)

44 expected = "A,B\n1.20000E+00,1.60000E+00\n2.00000E+00,"

45 assert result == expected

47 def test_all_empty(self) -> None:

48 arrays = [np.array([]), np.array([])]

49 header = ["A", "B"]

50 result = matrix_to_string(arrays, header)

51 assert result == "A,B\n"

53 def test_no_trailing_comma(self) -> None:

54 arrays = [np.array([1.2, 2]), np.array([1.6, 2])]

55 header = ["A", "B"]

56 result = matrix_to_string(arrays, header)

57 expected = "A,B\n1.20000E+00,1.60000E+00\n2.00000E+00,2.00000E+00"

58 assert result == expected

61class TestGenerateDownloadLink:

63 def test_basic_functionality(self) -> None:

64 """Test basic functionality of generate_download_link."""

65 x_data = [np.array([1, 2, 3])]

66 y_data = [np.array([4, 5, 6])]

67 header = ["X", "Y"]

68 text = "Download Data"

69 result = generate_download_link((x_data, y_data), header, text)

70 assert '<a href="data:text/csv;base64,' in result

71 assert "Download Data" in result

73 def test_no_header(self) -> None:

74 """Test when no header is provided."""

75 x_data = [np.array([1, 2, 3])]

76 y_data = [np.array([4, 5, 6])]

77 text = "Download Data"

78 result = generate_download_link((x_data, y_data), None, text)

79 assert '<a href="data:text/csv;base64,' in result

80 assert "Download Data" in result

82 def test_with_special_characters(self) -> None:

83 """Test if the function handles special characters in the header and text."""

84 x_data = [np.array([1, 2])]

85 y_data = [np.array([3, 4])]

86 header = ["Col@1", "Col#2"]

87 text = "Download with Special Characters"

88 result = generate_download_link((x_data, y_data), header, text)

90 # Extract the base64 string from the result

91 base64_string = result.split("base64,")[1].split('"')[0]

93 # Decode the base64 string to get the original string

94 decoded_string = base64.b64decode(base64_string).decode()

96 # Now check if the decoded string contains the header with special characters

97 assert "Col@1" in decoded_string

98 assert "Col#2" in decoded_string

99 assert "Download with Special Characters" in result

100

101 def test_no_text_provided(self) -> None:

102 """Test if no text is provided (empty string)."""

103 x_data = [np.array([1, 2, 3])]

104 y_data = [np.array([4, 5, 6])]

105 header = ["X", "Y"]

106 result = generate_download_link((x_data, y_data), header, "")

107 assert '<a href="data:text/csv;base64,' in result

108 assert 'href="data:text/csv;base64,' in result

109 assert "Download" not in result # Should not have any text if empty

110

111 def test_large_data(self) -> None:

112 """Test with large data to check performance (no specific checks)."""

113 x_data = [np.random.rand(100)]

114 y_data = [np.random.rand(100)]

115 header = [f"Col{i}" for i in range(100)]

116 result = generate_download_link((x_data, y_data), header, "Download Large Data")

117 assert '<a href="data:text/csv;base64,' in result

118 assert "Download Large Data" in result

119

120 def test_b64_encoding(self) -> None:

121 """Test to ensure base64 encoding is correct."""

122 x_data = [np.array([1, 2, 3])]

123 y_data = [np.array([4, 5, 6])]

124 header = ["X", "Y"]

125 result = generate_download_link((x_data, y_data), header, "Test Encoding")

126 # Check if base64 encoding exists within the result

127 assert "base64," in result

128

129

130class TestProcessData:

131

132 @pytest.fixture

133 def gaussian_data(self) -> tuple[list[np.ndarray], list[np.ndarray]]:

134 """Fixture to generate a Gaussian dataset with 20 data points."""

135 # Generate Gaussian data with mean 0 and standard deviation 1

136 n_points = 10

137 x_data = np.linspace(-10, 5, n_points) # x values from -5 to 5

138 y_data = np.exp(-0.5 * (x_data**2)) # Gaussian function: e^(-x^2/2)

139 return [x_data, x_data], [y_data, y_data * 2]

140

141 def test_no_normalisation(self, gaussian_data) -> None:

142 """Test with no normalisation."""

143 xs_data, ys_data = gaussian_data

144

145 # Process data without normalisation

146 result_xs, result_ys = process_data(xs_data, ys_data, normalise=False)

147

148 # Check if x values are reduced and shifted, and y values are reduced

149 expected_xs = [np.array([0.0, 1.66666667, 3.33333333, 5.0]), np.array([0.0, 1.66666667, 3.33333333, 5.0])]

150 expected_ys = [

151 np.array([1.00000000e00, 2.49352209e-01, 3.86592014e-03, 3.72665317e-06]),

152 np.array([2.00000000e00, 4.98704418e-01, 7.73184028e-03, 7.45330634e-06]),

153 ]

154

155 assert np.allclose(result_xs, expected_xs)

156 assert np.allclose(result_ys, expected_ys)

157

158 def test_with_normalisation(self, gaussian_data) -> None:

159 """Test with normalisation."""

160 xs_data, ys_data = gaussian_data

161

162 # Process data with normalisation

163 result_xs, result_ys = process_data(xs_data, ys_data, normalise=True)

164

165 # Check if x values are reduced and shifted, and y values are normalised

166 expected_xs = [np.array([0.0, 1.66666667, 3.33333333, 5.0]), np.array([0.0, 1.66666667, 3.33333333, 5.0])]

167 expected_ys = [

168 np.array([1.00000000e00, 2.49352209e-01, 3.86592014e-03, 3.72665317e-06]),

169 np.array([1.00000000e00, 2.49352209e-01, 3.86592014e-03, 3.72665317e-06]),

170 ]

171

172 assert np.allclose(result_xs, expected_xs)

173 assert np.allclose(result_ys, expected_ys)

174

175 def test_edge_case_empty_data(self) -> None:

176 """Test with empty input data."""

177 xs_data = []

178 ys_data = []

179

180 # Process empty data

181 result_xs, result_ys = process_data(xs_data, ys_data, normalise=False)

182

183 # Check that the result is also empty

184 assert result_xs == []

185 assert result_ys == []

186

187

188class TestGetDataIndex:

189

190 def test_no_delimiter(self) -> None:

191 """Test without a delimiter (default None)"""

192 content = ["header", "data starts here", "1 2 3", "4 5 6"]

193 result = get_data_index(content)

194 assert result == 2 # the first line with float data is at index 2

195

196 def test_with_delimiter(self) -> None:

197 """Test with a specified delimiter"""

198 content = ["header", "data starts here", "1,2,3", "4,5,6"]

199 result = get_data_index(content, delimiter=",")

200 assert result == 2 # the first line with float data is at index 2

201

202 def test_no_data(self) -> None:

203 """Test case when there are no float data lines"""

204 content = ["header", "some text", "more text"]

205 result = get_data_index(content)

206 assert result is None # No line contains float data

207

208 def test_empty_list(self) -> None:

209 """Test with an empty list"""

210 content = []

211 result = get_data_index(content)

212 assert result is None # No data in the list

213

214 def test_mixed_data(self) -> None:

215 """Test with mixed data (some numeric and some non-numeric)"""

216 content = ["header", "text", "1 2 3", "text again", "4 5 6"]

217 result = get_data_index(content)

218 assert result == 2 # the first line with numeric data is at index 2

219

220 def test_non_matching_delimiter(self) -> None:

221 """Test with a delimiter that doesn't match any line"""

222 content = ["header", "text", "1 2 3", "4 5 6"]

223 result = get_data_index(content, delimiter=",")

224 assert result is None # no lines with comma as delimiter

225

226

227class TestLoadData:

228

229 def test_x_y1_y2_y3_format(self) -> None:

230 """Test the X/Y1/Y2/Y3... format where all columns have the same length"""

231 content = "1 2 3 4\n5 6 7 8\n9 10 11 12\n" # X/Y1/Y2/Y3 data format

232 delimiter = " "

233 data_format = "X/Y1/Y2/Y3..."

234

235 # Simulate loading data from a file

236 xs_data, ys_data = load_data(content.encode(), delimiter, data_format)

237

238 # Expected results

239 expected_xs = [np.array([1.0, 5.0, 9.0]), np.array([1.0, 5.0, 9.0]), np.array([1.0, 5.0, 9.0])]

240 expected_ys = [np.array([2.0, 6.0, 10.0]), np.array([3.0, 7.0, 11.0]), np.array([4.0, 8.0, 12.0])]

241

242 assert np.array_equal(xs_data, expected_xs)

243 assert np.array_equal(ys_data, expected_ys)

244

245 def test_x1_y1_x2_y2_format(self) -> None:

246 """Test the X1/Y1/X2/Y2... format where all columns have the same length"""

247 content = "1,2,3,4\n5,6,7,8\n9,10,,\n" # X/Y1/Y2/Y3 data format

248 delimiter = ","

249 data_format = "X1/Y1/X2/Y2..."

250

251 # Simulate loading data from a file

252 xs_data, ys_data = load_data(content.encode(), delimiter, data_format)

253

254 # Expected results

255 expected_xs = [np.array([1.0, 5.0, 9.0]), np.array([3.0, 7.0])]

256 expected_ys = [np.array([2.0, 6.0, 10.0]), np.array([4.0, 8.0])]

257

258 for x_array, expected_x_array in zip(xs_data, expected_xs):

259 assert np.array_equal(x_array, expected_x_array)

260 for y_array, expected_y_array in zip(ys_data, expected_ys):

261 assert np.array_equal(y_array, expected_y_array)

262

263

264class TestComparisonFunctions:

265

266 def test_identical_simple_values(self) -> None:

267 """Test identical simple values."""

268 assert are_identical(5, 5)

269 assert are_identical("test", "test")

270 assert are_identical(None, None)

271 assert not are_identical(5, 6)

272 assert not are_identical("test", "different")

273

274 def test_identical_lists(self) -> None:

275 """Test identical lists."""

276 assert are_identical([1, 2, 3], [1, 2, 3])

277 assert are_identical([], [])

278 assert are_identical([1, [2, 3]], [1, [2, 3]])

279 assert not are_identical([1, 2, 3], [1, 2, 4])

280 assert not are_identical([1, 2, 3], [1, 2])

281 assert not are_identical([1, 2], [1, 2, 3])

282

283 def test_identical_tuples(self) -> None:

284 """Test identical tuples."""

285 assert are_identical((1, 2, 3), (1, 2, 3))

286 assert are_identical((), ())

287 assert are_identical((1, (2, 3)), (1, (2, 3)))

288 assert not are_identical((1, 2, 3), (1, 2, 4))

289

290 def test_identical_dicts(self) -> None:

291 """Test identical dictionaries."""

292 assert are_identical({"a": 1, "b": 2}, {"a": 1, "b": 2})

293 assert are_identical({}, {})

294 assert are_identical({"a": 1, "b": {"c": 3}}, {"a": 1, "b": {"c": 3}})

295 assert not are_identical({"a": 1, "b": 2}, {"a": 1, "b": 3})

296 assert not are_identical({"a": 1, "b": 2}, {"a": 1, "c": 2})

297 assert not are_identical({"a": 1}, {"a": 1, "b": 2})

298

299 def test_identical_nested_structures(self) -> None:

300 """Test identical nested structures."""

301 nested1 = {"a": [1, 2, {"b": (3, 4)}]}

302 nested2 = {"a": [1, 2, {"b": (3, 4)}]}

303 different = {"a": [1, 2, {"b": (3, 5)}]}

304

305 assert are_identical(nested1, nested2)

306 assert not are_identical(nested1, different)

307

308 def test_identical_numpy_arrays(self) -> None:

309 """Test identical numpy arrays."""

310 arr1 = np.array([1, 2, 3])

311 arr2 = np.array([1, 2, 3])

312 arr3 = np.array([1, 2, 4])

313

314 assert are_identical(arr1, arr2)

315 assert not are_identical(arr1, arr3)

316

317 def test_identical_with_rtol(self) -> None:

318 """Test identical with relative tolerance for floating point values."""

319 assert are_identical(1.0, 1.001, rtol=1e-2)

320 assert not are_identical(1.0, 1.001, rtol=1e-4)

321

322 arr1 = np.array([1.0, 2.0, 3.0])

323 arr2 = np.array([1.001, 2.002, 3.003])

324

325 assert are_identical(arr1, arr2, rtol=1e-2)

326 assert not are_identical(arr1, arr2, rtol=1e-4)

327

328 def test_identical_mixed_types(self) -> None:

329 """Test identical with mixed types - should use strict equality."""

330 assert are_identical(1, 1.0) # Different types

331 assert are_identical(True, 1) # Different types

332

333 def test_close_simple_values(self) -> None:

334 """Test are_close with simple values."""

335 assert are_close(1.0, 1.0009) # Default rtol=1e-3

336 assert are_close(1.0, 1.002, rtol=1e-2)

337 assert not are_close(1.0, 1.01) # Default rtol too small

338

339 def test_close_numpy_arrays(self) -> None:

340 """Test are_close with numpy arrays."""

341 arr1 = np.array([1.0, 2.0, 3.0])

342 arr2 = np.array([1.0009, 2.001, 3.0008])

343 arr3 = np.array([1.01, 2.01, 3.01])

344

345 assert are_close(arr1, arr2) # Default rtol=1e-3 is enough

346 assert not are_close(arr1, arr3) # Default rtol too small

347 assert are_close(arr1, arr3, rtol=1e-1) # Larger rtol works

348

349 def test_close_nested_structures(self) -> None:

350 """Test are_close with nested structures."""

351 nested1 = {"a": [1.0, 2.0, {"b": np.array([3.0, 4.0])}]}

352 nested2 = {"a": [1.0009, 2.001, {"b": np.array([3.0008, 4.0009])}]}

353 nested3 = {"a": [1.01, 2.01, {"b": np.array([3.01, 4.01])}]}

354

355 assert are_close(nested1, nested2) # Default rtol=1e-3 is enough

356 assert not are_close(nested1, nested3) # Default rtol too small

357 assert are_close(nested1, nested3, rtol=1e-1) # Larger rtol works

358

359 def test_close_different_structures(self) -> None:

360 """Test are_close with different structures - should return False."""

361 assert not are_close([1.0, 2.0], [1.0, 2.0, 3.0])

362 assert not are_close({"a": 1.0}, {"a": 1.0, "b": 2.0})

363 assert not are_close({"a": 1.0, "b": 2.0}, {"a": 1.0, "c": 2.0})

364

365 def test_edge_cases(self) -> None:

366 """Test edge cases."""

367 # Empty structures are identical

368 assert are_close([], [])

369 assert are_close({}, {})

370

371 # NaN values

372 nan_array1 = np.array([1.0, np.nan, 3.0])

373 nan_array2 = np.array([1.0, np.nan, 3.0])

374 assert not are_close(nan_array1, nan_array2) # np.allclose returns False for NaN

375

376 # Infinity values

377 inf_array1 = np.array([1.0, np.inf, 3.0])

378 inf_array2 = np.array([1.0, np.inf, 3.0])

379 assert are_close(inf_array1, inf_array2)

380

381 # Mixed infinity and regular values

382 mixed_inf1 = np.array([1.0, np.inf, 3.0])

383 mixed_inf2 = np.array([1.0001, np.inf, 3.0001])

384 assert are_close(mixed_inf1, mixed_inf2)

385

386

387class TestRenderImage:

388

389 def test_valid_image_file(self) -> None:

390

391 image_data = b"\x89PNG\r\n\x1a\n" + b"fakeimagecontent"

392 expected_mime = "image/png"

393

394 # Create a temporary image file

395 with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:

396 tmp.write(image_data)

397 tmp_path = tmp.name

398

399 html_output = render_image(tmp_path, width=200)

400

401 encoded = base64.b64encode(image_data).decode()

402 expected_html = f'<center><img src="data:{expected_mime};base64,{encoded}" width="200px"/></center>'

403 assert html_output == expected_html

404

405 def test_unknown_mime_type_raises(self) -> None:

406

407 with tempfile.NamedTemporaryFile(delete=False, suffix=".unknown") as tmp:

408 tmp.write(b"Some binary content")

409 tmp_path = tmp.name

410

411 with pytest.raises(ValueError, match=r"Could not determine MIME type"):

412 render_image(tmp_path)

413

414 def test_default_width(self) -> None:

415

416 image_data = b"GIF87a" + b"fakegifdata"

417 with tempfile.NamedTemporaryFile(delete=False, suffix=".gif") as tmp:

418 tmp.write(image_data)

419 tmp_path = tmp.name

420

421 html_output = render_image(tmp_path)

422 assert 'width="100px"' in html_output

423 assert "data:image/gif;base64," in html_output

424

425

426class TestReadTxtFile:

427 """Test class for the read_txt_file function."""

428

429 # File path for temporary test file

430 TEMP_FILE = "_temp.txt"

431

432 def teardown_method(self) -> None:

433 """Teardown method that runs after each test."""

434

435 # Clean up test file after each test

436 if os.path.exists(self.TEMP_FILE):

437 os.remove(self.TEMP_FILE)

438

439 # Clear the cache

440 st.cache_resource.clear()

441

442 def test_read_existing_file(self) -> None:

443 """Test reading from an existing file with valid content."""

444

445 # Create file with some content

446 with open(self.TEMP_FILE, "w") as f:

447 f.write("Hello, World!")

448

449 # Read the content using our function

450 content = read_txt_file(self.TEMP_FILE)

451

452 # Assert the content matches what we wrote

453 assert content == "Hello, World!"

454

455 def test_read_multiline_file(self) -> None:

456 """Test reading from a file with multiple lines."""

457

458 # Create a file with multiline content

459 with open(self.TEMP_FILE, "w") as f:

460 f.write("Line 1\nLine 2\nLine 3")

461

462 # Read the content using our function

463 content = read_txt_file(self.TEMP_FILE)

464

465 # Assert the content matches what we wrote

466 assert content == "Line 1\nLine 2\nLine 3"

467 # Additional check for line count

468 assert len(content.splitlines()) == 3

469

470 def test_nonexistent_file(self) -> None:

471 """Test that trying to read a nonexistent file raises an error."""

472

473 # Check that the function raises FileNotFoundError

474 with pytest.raises(FileNotFoundError):

475 read_txt_file(self.TEMP_FILE)

476

477

478class TestGenerateHtmlTable:

479 @pytest.fixture

480 def sample_dataframe(self) -> pd.DataFrame:

481 """Create a sample dataframe for testing."""

482

483 data = {"A": [1, 2, 3, 4], "B": [5, 6, 7, 8], "C": [9, 10, 11, 12]}

484 df = pd.DataFrame(data, index=["row1", "row2", "row3", "row4"])

485 return df

486

487 @pytest.fixture

488 def dataframe_with_identical_rows(self) -> pd.DataFrame:

489 """Create a dataframe with some identical rows."""

490

491 data = {"A": [1, 2, 2, 3], "B": [5, 2, 2, 7], "C": [9, 2, 2, 11]}

492 df = pd.DataFrame(data, index=["row1", "row2", "row3", "row4"])

493 return df

494

495 @pytest.fixture

496 def dataframe_with_column_name(self) -> pd.DataFrame:

497 """Create a dataframe with a column name."""

498

499 data = {"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}

500 df = pd.DataFrame(data, index=["row1", "row2", "row3"])

501 df.columns.name = "Categories"

502 return df

503

504 def test_basic_table_generation(self, sample_dataframe) -> None:

505 """Test basic HTML table generation."""

506

507 html = generate_html_table(sample_dataframe)

508

509 # Verify structure using BeautifulSoup

510 soup = BeautifulSoup(html, "html.parser")

511

512 # Check table exists

513 table = soup.find("table")

514 assert table is not None

515

516 # Check number of rows (header + data rows)

517 rows = table.find_all("tr")

518 assert len(rows) == 5 # 1 header + 4 data rows

519

520 # Check header row

521 header_cells = rows[0].find_all(["th"])

522 assert len(header_cells) == 4 # corner cell + 3 columns

523

524 # Check data cells

525 data_rows = rows[1:]

526 for i, row in enumerate(data_rows):

527 cells = row.find_all("td")

528 assert cells[0].text == f"row{i + 1}" # Check row name

529

530 def test_merged_cells_for_identical_values(self, dataframe_with_identical_rows) -> None:

531 """Test that cells are merged when all values in a row are identical."""

532 html = generate_html_table(dataframe_with_identical_rows)

533

534 soup = BeautifulSoup(html, "html.parser")

535 rows = soup.find_all("tr")

536

537 # Check row2 and row3 have merged cells

538 row2 = rows[2] # index 2 corresponds to row2

539 row3 = rows[3] # index 3 corresponds to row3

540

541 # Check for colspan in row2 and row3

542 assert row2.find_all("td")[1].has_attr("colspan")

543 assert row2.find_all("td")[1]["colspan"] == "3"

544 assert row3.find_all("td")[1].has_attr("colspan")

545 assert row3.find_all("td")[1]["colspan"] == "3"

546

547 # Check normal rows don't have merged cells

548 row1 = rows[1] # index 1 corresponds to row1

549 row4 = rows[4] # index 4 corresponds to row4

550 assert len(row1.find_all("td")) == 4 # 1 row name + 3 data cells

551 assert len(row4.find_all("td")) == 4 # 1 row name + 3 data cells

552

553 def test_column_name_in_corner(self, dataframe_with_column_name) -> None:

554 """Test that the column name appears in the corner cell."""

555 html = generate_html_table(dataframe_with_column_name)

556

557 soup = BeautifulSoup(html, "html.parser")

558 corner_cell = soup.find("tr").find("th")

559

560 assert corner_cell.text == "Categories"

561

562 def test_empty_corner_cell_with_no_column_name(self, sample_dataframe) -> None:

563 """Test that the corner cell is empty when no column name is provided."""

564 html = generate_html_table(sample_dataframe)

565

566 soup = BeautifulSoup(html, "html.parser")

567 corner_cell = soup.find("tr").find("th")

568

569 assert corner_cell.text == ""

570

571 def test_div_wrapper(self, sample_dataframe) -> None:

572 """Test that the table is wrapped in a div with correct styling."""

573 html = generate_html_table(sample_dataframe)

574

575 soup = BeautifulSoup(html, "html.parser")

576 div = soup.find("div")

577

578 assert div is not None

579 assert div.has_attr("style")

580 assert "margin: auto" in div["style"]

581 assert "display: table" in div["style"]

Coverage for tests/utility/test_data.py: 100%

349 statements