Coverage for app/utility/data.py: 100%

102 statements  

« prev     ^ index     » next       coverage.py v7.10.5, created at 2025-08-28 18:30 +0000

1"""utils module""" 

2 

3import base64 

4import mimetypes 

5from io import StringIO 

6from typing import Any 

7 

8import numpy as np 

9import pandas as pd 

10import streamlit as st 

11 

12 

13def matrix_to_string( 

14 arrays: np.ndarray | list[np.ndarray], 

15 header: None | list[str] | np.ndarray = None, 

16) -> str: 

17 """Convert a matrix to a string 

18 :param arrays: list of ndarrays 

19 :param header: header""" 

20 

21 max_rows = np.max([len(array) for array in arrays]) 

22 rows = [] 

23 delimiter = "," 

24 

25 for i in range(max_rows): 

26 row_values = [] 

27 for array in arrays: 

28 if i < len(array): 

29 row_values.append(f"{array[i]:.5E}") 

30 else: 

31 row_values.append("") 

32 rows.append(delimiter.join(row_values)) 

33 

34 string = "\n".join(rows) 

35 

36 if header is not None: 

37 string = delimiter.join(header) + "\n" + string 

38 

39 return string 

40 

41 

42@st.cache_resource 

43def render_image( 

44 file_path: str, 

45 width: int = 100, 

46) -> str: 

47 """Render an image file as base64 embedded HTML 

48 :param str file_path: path to the image file 

49 :param int width: image width in pixels 

50 :return: HTML string for rendering the image""" 

51 

52 mime_type, _ = mimetypes.guess_type(file_path) 

53 if mime_type is None: 

54 raise ValueError(f"Could not determine MIME type for file: {file_path}") 

55 

56 with open(file_path, "rb") as ofile: 

57 encoded = base64.b64encode(ofile.read()).decode() 

58 

59 return f'<center><img src="data:{mime_type};base64,{encoded}" width="{width}px"/></center>' 

60 

61 

62def generate_download_link( 

63 data: tuple, 

64 header: None | list[str] | np.ndarray = None, 

65 text: str = "", 

66 name: str | None = None, 

67) -> str: 

68 """Generate a download link from a matrix and a header 

69 :param data: tuple containing x-axis and y-axis data 

70 :param header: list of strings corresponding to the header of each column 

71 :param text: text to be displayed instead of the link 

72 :param name: name of the file""" 

73 

74 if name is None: 

75 name = text 

76 data = np.concatenate([[data[0][0]], data[1]]) 

77 string = matrix_to_string(data, header) 

78 b64 = base64.b64encode(string.encode()).decode() 

79 return rf'<a href="data:text/csv;base64,{b64}" download="{name}.csv">{text}</a>' 

80 

81 

82def process_data( 

83 xs_data: list[np.ndarray], 

84 ys_data: list[np.ndarray], 

85 normalise: bool, 

86) -> tuple[list[np.ndarray], list[np.ndarray]]: 

87 """Process the data 

88 :param xs_data: x data 

89 :param ys_data: y data 

90 :param normalise: if True, normalise the y-axis data""" 

91 

92 for i in range(len(xs_data)): 

93 index = ys_data[i].argmax() 

94 xs_data[i] = xs_data[i][index:] # reduce range x 

95 xs_data[i] -= xs_data[i][0] # shift x 

96 ys_data[i] = ys_data[i][index:] # reduce range y 

97 if normalise: 

98 ys_data[i] /= ys_data[i][0] # normalise y 

99 return xs_data, ys_data 

100 

101 

102def are_identical( 

103 obj1: Any, 

104 obj2: Any, 

105 rtol: float | None = None, 

106) -> bool: 

107 """Check if two objects are identical. 

108 :param obj1: list or dictionary to compare. 

109 :param obj2: list or dictionary to compare. 

110 :param rtol: Relative tolerance for floating-point comparisons using np.allclose.""" 

111 

112 if isinstance(obj1, dict) and isinstance(obj2, dict): 

113 if obj1.keys() != obj2.keys(): 

114 return False 

115 else: 

116 return all(are_identical(obj1[k], obj2[k], rtol) for k in obj1) 

117 

118 if isinstance(obj1, (list, tuple)) and isinstance(obj2, (list, tuple)): 

119 if len(obj1) != len(obj2): 

120 return False 

121 else: 

122 return all(are_identical(i1, i2, rtol) for i1, i2 in zip(obj1, obj2)) 

123 

124 else: 

125 if rtol is not None: 

126 return np.allclose(obj1, obj2, rtol=rtol) 

127 else: 

128 return np.array_equal(obj1, obj2) 

129 

130 

131def are_close(*args, rtol=1e-3) -> bool: 

132 """Check if two objects are similar""" 

133 

134 return are_identical(*args, rtol=rtol) 

135 

136 

137def get_data_index( 

138 content: list[str], 

139 delimiter: None | str = None, 

140) -> None | int: 

141 """Retrieve the index of the line where the data starts 

142 :param list of str content: list of strings 

143 :param str or None delimiter: delimiter of the float data 

144 

145 Example 

146 ------- 

147 >>> get_data_index(['first line', 'second line', '1 2 3']) 

148 2""" 

149 

150 # noinspection PyInconsistentReturns 

151 for index, line in enumerate(content): 

152 

153 if line != "": 

154 try: 

155 [float(f) for f in line.split(delimiter)] 

156 return index 

157 except ValueError: 

158 continue 

159 

160 

161def load_data( 

162 content: bytes, 

163 delimiter: str, 

164 data_format: str, 

165) -> tuple[list[np.ndarray], list[np.ndarray]]: 

166 """Process the data contained in a file 

167 :param bytes content: content string 

168 :param str delimiter: data delimiter 

169 :param str data_format: data column format""" 

170 

171 # Find data index and load the data 

172 content1 = content.decode("ascii").splitlines() 

173 content1 = [line.strip(delimiter) for line in content1] # remove any extra delimiter on each line 

174 index = get_data_index(content1, delimiter) 

175 data = np.transpose(np.genfromtxt(StringIO(content.decode("ascii")), delimiter=delimiter, skip_header=index)) 

176 

177 # Sort the data 

178 if data_format == "X/Y1/Y2/Y3...": 

179 xs_data, ys_data = np.array([data[0]] * (len(data) - 1)), data[1:] 

180 else: 

181 xs_data, ys_data = data[::2], data[1::2] 

182 

183 # Check the data 

184 xs_data = [x_data[np.invert(np.isnan(x_data))] for x_data in xs_data] 

185 ys_data = [y_data[np.invert(np.isnan(y_data))] for y_data in ys_data] 

186 

187 return xs_data, ys_data 

188 

189 

190@st.cache_resource 

191def read_txt_file(path: str) -> str: 

192 """Read the content of a text file and store it as a resource. 

193 :param path: file path""" 

194 

195 with open(path, encoding="utf-8") as ofile: 

196 return ofile.read() 

197 

198 

199def generate_html_table(df: pd.DataFrame) -> str: 

200 """Generate an HTML table from a pandas DataFrame with merged cells for rows 

201 where all values are identical. Includes row names (index), column names, 

202 and displays the columns name in the upper-left corner cell. 

203 :param df: pandas DataFrame to convert to HTML""" 

204 

205 html = ['<table border="1" style="border-collapse: collapse; text-align: center;">'] 

206 

207 # Add header row with columns name in the corner cell 

208 corner_cell_content = df.columns.name if df.columns.name else "" 

209 header = f'<tr><th style="padding: 8px; text-align: center;">{corner_cell_content}</th>' 

210 for col in df.columns: 

211 header += f'<th style="padding: 8px; text-align: center;">{col}</th>' 

212 header += "</tr>" 

213 html.append(header) 

214 

215 # Process each row 

216 for idx, row in df.iterrows(): 

217 values = row.tolist() 

218 

219 # Check if all values in the row are the same 

220 if len(set(values)) == 1: 

221 # All values are identical - merge cells, but keep row name 

222 html.append( 

223 f'<tr><td style="padding: 8px; font-weight: bold; text-align: center;">{idx}</td>' 

224 + f'<td colspan="{len(df.columns)}" style="padding: 8px; text-align: center;">{values[0]}</td></tr>' 

225 ) 

226 else: 

227 # Normal row handling with row name 

228 row_html = f'<tr><td style="padding: 8px; font-weight: bold; text-align: center;">{idx}</td>' 

229 for val in values: 

230 row_html += f'<td style="padding: 8px; text-align: center;">{val}</td>' 

231 row_html += "</tr>" 

232 html.append(row_html) 

233 

234 html.append("</table>") 

235 return '<div style="margin: auto; display: table;">' + "\n".join(html) + "</div>"