Coverage for app/utility/data.py: 100%

1"""utils module"""

3import base64

4import mimetypes

5from io import StringIO

6from typing import Any

8import numpy as np

9import pandas as pd

10import streamlit as st

13def matrix_to_string(

14 arrays: np.ndarray | list[np.ndarray],

15 header: None | list[str] | np.ndarray = None,

16) -> str:

17 """Convert a matrix to a string

18 :param arrays: list of ndarrays

19 :param header: header"""

21 max_rows = np.max([len(array) for array in arrays])

22 rows = []

23 delimiter = ","

25 for i in range(max_rows):

26 row_values = []

27 for array in arrays:

28 if i < len(array):

29 row_values.append(f"{array[i]:.5E}")

30 else:

31 row_values.append("")

32 rows.append(delimiter.join(row_values))

34 string = "\n".join(rows)

36 if header is not None:

37 string = delimiter.join(header) + "\n" + string

39 return string

42@st.cache_resource

43def render_image(

44 file_path: str,

45 width: int = 100,

46) -> str:

47 """Render an image file as base64 embedded HTML

48 :param str file_path: path to the image file

49 :param int width: image width in pixels

50 :return: HTML string for rendering the image"""

52 mime_type, _ = mimetypes.guess_type(file_path)

53 if mime_type is None:

54 raise ValueError(f"Could not determine MIME type for file: {file_path}")

56 with open(file_path, "rb") as ofile:

57 encoded = base64.b64encode(ofile.read()).decode()

59 return f'<center><img src="data:{mime_type};base64,{encoded}" width="{width}px"/></center>'

62def generate_download_link(

63 data: tuple,

64 header: None | list[str] | np.ndarray = None,

65 text: str = "",

66 name: str | None = None,

67) -> str:

68 """Generate a download link from a matrix and a header

69 :param data: tuple containing x-axis and y-axis data

70 :param header: list of strings corresponding to the header of each column

71 :param text: text to be displayed instead of the link

72 :param name: name of the file"""

74 if name is None:

75 name = text

76 data = np.concatenate([[data[0][0]], data[1]])

77 string = matrix_to_string(data, header)

78 b64 = base64.b64encode(string.encode()).decode()

79 return rf'<a href="data:text/csv;base64,{b64}" download="{name}.csv">{text}</a>'

82def process_data(

83 xs_data: list[np.ndarray],

84 ys_data: list[np.ndarray],

85 normalise: bool,

86) -> tuple[list[np.ndarray], list[np.ndarray]]:

87 """Process the data

88 :param xs_data: x data

89 :param ys_data: y data

90 :param normalise: if True, normalise the y-axis data"""

92 for i in range(len(xs_data)):

93 index = ys_data[i].argmax()

94 xs_data[i] = xs_data[i][index:] # reduce range x

95 xs_data[i] -= xs_data[i][0] # shift x

96 ys_data[i] = ys_data[i][index:] # reduce range y

97 if normalise:

98 ys_data[i] /= ys_data[i][0] # normalise y

99 return xs_data, ys_data

100

101

102def are_identical(

103 obj1: Any,

104 obj2: Any,

105 rtol: float | None = None,

106) -> bool:

107 """Check if two objects are identical.

108 :param obj1: list or dictionary to compare.

109 :param obj2: list or dictionary to compare.

110 :param rtol: Relative tolerance for floating-point comparisons using np.allclose."""

111

112 if isinstance(obj1, dict) and isinstance(obj2, dict):

113 if obj1.keys() != obj2.keys():

114 return False

115 else:

116 return all(are_identical(obj1[k], obj2[k], rtol) for k in obj1)

117

118 if isinstance(obj1, (list, tuple)) and isinstance(obj2, (list, tuple)):

119 if len(obj1) != len(obj2):

120 return False

121 else:

122 return all(are_identical(i1, i2, rtol) for i1, i2 in zip(obj1, obj2))

123

124 else:

125 if rtol is not None:

126 return np.allclose(obj1, obj2, rtol=rtol)

127 else:

128 return np.array_equal(obj1, obj2)

129

130

131def are_close(*args, rtol=1e-3) -> bool:

132 """Check if two objects are similar"""

133

134 return are_identical(*args, rtol=rtol)

135

136

137def get_data_index(

138 content: list[str],

139 delimiter: None | str = None,

140) -> None | int:

141 """Retrieve the index of the line where the data starts

142 :param list of str content: list of strings

143 :param str or None delimiter: delimiter of the float data

144

145 Example

146 -------

147 >>> get_data_index(['first line', 'second line', '1 2 3'])

148 2"""

149

150 # noinspection PyInconsistentReturns

151 for index, line in enumerate(content):

152

153 if line != "":

154 try:

155 [float(f) for f in line.split(delimiter)]

156 return index

157 except ValueError:

158 continue

159

160

161def load_data(

162 content: bytes,

163 delimiter: str,

164 data_format: str,

165) -> tuple[list[np.ndarray], list[np.ndarray]]:

166 """Process the data contained in a file

167 :param bytes content: content string

168 :param str delimiter: data delimiter

169 :param str data_format: data column format"""

170

171 # Find data index and load the data

172 content1 = content.decode("ascii").splitlines()

173 content1 = [line.strip(delimiter) for line in content1] # remove any extra delimiter on each line

174 index = get_data_index(content1, delimiter)

175 data = np.transpose(np.genfromtxt(StringIO(content.decode("ascii")), delimiter=delimiter, skip_header=index))

176

177 # Sort the data

178 if data_format == "X/Y1/Y2/Y3...":

179 xs_data, ys_data = np.array([data[0]] * (len(data) - 1)), data[1:]

180 else:

181 xs_data, ys_data = data[::2], data[1::2]

182

183 # Check the data

184 xs_data = [x_data[np.invert(np.isnan(x_data))] for x_data in xs_data]

185 ys_data = [y_data[np.invert(np.isnan(y_data))] for y_data in ys_data]

186

187 return xs_data, ys_data

188

189

190@st.cache_resource

191def read_txt_file(path: str) -> str:

192 """Read the content of a text file and store it as a resource.

193 :param path: file path"""

194

195 with open(path, encoding="utf-8") as ofile:

196 return ofile.read()

197

198

199def generate_html_table(df: pd.DataFrame) -> str:

200 """Generate an HTML table from a pandas DataFrame with merged cells for rows

201 where all values are identical. Includes row names (index), column names,

202 and displays the columns name in the upper-left corner cell.

203 :param df: pandas DataFrame to convert to HTML"""

204

205 html = ['<table border="1" style="border-collapse: collapse; text-align: center;">']

206

207 # Add header row with columns name in the corner cell

208 corner_cell_content = df.columns.name if df.columns.name else ""

209 header = f'<tr><th style="padding: 8px; text-align: center;">{corner_cell_content}</th>'

210 for col in df.columns:

211 header += f'<th style="padding: 8px; text-align: center;">{col}</th>'

212 header += "</tr>"

213 html.append(header)

214

215 # Process each row

216 for idx, row in df.iterrows():

217 values = row.tolist()

218

219 # Check if all values in the row are the same

220 if len(set(values)) == 1:

221 # All values are identical - merge cells, but keep row name

222 html.append(

223 f'<tr><td style="padding: 8px; font-weight: bold; text-align: center;">{idx}</td>'

224 + f'<td colspan="{len(df.columns)}" style="padding: 8px; text-align: center;">{values[0]}</td></tr>'

225 )

226 else:

227 # Normal row handling with row name

228 row_html = f'<tr><td style="padding: 8px; font-weight: bold; text-align: center;">{idx}</td>'

229 for val in values:

230 row_html += f'<td style="padding: 8px; text-align: center;">{val}</td>'

231 row_html += "</tr>"

232 html.append(row_html)

233

234 html.append("</table>")

235 return '<div style="margin: auto; display: table;">' + "\n".join(html) + "</div>"