diff --git a/docs/conf.py b/docs/conf.py index 5801d065..15992621 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -29,7 +29,7 @@ # The short X.Y version version = "" # The full version, including alpha/beta/rc tags -release = "0.4.198" +release = "0.6.20" # -- General configuration --------------------------------------------------- diff --git a/docs/usage/atmosphere.md b/docs/usage/atmosphere.md index 84eb1c22..669889fc 100644 --- a/docs/usage/atmosphere.md +++ b/docs/usage/atmosphere.md @@ -18,36 +18,34 @@ parameters, which is usually the same as that of the sme structure, but can be different, if for example the atmosphere is embedded, i.e. fixed, or has not been calculated yet. -The atmopshere object has the following fields +The atmopshere object has the following fields: -- **`teff`**: Effective Temperature in Kelvin -- **`logg`**: Surface Gravity in log(cgs) -- **`monh`**: Metallicity relative to the individual abundances -- **`abund`**: The individual abundances (see [abund](#abund)) -- **`vsini`**: Projected Rotational velocity in km/s -- **`vmic`**: Microturbulence velocity in km/s -- **`vmac`**: Macroturbulence velocity in km/s -- **`vturb`**: Turbulent velocity in km/s -- **`lonh`**: Mixing length -- **`source`**: Filename of the atmosphere grid -- **`depth`**: - The depth scale to use for calculations. - Either RHOX or TAU -- **`interp`**: - The depth scale to use for interpolation. - Either RHOX or TAU -- **`geom`**: - The geometry of the atmosphere. Either Plane - Parallel `'PP'` or Spherical `'SPH'` -- **`method`**: - The method to use for interpolation. Either `'grid'` - for a model grid or `'embedded'` if only a single - atmosphere is given. -- **`rhox`**: 'Column density' depth scale -- **`tau`**: 'Optical depth' depth scale -- **`temp`**: Temperature profile -- **`xna`**: Number density of atoms, ions, and molecules in each depth -- **`xne`**: Number density of electrons in each depth +|Field name|Description|Allowed values or [Unit]| +|:---:|:---:|:---:| +|`teff`|Effective Temperature|[K]| +|`logg`|Surface Gravity|[log(cgs)]| +|`monh`|Metallicity|| +|`abund`|The individual abundances (see [abund](#abund))|| +|`vsini`|Projected Rotational velocity|[km/s]| +|`vmic`|Microturbulence velocity|[km/s]| +|`vmac`|Macroturbulence velocity|[km/s]| +|`source`|Filename of the atmosphere grid|see [lfs](lfs.md)| +|`depth`|The depth scale to use for calculations.|`RHOX` or `TAU`| +|`interp`|The depth scale to use for interpolation.|`RHOX` or `TAU`| +|`geom`|The geometry of the atmosphere.|Plane Parallel `'PP'` or Spherical `'SPH'`| +|`method`|The method to use for interpolation|`'grid'` for a model grid or `'embedded'` if only a single atmosphere is given| +|`rhox`|Mass column at each tabulated depth in the atmosphere|[$\mathrm{g~cm^{-2}}$]| +|`tau`|Continuum optical depth at each tabulated depth in the atmosphere|| +|`temp`|Temperature at each tabulated depth in the atmosphere|[K]| +|`xna`|Atomic number density (including atomic components of molecules) at each tabulated depth in the atmosphere.|[$\mathrm{cm^{-3}}$]| +|`xne`|Electron number density at each tabulated depth in the atmosphere|[$\mathrm{cm^{-3}}$]| +|`rho`|Mass density at each tabulated depth in the atmosphere.|[$\mathrm{g~cm^{-3}}$]| +|`height`|Height above or below `radius` at each tabulated depth in a spherical atmosphere.|[cm]| +|`radius`|Stellar radius corresponding to `height` of zero in a spherical atmosphere|[cm]| +|`vturb`|Turbulent velocity used to generate the atmosphere|[km/s]| +|`lonh`|Ratio of mixing length to pressure scale height (`/H) used to generate the atmosphere.|| +|`wlstd`|Wavelength for continuum optical depth scale.|Å, Default value: 5000Å| +|`opflag`|Flags that indicate whether to enable various opacity packages during the radiative transfer calculation|| ## Grid atmospheres diff --git a/docs/usage/fordev.md b/docs/usage/fordev.md index 36792f50..a5698ecd 100644 --- a/docs/usage/fordev.md +++ b/docs/usage/fordev.md @@ -137,6 +137,15 @@ This variable determines how the cscale and vrad being assigned, and can be spec If you create your own NLTE departure coefficient grid following the current public grid to a `DirectAccess` file and you use the `nlte.DirectAccess.wrtie` function, note that the `wrtie` and `read` code changes the shape of b-grid, tau and rhox. Some extra test is needed to clear the situation. +## SME + +### `Trasf` function + +1. `AutoIonization` +2. Calculate Line center opacity using `LINEOPAC`. +3. Calculate Line contribution limits using `OPMTRX`. + - Step 2 and 3 go through all the input lines + ## IDLSME ### Installation diff --git a/docs/usage/installation.md b/docs/usage/installation.md index 986ea35b..b6cb6275 100644 --- a/docs/usage/installation.md +++ b/docs/usage/installation.md @@ -13,6 +13,11 @@ Currently PySME is tested with Python verion 3.9-3.13. - For the "stable" version (recommended): - `pip install pysme-astro` +```{warning} +- PySME requires the pre-compled C++/Fortran SME library to run. Currently we deliver SME library with Linux and Mac version; for Windows users, we recommend to use WSL and hence the Linux version. +- The files (mainly atmosphere models and NLTE departure coefficnent grids) required by PySME will be saved inside `~/.sme/`. These files can be large thus if your home directory is small, we recommend to create a softlink for `~/.sme`. +``` + ```{warning} PySME requires the pre-compled C++/Fortran SME library to run. Currently we deliver SME library with Linux and Mac version; for Windows users, we recommend to use WSL and hence the Linux version. ``` diff --git a/src/pysme/atmosphere/marcsfile.py b/src/pysme/atmosphere/marcsfile.py index ca789d54..dd40ef4a 100644 --- a/src/pysme/atmosphere/marcsfile.py +++ b/src/pysme/atmosphere/marcsfile.py @@ -227,7 +227,7 @@ def __init__(self, filename, calcRHOX=False): if self.geom.upper() == 'SPH': self.radius = data["radius"] self.depth = 'RHOX' if self.geom.upper() == 'SPH' else 'TAU' # spherical models should depth calculate using RHOX - self.interp = 'TAU' # not sure this is needed for embedded models + self.interp = 'RHOX' # not sure this is needed for embedded models self.vturb = data["vturb"] self.abund = Abund(monh=data["monh"], pattern=data["abund_inclmonh"].to_numpy()[0]-data["monh"], type="H=12") diff --git a/src/pysme/gui/plot_plotly.py b/src/pysme/gui/plot_plotly.py index 7e3d5e67..4cb1627d 100644 --- a/src/pysme/gui/plot_plotly.py +++ b/src/pysme/gui/plot_plotly.py @@ -354,7 +354,7 @@ def create_plot(self, current_segment): if self.lines is not None and len(self.lines) != 0: seg_annotations = [] xlimits = self.wave[seg][[0, -1]] - xlimits *= 1 - self.vrad[seg] / clight + xlimits = xlimits * (1 - self.vrad[seg] / clight) lines = (self.lines.wlcent > xlimits[0]) & ( self.lines.wlcent < xlimits[1] ) diff --git a/src/pysme/solve.py b/src/pysme/solve.py index 79bd99b3..fa791a7a 100644 --- a/src/pysme/solve.py +++ b/src/pysme/solve.py @@ -600,8 +600,7 @@ def sanitize_parameter_names(self, sme, param_names): if sme.vrad_flag in ["fix", "none"]: sme.vrad_flag = "whole" logger.info( - "Removed fit parameter 'vrad', instead set radial velocity flag to %s", - sme.vrad_flag, + f"Removed fit parameter 'vrad', instead set radial velocity flag to {sme.vrad_flag}" ) if "cont" in param_names: @@ -734,9 +733,9 @@ def solve(self, sme, param_names=None, segments="all", bounds=None, step_sizes=N ) # Setup LineList only once (Mingjie: not needed?) - dll = self.synthesizer.get_dll() - dll.SetLibraryPath() - _ = dll.InputLineList(sme.linelist) + # dll = self.synthesizer.get_dll() + # dll.SetLibraryPath() + # _ = dll.InputLineList(sme.linelist) # Do the heavy lifting if self.nparam > 0: @@ -794,7 +793,7 @@ def solve(self, sme, param_names=None, segments="all", bounds=None, step_sizes=N # We could try to reuse the already calculated synthetic spectrum (if it already exists) # However it is much lower resolution then the newly synthethized one (usually) # Therefore the radial velocity wont be as good as when redoing the whole thing - sme = self.synthesizer.synthesize_spectrum(sme, segments) + sme = self.synthesizer.synthesize_spectrum(sme, segments, linelist_mode=linelist_mode) else: raise ValueError("No fit parameters given") diff --git a/src/pysme/synthesize.py b/src/pysme/synthesize.py index 7930f1cc..23a4c6bb 100644 --- a/src/pysme/synthesize.py +++ b/src/pysme/synthesize.py @@ -68,6 +68,80 @@ def _load_all_grid_points(cdr_folder): fname_map[key] = fname return np.array(param_list), fname_map +def _load_mask_and_ranges_from_compressed(full_path, wlcent, n_lines_total): + """ + Load the strong-line mask and wavelength ranges from a compressed CDR grid file. + + The compressed file is expected to contain: + - mask_bits : 1D uint8 array, bit-packed boolean mask of strong lines + - unique_widths : 1D float32 array, all distinct line_width values in this grid + - codes : 1D uint8/uint16/uint32 array, indices into unique_widths + corresponding to the strong lines. The order of `codes` + matches the order of strong-line indices given by: + strong_idx = np.nonzero(mask_full)[0] + + For each strong line i: + width[i] = unique_widths[codes[i]] + line_range_s[i] = wlcent[i] - 0.5 * width[i] + line_range_e[i] = wlcent[i] + 0.5 * width[i] + + All non-strong lines are assigned NaN in line_range_s/e. + + Parameters + ---------- + full_path : str + Path to the compressed npz file for a single grid point. + wlcent : array-like, shape (n_lines_total,) + Central wavelengths of all lines in the global line list. + n_lines_total : int + Total number of lines in the line list. + + Returns + ------- + mask_full : np.ndarray, dtype=bool, shape (n_lines_total,) + Boolean array marking which lines are strong in this grid. + line_range_s : np.ndarray, dtype=float32, shape (n_lines_total,) + Start wavelength of the line range for each line (NaN for non-strong lines). + line_range_e : np.ndarray, dtype=float32, shape (n_lines_total,) + End wavelength of the line range for each line (NaN for non-strong lines). + """ + data = np.load(full_path) + mask_bits = data["mask_bits"] + unique_widths = data["unique_widths"] + codes = data["codes"] + n_lines_total = int(data["n_lines_total"]) + + # 1) Unpack bit-packed mask into a full boolean array + mask_full = np.unpackbits(mask_bits).astype(bool)[:n_lines_total] + if mask_full.size < n_lines_total: + raise ValueError( + f"{full_path}: unpacked mask length {mask_full.size} < n_lines_total={n_lines_total}. " + "Please ensure the compressed database matches the current VALD linelist." + ) + mask_full = mask_full[:n_lines_total] + + # 2) Indices of strong lines and their corresponding widths + strong_idx = np.nonzero(mask_full)[0] + widths = unique_widths[codes] # one-to-one with strong_idx + + # 3) Reconstruct line_range_s/e from central wavelengths and widths + wlcent = np.asarray(wlcent) + if wlcent.size != n_lines_total: + raise ValueError( + f"{full_path}: wlcent length {wlcent.size} != n_lines_total={n_lines_total}" + ) + + wl_strong = wlcent[strong_idx] + s_vals = (wl_strong - 0.5 * widths).astype(np.float32) + e_vals = (wl_strong + 0.5 * widths).astype(np.float32) + + line_range_s = np.full(n_lines_total, np.nan, dtype=np.float32) + line_range_e = np.full(n_lines_total, np.nan, dtype=np.float32) + line_range_s[strong_idx] = s_vals + line_range_e[strong_idx] = e_vals + + return mask_full, line_range_s, line_range_e + class Synthesizer: def __init__(self, config=None, lfs_atmo=None, lfs_nlte=None, dll=None): self.config, self.lfs_atmo, self.lfs_nlte = setup_lfs( @@ -82,7 +156,7 @@ def __init__(self, config=None, lfs_atmo=None, lfs_nlte=None, dll=None): # This stores a reference to the currently used sme structure, so we only log it once self.known_sme = None self.update_cdr_switch = False - logger.info("Don't forget to cite your sources. Use sme.citation()") + # logger.info("Don't forget to cite your sources. Use sme.citation()") def get_atmosphere(self, sme): """ @@ -609,7 +683,8 @@ def synthesize_spectrum( opacity = [[] for _ in range(n_segments)] if contribution_function: sme.contribution_function = [[] for _ in range(n_segments)] - sme.linelist._lines['nlte_flag'] = -1 + if updateStructure: + sme.linelist._lines['nlte_flag'] = -1 # If wavelengths are already defined use those as output if "wave" in sme: @@ -632,7 +707,7 @@ def synthesize_spectrum( v_broad = np.sqrt(sme.vmac**2 + sme.vsini**2 + (clight/sme.ipres)**2) for i in range(sme.nseg): line_indices |= (sme.linelist['line_range_e'] > sme.wran[i][0] * (1 - vbroad_expend_ratio*v_broad/clight)) & (sme.linelist['line_range_s'] < sme.wran[i][1] * (1 + vbroad_expend_ratio*v_broad/clight)) - line_indices &= sme.linelist['central_depth'] > sme.cdr_depth_thres + line_indices &= self.flag_strong_lines_by_bins(sme.linelist['wlcent'], sme.linelist['central_depth']) sme.linelist._lines['use_indices'] = line_indices line_ion_mask = dll.InputLineList(sme.linelist[line_indices]) else: @@ -1267,65 +1342,161 @@ def flag_strong_lines_by_bins_old(self, df, bin_width=0.2, threshold=0.01, wl_co # ----- 4. Save into DataFrame & return ----- df[out_col] = keep_mask return df[out_col] - def flag_strong_lines_by_database( - self, sme, cdr_keep_database, dims=['teff', 'logg', 'monh'], mode='or' + self, sme, cdr_negligibe_database, dims=['teff', 'logg', 'monh'], mode='or' ): + """ + Flag strong lines for the current star using the compressed CDR strong-line database, + and write both the 'strong' flag and the corresponding line_range_s / line_range_e + into the SME linelist. + + The compressed CDR database is assumed to contain one npz file per grid point in + `cdr_negligibe_database`. Each file should include: + - mask_bits : 1D uint8 array, bit-packed boolean mask for strong lines + - unique_widths : 1D float32 array, distinct line_width values in this grid + - codes : 1D uint8/uint16/uint32 array indexing unique_widths, + ordered consistently with the strong-line indices. + + Interpolation modes + ------------------- + mode : {'or', 'nearest'} + 'or': + Within the parameter box around the current star, construct a Delaunay + triangulation in the selected parameter dimensions (dims). If the + current parameter lies inside a simplex, take all vertices of that simplex: + - Combine their strong masks using a logical OR across vertices. + - For line_range_s/e, take the union of ranges: s = nanmin(s_i), + e = nanmax(e_i) across vertices (ignoring NaNs). + 'nearest': + Within the parameter box, find the single nearest grid point and directly + use its strong mask and line_range_s/e. + + Parameters + ---------- + sme : SME_Structure + The current SME object (stellar parameters and linelist are taken from here). + cdr_negligibe_database : str + Directory containing the compressed CDR grid files. + dims : list of str, default ['teff', 'logg', 'monh'] + Parameter dimensions used for interpolation. If length is 3, vmic is ignored. + mode : {'or', 'nearest'}, default 'or' + Interpolation / combination strategy as described above. + """ teff, logg, monh, vmic = sme.teff, sme.logg, sme.monh, sme.vmic - param = np.array([teff, logg, monh, vmic]) - param_grid, fname_map = _load_all_grid_points(cdr_keep_database) + param = np.array([teff, logg, monh, vmic], dtype=float) + + # Load all grid points and map (teff, logg, monh, vmic) -> filename + param_grid, fname_map = _load_all_grid_points(cdr_negligibe_database) + # In 3D mode, drop vmic dimension from grid and from the parameter vector if len(dims) == 3 and len(param_grid) > 0: - # Remove vmic in the grid - # vmic_mask = np.isclose(param_grid[:, -1], fixed_vmic) param_grid = param_grid[:, :-1] - - fname_map = { - k[:-1]: v for k, v in fname_map.items() - } + fname_map = {k[:-1]: v for k, v in fname_map.items()} param = param[:-1] - if len(param_grid) > 0: + if len(param_grid) == 0: + logger.warning("[cdr] Compressed strong-line database is empty; " + "flag_strong_lines_by_database will be skipped.") + return - thres = sme.linelist.cdr_paras_thres - - lower = np.array([teff - thres['teff'], logg - thres['logg'], monh - thres['monh'], vmic - thres['vmic']]) - upper = np.array([teff + thres['teff'], logg + thres['logg'], monh + thres['monh'], vmic + thres['vmic']]) + # ----- Select grid points within the parameter box around the current star ----- + thres = sme.linelist.cdr_paras_thres + lower = np.array( + [teff - thres['teff'], logg - thres['logg'], monh - thres['monh'], vmic - thres['vmic']], + dtype=float + ) + upper = np.array( + [teff + thres['teff'], logg + thres['logg'], monh + thres['monh'], vmic + thres['vmic']], + dtype=float + ) - if len(dims) == 3: - lower = lower[:-1] - upper = upper[:-1] + if len(dims) == 3: + lower = lower[:-1] + upper = upper[:-1] - in_box_mask = np.all((param_grid >= lower) & (param_grid <= upper), axis=1) - filtered_grid = param_grid[in_box_mask] + in_box_mask = np.all((param_grid >= lower) & (param_grid <= upper), axis=1) + filtered_grid = param_grid[in_box_mask] - if mode == 'or' and len(filtered_grid) >= len(dims)+1: - delaunay = Delaunay(filtered_grid) - simplex_index = delaunay.find_simplex(param) - if simplex_index >= 0: - logger.info('[cdr] Combining the bool array of strong lines.') - vertex_indices = delaunay.simplices[simplex_index] - vertices = filtered_grid[vertex_indices] + if filtered_grid.size == 0: + logger.warning("[cdr] No grid points found within the parameter box; " + "cannot flag strong lines from database.") + return - strong_array = [] - for pt in vertices: - logger.info(f'Using bool {fname_map[tuple(pt)]}') - strong_array.append(util.load_bool_sparse(os.path.join(cdr_keep_database, fname_map[tuple(pt)]))) - strong_final = np.logical_or.reduce(strong_array) - sme.linelist._lines['strong'] = strong_final - return + n_lines_total = len(sme.linelist) + wlcent = sme.linelist['wlcent'] - elif mode == 'nearest' and len(filtered_grid) > 0: - # box 内找到最近的 grid 点 - dists = cdist(filtered_grid, param[None, :]) - i_nearest = np.argmin(dists) - nearest_pt = filtered_grid[i_nearest] - logger.info(f'[cdr] Using nearest grid point {nearest_pt} in box for direct assignment.') + # ----- mode = 'or': use simplex vertices and OR-combine masks ----- + if mode == 'or' and filtered_grid.shape[0] >= len(dims) + 1: + from scipy.spatial import Delaunay + + delaunay = Delaunay(filtered_grid) + simplex_index = delaunay.find_simplex(param) + + if simplex_index >= 0: + logger.info("[cdr] Combining strong-line masks and line ranges from " + "compressed database (simplex OR).") + vertex_indices = delaunay.simplices[simplex_index] + vertices = filtered_grid[vertex_indices] + + masks = [] + lrs_list = [] + lre_list = [] + + for pt in vertices: + fname = fname_map[tuple(pt)] + full_path = os.path.join(cdr_negligibe_database, fname) + logger.info(f"[cdr] Using compressed strong-line file {fname} for OR-combination.") + mask_v, lrs_v, lre_v = _load_mask_and_ranges_from_compressed( + full_path, wlcent, n_lines_total + ) + masks.append(mask_v) + lrs_list.append(lrs_v) + lre_list.append(lre_v) - strong_final = util.load_bool_sparse(os.path.join(cdr_keep_database, fname_map[tuple(nearest_pt)])) - sme.linelist._lines['strong'] = strong_final + masks_arr = np.vstack(masks) # (n_vertex, N_lines) + strong_final = np.any(masks_arr, axis=0) # logical OR across vertices + + # For line_range_s/e, take the union across vertices: + # s = min over vertices (ignoring NaN), e = max over vertices. + lrs_arr = np.vstack(lrs_list) + lre_arr = np.vstack(lre_list) + + line_range_s_final = np.nanmin(lrs_arr, axis=0) + line_range_e_final = np.nanmax(lre_arr, axis=0) + + sme.linelist._lines['strong'] = strong_final + sme.linelist._lines['line_range_s'] = line_range_s_final + sme.linelist._lines['line_range_e'] = line_range_e_final return + # If the parameter is not inside any simplex, fall back to nearest strategy + logger.warning("[cdr] Target parameter is not inside any simplex; " + "falling back to 'nearest' strategy.") + mode = 'nearest' + + # ----- mode = 'nearest': use the nearest grid point within the box ----- + if mode == 'nearest': + dists = cdist(filtered_grid, param[None, :]) + i_nearest = np.argmin(dists) + nearest_pt = filtered_grid[i_nearest] + logger.info(f"[cdr] Using nearest grid point {nearest_pt} in box for " + "strong-line mask and line ranges.") + + fname = fname_map[tuple(nearest_pt)] + full_path = os.path.join(cdr_negligibe_database, fname) + strong_final, line_range_s_final, line_range_e_final = _load_mask_and_ranges_from_compressed( + full_path, wlcent, n_lines_total + ) + + sme.linelist._lines['strong'] = strong_final + sme.linelist._lines['line_range_s'] = line_range_s_final + sme.linelist._lines['line_range_e'] = line_range_e_final + return + + logger.warning("[cdr] flag_strong_lines_by_database: no valid interpolation/nearest " + "strategy found; 'strong' and line_range_s/e were not set.") + return + def get_H_3dnlte_correction_rbf(self, sme): """ Compute the 3D NLTE correction factor for hydrogen lines, using RBF interpolator and in intensities. diff --git a/src/pysme/util.py b/src/pysme/util.py index 1272d3bc..42aee284 100644 --- a/src/pysme/util.py +++ b/src/pysme/util.py @@ -832,3 +832,99 @@ def load_bool_sparse(path): out = np.zeros(size, dtype=bool) out[idx] = True return out.reshape(shape) + +def compress_one_grid(line_info, + strong_idx, + n_lines_total=None, + verbose: bool = False): + """ + 对一个格点: + - 使用 strong_idx 裁剪 line_info(只保留强线) + - 计算 line_width = e - s,并做字典编码: unique_widths + codes + - 从 strong_idx 构造完整 bool mask, 再 bit-pack 成 uint8 串 + + 自动根据数据推断 n_lines_total,避免 off-by-one。 + """ + + # ---- 0. 标准化 strong_idx:转 int64 + 排序(并去重)---- + strong_idx = np.asarray(strong_idx, dtype=np.int64).ravel() + strong_idx = np.unique(strong_idx) # 保证升序 & 无重复 + + # ---- 自动推断谱线总数 n_lines_total ---- + idx_col = line_info[:, 0].astype(np.int64) + max_idx = max(idx_col.max(), strong_idx.max()) + + if n_lines_total is None: + n_lines_total = int(max_idx) + 1 + else: + if max_idx >= n_lines_total: + if verbose: + print(f"[注意] 调整 n_lines_total: {n_lines_total} -> {int(max_idx)+1}") + n_lines_total = int(max_idx) + 1 + + if verbose: + print(f"推断 n_lines_total = {n_lines_total}") + print(f"strong_idx 范围: [{strong_idx.min()}, {strong_idx.max()}]") + print(f"index 列范围 : [{idx_col.min()}, {idx_col.max()}]") + + # ---- 1. 裁剪 line_info 到强线 ---- + if np.array_equal(idx_col, np.arange(line_info.shape[0], dtype=np.int64)): + # index 列 = 行号,直接 fancy index + line_info_strong = line_info[strong_idx] + else: + order = np.argsort(idx_col) + idx_sorted = idx_col[order] + pos = np.searchsorted(idx_sorted, strong_idx) + rows = order[pos] + line_info_strong = line_info[rows] + + M_active = line_info_strong.shape[0] + if verbose: + print(f"强线数 M_active = {M_active}") + + # ---- 2. 计算 line_width 并做字典编码 ---- + s = line_info_strong[:, 2] + e = line_info_strong[:, 3] + line_width = e - s + + unique_widths, inv = np.unique(line_width, return_inverse=True) + K = unique_widths.size + + if K <= 256: + code_dtype = np.uint8 + elif K <= 65536: + code_dtype = np.uint16 + else: + code_dtype = np.uint32 + + codes = inv.astype(code_dtype) + unique_widths_f32 = unique_widths.astype(np.float32) + + if verbose: + print(f"不同 line_width 取值 K = {K} -> codes dtype = {code_dtype.__name__}") + + # ---- 3. 构造 bool mask 并 bitpack ---- + mask = np.zeros(n_lines_total, dtype=bool) + mask[strong_idx] = True + mask_bits = np.packbits(mask) + + if verbose: + print(f"mask_bits.shape = {mask_bits.shape}, " + f"约 {mask_bits.nbytes/1024**2:.3f} MiB/格点") + + return mask_bits, unique_widths_f32, codes + +def save_compressed_grid(mask_bits, unique_widths, codes, n_lines_total, out_path): + """ + 把一个格点压缩后的数据保存为 npz: + - mask_bits: uint8 bit-packed bool mask + - unique_widths: float32 + - codes: uint8/uint16/uint32 + """ + np.savez_compressed( + out_path, + mask_bits=mask_bits, + unique_widths=unique_widths, + codes=codes, + n_lines_total=np.array(n_lines_total, dtype=np.int32) + )