Source code for ndmanager.API.iaea.sublibrary

"""A class to manage a nuclear data sublibrary originating from the IAEA website"""

import multiprocessing as mp
import re
import tempfile
import zipfile
from contextlib import chdir
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List

import requests
from bs4 import BeautifulSoup
from tqdm import tqdm

from ndmanager.API.nuclide import Nuclide


[docs] @dataclass class IAEASublibrary: """A class to manage a nuclear data sublibrary originating from the IAEA website. Raises: ValueError: If an unknown name style is passed to IAEASublibrary.download e: Raise errors raised by parallel download of nuclear data files Returns: IAEASublibrary: The sublibrary instance """ kind: str library_root: str index_node: str lib: str library: str nsub: int sublibrary: str urls: Dict[str, str]
[docs] @classmethod def from_website(cls, root: str, node: str, kind: str) -> "IAEASublibrary": """Constructor to build a sublibrary using IAEA's website. Args: root (str): Root url of the library node (str): Name of the index file in the root directory kind (str): The kind of sublibrary (from NSUB) Returns: IAEASublibrary: An IAEASublibrary object """ kwargs = {} kwargs["library_root"] = root kwargs["index_node"] = node kwargs["urls"] = {} kwargs["kind"] = kind url = root + node r = requests.get(url, timeout=600) html = BeautifulSoup(r.text, "html.parser") tags = html.find_all("a") index = html.find_all("pre")[0].text.split("\n") materials = cls.parse_index(index, kwargs) for matname, tag in zip(materials, tags): if kwargs["nsub"] == 12: # TSL file name = (tag.get("href")).split("/")[-1].rstrip(".zip") kwargs["urls"][name] = root + tag.get("href") else: try: nuclide = Nuclide.from_iaea_name(matname).name kwargs["urls"][nuclide] = root + tag.get("href") except (KeyError, ValueError): # If the element does not exist, mostly for evaluations with # a neutron target, e.g. 0-nn-1 in cendl32 kwargs["urls"][matname] = root + tag.get("href") return cls(**kwargs)
def __getitem__(self, key: str) -> str: """Define the [] get operator Args: key (str): Name of the desired material Returns: str: URL of the zip file """ return self.urls[key] def __setitem__(self, key: str, value: str) -> None: """Define the [] set operator Args: key (str): name of the material value (str): URL of the zip file """ self.urls[key] = value def __len__(self) -> int: """The number of materials in the sublibrary Returns: int: The number of materials in the sublibrary """ return len(self.urls)
[docs] def keys(self) -> List[str]: """The list of materials in the sublibrary Returns: List[str]: List of materials in the sublibrary """ return list(self.urls.keys())
[docs] @staticmethod def parse_index(index: List[str], kwargs: Dict[str, Any]) -> List[str]: """Parse an sublibrary index from the IAEA website, e.g. https://www-nds.iaea.org/public/download-endf/JEFF-3.3/n-index.htm Args: index (List[str]): The index file lines kwargs (Dict[str, Any]): The dictionnary of attributes Returns: List[str]: The list of material names """ materials = [] for line in index: splat = line.split() if len(splat) == 0: continue if re.match(r" Lib:", line): kwargs["lib"] = splat[1] if re.match(r" Library:", line): kwargs["library"] = " ".join(splat[1:]) if re.match(r" Sub-library:", line): kwargs["nsub"] = int(splat[1][5:]) kwargs["sublibrary"] = " ".join(splat[2:]) if splat[0] == "#)": span = re.search(r"Material[ ]+", line).span() continue if re.match(r"\d+\)", splat[0]) is not None: s = IAEASublibrary.insert_separator(line, span[0]) s = IAEASublibrary.insert_separator(s, span[1] + 1) materials.append(s.split("$")[1].strip()) return materials
[docs] @staticmethod def insert_separator(string: str, pos: int) -> str: """Insert a $ at the `pos` position in the string. Args: string (str): The base string pos (int): The position of the $ Returns: str: A new string with the $ inserted """ return string[:pos] + "$" + string[pos:]
[docs] def fetch_tape(self, material: str) -> str: """Fetch the content of an ENDF6 tape for the desired material Args: material (str): The name of the material Returns: str: The content of the tape """ url = self[material] with tempfile.TemporaryDirectory() as tmpdir: with chdir(tmpdir): content = requests.get(url, timeout=600).content zipname = url.split("/")[-1] with open(zipname, "wb") as f: f.write(content) with zipfile.ZipFile(zipname) as zf: zf.extractall() datafile = f"{zipname[:-4]}.dat" with open(datafile, "r", encoding="utf-8", newline="") as f: return f.read()
[docs] def download_single(self, material: str, targetfile: str | Path) -> None: """Download an ENDF6 tape for the desired material Args: material (str): The name of the material targetfile (str | Path): The path to write the tape to """ content = self.fetch_tape(material) target = Path(targetfile) target.parent.mkdir(parents=True, exist_ok=True) with open(target, "w", encoding="utf-8", newline="") as f: print(content, file=f, end="")
[docs] def download( self, targetdir: str | Path, style: str = "nuclide", processes: int = 1 ) -> None: """Download the all the tapes in the sublibrary to a directory specified by `targetdir`. Args: targetdir (str | Path): Path to the directory to write the tapes in style (str, optional): Style of the tape names. Defaults to "nuclide". In {'nuclide', 'tsl', 'atom'} processes (int, optional): Number of download jobs to launch. Defaults to 1. Raises: ValueError: If an unknown name style is passed to IAEASublibrary.download e: Raise errors raised by parallel download of nuclear data files """ bar_format = "{l_bar}{bar:40}| {n_fmt}/{total_fmt} [{elapsed}s]" pbar = tqdm(total=len(self), bar_format=bar_format) targets = [] nuclides = [] for nuclide in self.urls: if style in ("nuclide", "tsl"): name = nuclide elif style == "atom": name = Nuclide.from_name(nuclide).element else: raise ValueError("Unknown name style") targets.append(Path(targetdir) / f"{name}.endf6") nuclides.append(nuclide) if processes == 1: for nuclide, target in zip(nuclides, targets): description = f"{self.lib}/{self.kind}/{name}" pbar.set_description(f"{description:<40}") self.download_single(nuclide, target) pbar.update() pbar.close() else: def error_callback(e): raise e def update_pbar(_): pbar.update() description = f"{self.lib}/{self.kind}" pbar.set_description(f"{description:<25}") with mp.get_context("spawn").Pool(processes) as p: for nuclide, target in zip(nuclides, targets): p.apply_async( self.download_single, args=(nuclide, target), callback=update_pbar, error_callback=error_callback, ) p.close() p.join() pbar.close()