recent.bib

@unpublished{jam2025MLKAPS,
  title = {{MLKAPS: Machine Learning and Adaptive Sampling for HPC Kernel Auto-tuning}},
  author = {Jam, Mathys and Petit, Eric and de Oliveira Castro, Pablo and Defour, David and Henry, Greg and Jalby, William},
  note = {working paper or preprint},
  year = {2025},
  month = jan,
  pdf = {https://arxiv.org/pdf/2501.05811},
  abstract = {Many High-Performance Computing (HPC) libraries rely on decision trees to select the best kernel hyperparameters at runtime,depending on the input and environment. However, finding optimized configurations for each input and environment is challengingand requires significant manual effort and computational resources. This paper presents MLKAPS, a tool that automates this task usingmachine learning and adaptive sampling techniques. MLKAPS generates decision trees that tune HPC kernels' design parameters toachieve efficient performance for any user input. MLKAPS scales to large input and design spaces, outperforming similar state-of-the-artauto-tuning tools in tuning time and mean speedup. We demonstrate the benefits of MLKAPS on the highly optimized Intel MKLdgetrf LU kernel and show that MLKAPS finds blindspots in the manual tuning of HPC experts. It improves over 85\% of the inputswith a geomean speedup of x1.30. On the Intel MKL dgeqrf QR kernel, MLKAPS improves performance on 85\% of the inputs with ageomean speedup of x1.18.}
}
@unpublished{deoliveiracastro2024error,
  title = {{Error Analysis of sum-product algorithms under stochastic rounding}},
  author = {de Oliveira Castro, Pablo and El Arar, El-Mehdi and Petit, Eric and Sohier, Devan},
  note = {working paper or preprint},
  year = {2024},
  month = nov,
  keywords = {Stochastic rounding ; Martingales ; Rounding error analysis ; Floating-point arithmetic ; Computation DAG ; Karatsuba multiplication},
  pdf = {https://hal.science/hal-04787542v1/file/main.pdf},
  abstract = {The quality of numerical computations can be measured through their forward error, for which finding good error bounds is challenging in general. For several algorithms and using stochastic rounding (SR), probabilistic analysis has been shown to be an effective alternative for obtaining tight error bounds. This analysis considers the distribution of errors and evaluates the algorithm's performance on average. Using martingales and the Azuma-Hoeffding inequality, it provides error bounds that are valid with a certain probability and in O(n√u) instead of deterministic worst-case bounds in O(nu), where n is the number of operations and u is the unit roundoff. In this paper, we present a general method that automatically constructs a martingale for any computation scheme with multi-linear errors based on additions, subtractions, and multiplications. We apply this generalization to algorithms previously studied with SR, such as pairwise summation and the Horner algorithm, and prove equivalent results. We also analyze a previously unstudied algorithm, Karatsuba polynomial multiplication, which illustrates that the method can handle reused intermediate computations.}
}
@inproceedings{delval2024verificarloCI,
  title = {{Verificarlo CI: continuous integration for numerical optimization and debugging}},
  author = {Delval, Aur{\'e}lien and Coppens, Fran{\c c}ois and Petit, Eric and Iakymchuk, Roman and de Oliveira Castro, Pablo},
  url = {https://hal.science/hal-04643176},
  booktitle = {{Parallel Computational Fluid Dynamics (ParCFD) 2024}},
  address = {Bonn, Germany},
  year = {2024},
  month = sep,
  pdf = {https://hal.science/hal-04643176v1/file/main.pdf},
  abstract = {Floating-point accuracy is an important concern when developing numerical simulations or other compute-intensive codes. Tracking the introduction of numerical regression is often delayed until it provokes unexpected bug for the end-user. In this paper, we introduce Verificarlo CI, a continuous integration workflow for the numerical optimization and debugging of a code over the course of its development. We demonstrate applicability of Verificarlo CI on two test-case applications.}
}
@article{elarar2024bounds,
  title = {{Bounds on non-linear errors for variance computation with stochastic rounding}},
  author = {El Arar, El-Mehdi and Sohier, Devan and de Oliveira Castro, Pablo and Petit, Eric},
  journal = {SIAM Journal on Scientific Computing},
  volume = {46},
  number = {5},
  pages = {B579-B599},
  year = {2024},
  pdf = {https://hal.science/hal-04056057/file/main.pdf},
  doi = {10.1137/23M1563001},
  abstract = { Abstract. The main objective of this work is to investigate nonlinear errors and pairwise summation using stochastic rounding (SR) in variance computation algorithms. We estimate the forward error of computations under SR through two methods: the first is based on a bound of the variance and the Bienaymé–Chebyshev inequality, while the second is based on martingales and the Azuma–Hoeffding inequality. The study shows that for pairwise summation, using SR results in a probabilistic bound of the forward error proportional to \(\sqrt{\log (n)}u\) rather than the deterministic bound in \(O(\log (n)u)\) when using the default rounding mode. We examine two algorithms that compute the variance, one called “textbook” and the other “two-pass,” which both exhibit nonlinear errors. Using the two methods mentioned above, we show that the forward errors of these algorithms have probabilistic bounds under SR in \(O(\sqrt{n}u)\) instead of \(nu\) for the deterministic bounds. We show that this advantage holds using pairwise summation for both textbook and two-pass, with probabilistic bounds of the forward error proportional to \(\sqrt{\log (n)}u\). }
}