arithmetic.bib

@inproceedings{chen2024enabling,
  title = {{Enabling Mixed-Precision with the Help of Tools: A Nekbone Case Study}},
  author = {Chen, Yanxiang and de Oliveira Castro, Pablo and Bientinesi, Paolo and Iakymchuk, Roman},
  year = 2025,
  booktitle = {Parallel Processing and Applied Mathematics},
  publisher = {Springer Nature},
  pages = {34--50},
  doi = {10.1007/978-3-031-85697-6_3},
  isbn = {978-3-031-85697-6},
  abstract = {Mixed-precision computing has the potential to significantly reduce the cost of exascale computations, but determining when and how to implement it in programs can be challenging. In this article, we consider Nekbone, a mini-application for the Computational Fluid Dynamics (CFD) solver Nek5000, as a case study, and propose a methodology for enabling mixed-precision with the help of computer arithmetic tools and roofline model. We evaluate the derived mixed-precision program by combining metrics in three dimensions: accuracy, time-to-solution, and energy-to-solution. Notably, the introduction of mixed-precision in Nekbone, reducing time-to-solution by 40.7\% and energy-to-solution by 47\% on 128 MPI ranks without sacrificing the accuracy.}
}

@article{posenitskiy2023TREXIO,
  author = {Posenitskiy, Evgeny and Chilkuri, Vijay Gopal and Ammar, Abdallah and Hapka, Michał and Pernal, Katarzyna and Shinde, Ravindra and Landinez Borda, Edgar Josué and Filippi, Claudia and Nakano, Kosuke and Kohulák, Otto and Sorella, Sandro and de Oliveira Castro, Pablo and Jalby, William and Ríos, Pablo López and Alavi, Ali and Scemama, Anthony},
  title = {{TREXIO: A file format and library for quantum chemistry}},
  journal = {The Journal of Chemical Physics},
  volume = {158},
  number = {17},
  year = {2023},
  month = {05},
  abstract = {TREXIO is an open-source file format and library developed for the storage and manipulation of data produced by quantum chemistry calculations. It is designed with the goal of providing a reliable and efficient method of storing and exchanging wave function parameters and matrix elements, making it an important tool for researchers in the field of quantum chemistry. In this work, we present an overview of the TREXIO file format and library. The library consists of a front-end implemented in the C programming language and two different back-ends: a text back-end and a binary back-end utilizing the hierarchical data format version 5 library, which enables fast read and write operations. It is compatible with a variety of platforms and has interfaces for Fortran, Python, and OCaml programming languages. In addition, a suite of tools have been developed to facilitate the use of the TREXIO format and library, including converters for popular quantum chemistry codes and utilities for validating and manipulating data stored in TREXIO files. The simplicity, versatility, and ease of use of TREXIO make it a valuable resource for researchers working with quantum chemistry data.},
  issn = {0021-9606},
  doi = {10.1063/5.0148161},
  url = {https://doi.org/10.1063/5.0148161},
  pdf = {https://pubs.aip.org/aip/jcp/article-pdf/doi/10.1063/5.0148161/17355866/174801_1_5.0148161.pdf}
}

@article{elarar2022Stochastic,
  title = {Stochastic Rounding Variance and Probabilistic Bounds: A New Approach},
  author = {Arar, El-Mehdi El and Sohier, Devan and de Oliveira Castro, Pablo and Petit, Eric},
  journal = {{SIAM} Journal on Scientific Computing},
  volume = {45},
  pages = {C255--C275},
  number = {5},
  date = {2023},
  doi = {10.1137/22M1510819},
  url = {https://doi.org/10.1137/22M1510819},
  pdf = {https://hal.archives-ouvertes.fr/hal-03722888/file/main.pdf},
  abstract = {Abstract. Stochastic rounding ({SR}) offers an alternative to the deterministic {IEEE}-754 floating-point rounding modes. In some applications such as {PDEs}, {ODEs}, and neural networks, {SR} empirically improves the numerical behavior and convergence to accurate solutions while the theoretical background remains partial. Recent works by Ipsen, Zhou, Higham, and Mary have computed {SR} probabilistic error bounds for basic linear algebra kernels. For example, the inner product {SR} probabilistic bound of the forward error is proportional to (sqrt nu) instead of (nu) for the default rounding mode. To compute the bounds, these works show that the errors accumulated in computation form a martingale. This paper proposes an alternative framework to characterize {SR} errors based on the computation of the variance. We pinpoint common error patterns in numerical algorithms and propose a lemma that bounds their variance. For each probability and through the Bienaymé–Chebyshev inequality, this bound leads to a better probabilistic error bound in several situations. Our method has the advantage of providing a tight probabilistic bound for all algorithms fitting our model. We show how the method can be applied to give {SR} error bounds for the inner product and Horner polynomial evaluation.}
}

@inproceedings{elarar2022positiv,
  title = {{The Positive Effects of Stochastic Rounding in Numerical Algorithms}},
  author = {El Arar, El-Mehdi and Sohier, Devan and de Oliveira Castro, Pablo and Petit, Eric},
  url = {https://hal.archives-ouvertes.fr/hal-03716058},
  booktitle = {{29th IEEE Symposium on Computer Arithmetic ARITH 2022}},
  address = {Virtual conference, France},
  year = {2022},
  pdf = {https://hal.archives-ouvertes.fr/hal-03716058/file/main.pdf},
  hal_version = {v1},
  abstract = {Recently, stochastic rounding (SR) has been implemented in specialized hardware but most current computing nodes do not yet support this rounding mode. Several works empirically illustrate the benefit of stochastic rounding in various fields such as neural networks and ordinary differential equations. For some algorithms, such as summation, inner product or matrixvector multiplication, it has been proved that SR provides probabilistic error bounds better than the traditional deterministic bounds. In this paper, we extend this theoretical ground for a wider adoption of SR in computer architecture. First, we analyze the biases of the two SR modes: SR-nearness and SR-up-or-down. We demonstrate on a case-study of Euler's forward method that IEEE-754 default rounding modes and SR-up-or-down accumulate rounding errors across iterations and that SR-nearness, being unbiased, does not. Second, we prove a O(√ n) probabilistic bound on the forward error of Horner's polynomial evaluation method with SR, improving on the known deterministic O(n) bound.}
}

@article{Kiar2021Numerical,
  doi = {10.1371/journal.pone.0250755},
  author = {Kiar, Gregory AND Chatelain, Yohan AND de Oliveira Castro, Pablo AND Petit, Eric AND Rokem, Ariel AND Varoquaux, Gaël AND Misic, Bratislav AND Evans, Alan C. AND Glatard, Tristan},
  journal = {PLOS ONE},
  publisher = {Public Library of Science},
  title = {Numerical uncertainty in analytical pipelines lead to impactful variability in brain networks},
  year = {2021},
  month = {11},
  volume = {16},
  url = {https://doi.org/10.1371/journal.pone.0250755},
  pages = {1-16},
  abstract = {The analysis of brain-imaging data requires complex processing pipelines to support findings on brain function or pathologies. Recent work has shown that variability in analytical decisions, small amounts of noise, or computational environments can lead to substantial differences in the results, endangering the trust in conclusions. We explored the instability of results by instrumenting a structural connectome estimation pipeline with Monte Carlo Arithmetic to introduce random noise throughout. We evaluated the reliability of the connectomes, the robustness of their features, and the eventual impact on analysis. The stability of results was found to range from perfectly stable (i.e. all digits of data significant) to highly unstable (i.e. 0 − 1 significant digits). This paper highlights the potential of leveraging induced variance in estimates of brain connectivity to reduce the bias in networks without compromising reliability, alongside increasing the robustness and potential upper-bound of their applications in the classification of individual differences. We demonstrate that stability evaluations are necessary for understanding error inherent to brain imaging experiments, and how numerical analysis can be applied to typical analytical workflows both in brain imaging and other domains of computational sciences, as the techniques used were data and context agnostic and globally relevant. Overall, while the extreme variability in results due to analytical instabilities could severely hamper our understanding of brain organization, it also affords us the opportunity to increase the robustness of findings.},
  number = {11}
}

@article{Sohier2021Confidence,
  author = {Sohier, Devan and de Oliveira Castro, Pablo and F\'{e}votte, Fran\c{c}ois and Lathuili\`{e}re, Bruno and Petit, Eric and Jamond, Olivier},
  title = {Confidence Intervals for Stochastic Arithmetic},
  year = {2021},
  issue_date = {April 2021},
  publisher = {Association for Computing Machinery},
  address = {New York, NY, USA},
  volume = {47},
  number = {2},
  issn = {0098-3500},
  url = {https://doi.org/10.1145/3432184},
  doi = {10.1145/3432184},
  abstract = {Quantifying errors and losses due to the use of Floating-point (FP) calculations in industrial scientific computing codes is an important part of the Verification, Validation, and Uncertainty Quantification process. Stochastic Arithmetic is one way to model and estimate FP losses of accuracy, which scales well to large, industrial codes. It exists in different flavors, such as CESTAC or MCA, implemented in various tools such as CADNA, Verificarlo, or Verrou. These methodologies and tools are based on the idea that FP losses of accuracy can be modeled via randomness. Therefore, they share the same need to perform a statistical analysis of programs results to estimate the significance of the results.In this article, we propose a framework to perform a solid statistical analysis of Stochastic Arithmetic. This framework unifies all existing definitions of the number of significant digits (CESTAC and MCA), and also proposes a new quantity of interest: the number of digits contributing to the accuracy of the results. Sound confidence intervals are provided for all estimators, both in the case of normally distributed results, and in the general case. The use of this framework is demonstrated by two case studies of industrial codes: Europlexus and code_aster.},
  journal = {ACM Transactions Mathematical Software},
  month = apr,
  articleno = {10},
  numpages = {33},
  keywords = {Monte Carlo Arithmetic, confidence intervals, numerical analysis, Stochastic arithmetic},
  pdf = {https://hal.archives-ouvertes.fr/hal-01827319/file/confidence.pdf},
  documenturl = {confidence_interval_slides.pdf}
}

@inproceedings{Defour2021Shadow,
  title = {{Shadow computation with BFloat16 to compute numerical accuracy}},
  author = {Defour, David and De Oliveira Castro, Pablo and Istoan, Matei and Petit, Eric},
  booktitle = {{IEEE 28th Symposium on Computer Arithmetic (ARITH)}},
  year = {2021},
  month = jun,
  pdf = {https://hal.archives-ouvertes.fr/hal-03159965v2/}
}

@article{Brun2021Study,
  author = {E. {Brun} and D. {Defour} and P. {De Oliveira Castro} and M. {Istoan} and D. {Mancusi} and E. {Petit} and A. {Vaquet}},
  journal = {IEEE Transactions on Emerging Topics in Computing},
  title = {A Study of the Effects and Benefits of Custom-Precision Mathematical Libraries for HPC Codes},
  year = {2021},
  volume = {9},
  number = {3},
  pages = {1467-1478},
  doi = {10.1109/TETC.2021.3070422},
  abstract = {Mathematical libraries are being specifically developed to use fixed-width data-paths on processors and target common floating-point formats like binary32 and binary64. In this article we propose a framework to evaluate the effects of mathematical library calls accuracy in scientific computations. First, our tool collects for each call-site of a mathematical function the input-data profile. Then, using a heuristic exploration algorithm, we estimate the minimal required accuracy by rounding the result to lower precisions. The data profile and accuracy measurement per call-site is used to speculatively select the mathematical function implementation with the most appropriate accuracy for a given scenario. We have tested the methodology with the Intel MKL VML library with predefined accuracy levels. We demonstrate the benefits of our approach on two real-world applications: SGP4, a satellite tracking application, and PATMOS, a Monte Carlo neutron transport code. We experiment and discuss its generalization across data-sets, and finally propose a speculative runtime implementation for PATMOS. The experiment provides an insight into the performance improvements that can be achieved by leveraging the control of per-function call-site accuracy-mode execution of the Intel MKL VML library.}
}

@article{Kiar2020Comparing,
  author = {Gregory Kiar and Pablo de Oliveira Castro and Pierre Rioux and Eric Petit and Shawn T Brown and Alan C Evans and Tristan Glatard},
  title = {Comparing perturbation models for evaluating stability of neuroimaging pipelines},
  journal = {The International Journal of High Performance Computing Applications},
  volume = {34},
  number = {5},
  pages = {491--501},
  year = {2020},
  doi = {10.1177/1094342020926237},
  url = { https://doi.org/10.1177/1094342020926237 },
  documenturl = {https://youtu.be/TymQQJ6iYRQ},
  abstract = { With an increase in awareness regarding a troubling lack of reproducibility in analytical software tools, the degree of validity in scientific derivatives and their downstream results has become unclear. The nature of reproducibility issues may vary across domains, tools, data sets, and computational infrastructures, but numerical instabilities are thought to be a core contributor. In neuroimaging, unexpected deviations have been observed when varying operating systems, software implementations, or adding negligible quantities of noise. In the field of numerical analysis, these issues have recently been explored through Monte Carlo Arithmetic, a method involving the instrumentation of floating-point operations with probabilistic noise injections at a target precision. Exploring multiple simulations in this context allows the characterization of the result space for a given tool or operation. In this article, we compare various perturbation models to introduce instabilities within a typical neuroimaging pipeline, including (i) targeted noise, (ii) Monte Carlo Arithmetic, and (iii) operating system variation, to identify the significance and quality of their impact on the resulting derivatives. We demonstrate that even low-order models in neuroimaging such as the structural connectome estimation pipeline evaluated here are sensitive to numerical instabilities, suggesting that stability is a relevant axis upon which tools are compared, alongside more traditional criteria such as biological feasibility, computational efficiency, or, when possible, accuracy. Heterogeneity was observed across participants which clearly illustrates a strong interaction between the tool and data set being processed, requiring that the stability of a given tool be evaluated with respect to a given cohort. We identify use cases for each perturbation method tested, including quality assurance, pipeline error detection, and local sensitivity analysis, and make recommendations for the evaluation of stability in a practical and analytically focused setting. Identifying how these relationships and recommendations scale to higher order computational tools, distinct data sets, and their implication on biological feasibility remain exciting avenues for future work. }
}

@inproceedings{Defour2020CustomPrecision,
  author = {Defour, David and de Oliveira Castro, Pablo and Istoan, Matei and Petit, Eric},
  title = {Custom-Precision Mathematical Library Explorations for Code Profiling and Optimization},
  booktitle = {27th {IEEE} Symposium on Computer Arithmetic, {ARITH} 2020},
  year = {2020},
  pages = {121--124},
  abstract = {The typical processors used for scientific computing have fixed-width data-paths. This implies that mathematical libraries were specifically developed to target each of these fixed precisions (binary16, binary32, binary64). However, to address the increasing energy consumption and throughput requirements of scientific applications, library and hardware designers are moving beyond this one-size-fits-all approach. In this article we propose to study the effects and benefits of using user-defined floating-point formats and target accuracies in calculations involving mathematical functions. Our tool collects input-data profiles and iteratively explores lower precisions for each call-site of a mathematical function in user applications. This profiling data will be a valuable asset for specializing and fine-tuning mathematical function implementations for a given application. We demonstrate the tool's capabilities on SGP4, a satellite tracking application. The profile data shows the potential for specialization and provides insight into answering where it is useful to provide variable-precision designs for elementary function evaluation.},
  pdf = {https://hal.archives-ouvertes.fr/hal-02563852/document}
}

@inproceedings{Chatelain2019automatic,
  title = {Automatic exploration of reduced floating-point representations in iterative methods},
  author = {Chatelain, Yohan and Petit, Eric and de Oliveira Castro, Pablo and Lartigue, Ghislain and Defour, David},
  booktitle = {Euro-Par 2019 Parallel Processing - 25th International Conference},
  year = {2019},
  publisher = {Springer},
  series = {Lecture Notes in Computer Science},
  abstract = {With the ever-increasing need for computation of scientific applications, new application domains, and major energy constraints, the landscape of floating-point computation is changing.  New floating-point representation formats are emerging and there is a need for tools to simulate their impact in legacy codes.  In this paper, we propose an automatic tool to evaluate the effect of adapting the floating point precision for each operation over time, which is particularly useful in iterative schemes.  We present a backend to emulate any IEEE-754 floating-point operation in lower precision.  We tested the numerical errors resilience of our solutions thanks to Monte Carlo Arithmetic and  demonstrated the effectiveness of this methodology on YALES2, a large Combustion-CFD HPC code, by achieving 28\% to 67\% reduction in communication volume by lowering precision.},
  pdf = {dyn_adapt_precision19.pdf},
  documenturl = {dyn_adapt_slides19.pdf}
}

@inproceedings{Chatelain2018veritracer,
  author = {Yohan Chatelain and
                 Pablo de Oliveira Castro and
                 Eric Petit and
                 David Defour and
                 Jordan Bieder and
                 Marc Torrent},
  title = {{VeriTracer: Context-enriched tracer for floating-point arithmetic analysis}},
  booktitle = {25th {IEEE} Symposium on Computer Arithmetic, {ARITH} 2018, Amherst, MA, USA. June 25th-27th, 2018},
  pages = {65--72},
  publisher = {IEEE},
  year = {2018},
  abstract = {VeriTracer automatically instruments a code and
                 traces the accuracy of floating-point variables over
                 time. VeriTracer enriches the visual traces with contextual
                 information such as the call site path in which
                 a value was modified. Contextual information is important
                 to understand how the floating-point errors
                 propagate in complex codes. VeriTracer is implemented
                 as an LLVM compiler tool on top of Verificarlo.
                 We demonstrate how VeriTracer can detect accuracy
                 loss and quantify the impact of using a compensated
                 algorithm on ABINIT, an industrial HPC application
                 for Ab Initio quantum computation.},
  pdf = {arith2018veritracer.pdf},
  documenturl = {arith2018slides.pdf}
}

@inproceedings{Denis2016verificarlo,
  author = {Christophe Denis and
               Pablo de Oliveira Castro and
               Eric Petit},
  title = {Verificarlo: Checking Floating Point Accuracy through Monte Carlo
               Arithmetic},
  booktitle = {23nd {IEEE} Symposium on Computer Arithmetic, {ARITH} 2016, Silicon
               Valley, CA, USA, July 10-13, 2016},
  pages = {55--62},
  year = {2016},
  url = {http://dx.doi.org/10.1109/ARITH.2016.31},
  doi = {10.1109/ARITH.2016.31},
  abstract = {Numerical accuracy of floating point computation is a well studied topic which has not made its way to the end-user in scientific computing. Yet, it has become a critical issue with the recent requirements for code modernization to harness new highly parallel hardware and perform higher resolution computation. To democratize numerical accuracy analysis, it is important to propose tools and methodologies to study large use cases in a reliable and automatic way. In this paper, we propose verificarlo, an extension to the LLVM compiler to automatically use Monte Carlo Arithmetic in a transparent way for the end-user. It supports all the major languages including C, C++, and Fortran. Unlike source-to-source approaches, our implementation captures the influence of compiler optimizations on the numerical accuracy. We illustrate how Monte Carlo Arithmetic using the verificarlo tool outperforms the existing approaches on various use cases and is a step toward automatic numerical analysis.},
  pdf = {https://hal.archives-ouvertes.fr/hal-01192668/file/verificarlo-preprint.pdf}
}