<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN" "JATS-journalpublishing1.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="en" article-type="discussion">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">exposome</journal-id>
<journal-title-group>
<journal-title>Exposome</journal-title></journal-title-group>
<issn pub-type="epub">2635-2265</issn>
<publisher>
<publisher-name>Oxford University Press</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.1093/exposome/osaf010</article-id>
<article-id pub-id-type="publisher-id">osaf010</article-id>
<article-categories>
<subj-group subj-group-type="category-toc-heading">
<subject>Commentary</subject>
</subj-group>
<subj-group subj-group-type="category-taxonomy-collection">
<subject>AcademicSubjects/MED00305</subject>
<subject>AcademicSubjects/MED00860</subject>
<subject>AcademicSubjects/SCI01040</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>A statistical workflow for analyzing the untargeted chemical exposome and metabolome in epidemiologic studies using high-dimensional mixture methods</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-6047-4406</contrib-id>
<name><surname>Young</surname><given-names>Anna S</given-names></name><degrees>PhD</degrees>
<role vocab="credit" vocab-identifier="https://credit.niso.org" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization" degree-contribution="lead">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis" degree-contribution="lead">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology" degree-contribution="equal">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization" degree-contribution="lead">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org" vocab-term="Writing - original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft" degree-contribution="lead">Writing - original draft</role>
<aff><institution>Gangarosa Department of Environmental Health, Emory Rollins School of Public Health</institution>, Atlanta, GA, <country country="US">United States</country></aff>
<xref ref-type="corresp" rid="osaf010-cor1"/>
<email xlink:type="simple">ayoung@mail.harvard.edu</email>
</contrib>
<contrib contrib-type="author">
<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-6058-5907</contrib-id>
<name><surname>Gennings</surname><given-names>Chris</given-names></name><degrees>PhD</degrees>
<role vocab="credit" vocab-identifier="https://credit.niso.org" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization" degree-contribution="supporting">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology" degree-contribution="equal">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org" vocab-term="Writing - review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing" degree-contribution="equal">Writing - review &amp; editing</role>
<aff><institution>Department of Environmental Medicine and Public Health, Icahn School of Medicine at Mount Sinai</institution>, NY, NY, <country country="US">United States</country></aff>
</contrib>
<contrib contrib-type="author">
<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0001-6695-3291</contrib-id>
<name><surname>Eick</surname><given-names>Stephanie M</given-names></name><degrees>PhD</degrees>
<role vocab="credit" vocab-identifier="https://credit.niso.org" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology" degree-contribution="supporting">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org" vocab-term="Writing - review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing" degree-contribution="equal">Writing - review &amp; editing</role>
<aff><institution>Gangarosa Department of Environmental Health, Emory Rollins School of Public Health</institution>, Atlanta, GA, <country country="US">United States</country></aff>
</contrib>
<contrib contrib-type="author">
<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0001-7311-2298</contrib-id>
<name><surname>Liang</surname><given-names>Donghai</given-names></name><degrees>PhD</degrees>
<role vocab="credit" vocab-identifier="https://credit.niso.org" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology" degree-contribution="supporting">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org" vocab-term="Writing - review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing" degree-contribution="equal">Writing - review &amp; editing</role>
<aff><institution>Gangarosa Department of Environmental Health, Emory Rollins School of Public Health</institution>, Atlanta, GA, <country country="US">United States</country></aff>
</contrib>
<contrib contrib-type="author">
<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0003-2912-398X</contrib-id>
<name><surname>Walker</surname><given-names>Douglas I</given-names></name><degrees>PhD</degrees>
<role vocab="credit" vocab-identifier="https://credit.niso.org" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization" degree-contribution="lead">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition" degree-contribution="lead">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology" degree-contribution="equal">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org" vocab-term="Writing - review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing" degree-contribution="equal">Writing - review &amp; editing</role>
<aff><institution>Gangarosa Department of Environmental Health, Emory Rollins School of Public Health</institution>, Atlanta, GA, <country country="US">United States</country></aff>
</contrib>
</contrib-group>
<author-notes>
<corresp id="osaf010-cor1">Corresponding author: Anna S. Young, PhD, Gangarosa Department of Environmental Health, Emory Rollins School of Public Health, 1518 Clifton Rd NE, Atlanta, GA 30322, United States (<email>ayoung@mail.harvard.edu</email>; <email>anna.s.young@emory.edu</email>).</corresp>
</author-notes>
<pub-date pub-type="cover"><year>2025</year></pub-date>
<pub-date pub-type="collection" iso-8601-date="2025-01-22"><day>22</day><month>01</month><year>2025</year></pub-date>
<pub-date pub-type="epub" iso-8601-date="2025-10-01"><day>01</day><month>10</month><year>2025</year></pub-date>
<volume>5</volume><issue>1</issue>
<elocation-id>osaf010</elocation-id>
<history>
<date date-type="received"><day>14</day><month>01</month><year>2025</year></date>
<date date-type="rev-recd"><day>23</day><month>07</month><year>2025</year></date>
<date date-type="accepted"><day>24</day><month>09</month><year>2025</year></date>
<date date-type="corrected-typeset"><day>13</day><month>11</month><year>2025</year></date>
</history>
<permissions>
<copyright-statement>© The Author(s) 2025. Published by Oxford University Press.</copyright-statement>
<copyright-year>2025</copyright-year>
<license license-type="cc-by-nc" xlink:href="https://creativecommons.org/licenses/by-nc/4.0/">
<license-p>This is an Open Access article distributed under the terms of the Creative Commons Attribution-NonCommercial License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by-nc/4.0/">https://creativecommons.org/licenses/by-nc/4.0/</ext-link>), which permits non-commercial re-use, distribution, and reproduction in any medium, provided the original work is properly cited. For commercial re-use, please contact reprints@oup.com for reprints and translation rights for reprints. All other permissions can be obtained through our RightsLink service via the Permissions link on the article page on our site—for further information please contact journals.permissions@oup.com.</license-p>
</license>
</permissions>
<self-uri xlink:href="osaf010.pdf"/>
<abstract abstract-type="abstract"><title>Abstract</title>
<p>Humans are exposed to upwards of thousands of chemicals simultaneously, but research has traditionally focused on the health effects of only one chemical at a time. Single-chemical analyses not only underestimate total health risk, but also ignore bias from multicollinearity and co-exposure confounding between chemicals. Advanced statistical mixture methods address these challenges and allow us to both estimate the cumulative health effect of chemical mixtures and identify the strongest chemical contributors. At the same time, untargeted chemical exposome profiling through high-resolution mass spectrometry (HRMS) now supports measurement of over 100,000 chemical signals in biospecimens. However, most mixture methods cannot evaluate untargeted exposome data containing more chemical variables than samples. Weighted quantile sum regression with its recent random subsets implementation (WQS<sub>RS</sub>) is a unique, statistically powerful mixture method for high-dimensional exposome data. It estimates weights of chemicals towards the mixture index over many different repetitions in which only a small random subset of chemicals is used at a time, thus de-correlating data and avoiding overfitting. In this paper, we discuss our statistical workflow and important considerations for the application of WQS<sub>RS</sub> to exposome epidemiology, including manual quantization for non-detects, custom repeated holdouts for matched data, pre-selection of exogenous chemicals, parameter decisions, interpretation options, and visualizations. We then describe its application to functional pathway enrichment analysis with integrated exposome-metabolome data to explore underlying biological mechanisms. These data science approaches will enable exposome epidemiology to discover previously unknown risk factors, estimate cumulative health risk from total chemical mixtures, and gain mechanistic insight.</p>
</abstract>
<kwd-group><kwd>exposomics</kwd><kwd>mixture models</kwd><kwd>multi-omics</kwd><kwd>high-dimensional data</kwd><kwd>data science</kwd><kwd>cumulative chemical mixtures</kwd>
</kwd-group>
<funding-group>
<award-group award-type="grant">
<funding-source><institution-wrap><institution>National Institutes of Health</institution><institution-id institution-id-type="DOI">10.13039/100000002</institution-id></institution-wrap>
</funding-source>
<award-id>R01ES032831</award-id>
<award-id>K99ES036289</award-id>
<award-id>K01ES035082</award-id>
<award-id>R01ES035738</award-id>
<award-id>U2CES026555</award-id>
<award-id>P30ES023515</award-id>
</award-group>
</funding-group>
<counts>
<page-count count="14"/>
</counts>
</article-meta>
</front>
<body><sec sec-type="intro"><title>Introduction</title>
<sec><title>Our chemical ‘soup’ of exposures</title>
<p>Research has traditionally focused on the health impacts of one chemical at a time. In reality, individuals are simultaneously exposed to potentially thousands of environmental chemicals, each of which can add to the cumulative health burden. Based on chemical inventories around the world, over 355,000 chemicals or chemical mixtures have been registered for production and use, including 69,000 in the previous decade alone (2010-2019).<xref ref-type="bibr" rid="osaf010-B1"><sup>1</sup></xref> For roughly 15% of those registered substances, descriptive chemical names are not provided publicly due to confidential business information, and another ∼15% are only ambiguously described.<xref ref-type="bibr" rid="osaf010-B1"><sup>1</sup></xref> Considering plastic production alone, more than 16,000 chemicals are used or present in products, of which over 4,200 are potentially hazardous and over 10,000 lack hazard information.<xref ref-type="bibr" rid="osaf010-B2"><sup>2</sup></xref><sup>,</sup><xref ref-type="bibr" rid="osaf010-B3"><sup>3</sup></xref> As a result, humans are exposed to complex mixtures of both known and unidentified chemicals, a large fraction of which have not been comprehensively studied for toxicological safety or health.</p>
<p>A key challenge in identifying new chemical exposures has been the phenomenon of regrettable substitution. Even when one toxic chemical is eventually phased out of production, another chemical—often with a similar structure and same chemical class—may replace it and only later be revealed to have toxicity concerns as well. Such chemical ‘whack-a-mole’ has been observed countless times for phenols in plastic,<xref ref-type="bibr" rid="osaf010-B4"><sup>4</sup></xref> phthalates in plastic,<xref ref-type="bibr" rid="osaf010-B5"><sup>5</sup></xref> flame retardants in furniture and electronics,<xref ref-type="bibr" rid="osaf010-B6"><sup>6</sup></xref> and per- and polyfluoroalkyl substances (PFAS) in consumer products<xref ref-type="bibr" rid="osaf010-B7"><sup>7</sup></xref>—of which there exist at least 14,000 types of PFAS.<xref ref-type="bibr" rid="osaf010-B8"><sup>8</sup></xref> Consequently, traditional targeted research methods that measure only a limited number of pre-selected known chemicals cannot match the pace at which new chemicals enter commerce. For example, large human biomonitoring programs have usually analyzed up to about 300 targeted chemicals, due to limitations from available analytical standards, cost, and time.<xref ref-type="bibr" rid="osaf010-B9"><sup>9</sup></xref><sup>,</sup><xref ref-type="bibr" rid="osaf010-B10"><sup>10</sup></xref></p>
</sec>
<sec><title>The untargeted chemical exposome</title>
<p>The exposome was first conceptualized in 2005 in response to the evident gulf in the scale at which genomic versus environmental risk factors can be characterized in biospecimens and the need to advance methodologies for measuring biomarkers of environmental exposure at a similar omics level.<xref ref-type="bibr" rid="osaf010-B11"><sup>11</sup></xref> The most recent definition of exposomics is the study of “the comprehensive and cumulative effects of physical, chemical, biological, and psychosocial influences that impact biological systems by integrating data from a variety of interdisciplinary methodologies and streams to enable discovery-based analysis of environmental influences on health.”<xref ref-type="bibr" rid="osaf010-B12"><sup>12</sup></xref> As a key aspect of the exposome, untargeted chemical profiling using high-resolution mass spectrometry (HRMS) now supports the simultaneous measurement and characterization of over 100,000 chemical signals in human biospecimens, including both the internal exposome (ie, exogenous environmental chemicals and their biotransformation products) and the metabolome (ie, endogenous metabolites).<xref ref-type="bibr" rid="osaf010-B13 osaf010-B14 osaf010-B15"><sup>13-15</sup></xref></p>
<p>The untargeted approach encompasses even unknown chemicals that we cannot yet identify but can still monitor in samples and elucidate structural information for.<xref ref-type="bibr" rid="osaf010-B16"><sup>16</sup></xref> Each untargeted chemical feature is characterized in the mass spectrometry output by its accurate mass-to-charge ratio (<italic>m/z</italic>) and retention time.<xref ref-type="bibr" rid="osaf010-B17"><sup>17</sup></xref> Although the data generated are relative abundances (ie, ion intensities) rather than absolute targeted concentrations,<xref ref-type="bibr" rid="osaf010-B18"><sup>18</sup></xref> HRMS offers a powerful hypothesis-free <italic>discovery</italic> approach for detecting previously unknown environmental risk factors of disease as well as the metabolic responses that underlie this risk, without requiring analytical standards for the chemicals.<xref ref-type="bibr" rid="osaf010-B19"><sup>19</sup></xref><sup>,</sup><xref ref-type="bibr" rid="osaf010-B20"><sup>20</sup></xref> With the high dimensionality of untargeted chemical data produced, statistical analysis remains a major challenge in the field of exposomics. The objective of our commentary is to share a statistical workflow that we have developed for evaluating the cumulative mixture effects of untargeted chemicals on disease, prioritizing novel chemicals of concern, and exploring mechanisms of action. This workflow has been applied in our recent studies of exposome drivers of fertility outcomes<xref ref-type="bibr" rid="osaf010-B21"><sup>21</sup></xref> and lymphoma risk.</p>
</sec>
</sec>
<sec><title>The importance of statistical mixture methods</title>
<p>In response to the need to evaluate exposures to chemical and non-chemical stressors as complex mixtures, there has been a recent explosion in statistical mixture methods.<xref ref-type="bibr" rid="osaf010-B22"><sup>22</sup></xref> For purposes of this paper, we focus on the chemical exposome and consider a “mixture” to be the cumulative (combined) exposures to multiple chemicals through the same and/or different routes of exposure.<xref ref-type="bibr" rid="osaf010-B23"><sup>23</sup></xref> Mixture methods address two key statistical limitations of the traditional single-chemical regression approach that models the effects of one chemical exposure at a time.</p>
<p>First, single-chemical regression may underestimate risk, because many chemicals can simultaneously affect the same health endpoint or receptor (through similar or different mechanisms) and thus accumulate the health burden. A chemical may even have a small effect size that is not statistically observable on its own, so its hazard would go overlooked and unaddressed in the absence of assessments of cumulative mixture effects.<xref ref-type="bibr" rid="osaf010-B24"><sup>24</sup></xref> Some chemicals actually interact together in non-additive ways to amplify, trigger, or attenuate each other’s effect;<xref ref-type="bibr" rid="osaf010-B23"><sup>23</sup></xref> however, not all mixture methods can account for complex interactions.</p>
<p>Second, and most importantly, traditional regression ignores bias arising from collinearity or confounding due to co-exposure of other chemicals. The issue arises from the high degree of correlation between individual chemicals, between or across chemical classes, and between chemical metabolites. For example, an analysis of typical chemical exposure data from the U.S. National Health and Nutrition Examination Survey (NHANES) found 2,656 significant pairwise correlations among 289 exposure variables, demonstrating a potentially dense correlation structure.<xref ref-type="bibr" rid="osaf010-B25"><sup>25</sup></xref> A pair of chemicals has the potential to confound each other<xref ref-type="bibr" rid="osaf010-B26"><sup>26</sup></xref> if they are highly correlated due to a common source (such as flame retardants in furniture),<xref ref-type="bibr" rid="osaf010-B27"><sup>27</sup></xref> common route of exposure (such as PFAS in drinking water),<xref ref-type="bibr" rid="osaf010-B28"><sup>28</sup></xref> and/or common biotransformation pattern (such as multiple urinary metabolites of the same parent phthalate chemical).<xref ref-type="bibr" rid="osaf010-B29"><sup>29</sup></xref> For example, in a single-chemical regression model, this confounding may result in Chemical B incorrectly appearing to be associated with the outcome (through an open backdoor path on a Directed Acyclic Graph [DAG]), when in reality Chemical A is the true risk factor. In that specific case, mutually adjusting for both chemicals within the same regression model would yield the correct result. However, in other situations, such as in the presence of an unmeasured confounder of Chemical A, mutual adjustment of both chemicals can worsen the bias for Chemical A’s association, or even reverse the direction of the association for Chemical B due to conditioning on a collider.<xref ref-type="bibr" rid="osaf010-B26"><sup>26</sup></xref> The paper by Weisskopf et al. provides helpful DAGs to demonstrate these case scenarios of bias amplification from exposure correlation patterns.<xref ref-type="bibr" rid="osaf010-B26"><sup>26</sup></xref> Relatedly, the “reversal paradox” refers to the case in multiple-chemical regression models when the coefficients for two highly correlated exposures associated with the outcome reverse direction in opposite extremes from each other, although this does not always happen.<xref ref-type="bibr" rid="osaf010-B30"><sup>30</sup></xref><sup>,</sup><xref ref-type="bibr" rid="osaf010-B31"><sup>31</sup></xref> While the potential bias arises even with only two correlated chemicals in the same regression model, it can grow in complexity when adding more than two or when the correlations strengthen, including 1) the potential for worse bias amplification,<xref ref-type="bibr" rid="osaf010-B26"><sup>26</sup></xref> 2) more inflated standard errors and less stable estimates due to multicollinearity (since the chemicals are linear predictors of each other),<xref ref-type="bibr" rid="osaf010-B32 osaf010-B33 osaf010-B34"><sup>32-34</sup></xref> and 3) overfitting of the model with more variables than the sample size can handle (such that the model is simply fitting noise).<xref ref-type="bibr" rid="osaf010-B35"><sup>35</sup></xref> As such, model estimates for highly correlated chemical exposures in traditional multivariate regression are not reliable. Specialized mixture methods seek to address the high dimensional, multicollinear structure of complex chemical exposure data.</p>
<sec><title>Special considerations for untargeted mixtures</title>
<p>Although statistical mixture methods embrace the complexity of chemical exposures, most approaches cannot handle untargeted data at the omics-scale. The major barrier with untargeted omics data lies in the fact that the number of parameters is far greater than the sample size (<italic>p ≫ n</italic>), and thus would overfit the model to an impossible degree. In addition, untargeted data can be even more highly correlated because of the presence of some redundant signals, such as adducts, isotopes, or fragments of the same chemical, and multiple biotransformation products arising from the same exogenous chemical.<xref ref-type="bibr" rid="osaf010-B36"><sup>36</sup></xref> For this reason, not all mixture methods can be readily scaled up to the high dimensions of the exposome.</p>
</sec>
<sec><title>Limitations of other statistical approaches</title>
<p>Choice of mixture method largely depends on the research question.<xref ref-type="bibr" rid="osaf010-B22"><sup>22</sup></xref> In our case, we aim to use mixture methods to evaluate cumulative mixture effects from <italic>untargeted</italic> chemical exposures and identify the ‘bad actor’ chemicals driving the mixture effect the most. There are several common statistical approaches that would not work best for our research questions.</p>
<p>For example, some studies calculate the sum, molar sum, or potency-weighted sum of specific chemical classes to model the total effect on health using fewer variables.<xref ref-type="bibr" rid="osaf010-B37"><sup>37</sup></xref> This approach has been commonly applied to phthalates or phthalate subgroups, such as the summed urinary metabolites of di(2-ethylhexyl) phthalate (DEHP) or high versus low molecular weight phthalates.<xref ref-type="bibr" rid="osaf010-B29"><sup>29</sup></xref> However, chemical sums lose data resolution and can hinder interpretations about individual chemicals for decision-making. In addition, the summation could mask hazards if chemicals in the same class have different molecular weights, abundances, toxicological relevance, interactions, concentrations at which adverse effects occur, or even opposing directions of effects. For instance, the health effect of a low-concentration chemical may not be observed when summed with a high-concentration chemical that does not affect the outcome.<xref ref-type="bibr" rid="osaf010-B32"><sup>32</sup></xref></p>
<p>Bayesian kernel machine regression (BKMR) is a popular mixture method<xref ref-type="bibr" rid="osaf010-B38"><sup>38</sup></xref> that can model the joint effects of exposures using a flexible kernel function that allows for non-linear, smooth exposure-outcome relationships and interactive effects between chemicals (with some sacrifices to statistical power).<xref ref-type="bibr" rid="osaf010-B39"><sup>39</sup></xref><sup>,</sup><xref ref-type="bibr" rid="osaf010-B40"><sup>40</sup></xref> Although useful for targeted chemical mixtures, especially research questions related to non-linearity and interactions between chemicals, BKMR is currently not practical for higher dimensional chemical data (<italic>p ≫ n</italic>) because of the large sample size and computational intensity needed for the non-parametric kernel function, which is highly flexible but less statistically powerful.<xref ref-type="bibr" rid="osaf010-B41"><sup>41</sup></xref> In addition, the visual interpretation of curvilinearity and interaction within high-dimensional mixtures in BKMR would be intractable. For applications of mixture methods to lower-dimensional mixtures, a recent publication offers a helpful workflow for statistical decisions related to the distribution and type of data, variable transformations, missing data, statistical assumptions, specific research questions, and other study design considerations.<xref ref-type="bibr" rid="osaf010-B42"><sup>42</sup></xref></p>
<p>Quantile g-computation (QGcomp) is another common mixture method similar to WQS that builds a weighted summary index based on quantiles of exposures, with a few slight advantages or disadvantages depending on the desired research question and sample size. First, QGcomp relaxes the directional homogeneity assumption by incorporating opposing effects within the same index,<xref ref-type="bibr" rid="osaf010-B43"><sup>43</sup></xref> whereas WQS now sequentially defines separate positive and negative indices to evaluate double associations of the mixture in both directions.<xref ref-type="bibr" rid="osaf010-B44"><sup>44</sup></xref><sup>,</sup><xref ref-type="bibr" rid="osaf010-B45"><sup>45</sup></xref> Second, QGcomp allows for more non-additivity and non-linearity of effects if specified as model terms in advance. Third, QGcomp does not split samples into repeated training/validation sets, which has tradeoffs between statistical power and computational speed versus generalizability and weight stability.<xref ref-type="bibr" rid="osaf010-B43"><sup>43</sup></xref><sup>,</sup><xref ref-type="bibr" rid="osaf010-B46"><sup>46</sup></xref> However, for the purpose of untargeted mixture methods in this paper, QGcomp has not yet been designed for applications to high-dimensional data.<xref ref-type="bibr" rid="osaf010-B47"><sup>47</sup></xref></p>
<p>Other statistical approaches seek to systematically reduce the dimensions of data before modeling. For example, principal component analysis (PCA) is an unsupervised approach that converts the chemical data into a smaller set of uncorrelated linear predictors (not taking into account the outcome). PCA may be helpful for assessing patterns in exposures (eg, shared sources of chemicals) without bias from multicollinearity, however, there is a loss of information, generalizability (due to dimensionless units), and interpretability of the individual principal components that makes it difficult to identify specific ‘bad actor’ chemicals or their doses of harm when modeling the effects on health outcomes.<xref ref-type="bibr" rid="osaf010-B48"><sup>48</sup></xref> Other dimension reduction approaches that do consider the outcome (ie, are supervised) include shrinkage methods. Ridge regression shrinks the regression coefficients in a manner that decreases variance (with a trade-off of increased bias),<xref ref-type="bibr" rid="osaf010-B49"><sup>49</sup></xref> but it keeps all predictors in the model and thus does not reduce the dimensions of the data.<xref ref-type="bibr" rid="osaf010-B31"><sup>31</sup></xref><sup>,</sup><xref ref-type="bibr" rid="osaf010-B50"><sup>50</sup></xref> Lasso regression builds on ridge to allow coefficients to shrink to exactly zero, thus producing a model with fewer predictors.<xref ref-type="bibr" rid="osaf010-B51"><sup>51</sup></xref> However, lasso saturates at <italic>n</italic> predictors (no more than the sample size), and it tends to select one exposure arbitrarily from a group of highly correlated exposures,<xref ref-type="bibr" rid="osaf010-B50"><sup>50</sup></xref> which may erroneously lead the researcher to conclude that the other unselected exposures are not associated with the health outcome.<xref ref-type="bibr" rid="osaf010-B31"><sup>31</sup></xref> Elastic net combines both lasso and ridge regression and can work in a high-dimensional setting; however, it encourages a “grouping effect” that either keeps all or eliminates all the chemicals in a highly correlated set, even when some of the chemicals in the set may only be correlated due to shared exposure routes not health outcomes.<xref ref-type="bibr" rid="osaf010-B31"><sup>31</sup></xref> This demonstrated poor specificity in the selection of correlated variables by elastic net compared to WQS is not ideal for studying chemical exposures.<xref ref-type="bibr" rid="osaf010-B31"><sup>31</sup></xref><sup>,</sup><xref ref-type="bibr" rid="osaf010-B52"><sup>52</sup></xref> Adaptive elastic net has been proposed to be more successful than its predecessor in the case of <italic>p ≫ n</italic>, but it does not provide a cumulative estimate across individual exposures to address research questions focused on combined mixture effects.<xref ref-type="bibr" rid="osaf010-B53"><sup>53</sup></xref></p>
</sec>
</sec>
<sec><title>Weighted quantile sum regression with random subsets of untargeted chemicals</title>
<p>Recent extensions of weighted quantile sum (WQS) regression now support estimation of cumulative mixture effects and individual contributions of untargeted chemicals under <italic>p ≫ n</italic> scenarios.<xref ref-type="bibr" rid="osaf010-B54"><sup>54</sup></xref> To our knowledge, this is currently the only mixture method that can do so under typical epidemiologic cohort sizes with high-dimensional exposome data without loss of information for identifying individual ‘bad actor’ chemicals.</p>
<sec><title>WQS method overview</title>
<p>At its base, WQS calculates a mixture effect by combining quantiles of all the exposures (here, chemicals) into one summary weighted index that represents the cumulative mixture effect on the health outcome in a single direction.<xref ref-type="bibr" rid="osaf010-B31"><sup>31</sup></xref><sup>,</sup><xref ref-type="bibr" rid="osaf010-B52"><sup>52</sup></xref> To do so, in a training set of the data (eg, 40% of participants), each chemical is assigned a weight that reflects its individual contribution to the overall mixture effect on the outcome, where all weights sum to one. Every participant then has a score (mixture index) based on this formula for the weighted sum of their personal chemical exposures. In the final regression model using the testing set of the data (eg, the other 60% of participants), WQS simply determines the association of the mixture index with the outcome in a single degree-of-freedom, highly statistically powerful test. Quantiles are used in WQS instead of the fully continuous data because they convert the exposures to all be on the same scale in the mixture regardless of units and avoid extreme values that would cause weights to grow in extremes.<xref ref-type="bibr" rid="osaf010-B31"><sup>31</sup></xref></p>
<p>It is best practice to always apply the repeated holdouts (RH) variation of WQS, which randomly partitions the data observations into the training set (used to estimate weights) and the testing/validation set (used to test the mixture effect) many times over.<xref ref-type="bibr" rid="osaf010-B46"><sup>46</sup></xref> The entire WQS model is repeated within each of, say, 100 repeated holdouts. This produces a distribution of validated results, which improves data representativeness and generalizability, stabilizes the chemical weights, and characterizes uncertainty in the estimates through confidence intervals.</p>
<p>For the critical extension to untargeted exposome data, the random subsets (RS) implementation of WQS addresses the challenges from having more exposure parameters than sample size (<italic>p ≫ n</italic>).<xref ref-type="bibr" rid="osaf010-B54"><sup>54</sup></xref> In the basic version of WQS, chemical weights are estimated by bootstrapping over <italic>observations</italic> in the training data (eg, 1,000 randomly selected bootstrap samples with replacement) and then determining the average weights based on the relative signal of the test statistic for the mixture index slope (in association with the outcome) in each bootstrap sample, using the full set of chemicals each time.<xref ref-type="bibr" rid="osaf010-B31"><sup>31</sup></xref> By contrast, the RS version of WQS uses “feature bagging” to estimate weights over many different randomly selected subsets of <italic>chemicals</italic> in the full training data (<xref ref-type="fig" rid="osaf010-F1">Figure 1</xref>). This allows WQS<sub>RS</sub> to aggregate across de-correlated sets of chemicals by repeatedly perturbing the exposure data under different correlation scenarios, thus avoiding multicollinearity and co-confounding, improving generalizability, and preventing overfitting.<xref ref-type="bibr" rid="osaf010-B54"><sup>54</sup></xref> The RH and RS variations should be used together, which performs the random subsetting procedure within every repeated holdout. Chemical weights are thus estimated a total of RH x RS times, so the computational intensity of WQS<sub>RS</sub> can grow quickly. In summary, WQS<sub>RS</sub> represents a major advancement in mixture methods for high-dimensional exposure data with complex correlation patterns.</p>
<fig id="osaf010-F1"><label>Figure 1.</label><caption><p>Diagram showing how the Random Subsets extension of WQS repeatedly estimates chemical weights over <italic>b</italic> different scenarios of smaller chemical mixtures, thus de-correlating the exposure data. For purposes of illustration, only nine chemicals are included in this example.</p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" mimetype="image" xlink:href="osaf010f1.png"/></fig>
</sec>
<sec><title>Selecting the chemical mixture input</title>
<p>Although WQS<sub>RS</sub> supports high-dimensional untargeted data, inputting a larger and larger number of chemical features as exposures into the model does not necessarily improve the usefulness of the model. A summary of our roadmap is provided in <xref ref-type="fig" rid="osaf010-F2">Figure 2</xref>. First, depending on the research question, it may be beneficial to focus the mixture index to only environmental risk factors by excluding possible endogenous or pharmaceutical metabolites and early biomarkers of disease that could overinflate the mixture effect. This can be achieved through a variety of approaches, each of which faces a tradeoff between either excluding more possible endogenous metabolites or including more unidentified chemical features of unknown origin. For example, the chemical mixture could focus on only the detected features with annotations as possible environmental chemicals based on database matches, thus minimizing the presence of endogenous features (whether identified or unidentified) but being less inclusive to novel environmental chemicals not yet existing in databases. The coverage offered by a particular annotation database may need to be pruned depending on the research hypothesis; for example, the Norman Substance Database combines multiple sources of environmental chemical lists, some of which are pharmaceutical-focused or cover drinking water contaminants that include pharmaceuticals and hormone-related compounds.<xref ref-type="bibr" rid="osaf010-B55"><sup>55</sup></xref> Further, any additional annotated adducts or isotopes of the detected environmental chemicals could be removed to reduce feature redundancy (retaining the primary M + H or M-H adduct), although this should only be done among the annotations meeting the highest confidence level of three by xMSannotator (ie, adducts predictably clustered into the same correlation modules, retention time sub-modules, and mass-defect sub-groups).<xref ref-type="bibr" rid="osaf010-B56"><sup>56</sup></xref> An alternative approach for environmental chemical selection could focus on including all the chemical features that do <italic>not</italic> have identities or annotations as possible endogenous features, which would be inclusive to more chemicals but vary in effectiveness depending on the metabolomic database’s degree of coverage and endogenous/exogenous distinction. The strictest approach would be to focus the mixture on specific known classes or source groups of environmental chemicals. The potential interference by endogenous biomarkers is more pronounced in data from liquid chromatography (LC) HRMS, which widely integrates both the metabolome and the chemical exposome in its measurement of polar molecules with specific functional groups. Depending on sample preparation and extraction methods, gas chromatography (GC) HRMS can allow more focused detection of environmental chemicals, which is why the use of both instruments has been recommended for optimal chemical coverage.<xref ref-type="bibr" rid="osaf010-B10"><sup>10</sup></xref><sup>,</sup><xref ref-type="bibr" rid="osaf010-B15"><sup>15</sup></xref> In summary, the purpose of the WQS index and what mixture it intends to represent should be carefully considered in advance of modeling.</p>
<fig id="osaf010-F2"><label>Figure 2.</label><caption><p>An untargeted exposome-metabolome statistical workflow to leverage weighted quantile sum regression (WQS) with random subsets for identifying cumulative mixture effects on a health outcome and determining bad actor chemicals driving the mixture effects, then to use pathway enrichment analysis to investigate metabolic pathways that may underlie the mixture effects.</p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" mimetype="image" xlink:href="osaf010f2.png"/></fig>
<p>Second, the more chemicals included, the more repetitions needed, so the computational intensity can grow rapidly. Depending on the number of detected environmental chemicals in the research study, a layered pre-filtering strategy may help. The first layer is to restrict chemicals by some detection rate threshold to ensure there is sufficient spread of exposure across the quantiles for each chemical. Stricter thresholds (such as 50% or 75%) may miss out on important chemicals that are mostly found only in those that develop the disease, so this should be tailored to the study; in our 1:1 matched case-control study, for example, we are using 25% to allow for chemicals that hypothetically only present in half of the cases and none of the controls. However, quantizing data with such low detection thresholds requires a specialized approach (see next section).</p>
<p>As the next pre-filtering criterion, the mixture inputted into WQS<sub>RS</sub> could be narrowed to only exposures with potential health relevance, while being cautiously inclusive. This is because including higher numbers of irrelevant chemicals may unexpectedly attenuate the mixture effect if the random subset parameter is not equivalently scaled, despite that the model trends weights of unimportant chemicals close to zero. To illustrate this, in the hypothetical case of a model with 18,000 chemicals compared to a model with 1,800 relevant chemicals, if the number of random subsets of chemicals remains the same (eg, 2,000 repetitions, even with slightly larger subset sizes), there would likely be more random subsets with mixtures full of the irrelevant chemicals that exhibit null effects, which then get averaged into the overall weighted index. Then, weight estimates may be based more on noise than signal. Instead, the mixture could first be further filtered to only the chemicals univariately associated with the outcome under a loose significance threshold without correcting for multiple testing (such as unadjusted <italic>p </italic>&lt; 0.10 or even <italic>p </italic>&lt; 0.20 in the adverse direction) in an exposome-wide association study (EWAS). We err on the side of inclusivity since univariate analyses are prone to issues from multicollinearity or chemical co-confounding and, under stricter significance thresholds, might miss chemicals that would show a stronger effect in the de-correlated mixture model. Although not without limitations, this pre-filtration approach has advantages over high-dimension shrinkage methods. As we described in the introduction, elastic net regression tends to either keep all or eliminate all chemicals among a highly correlated group,<xref ref-type="bibr" rid="osaf010-B31"><sup>31</sup></xref> and thus could over-exclude potentially relevant chemicals from further consideration. However, future work should use simulated data to compare different approaches to selecting high-dimensional mixtures.</p>
<p>Finally, a third possible filtering layer is to conduct the WQS<sub>RS</sub> models separately for the data from different instruments (GC-HRMS Versus LC-HRMS) or different instrument columns (LC hydrophilic interaction [HILIC] chromatography versus C18 hydrophobic interaction chromatography), if applicable. This not only reduces potential overlap in chemicals detected through each methodology but can also demonstrate robustness of results across platforms. However, the tradeoff is not identifying the fullest cumulative mixture effect from incorporating all relevant chemicals. Overall, the decisions about the chemical mixture to input into WQS<sub>RS</sub> should depend on the research question and the dimensions of the exposome data in the specific study, and conducting sensitivity analyses under different mixture sizes and model parameters is helpful to understand the consistency of results.</p>
</sec>
<sec><title>Important decisions on WQS parameters</title>
<p>For the choice of the number of repeated holdouts of observations to implement, 100 RHs are typical and sufficient to improve generalizability over traditional epidemiologic analyses and to characterize uncertainty of weights in mixture indices.<xref ref-type="bibr" rid="osaf010-B46"><sup>46</sup></xref> Although more RHs (such as &gt;1,000) would approximate the normal distribution better, the computational requirements could be prohibitive: 1,000 RHs would take 10 times as long as 100 RHs would. By contrast, the choice of the number and size of random subsets of chemicals is more important to customize to each research question. We recommend first trying various choices of RSs under RH = 1 (for lower computational intensity) to see how the distribution of chemical weights change. If the number of chemicals with weights extremely near zero decreases with more RSs, that might mean that some chemicals did not have sufficient chance of being included under the lower number of RSs and thus that a higher number of RSs is still offering benefits. The size of each RS by default is the square root of the number of chemicals included in the mixture, and its input has a trade-off between giving a particular chemical more chances to be included in the model (under a fixed number of RSs) versus better de-correlating the data. In sensitivity analyses in our fertility study, we found the results to be sensitive to the choice of RS size. When we increased the number of chemicals by loosening filter criteria to include potentially less relevant exposures, and the RS size increased automatically, there were fewer important mixture contributors meeting Busgang criteria than when fixing the RSs at the original smaller size.<xref ref-type="bibr" rid="osaf010-B21"><sup>21</sup></xref> We recommend to choose a relatively small size of each RS that allows for sufficient perturbation of chemical correlation patterns to discover important exposures, assuming that the number of RS repetitions is high enough to still give each chemical enough chance of being included in RSs. The previous WQS simulation study of the random subset implementation used a mixture of 472 untargeted metabolites with 1,000 RSs and a default RS size of 22 (ie, &amp;cenveo_unknown_entity_Symbol_F0D6;472),<xref ref-type="bibr" rid="osaf010-B54"><sup>54</sup></xref> but simulation analyses have not yet been conducted on implications of higher RS sizes due to larger high-dimensional mixtures.</p>
<p>The average weights of chemicals are estimated based on the relative signal of the test statistic for the index slope in each random subset, and this signal function parameter has multiple possible values. For example, an “expt” signal function would apply the exponential of the t-statistic, which allows the most important chemicals to be much higher weighted than others, if that is desired. In addition, the ß slopes of the indices can be constrained to a single direction, which excludes random subsets with slopes in the opposite direction and ensures that only those in the relevant direction will contribute to the estimation of weights; thus, the mixture effect represents the adverse direction of harm.</p>
<p>There are two key deviations from default parameters that are important to consider. First, we recommend that the exposure data are manually quantized into deciles or quartiles before input into the WQS model (with the <italic>q</italic> parameter then set to null) in such a way that the non-detect values are put into their own zero-quantile and the detected values are quantized separately (eg, into 9 quantiles for a total of 10 deciles). The quantization is done individually for each chemical and modified from the source code for function gwqs_rank (see example code at github.com/anna-s-young/exposome-statistics). This approach avoids chemicals having, for example, five deciles that all refer to zero values and then large jumps between the latter five deciles. Second, matched case-control studies may need to consider how data are partitioned. The default is to randomly split the observations (eg, participants) into separate training and validation sets for each repeated holdout. However, this may lose some of the benefits of individually matched case-control pairs if a particular set no longer has a similar distribution of confounders between cases and controls. An alternative is to manually partition the <italic>pairs</italic>, instead of individual observations, randomly within each repeated holdout. This is supported by the “validation_rows” parameter in the model that takes a list of vectors (one for each RH) indicating the rows of observations to include in the validation set (see code available at github.com/anna-s-young/exposome-statistics).</p>
</sec>
<sec><title>Computational intensity</title>
<p>The WQS<sub>RS</sub> models with high-dimensional exposome data should be performed on a high-performance computing (HPC) cluster rather than a standard computer due to the computational intensity. The speed can be accelerated through parallel processing by using the “multisession” or “multicore” option for the plan strategy in the WQS function call. Although computational time can vary greatly for a cluster job depending on factors such as network performance, processing speed, cluster utilization, and available memory, in our unpublished lymphoma nested case-control study we completed most WQS sensitivity models in approximately 7-60h each under multisession (using mixtures of up to 2,700 chemicals, up to 4,000 RSs, 100 RHs, 444 samples, and our slower customizations of manual quantization and manual partitions) on the HPC at the Emory Rollins School of Public Health. Without the manual partitioning of case-control pairs into repeated holdouts, the models took an order of magnitude less time. In our fertility study, we also completed most of the sensitivity models in 4-50h each (using mixtures of up to 2,700 chemicals, up to 4,000 RSs, 100 RHs, 82 samples, and manual quantization).<xref ref-type="bibr" rid="osaf010-B21"><sup>21</sup></xref> Increasing the number of chemicals and/or numbers of RSs to much higher degrees were our primary limiting factors for cluster load. However, we observed in the sensitivity analyses that the margins of return diminished with increasing numbers of RSs after a certain point, which justified us keeping 2,000 RSs for our main models (although this number will change depending on the study and mixture size). As described earlier, the choice of RSs can first be decided upon while using RH = 1 in faster preliminary models. Importantly, the independence of the repeated holdouts and random subsets makes them easily parallelizable and able to be scaled up in line with available high-performance computing resources.</p>
</sec>
<sec><title>Interpreting the WQS results</title>
<p>With repeated holdout validation, the mixture effect (ß estimate for the index’s association with the outcome) is interpreted as an aggregation across repeated holdouts. The function will provide a mean and median of the mixture effect, along with confidence intervals. The median and percentile-based interval are preferred because they do not make assumptions about symmetry. The units of the mixture effect are per quantile. However, a histogram of the distribution of the cumulative mixture index across observations may show that it bunches in the middle quantiles, as it may be rare for a participant to have consistently low (or consistently high) quantiles for most exposures. If so, the per-quantile mixture effect may represent a difference covering a wide range of the exposures. An alternative is to transform the units to a per standard deviation (SD) change in the index. To do so, the ß estimate within each repeated holdout must be manually extracted from the output and multiplied by the standard deviation of that holdout’s index, then the median or mean of the estimates is used as the mixture effect per SD increase. This approach may also aid in the comparison of the cumulative mixture effect to the magnitudes of individual chemical effects, which can be estimated (and transformed to per-SD) in basic univariate regression models among the chemicals deemed important. Such a comparison can show the usefulness of WQS<sub>RS</sub> for evaluating chemicals as mixtures instead of as single exposures.</p>
<p>The weights estimated for each chemical component can reveal potential ‘bad actor’ chemicals driving the overall mixture effect. However, because the weights always sum to one, they should only be interpreted in the case when the mixture effect itself is significant or borderline significant, based on its overall <italic>p</italic> value or percent of repeated holdouts in which it reaches significance. In the simplest interpretation of weights, chemicals are considered important if they have an average weight higher than the equi-weight threshold (1/<italic>p</italic>), which represents the hypothetical scenario of equal contributions by all chemicals. However, with the repeated holdout validation, we can better characterize the uncertainty in the chemical weights. Following the “Busgang criteria”, chemicals with an average weight above the threshold within at least 90% of repeated holdouts can be defined as “probable contributors”, else within at least 50% of holdouts as “possible contributors”, else within at least 10% of holdouts as “possibly not contributors”, else within less than 10% of holdouts as “probably not contributors.”<xref ref-type="bibr" rid="osaf010-B57"><sup>57</sup></xref><sup>,</sup><xref ref-type="bibr" rid="osaf010-B58"><sup>58</sup></xref> These criteria characterize how replicable the results are across data samples from the same underlying population, especially because certain chemicals may be misclassified as concerning—or not concerning—when looking at only one holdout of data.<xref ref-type="bibr" rid="osaf010-B46"><sup>46</sup></xref> We can graph these distributions of weights (eg, among the “possible contributors”) using the individual repeated holdout data from the WQS output. An example for chemicals across different levels of contribution is shown in <xref ref-type="fig" rid="osaf010-F3">Figure 3</xref> (code available at github.com/anna-s-young/exposome-statistics).</p>
<fig id="osaf010-F3"><label>Figure 3.</label><caption><p>Example visualization of distributions of weights of chemicals across repeated holdouts of data in weighted quantile sum regression (WQS) models. The equi-weight threshold for weights is defined as one divided by the number of included chemicals. Here, a random selection of chemicals across different contribution levels are shown.</p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" mimetype="image" xlink:href="osaf010f3.png"/></fig>
</sec>
<sec><title>Strengths and limitations of WQS</title>
<p>WQS<sub>RS</sub> is a powerful approach to identify cumulative mixture effects and bad actor chemicals while uniquely embracing the complexity and high dimensionality of untargeted exposome data. To our knowledge, it is currently the only mixture method that can be tailored to epidemiologic studies where the number of exposures exceeds the sample size. Compared to traditional univariate regression analyses, this mixture method avoids bias from multi-collinearity and co-confounding of chemicals and thus more accurately prioritizes chemicals of concern. At the same time, WQS<sub>RS</sub> is highly statistically powerful, as it only conducts a single degree-of-freedom test, <italic>without</italic> losing information critical for interpretation of individual chemical risk factors. The summary mixture index not only provides a measure of cumulative mixture effects from the many chemicals that can simultaneously interfere with health, but it can also be used in other analyses as a single variable representing harmful exposure (see next section).</p>
<p>As limitations, WQS assumes that there are no interactions between exposures and that there is a constant change in risk between quantiles. These assumptions can be tested in sensitivity analyses considering other quantiles (eg, quartiles instead of deciles) and in other mixture methods considering small mixtures (such as Bayesian kernel machine regression).<xref ref-type="bibr" rid="osaf010-B39"><sup>39</sup></xref> With the random subsets implementation, WQS also assumes that all component effects are in the same direction, when in reality, the chosen chemical mixture may include substances that operate in the non-adverse direction, such as endogenous metabolites or some chemicals seeming to be protective due to unknown confounding (eg, pesticides related to diet and nutrition). Or, the health endpoint may be adverse in either extreme direction, as opposed to a simple dichotomy. In some cases, especially under hypothesis discovery, determining the direction of interest is not straightforward. Despite the limitation, this unidirectionality assumption actually helps prevent the reversal paradox arising from complex multicollinearity of exposures,<xref ref-type="bibr" rid="osaf010-B45"><sup>45</sup></xref> while also focusing the index for better interpretability. There is a recent extension of WQS for double (positive and negative) indices with a penalization term, but it does not yet support random subsets of chemicals for high-dimensional data at the same time.<xref ref-type="bibr" rid="osaf010-B45"><sup>45</sup></xref> As another limitation that should be acknowledged in matched case-control studies, WQS does not yet allow for <italic>conditional</italic> logistic regression in its models, only adjustment for the matching variables. WQS currently supports linear, logistic, Poisson, quasi-Poisson, and negative binomial regression in its current version 3.0.5 of the R package <italic>gWQS</italic>. Furthermore, WQS only supports continuous or ordinal exposure variables, however, categorical variables can be used if transformable to an ordinal structure. For example, previous work has evaluated quartiles of scales for post-traumatic stress disorder symptoms, depressive symptoms, and life stressors.<xref ref-type="bibr" rid="osaf010-B59"><sup>59</sup></xref><sup>,</sup><xref ref-type="bibr" rid="osaf010-B60"><sup>60</sup></xref> Although quantization of continuous exposures loses the full range of levels, it prevents extreme weights from outliers.<xref ref-type="bibr" rid="osaf010-B31"><sup>31</sup></xref> Finally, the mixture method can become quite computationally intensive depending on the number of exposures, random subsets, repeated holdouts, and samples, and it is best used on a high-performance computing cluster.</p>
</sec>
</sec>
<sec><title>Metabolic pathway enrichment with WQS</title>
<sec><title>Pathway enrichment method overview</title>
<p>Because of the simultaneous measurement of both the chemical exposome and metabolome using untargeted HRMS,<xref ref-type="bibr" rid="osaf010-B13 osaf010-B14 osaf010-B15"><sup>13-15</sup></xref> multi-omics analysis offers the opportunity to identify biological mechanisms that may underly the associations between chemical exposure and adverse health outcomes. For example, a meet-in-the-middle (MITM) strategy could: (1) identify the metabolic pathways associated with disease, (2) identify the metabolic pathways associated with exposure, and then (3) determine which significant pathways overlap “in the middle” between exposure and disease (<xref ref-type="fig" rid="osaf010-F4">Figure 4</xref>); however, researchers have approached MITM in different ways.<xref ref-type="bibr" rid="osaf010-B61"><sup>61</sup></xref> Because the untargeted metabolomic data are also high dimensional, the supervised WQS<sub>RS</sub> index is useful as a single exposure variable representing the cumulative mixture effect of untargeted chemicals on disease, thus reducing the complexity of one of the multi-omics layers and focusing the exposure index to the chemicals that are most relevant to the outcome and its mechanisms. Ideally, prospective, longitudinal samples would be used such that the metabolome data lie chronologically in the middle between the exposome and the health outcome and thus avoid problems with reverse causality. However, achieving this temporality is not always possible due to limited sample availability or budget restrictions, in which case care should be taken to acknowledge the potential for reverse causality or health treatment effects.</p>
<fig id="osaf010-F4"><label>Figure 4.</label><caption><p>Diagram of our approach to assess the mixture effect of the untargeted exposome on health and then analyze underlying metabolic pathways that are significantly enriched for both the WQS mixture index and the health outcome (ie., that overlap in the middle). Note that prospective longitudinal samples are best for interpreting causality. Note: WQS = weighted quantile sum.</p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" mimetype="image" xlink:href="osaf010f4.png"/></fig>
<p>To accomplish steps 1 and 2 of MITM (separately), <italic>p</italic> values are first calculated for the univariate associations of each detected untargeted feature with the dependent variable (either the health outcome for step 1 or the exposure for step 2). Then, functional pathway enrichment analysis can predict functional activity by mapping all possible metabolite annotations for each feature (based on <italic>m/z</italic> and/or retention time) to a metabolic network and then finding the significant features that are locally enriched on a structure (ie, represent biological activity), whereas the false matches would only be randomly distributed in the network.<xref ref-type="bibr" rid="osaf010-B62"><sup>62</sup></xref> This method leverages metabolic interconnections to improve prediction of pathway activity without having to identify each metabolite <italic>a priori</italic>.<xref ref-type="bibr" rid="osaf010-B62"><sup>62</sup></xref> <italic>Mummichog</italic> (which is also implemented in MetaboAnalyst) has been the most common algorithm for pathway enrichment analysis,<xref ref-type="bibr" rid="osaf010-B62"><sup>62</sup></xref><sup>,</sup><xref ref-type="bibr" rid="osaf010-B63"><sup>63</sup></xref> however, we currently use Metapone’s permutation-based weighted hypergeometric test for several reasons.<xref ref-type="bibr" rid="osaf010-B64"><sup>64</sup></xref> Metapone jointly analyzes both positive- and negative-ion mode HRMS data when applicable (while avoiding double counting), accounts for matching uncertainty by using fractional counts of features (thus down-weighting those with higher numbers of matches), has an R package for suitability in our workflow alongside environmental mixture methods, and combines pathway information from multiple databases for higher relevance to xenobiotic pathways.<xref ref-type="bibr" rid="osaf010-B64"><sup>64</sup></xref> It leverages three databases: <italic>mummichog</italic>,<xref ref-type="bibr" rid="osaf010-B62"><sup>62</sup></xref> the Kyoto Encyclopedia of Genes and Genomes (KEGG),<xref ref-type="bibr" rid="osaf010-B65"><sup>65</sup></xref> and the Small Molecule Pathway Database (SMPDB).<xref ref-type="bibr" rid="osaf010-B66"><sup>66</sup></xref> The overall <italic>p</italic> value of enrichment significance for each pathway is defined as the proportion of permutations in which the total fractional count of significant features in that pathway based on the real feature <italic>p</italic> values is lower than the total fractional count of significant features in that pathway based on randomly permuted <italic>p</italic> values for each feature.<xref ref-type="bibr" rid="osaf010-B64"><sup>64</sup></xref> As the final step in MITM, the results from pathway enrichment for exposure versus outcome can be compared for potential overlap (example in <xref ref-type="fig" rid="osaf010-F5">Figure 5</xref>).</p>
<fig id="osaf010-F5"><label>Figure 5.</label><caption><p>Illustrative example of how to display results of meet-in-the-middle pathway enrichment, where pathways indicated by the dotted red line are significantly enriched for both an exposure mixture and the health outcome. These results represent four different pathway enrichment analyses conducted separately (one for each of the three WQS chemical mixture indices and the outcome). GC = gas chromatography; LC = liquid chromatography; C18 = C<sub>18</sub> reverse phase chromatography; HILIC = hydrophilic interaction chromatography); WQS = weighted quantile sum regression.</p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" mimetype="image" xlink:href="osaf010f5.png"/></fig>
</sec>
<sec><title>Method decisions for metapone</title>
<p>When using Metapone, a high number of permutations, such as 1,000, will help stabilize results. The list of adducts ions to consider in annotations can be determined in consultation with the laboratory producing the data. The R package comes with a default data frame of pathway information, however, a data frame with a flag to filter to only the human pathways (flag = 1) is available at github.com/EMERGE-EXPOSOME/Metapone-pathway. The default threshold for significance of features within a pathway is raw <italic>p </italic>&lt; 0.05, and the overall pathway <italic>p</italic> value has an optional adjusted value (by the Benjamini-Hochberg procedure) to account for multiple testing and reduce false positives. We recommend that significant pathways are also filtered to those with at least three significant metabolites (fractional counts) and that thus more comprehensively represent biological activity. The interpretations about specific pathways should be appropriately caveated due to the exploratory nature of this type of analysis. Finally, when conducting pathway enrichment using the WQS<sub>RS</sub> mixture index, it is possible to determine significantly enriched pathways <italic>within</italic> each repeated holdout of WQS<sub>RS</sub> (instead of using the average index) and investigate how frequently each pathway is significant; however, this would be computationally intensive.</p>
</sec>
<sec><title>Additional approaches</title>
<p>While the MITM pathway enrichment analysis with WQS mixtures can reveal relevant mechanisms overall, the summary mixture indices may mask some significant biological pathways due to the diverse modes of action through which different chemicals operate. For this reason, it may also be useful to investigate the potential metabolic role of specific individual chemicals. For example, a few of the top contributors to the WQS mixture effect could be selected for additional pathway enrichment on their own. As another example, network analyses can explore correlations between the important chemicals in the WQS mixture effect (such as the “possible” and “probable” contributors) and each significant disease-associated metabolic pathway, where a pathway is represented by the first component (PC1) in principal component analysis (PCA) of the metabolites in that pathway (<xref ref-type="fig" rid="osaf010-F6">Figure 6</xref>). The PCA could be performed on only the significant metabolites within the pathway or, to understand how chemicals impact the pathway as a whole, on all the mapped metabolites regardless of significance. It is also possible to include in the network multiple PCs per pathway (such as the subset of PCs that explain most of the variance), or to use another method for creating a summary index of the pathway. It is important to note that correlation networks are exploratory and not suitable for causal interpretation, especially given the multicollinearity issues between chemicals, so further research would be required to confirm mechanisms of action of chemicals.<xref ref-type="bibr" rid="osaf010-B67"><sup>67</sup></xref></p>
<fig id="osaf010-F6"><label>Figure 6.</label><caption><p>Illustrative example of a network analysis of significant, strong correlations between the disease-associated metabolic pathways (represented by their first principal component) and the exposures to chemicals that were deemed important in the weighted quantile sum (WQS) mixture effect on disease. Numbered circles refer to the chemicals and black squares to the pathways. Network was created with the R package <italic>igraph</italic> and clusters were determined based on multilevel community detection. Non-significant or weak correlations were not retained in the network.</p></caption><graphic xmlns:xlink="http://www.w3.org/1999/xlink" mimetype="image" xlink:href="osaf010f6.png"/></fig>
<p>High-dimensional mediation analyses offer an alternative approach to investigate effects of exposure on disease through multiple metabolites (or preferably, pathway groups) and may improve causal inference depending on assumptions met.<xref ref-type="bibr" rid="osaf010-B67 osaf010-B68 osaf010-B69 osaf010-B70"><sup>67-70</sup></xref> Again, the WQS index could be helpful here as a single exposure variable representing the mixture effect, while the metabolome is retained as high-dimensional. The fact that WQS is a supervised approach that takes into account the outcome should be acknowledged when using the index to represent exposure. Other high-dimensional mediation research has employed separate exposure risk scores for different classes of chemicals,<xref ref-type="bibr" rid="osaf010-B68"><sup>68</sup></xref> however, this would be challenging with untargeted data where not all chemicals are identifiable or groupable. Pairwise mediation analyses between each chemical and each metabolite or pathway group are also possible to understand specific toxicant-mediator relationships, but they are again prone to bias from chemical multicollinearity unlike mixture effect indices.<xref ref-type="bibr" rid="osaf010-B68"><sup>68</sup></xref></p>
</sec>
<sec><title>Strengths and limitations of pathway enrichment</title>
<p>Pathway enrichment is a practical exploratory approach to predict functional biological activity by leveraging pathway knowledge and bypassing the bottleneck of metabolite identification.<xref ref-type="bibr" rid="osaf010-B62"><sup>62</sup></xref><sup>,</sup><xref ref-type="bibr" rid="osaf010-B64"><sup>64</sup></xref> It produces interpretable and parsimonious results by mapping individual metabolites into pathway groups, which can generate hypotheses for future experimental research with the ultimate goal of identifying chemical mechanisms, therapeutic targets, or early biomarkers of disease. There are several limitations to note. Pathways are not mutually exclusive, and many metabolites are involved in multiple pathways. Thus, effects seen with a given metabolite do not always mean effects with all its pathways. The reliance only on <italic>m/z</italic> (and retention time in some cases) also limits the accuracy of metabolite annotations and may lead to false discovery, although the network mapping does help filter out the randomly structured and thus potentially irrelevant matches. In addition, the significance of enriched pathways does not reveal whether the involvement was harmful or beneficial to the health outcome, which is a challenge because of the different directions in which metabolites of the same pathway may act. Results are sensitive to the choice of databases, some of which overlook xenobiotic pathways,<xref ref-type="bibr" rid="osaf010-B71"><sup>71</sup></xref> and to the selected thresholds for the significance and size of pathways.<xref ref-type="bibr" rid="osaf010-B72"><sup>72</sup></xref> Furthermore, any pathway enrichment relies on known pathway definitions, which are subjective in their method of imposing order onto a biochemical network.<xref ref-type="bibr" rid="osaf010-B72"><sup>72</sup></xref> Finally, if MITM is conducted on cross-sectional exposome-metabolome data, there is the possibility for reverse causation, which limits interpretation. In general, it is best to treat pathway enrichment results as exploratory.</p>
</sec>
</sec>
<sec sec-type="conclusion"><title>Conclusion</title>
<p>Statistically analyzing chemicals as mixtures is important not only to capture the real-world accumulation of health burden from simultaneous exposures, but also to minimize bias from chemical multicollinearity and co-exposure confounding. Many mixture methods address this challenge, but few currently scale to high-dimensional untargeted chemical exposome data wherein the number of features is much higher than the number of samples. WQS regression with the random subsets implementation is a statistically powerful mixture method that evaluates cumulative mixture effects on a health outcome and reveals important individual chemical drivers of the mixture effects, without loss of data resolution or interpretability. Its repetitions to estimate the mixture index across many random smaller subsets of chemicals serve to de-correlate even high-dimensional exposure data while avoiding bias and overfitting. This represents a critical advancement in the exposomics field’s ability to investigate cumulative health risk from very large mixtures of exposures and to uncover emerging environmental risk factors of concern, including untargeted chemicals that are not commonly measured or not yet identifiable.</p>
<p>Furthermore, the cumulative mixture index can be used as a single variable representing the weighted sum of outcome-relevant exposures in integrations with other high-dimensional omics, such as meet-in-the-middle metabolomic pathway enrichment analysis or mediation analysis. These exploratory multi-omics approaches can reveal insights into potential underlying modes of action of the chemical exposures in association with the health outcome and thus generate new hypotheses for future mechanistic research. Many decisions are required for WQS<sub>RS</sub> and pathway enrichment, so we suggest careful consideration of the discussed method parameters and customizations and recommend implementing sensitivity analyses to ensure that any conclusions are not overly sensitive to the decision points. In addition, multiple different methods may be used to test whether certain assumptions were met (such as the presence of interactions between exposures). Interpretations should also be appropriately caveated depending on the level of temporal causality in the study design. Finally, the field has continuous advancements in statistical methods, so we recommend staying attuned to new updates and functionalities for high-dimensional exposome data. In conclusion, with the ability of untargeted HRMS to now detect over 100,000 chemical signals in human samples, novel data science approaches such as WQS<sub>RS</sub> that embrace the full dimensions of the data are critical to support discovery-based exposome epidemiology and multi-omics integration.</p>
</sec>
</body>
<back>
<sec><title>Author contributions</title>
<p>Anna S. Young (Conceptualization [equal], Methodology [equal], Funding acquisition [equal], Software [equal], Visualization [lead], Writing—original draft [lead], Writing—review &amp; editing [lead]), Chris Gennings (Conceptualization [equal], Methodology [equal], Resources [equal], Software [equal], Writing—review &amp; editing [equal]), Donghai Liang (Methodology [equal], Writing—review &amp; editing [equal]), Stephanie M. Eick (Methodology [equal], Writing—review &amp; editing [equal]), Douglas I. Walker (Conceptualization [equal], Funding acquisition [lead], Methodology [equal], Resources [equal], Supervision [lead], Writing—review &amp; editing [equal])</p>
</sec>
<sec><title>Funding</title>
<p>This work was supported by the National Institute of Environmental Health Sciences at the National Institutes of Health (R01ES032831 to D.I.W. and A.S.Y., K99ES036289 to A.S.Y., K01ES035082 to S.M.E., R01ES035738 to D.L. and S.M.E., U2CES026555 to C.G., P30ES023515 to C.G.) and the National Institute of General Medical Sciences at the National Institutes of Health (R25GM143298 to A.S.Y.). The funders did not play a role in the design of the study; the collection, analysis, and interpretation of the data; the writing of the manuscript; or the decision to submit the manuscript for publication. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. Diagrams in this paper were created using Biorender.com.</p>
</sec>
<sec><title>Conflicts of interest</title>
<p>The authors declare no conflicts of interest.</p>
</sec>
<sec sec-type="data-availability"><title>Data availability</title>
<p>No new data were generated or analyzed in support of this research.</p>
</sec>
<ref-list id="ref1"><title>References</title>
<ref id="osaf010-B1"><label>1</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Wang</surname><given-names>ZY</given-names></string-name>, <string-name name-style="western"><surname>Walker</surname><given-names>GW</given-names></string-name>, <string-name name-style="western"><surname>Muir</surname><given-names>DCG</given-names></string-name>, <string-name name-style="western"><surname>Nagatani-Yoshida</surname><given-names>K.</given-names></string-name></person-group> <article-title>Toward a global understanding of chemical pollution: a first comprehensive analysis of national and regional chemical inventories</article-title>. <source>Environ Sci Technol.</source> <year>2020</year>;<volume>54</volume>:<fpage>2575</fpage>-<lpage>2584</lpage>. <pub-id pub-id-type="doi">10.1021/acs.est.9b06379</pub-id></mixed-citation></ref>
<ref id="osaf010-B2"><label>2</label><mixed-citation publication-type="data"><person-group person-group-type="curator"><string-name name-style="western"><surname>Wagner</surname><given-names>M</given-names></string-name>, <string-name name-style="western"><surname>Monclús</surname><given-names>L</given-names></string-name>, <string-name name-style="western"><surname>Arp</surname><given-names>HPH</given-names></string-name></person-group>, <etal>et al</etal> <data-title>State of the Science on Plastic Chemicals—Identifying and Addressing Chemicals and Polymers of Concern.</data-title> <publisher-name>Zenodo</publisher-name>; <year>2024</year>. <pub-id pub-id-type="doi">10.5281/zenodo.10701706</pub-id></mixed-citation></ref>
<ref id="osaf010-B3"><label>3</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Wiesinger</surname><given-names>H</given-names></string-name>, <string-name name-style="western"><surname>Wang</surname><given-names>Z</given-names></string-name>, <string-name name-style="western"><surname>Hellweg</surname><given-names>S.</given-names></string-name></person-group> <article-title>Deep dive into plastic monomers, additives, and processing aids</article-title>. <source>Environ Sci Technol.</source> <year>2021</year>;<volume>55</volume>:<fpage>9339</fpage>-<lpage>9351</lpage>. <pub-id pub-id-type="doi">10.1021/acs.est.1c00976</pub-id></mixed-citation></ref>
<ref id="osaf010-B4"><label>4</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Zimmerman</surname><given-names>JB</given-names></string-name>, <string-name name-style="western"><surname>Anastas</surname><given-names>PT.</given-names></string-name></person-group> <article-title>Toward substitution with no regrets</article-title>. <source>Science.</source> <year>2015</year>;<volume>347</volume>:<fpage>1198</fpage>-<lpage>1199</lpage>. <pub-id pub-id-type="doi">10.1126/science.aaa0812</pub-id></mixed-citation></ref>
<ref id="osaf010-B5"><label>5</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Zota</surname><given-names>AR</given-names></string-name>, <string-name name-style="western"><surname>Calafat</surname><given-names>AM</given-names></string-name>, <string-name name-style="western"><surname>Woodruff</surname><given-names>TJ.</given-names></string-name></person-group> <article-title>Temporal trends in phthalate exposures: findings from the National Health and Nutrition Examination Survey, 1–2010</article-title>. <source>Environ Health Perspect.</source> <year>2014</year>;<volume>122</volume>:<fpage>235</fpage>-<lpage>241</lpage>. <pub-id pub-id-type="doi">10.1289/ehp.1306681</pub-id></mixed-citation></ref>
<ref id="osaf010-B6"><label>6</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Birnbaum</surname><given-names>LS</given-names></string-name>, <string-name name-style="western"><surname>Bergman</surname><given-names>Å.</given-names></string-name></person-group> <article-title>Brominated and chlorinated flame retardants: the San Antonio statement</article-title>. <source>Environ Health Perspect.</source> <year>2010</year>;<volume>118</volume>:<fpage>A514</fpage>-<lpage>A515</lpage>. <pub-id pub-id-type="doi">10.1289/ehp.1003088</pub-id></mixed-citation></ref>
<ref id="osaf010-B7"><label>7</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Brase</surname><given-names>RA</given-names></string-name>, <string-name name-style="western"><surname>Mullin</surname><given-names>EJ</given-names></string-name>, <string-name name-style="western"><surname>Spink</surname><given-names>DC.</given-names></string-name></person-group> <article-title>Legacy and emerging per- and polyfluoroalkyl substances: analytical techniques, environmental fate, and health effects</article-title>. <source>Int J Mol Sci.</source> <year>2021</year>;<volume>22</volume>:<fpage>995</fpage>. <pub-id pub-id-type="doi">10.3390/ijms22030995</pub-id></mixed-citation></ref>
<ref id="osaf010-B8"><label>8</label><mixed-citation publication-type="other"><collab>US EPA</collab>. PFAS: V2 PFAS Master List of PFAS Substances. <year>2020</year>. <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:href="https://comptox.epa.gov/dashboard/chemical-lists/PFASMASTERLISTV2">https://comptox.epa.gov/dashboard/chemical-lists/PFASMASTERLISTV2</ext-link></mixed-citation></ref>
<ref id="osaf010-B9"><label>9</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Krahl</surname><given-names>PL</given-names></string-name>, <string-name name-style="western"><surname>Benchoff</surname><given-names>E</given-names></string-name>, <string-name name-style="western"><surname>Go</surname><given-names>YM</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Advances in comprehensive exposure assessment: opportunities for the US military</article-title>. <source>J Occup Environ Med.</source> <year>2019</year>;<volume>61</volume>:<fpage>S5</fpage>-<lpage>S14</lpage>. <pub-id pub-id-type="doi">10.1097/JOM.0000000000001677</pub-id></mixed-citation></ref>
<ref id="osaf010-B10"><label>10</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Zhang</surname><given-names>P</given-names></string-name>, <string-name name-style="western"><surname>Carlsten</surname><given-names>C</given-names></string-name>, <string-name name-style="western"><surname>Chaleckis</surname><given-names>R</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Defining the scope of exposome studies and research needs from a multidisciplinary perspective</article-title>. <source>Environ Sci Technol Lett.</source> <year>2021</year>;<volume>8</volume>:<fpage>839</fpage>-<lpage>852</lpage>. <pub-id pub-id-type="doi">10.1021/acs.estlett.1c00648</pub-id></mixed-citation></ref>
<ref id="osaf010-B11"><label>11</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Wild</surname><given-names>CP.</given-names></string-name></person-group> <article-title>Complementing the genome with an “exposome”: the outstanding challenge of environmental exposure measurement in molecular epidemiology</article-title>. <source>Cancer Epidemiology, Biomarkers &amp; Prevention</source> <year>2005</year>;<volume>14</volume>:<fpage>1847</fpage>-<lpage>1850</lpage>. <pub-id pub-id-type="doi">10.1158/5–9965.EPI-5–0456</pub-id></mixed-citation></ref>
<ref id="osaf010-B12"><label>12</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Miller</surname><given-names>GW.</given-names></string-name></person-group> <article-title>Exposomics: perfection not required</article-title>. <source>Exposome</source> <year>2024</year>;<volume>4</volume>:<fpage>osae006</fpage>. <pub-id pub-id-type="doi">10.1093/exposome/osae006</pub-id></mixed-citation></ref>
<ref id="osaf010-B13"><label>13</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Balcells</surname><given-names>C</given-names></string-name>, <string-name name-style="western"><surname>Xu</surname><given-names>Y</given-names></string-name>, <string-name name-style="western"><surname>Gil-Solsona</surname><given-names>R</given-names></string-name>, <string-name name-style="western"><surname>Maitre</surname><given-names>L</given-names></string-name>, <string-name name-style="western"><surname>Gago-Ferrero</surname><given-names>P</given-names></string-name>, <string-name name-style="western"><surname>Keun</surname><given-names>HC.</given-names></string-name></person-group> <article-title>Blurred lines: crossing the boundaries between the chemical exposome and the metabolome</article-title>. <source>Curr Opin Chem Biol.</source> <year>2024</year>;<volume>78</volume>:<fpage>102407</fpage>. <pub-id pub-id-type="doi">10.1016/j.cbpa.2023.102407</pub-id></mixed-citation></ref>
<ref id="osaf010-B14"><label>14</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>David</surname><given-names>A</given-names></string-name>, <string-name name-style="western"><surname>Chaker</surname><given-names>J</given-names></string-name>, <string-name name-style="western"><surname>Price</surname><given-names>EJ</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Towards a comprehensive characterisation of the human internal chemical exposome: Challenges and perspectives</article-title>. <source>Environ Int.</source> <year>2021</year>;<volume>156</volume>:<fpage>106630</fpage>. <pub-id pub-id-type="doi">10.1016/j.envint.2021.106630</pub-id></mixed-citation></ref>
<ref id="osaf010-B15"><label>15</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Walker</surname><given-names>DI</given-names></string-name>, <string-name name-style="western"><surname>Valvi</surname><given-names>D</given-names></string-name>, <string-name name-style="western"><surname>Rothman</surname><given-names>N</given-names></string-name>, <string-name name-style="western"><surname>Lan</surname><given-names>Q</given-names></string-name>, <string-name name-style="western"><surname>Miller</surname><given-names>GW</given-names></string-name>, <string-name name-style="western"><surname>Jones</surname><given-names>DP.</given-names></string-name></person-group> <article-title>The metabolome: a key measure for exposome research in epidemiology</article-title>. <source>Curr Epidemiol Rep.</source> <year>2019</year>;<volume>6</volume>:<fpage>93</fpage>-<lpage>103</lpage>.</mixed-citation></ref>
<ref id="osaf010-B16"><label>16</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Jones</surname><given-names>DP</given-names></string-name>, <string-name name-style="western"><surname>Cohn</surname><given-names>BA.</given-names></string-name></person-group> <article-title>A vision for exposome epidemiology: The pregnancy exposome in relation to breast cancer in the Child Health and Development Studies</article-title>. <source>Reprod Toxicol.</source> <year>2020</year>;<volume>92</volume>:<fpage>4</fpage>-<lpage>10</lpage>. <pub-id pub-id-type="doi">10.1016/j.reprotox.2020.03.006</pub-id></mixed-citation></ref>
<ref id="osaf010-B17"><label>17</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Uppal</surname><given-names>K</given-names></string-name>, <string-name name-style="western"><surname>Walker</surname><given-names>DI</given-names></string-name>, <string-name name-style="western"><surname>Liu</surname><given-names>K</given-names></string-name>, <string-name name-style="western"><surname>Li</surname><given-names>S</given-names></string-name>, <string-name name-style="western"><surname>Go</surname><given-names>YM</given-names></string-name>, <string-name name-style="western"><surname>Jones</surname><given-names>DP.</given-names></string-name></person-group> <article-title>Computational metabolomics: a framework for the million metabolome</article-title>. <source>Chem Res Toxicol.</source> <year>2016</year>;<volume>29</volume>:<fpage>1956</fpage>-<lpage>1975</lpage>. <pub-id pub-id-type="doi">10.1021/acs.chemrestox.6b00179</pub-id></mixed-citation></ref>
<ref id="osaf010-B18"><label>18</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Liu</surname><given-names>KH</given-names></string-name>, <string-name name-style="western"><surname>Nellis</surname><given-names>M</given-names></string-name>, <string-name name-style="western"><surname>Uppal</surname><given-names>K</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Reference standardization for quantification and harmonization of large-scale metabolomics</article-title>. <source>Anal Chem.</source> <year>2020</year>;<volume>92</volume>:<fpage>8836</fpage>-<lpage>8844</lpage>. <pub-id pub-id-type="doi">10.1021/acs.analchem.0c00338</pub-id></mixed-citation></ref>
<ref id="osaf010-B19"><label>19</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Chen</surname><given-names>YC</given-names></string-name>, <string-name name-style="western"><surname>Hsu</surname><given-names>JF</given-names></string-name>, <string-name name-style="western"><surname>Chang</surname><given-names>CW</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Connecting chemical exposome to human health using high-resolution mass spectrometry-based biomonitoring: Recent advances and future perspectives</article-title>. <source>Mass Spectrom Rev.</source> <year>2023</year>;<volume>42</volume>:<fpage>2466</fpage>-<lpage>2486</lpage>. <pub-id pub-id-type="doi">10.1002/mas.21805</pub-id></mixed-citation></ref>
<ref id="osaf010-B20"><label>20</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Vermeulen</surname><given-names>R</given-names></string-name>, <string-name name-style="western"><surname>Schymanski</surname><given-names>EL</given-names></string-name>, <string-name name-style="western"><surname>Barabási</surname><given-names>AL</given-names></string-name>, <string-name name-style="western"><surname>Miller</surname><given-names>GW.</given-names></string-name></person-group> <article-title>The exposome and health: where chemistry meets biology</article-title>. <source>Science.</source> <year>2020</year>;<volume>367</volume>:<fpage>392</fpage>-<lpage>396</lpage>. <pub-id pub-id-type="doi">10.1126/science.aay3164</pub-id></mixed-citation></ref>
<ref id="osaf010-B21"><label>21</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Young</surname><given-names>AS</given-names></string-name>, <string-name name-style="western"><surname>Gennings</surname><given-names>C</given-names></string-name>, <string-name name-style="western"><surname>Braselton</surname><given-names>ME</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Integrated chemical exposome–metabolome profiling of follicular fluid and associations with fertility outcomes during assisted reproduction</article-title>. <source>Environ Int.</source> <year>2025</year>;<volume>203</volume>:<fpage>109787</fpage>. <pub-id pub-id-type="doi">10.1016/j.envint.2025.109787</pub-id></mixed-citation></ref>
<ref id="osaf010-B22"><label>22</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Joubert</surname><given-names>BR</given-names></string-name>, <string-name name-style="western"><surname>Kioumourtzoglou</surname><given-names>MA</given-names></string-name>, <string-name name-style="western"><surname>Chamberlain</surname><given-names>T</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Powering research through innovative methods for mixtures in epidemiology (prime) program: novel and expanded statistical methods</article-title>. <source>Int J Environ Res Public Health.</source> <year>2022</year>;<volume>19</volume>. <pub-id pub-id-type="doi">10.3390/ijerph19031378</pub-id></mixed-citation></ref>
<ref id="osaf010-B23"><label>23</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Kienzler</surname><given-names>A</given-names></string-name>, <string-name name-style="western"><surname>Bopp</surname><given-names>SK</given-names></string-name>, <string-name name-style="western"><surname>van der Linden</surname><given-names>S</given-names></string-name>, <string-name name-style="western"><surname>Berggren</surname><given-names>E</given-names></string-name>, <string-name name-style="western"><surname>Worth</surname><given-names>A.</given-names></string-name></person-group> <article-title>Regulatory assessment of chemical mixtures: Requirements, current approaches and future perspectives. <italic>Regulatory.</italic></article-title> <source>Regul Toxicol Pharmacol.</source> <year>2016</year>;<volume>80</volume>:<fpage>321</fpage>-<lpage>334</lpage>. <pub-id pub-id-type="doi">10.1016/j.yrtph.2016.05.020</pub-id></mixed-citation></ref>
<ref id="osaf010-B24"><label>24</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Kortenkamp</surname><given-names>A.</given-names></string-name></person-group> <article-title>Low dose mixture effects of endocrine disrupters and their implications for regulatory thresholds in chemical risk assessment</article-title>. <source>Curr Opin Pharmacol.</source> <year>2014</year>;<volume>19</volume>:<fpage>105</fpage>-<lpage>111</lpage>. <pub-id pub-id-type="doi">10.1016/j.coph.2014.08.006</pub-id></mixed-citation></ref>
<ref id="osaf010-B25"><label>25</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Patel</surname><given-names>CJ.</given-names></string-name></person-group> <article-title>Analytic complexity and challenges in identifying mixtures of exposures associated with phenotypes in the exposome era</article-title>. <source>Curr Epidemiol Rep.</source> <year>2017</year>;<volume>4</volume>:<fpage>22</fpage>-<lpage>30</lpage>. <pub-id pub-id-type="doi">10.1007/s40471-7–0100-5</pub-id></mixed-citation></ref>
<ref id="osaf010-B26"><label>26</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Weisskopf</surname><given-names>MG</given-names></string-name>, <string-name name-style="western"><surname>Seals</surname><given-names>RM</given-names></string-name>, <string-name name-style="western"><surname>Webster</surname><given-names>TF.</given-names></string-name></person-group> <article-title>Bias amplification in epidemiologic analysis of exposure to mixtures</article-title>. <source>Environ Health Perspect.</source> <year>2018</year>;<volume>126</volume>:<fpage>047003</fpage>. <pub-id pub-id-type="doi">10.1289/EHP2450</pub-id></mixed-citation></ref>
<ref id="osaf010-B27"><label>27</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Stapleton</surname><given-names>HM</given-names></string-name>, <string-name name-style="western"><surname>Klosterhaus</surname><given-names>S</given-names></string-name>, <string-name name-style="western"><surname>Eagle</surname><given-names>S</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Detection of organophosphate flame retardants in furniture foam and U.S. house dust</article-title>. <source>Environ Sci Technol.</source> <year>2009</year>;<volume>43</volume>:<fpage>7490</fpage>-<lpage>7495</lpage>. <pub-id pub-id-type="doi">10.1021/es9014019</pub-id></mixed-citation></ref>
<ref id="osaf010-B28"><label>28</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Levin</surname><given-names>R</given-names></string-name>, <string-name name-style="western"><surname>Villanueva</surname><given-names>CM</given-names></string-name>, <string-name name-style="western"><surname>Beene</surname><given-names>D</given-names></string-name></person-group>, <etal>et al</etal> <article-title>US drinking water quality: exposure risk profiles for seven legacy and emerging contaminants</article-title>. <source>J Expo Sci Environ Epidemiol.</source> <year>2024</year>;<volume>34</volume>:<fpage>3</fpage>-<lpage>22</lpage>. <pub-id pub-id-type="doi">10.1038/s41370-3–00597-z</pub-id></mixed-citation></ref>
<ref id="osaf010-B29"><label>29</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Johns</surname><given-names>LE</given-names></string-name>, <string-name name-style="western"><surname>Cooper</surname><given-names>GS</given-names></string-name>, <string-name name-style="western"><surname>Galizia</surname><given-names>A</given-names></string-name>, <string-name name-style="western"><surname>Meeker</surname><given-names>JD.</given-names></string-name></person-group> <article-title>Exposure assessment issues in epidemiology studies of phthalates</article-title>. <source>Environ Int.</source> <year>2015</year>;<volume>85</volume>:<fpage>27</fpage>-<lpage>39</lpage>. <pub-id pub-id-type="doi">10.1016/j.envint.2015.08.005</pub-id></mixed-citation></ref>
<ref id="osaf010-B30"><label>30</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Tu</surname><given-names>YK</given-names></string-name>, <string-name name-style="western"><surname>Gunnell</surname><given-names>D</given-names></string-name>, <string-name name-style="western"><surname>Gilthorpe</surname><given-names>MS.</given-names></string-name></person-group> <article-title>Simpson’s paradox, lord’s paradox, and suppression effects are the same phenomenon—the reversal paradox</article-title>. <source>Emerg Themes Epidemiol</source> <year>2008</year>;<volume>5</volume>:<fpage>2</fpage>. <pub-id pub-id-type="doi">10.1186/2–7622-5-2</pub-id></mixed-citation></ref>
<ref id="osaf010-B31"><label>31</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Carrico</surname><given-names>C</given-names></string-name>, <string-name name-style="western"><surname>Gennings</surname><given-names>C</given-names></string-name>, <string-name name-style="western"><surname>Wheeler</surname><given-names>DC</given-names></string-name>, <string-name name-style="western"><surname>Factor-Litvak</surname><given-names>P.</given-names></string-name></person-group> <article-title>Characterization of weighted quantile sum regression for highly correlated data in a risk analysis setting</article-title>. <source>J Agric Biol Environ Stat.</source> <year>2015</year>;<volume>20</volume>:<fpage>100</fpage>-<lpage>120</lpage>. <pub-id pub-id-type="doi">10.1007/s13253-4–0180-3</pub-id></mixed-citation></ref>
<ref id="osaf010-B32"><label>32</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Braun</surname><given-names>JM</given-names></string-name>, <string-name name-style="western"><surname>Gennings</surname><given-names>C</given-names></string-name>, <string-name name-style="western"><surname>Hauser</surname><given-names>R</given-names></string-name>, <string-name name-style="western"><surname>Webster</surname><given-names>TF.</given-names></string-name></person-group> <article-title>What can epidemiological studies tell us about the impact of chemical mixtures on human health?</article-title> <source>Environ Health Perspect.</source> <year>2016</year>;<volume>124</volume>:<fpage>A6</fpage>-<lpage>A9</lpage>. <pub-id pub-id-type="doi">10.1289/ehp.1510569</pub-id></mixed-citation></ref>
<ref id="osaf010-B33"><label>33</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Dormann</surname><given-names>CF</given-names></string-name>, <string-name name-style="western"><surname>Elith</surname><given-names>J</given-names></string-name>, <string-name name-style="western"><surname>Bacher</surname><given-names>S</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Collinearity: a review of methods to deal with it and a simulation study evaluating their performance</article-title>. <source>Ecography</source> <year>2013</year>;<volume>36</volume>:<fpage>27</fpage>-<lpage>46</lpage>. <pub-id pub-id-type="doi">10.1111/j.1600-0587.2012.07348.x</pub-id></mixed-citation></ref>
<ref id="osaf010-B34"><label>34</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Vatcheva</surname><given-names>KP</given-names></string-name>, <string-name name-style="western"><surname>Lee</surname><given-names>M</given-names></string-name>, <string-name name-style="western"><surname>McCormick</surname><given-names>JB</given-names></string-name>, <string-name name-style="western"><surname>Rahbar</surname><given-names>MH.</given-names></string-name></person-group> <article-title>Multicollinearity in regression analyses conducted in epidemiologic studies</article-title>. <source>Epidemiology (Sunnyvale).</source> <year>2016</year>;<volume>6</volume>:<fpage>227</fpage>. <pub-id pub-id-type="doi">10.4172/2161-1165.1000227</pub-id></mixed-citation></ref>
<ref id="osaf010-B35"><label>35</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Chung</surname><given-names>MK</given-names></string-name>, <string-name name-style="western"><surname>House</surname><given-names>JS</given-names></string-name>, <string-name name-style="western"><surname>Akhtari</surname><given-names>FS</given-names></string-name></person-group>, <collab>Members of the Exposomics Consortium</collab>, <etal>et al</etal> <article-title>Decoding the exposome: data science methodologies and implications in exposome-wide association studies (ExWASs)</article-title>. <source>Exposome</source> <year>2024</year>;<volume>4</volume>:<fpage>osae001</fpage>. <pub-id pub-id-type="doi">10.1093/exposome/osae001</pub-id></mixed-citation></ref>
<ref id="osaf010-B36"><label>36</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Li</surname><given-names>S</given-names></string-name>, <string-name name-style="western"><surname>Cirillo</surname><given-names>P</given-names></string-name>, <string-name name-style="western"><surname>Hu</surname><given-names>X</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Understanding mixed environmental exposures using metabolomics via a hierarchical community network model in a cohort of California women in 1960’s</article-title>. <source>Reprod Toxicol.</source> <year>2020</year>;<volume>92</volume>:<fpage>57</fpage>-<lpage>65</lpage>. <pub-id pub-id-type="doi">10.1016/j.reprotox.2019.06.013</pub-id></mixed-citation></ref>
<ref id="osaf010-B37"><label>37</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Zhu</surname><given-names>G</given-names></string-name>, <string-name name-style="western"><surname>Wen</surname><given-names>Y</given-names></string-name>, <string-name name-style="western"><surname>Cao</surname><given-names>K</given-names></string-name>, <string-name name-style="western"><surname>He</surname><given-names>S</given-names></string-name>, <string-name name-style="western"><surname>Wang</surname><given-names>T.</given-names></string-name></person-group> <article-title>A review of common statistical methods for dealing with multiple pollutant mixtures and multiple exposures</article-title>. <source>Front Public Health.</source> <year>2024</year>;<volume>12</volume>:<fpage>1377685</fpage>. <pub-id pub-id-type="doi">10.3389/fpubh.2024.1377685</pub-id></mixed-citation></ref>
<ref id="osaf010-B38"><label>38</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Pan</surname><given-names>S</given-names></string-name>, <string-name name-style="western"><surname>Li</surname><given-names>Z</given-names></string-name>, <string-name name-style="western"><surname>Rubbo</surname><given-names>B</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Applications of mixture methods in epidemiological studies investigating the health impact of persistent organic pollutants exposures: a scoping review</article-title>. <source>J Expo Sci Environ Epidemiol.</source> <year>2025</year>;<volume>35</volume>:<fpage>522</fpage>-<lpage>534</lpage>. Published online September 10. <pub-id pub-id-type="doi">10.1038/s41370-4–00717-3</pub-id></mixed-citation></ref>
<ref id="osaf010-B39"><label>39</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Bobb</surname><given-names>JF</given-names></string-name>, <string-name name-style="western"><surname>Valeri</surname><given-names>L</given-names></string-name>, <string-name name-style="western"><surname>Claus Henn</surname><given-names>B</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Bayesian kernel machine regression for estimating the health effects of multi-pollutant mixtures</article-title>. <source>Biostatistics.</source> <year>2014</year>;<volume>16</volume>:<fpage>493</fpage>-<lpage>508</lpage>. <pub-id pub-id-type="doi">10.1093/biostatistics/kxu058</pub-id></mixed-citation></ref>
<ref id="osaf010-B40"><label>40</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Bobb</surname><given-names>JF</given-names></string-name>, <string-name name-style="western"><surname>Claus Henn</surname><given-names>B</given-names></string-name>, <string-name name-style="western"><surname>Valeri</surname><given-names>L</given-names></string-name>, <string-name name-style="western"><surname>Coull</surname><given-names>BA.</given-names></string-name></person-group> <article-title>Statistical software for analyzing the health effects of multiple concurrent exposures via Bayesian kernel machine regression</article-title>. <source>Environ Health.</source> <year>2018</year>;<volume>17</volume>:<fpage>67</fpage>. <pub-id pub-id-type="doi">10.1186/s12940-8–0413-y</pub-id></mixed-citation></ref>
<ref id="osaf010-B41"><label>41</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Gibson</surname><given-names>EA</given-names></string-name>, <string-name name-style="western"><surname>Nunez</surname><given-names>Y</given-names></string-name>, <string-name name-style="western"><surname>Abuawad</surname><given-names>A</given-names></string-name></person-group>, <etal>et al</etal> <article-title>An overview of methods to address distinct research questions on environmental mixtures: an application to persistent organic pollutants and leukocyte telomere length</article-title>. <source>Environ Health.</source> <year>2019</year>;<volume>18</volume>:<fpage>76</fpage>. <pub-id pub-id-type="doi">10.1186/s12940-9–0515-1</pub-id></mixed-citation></ref>
<ref id="osaf010-B42"><label>42</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Joubert</surname><given-names>BR</given-names></string-name>, <string-name name-style="western"><surname>Palmer</surname><given-names>G</given-names></string-name>, <string-name name-style="western"><surname>Dunson</surname><given-names>D</given-names></string-name>, <string-name name-style="western"><surname>Kioumourtzoglou</surname><given-names>MA</given-names></string-name>, <string-name name-style="western"><surname>Coull</surname><given-names>BA.</given-names></string-name></person-group> <article-title>Workflow for statistical analysis of environmental mixtures</article-title>. <source>Environ Health Perspect</source>. <year>2025</year>. <pub-id pub-id-type="doi">10.1289/EHP16791</pub-id></mixed-citation></ref>
<ref id="osaf010-B43"><label>43</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Keil</surname><given-names>AP</given-names></string-name>, <string-name name-style="western"><surname>Buckley</surname><given-names>JP</given-names></string-name>, <string-name name-style="western"><surname>O'Brien</surname><given-names>KM</given-names></string-name>, <string-name name-style="western"><surname>Ferguson</surname><given-names>KK</given-names></string-name>, <string-name name-style="western"><surname>Zhao</surname><given-names>S</given-names></string-name>, <string-name name-style="western"><surname>White</surname><given-names>AJ.</given-names></string-name></person-group> <article-title>A quantile-based g-computation approach to addressing the effects of exposure mixtures</article-title>. <source>Environ Health Perspect.</source> <year>2020</year>;<volume>128</volume>:<fpage>47004</fpage>. <pub-id pub-id-type="doi">10.1289/EHP5838</pub-id></mixed-citation></ref>
<ref id="osaf010-B44"><label>44</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Gennings</surname><given-names>C.</given-names></string-name></person-group> <article-title>Comment on “a quantile-based g-computation approach to addressing the effects of exposure mixtures”</article-title>. <source>Environ Health Perspect.</source> <year>2021</year>;<volume>129</volume>:<fpage>38001</fpage>. <pub-id pub-id-type="doi">10.1289/EHP8739</pub-id></mixed-citation></ref>
<ref id="osaf010-B45"><label>45</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Renzetti</surname><given-names>S</given-names></string-name>, <string-name name-style="western"><surname>Gennings</surname><given-names>C</given-names></string-name>, <string-name name-style="western"><surname>Calza</surname><given-names>S.</given-names></string-name></person-group> <article-title>A weighted quantile sum regression with penalized weights and two indices</article-title>. <source>Front Public Health.</source> <year>2023</year>;<volume>11</volume>:<fpage>1151821</fpage>.Accessed August 21, 2023. <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fpubh.2023.1151821">https://www.frontiersin.org/articles/10.3389/fpubh.2023.1151821</ext-link></mixed-citation></ref>
<ref id="osaf010-B46"><label>46</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Tanner</surname><given-names>EM</given-names></string-name>, <string-name name-style="western"><surname>Bornehag</surname><given-names>CG</given-names></string-name>, <string-name name-style="western"><surname>Gennings</surname><given-names>C.</given-names></string-name></person-group> <article-title>Repeated holdout validation for weighted quantile sum regression</article-title>. <source>MethodsX</source> <year>2019</year>;<volume>6</volume>:<fpage>2855</fpage>-<lpage>2860</lpage>. <pub-id pub-id-type="doi">10.1016/j.mex.2019.11.008</pub-id></mixed-citation></ref>
<ref id="osaf010-B47"><label>47</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Hao</surname><given-names>W</given-names></string-name>, <string-name name-style="western"><surname>Cathey</surname><given-names>AL</given-names></string-name>, <string-name name-style="western"><surname>Aung</surname><given-names>MM</given-names></string-name>, <string-name name-style="western"><surname>Boss</surname><given-names>J</given-names></string-name>, <string-name name-style="western"><surname>Meeker</surname><given-names>JD</given-names></string-name>, <string-name name-style="western"><surname>Mukherjee</surname><given-names>B.</given-names></string-name></person-group> <article-title>Statistical methods for chemical mixtures: a roadmap for practitioners using simulation studies and a sample data analysis in the PROTECT cohort</article-title>. <source>Environ Health Perspect.</source> <year>2025</year>;<volume>133</volume>:<fpage>67019</fpage>.Published online May 20. <pub-id pub-id-type="doi">10.1289/EHP15305</pub-id></mixed-citation></ref>
<ref id="osaf010-B48"><label>48</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Kalia</surname><given-names>V</given-names></string-name>, <string-name name-style="western"><surname>Walker</surname><given-names>DI</given-names></string-name>, <string-name name-style="western"><surname>Krasnodemski</surname><given-names>KM</given-names></string-name>, <string-name name-style="western"><surname>Jones</surname><given-names>DP</given-names></string-name>, <string-name name-style="western"><surname>Miller</surname><given-names>GW</given-names></string-name>, <string-name name-style="western"><surname>Kioumourtzoglou</surname><given-names>MA.</given-names></string-name></person-group> <article-title>Unsupervised dimensionality reduction for exposome research</article-title>. <source>Curr Opin Environ Sci Health.</source> <year>2020</year>;<volume>15</volume>:<fpage>32</fpage>-<lpage>38</lpage>. <pub-id pub-id-type="doi">10.1016/j.coesh.2020.05.001</pub-id></mixed-citation></ref>
<ref id="osaf010-B49"><label>49</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Hoerl</surname><given-names>AE</given-names></string-name>, <string-name name-style="western"><surname>Kennard</surname><given-names>RW.</given-names></string-name></person-group> <article-title>Ridge regression: biased estimation for nonorthogonal problems</article-title>. <source>Technometrics</source> <year>1970</year>;<volume>12</volume>:<fpage>55</fpage>-<lpage>67</lpage>. <pub-id pub-id-type="doi">10.1080/00401706.1970.10488634</pub-id></mixed-citation></ref>
<ref id="osaf010-B50"><label>50</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Zou</surname><given-names>H</given-names></string-name>, <string-name name-style="western"><surname>Hastie</surname><given-names>T.</given-names></string-name></person-group> <article-title>Regularization and variable selection via the elastic net</article-title>. <source>Journal of the Royal Statistical Society Series B: Statistical Methodology</source>. <year>2005</year>;<volume>67</volume>:<fpage>301</fpage>-<lpage>320</lpage>. <pub-id pub-id-type="doi">10.1111/j.7–9868.2005.00503.x</pub-id></mixed-citation></ref>
<ref id="osaf010-B51"><label>51</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Tibshirani</surname><given-names>R.</given-names></string-name></person-group> <article-title>Regression shrinkage and selection via the lasso</article-title>. <source>Journal of the Royal Statistical Society: Series B (Methodological)</source>. <year>1996</year>;<volume>58</volume>:<fpage>267</fpage>-<lpage>288</lpage>. <pub-id pub-id-type="doi">10.1111/j.7–6161.1996.tb02080.x</pub-id></mixed-citation></ref>
<ref id="osaf010-B52"><label>52</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Czarnota</surname><given-names>J</given-names></string-name>, <string-name name-style="western"><surname>Gennings</surname><given-names>C</given-names></string-name>, <string-name name-style="western"><surname>Colt</surname><given-names>JS</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Analysis of environmental chemical mixtures and non-Hodgkin lymphoma risk in the NCI-SEER NHL study</article-title>. <source>Environ Health Perspect.</source> <year>2015</year>;<volume>123</volume>:<fpage>965</fpage>-<lpage>970</lpage>. <pub-id pub-id-type="doi">10.1289/ehp.1408630</pub-id></mixed-citation></ref>
<ref id="osaf010-B53"><label>53</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Zou</surname><given-names>H</given-names></string-name>, <string-name name-style="western"><surname>Zhang</surname><given-names>HH.</given-names></string-name></person-group> <article-title>On the adaptive elastic-net with a diverging number of parameters</article-title>. <source>Ann Stat.</source> <year>2009</year>;<volume>37</volume>:<fpage>1733</fpage>-<lpage>1751</lpage>. <pub-id pub-id-type="doi">10.1214/08-AOS625</pub-id></mixed-citation></ref>
<ref id="osaf010-B54"><label>54</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Curtin</surname><given-names>P</given-names></string-name>, <string-name name-style="western"><surname>Kellogg</surname><given-names>J</given-names></string-name>, <string-name name-style="western"><surname>Cech</surname><given-names>N</given-names></string-name>, <string-name name-style="western"><surname>Gennings</surname><given-names>C.</given-names></string-name></person-group> <article-title>A random subset implementation of weighted quantile sum (WQSRS) regression for analysis of high-dimensional mixtures</article-title>. <source>Communications in Statistics—Simulation and Computation</source>. <year>2021</year>;<volume>50</volume>:<fpage>1119</fpage>-<lpage>1134</lpage>. <pub-id pub-id-type="doi">10.1080/03610918.2019.1577971</pub-id></mixed-citation></ref>
<ref id="osaf010-B55"><label>55</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Mohammed Taha</surname><given-names>H</given-names></string-name>, <string-name name-style="western"><surname>Aalizadeh</surname><given-names>R</given-names></string-name>, <string-name name-style="western"><surname>Alygizakis</surname><given-names>N</given-names></string-name></person-group>, <etal>et al</etal> <article-title>The NORMAN Suspect List Exchange (NORMAN-SLE): facilitating European and worldwide collaboration on suspect screening in high resolution mass spectrometry</article-title>. <source>Environ Sci Eur.</source> <year>2022</year>;<volume>34</volume>:<fpage>104</fpage>. <pub-id pub-id-type="doi">10.1186/s12302-2–00680-6</pub-id></mixed-citation></ref>
<ref id="osaf010-B56"><label>56</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Uppal</surname><given-names>K</given-names></string-name>, <string-name name-style="western"><surname>Walker</surname><given-names>DI</given-names></string-name>, <string-name name-style="western"><surname>Jones</surname><given-names>DP.</given-names></string-name></person-group> <article-title>xMSannotator: an R package for network-based annotation of high-resolution metabolomics data</article-title>. <source>Anal Chem.</source> <year>2017</year>;<volume>89</volume>:<fpage>1063</fpage>-<lpage>1067</lpage>. <pub-id pub-id-type="doi">10.1021/acs.analchem.6b01214</pub-id></mixed-citation></ref>
<ref id="osaf010-B57"><label>57</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Bennett</surname><given-names>DH</given-names></string-name>, <string-name name-style="western"><surname>Busgang</surname><given-names>SA</given-names></string-name>, <string-name name-style="western"><surname>Kannan</surname><given-names>K</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Environmental exposures to pesticides, phthalates, phenols and trace elements are associated with neurodevelopment in the CHARGE study</article-title>. <source>Environ Int.</source> <year>2022</year>;<volume>161</volume>:<fpage>107075</fpage>. <pub-id pub-id-type="doi">10.1016/j.envint.2021.107075</pub-id></mixed-citation></ref>
<ref id="osaf010-B58"><label>58</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Busgang</surname><given-names>SA</given-names></string-name>, <string-name name-style="western"><surname>Spear</surname><given-names>EA</given-names></string-name>, <string-name name-style="western"><surname>Andra</surname><given-names>SS</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Application of growth modeling to assess the impact of hospital-based phthalate exposure on preterm infant growth parameters during the neonatal intensive care unit hospitalization</article-title>. <source>Sci Total Environ.</source> <year>2022</year>;<volume>850</volume>:<fpage>157830</fpage>. <pub-id pub-id-type="doi">10.1016/j.scitotenv.2022.157830</pub-id></mixed-citation></ref>
<ref id="osaf010-B59"><label>59</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Campbell</surname><given-names>RK</given-names></string-name>, <string-name name-style="western"><surname>Curtin</surname><given-names>P</given-names></string-name>, <string-name name-style="western"><surname>Enlow</surname><given-names>MB</given-names></string-name>, <string-name name-style="western"><surname>Brunst</surname><given-names>KJ</given-names></string-name>, <string-name name-style="western"><surname>Wright</surname><given-names>RO</given-names></string-name>, <string-name name-style="western"><surname>Wright</surname><given-names>RJ.</given-names></string-name></person-group> <article-title>Disentangling associations among maternal lifetime and prenatal stress, psychological functioning during pregnancy, maternal race/ethnicity, and infant negative affectivity at age 6 months: a mixtures approach</article-title>. <source>Health Equity.</source> <year>2020</year>;<volume>4</volume>:<fpage>489</fpage>-<lpage>499</lpage>. <pub-id pub-id-type="doi">10.1089/heq.2020.0032</pub-id></mixed-citation></ref>
<ref id="osaf010-B60"><label>60</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Invernizzi</surname><given-names>A</given-names></string-name>, <string-name name-style="western"><surname>Rechtman</surname><given-names>E</given-names></string-name>, <string-name name-style="western"><surname>Curtin</surname><given-names>P</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Functional changes in neural mechanisms underlying post-traumatic stress disorder in World Trade Center responders</article-title>. <source>Transl Psychiatry.</source> <year>2023</year>;<volume>13</volume>:<fpage>239</fpage>-<lpage>10</lpage>. <pub-id pub-id-type="doi">10.1038/s41398-3–02526-y</pub-id></mixed-citation></ref>
<ref id="osaf010-B61"><label>61</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Babin</surname><given-names>É</given-names></string-name>, <string-name name-style="western"><surname>Cano-Sancho</surname><given-names>G</given-names></string-name>, <string-name name-style="western"><surname>Vigneau</surname><given-names>E</given-names></string-name>, <string-name name-style="western"><surname>Antignac</surname><given-names>JP.</given-names></string-name></person-group> <article-title>A review of statistical strategies to integrate biomarkers of chemical exposure with biomarkers of effect applied in omic-scale environmental epidemiology</article-title>. <source>Environ Pollut.</source> <year>2023</year>;<volume>330</volume>:<fpage>121741</fpage>. <pub-id pub-id-type="doi">10.1016/j.envpol.2023.121741</pub-id></mixed-citation></ref>
<ref id="osaf010-B62"><label>62</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Li</surname><given-names>S</given-names></string-name>, <string-name name-style="western"><surname>Park</surname><given-names>Y</given-names></string-name>, <string-name name-style="western"><surname>Duraisingham</surname><given-names>S</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Predicting network activity from high throughput metabolomics</article-title>. <source>PLOS Comput Biol.</source> <year>2013</year>;<volume>9</volume>:<fpage>e1003123</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1003123</pub-id></mixed-citation></ref>
<ref id="osaf010-B63"><label>63</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Pang</surname><given-names>Z</given-names></string-name>, <string-name name-style="western"><surname>Chong</surname><given-names>J</given-names></string-name>, <string-name name-style="western"><surname>Zhou</surname><given-names>G</given-names></string-name></person-group>, <etal>et al</etal> <article-title>MetaboAnalyst 5.0: narrowing the gap between raw spectra and functional insights</article-title>. <source>Nucleic Acids Res.</source> <year>2021</year>;<volume>49</volume>:<fpage>W388</fpage>–<lpage>W396</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkab382</pub-id></mixed-citation></ref>
<ref id="osaf010-B64"><label>64</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Tian</surname><given-names>L</given-names></string-name>, <string-name name-style="western"><surname>Li</surname><given-names>Z</given-names></string-name>, <string-name name-style="western"><surname>Ma</surname><given-names>G</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Metapone: a Bioconductor package for joint pathway testing for untargeted metabolomics data</article-title>. <source>Bioinformatics.</source> <year>2022</year>;<volume>38</volume>:<fpage>3662</fpage>-<lpage>3664</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btac364</pub-id></mixed-citation></ref>
<ref id="osaf010-B65"><label>65</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Ogata</surname><given-names>H</given-names></string-name>, <string-name name-style="western"><surname>Goto</surname><given-names>S</given-names></string-name>, <string-name name-style="western"><surname>Fujibuchi</surname><given-names>W</given-names></string-name>, <string-name name-style="western"><surname>Kanehisa</surname><given-names>M.</given-names></string-name></person-group> <article-title>Computation with the KEGG pathway database</article-title>. <source>Biosystems.</source> <year>1998</year>;<volume>47</volume>:<fpage>119</fpage>-<lpage>128</lpage>. <pub-id pub-id-type="doi">10.1016/S0303-2647(98)00017-3</pub-id></mixed-citation></ref>
<ref id="osaf010-B66"><label>66</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Frolkis</surname><given-names>A</given-names></string-name>, <string-name name-style="western"><surname>Knox</surname><given-names>C</given-names></string-name>, <string-name name-style="western"><surname>Lim</surname><given-names>E</given-names></string-name></person-group>, <etal>et al</etal> <article-title>SMPDB: the small molecule pathway database</article-title>. <source>Nucleic Acids Res.</source> <year>2010</year>;<volume>38</volume>:<fpage>D480</fpage>–<lpage>D487</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkp1002</pub-id></mixed-citation></ref>
<ref id="osaf010-B67"><label>67</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Fuller</surname><given-names>H</given-names></string-name>, <string-name name-style="western"><surname>Zhu</surname><given-names>Y</given-names></string-name>, <string-name name-style="western"><surname>Nicholas</surname><given-names>J</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Metabolomic epidemiology offers insights into disease aetiology</article-title>. <source>Nat Metab.</source> <year>2023</year>;<volume>5</volume>:<fpage>1656</fpage>-<lpage>1672</lpage>. <pub-id pub-id-type="doi">10.1038/s42255-3–00903-x</pub-id></mixed-citation></ref>
<ref id="osaf010-B68"><label>68</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Aung</surname><given-names>MT</given-names></string-name>, <string-name name-style="western"><surname>Song</surname><given-names>Y</given-names></string-name>, <string-name name-style="western"><surname>Ferguson</surname><given-names>KK</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Application of an analytical framework for multivariate mediation analysis of environmental data</article-title>. <source>Nat Commun.</source> <year>2020</year>;<volume>11</volume>:<fpage>5624</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-0–19335-2</pub-id></mixed-citation></ref>
<ref id="osaf010-B69"><label>69</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Goodrich</surname><given-names>JA</given-names></string-name>, <string-name name-style="western"><surname>Wang</surname><given-names>H</given-names></string-name>, <string-name name-style="western"><surname>Jia</surname><given-names>Q</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Integrating Multi-Omics with environmental data for precision health: A novel analytic framework and case study on prenatal mercury induced childhood fatty liver disease</article-title>. <source>Environ Int.</source> <year>2024</year>;<volume>190</volume>:<fpage>108930</fpage>. <pub-id pub-id-type="doi">10.1016/j.envint.2024.108930</pub-id></mixed-citation></ref>
<ref id="osaf010-B70"><label>70</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Zhang</surname><given-names>H</given-names></string-name>, <string-name name-style="western"><surname>Zheng</surname><given-names>Y</given-names></string-name>, <string-name name-style="western"><surname>Zhang</surname><given-names>Z</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Estimating and testing high-dimensional mediation effects in epigenetic studies</article-title>. <source>Bioinformatics.</source> <year>2016</year>;<volume>32</volume>:<fpage>3150</fpage>-<lpage>3154</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btw351</pub-id></mixed-citation></ref>
<ref id="osaf010-B71"><label>71</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Liang</surname><given-names>D</given-names></string-name>, <string-name name-style="western"><surname>Li</surname><given-names>Z</given-names></string-name>, <string-name name-style="western"><surname>Vlaanderen</surname><given-names>J</given-names></string-name></person-group>, <etal>et al</etal> <article-title>A state-of-the-science review on high-resolution metabolomics application in air pollution health research: current progress, analytical challenges, and recommendations for future direction</article-title>. <source>Environ Health Perspect.</source> <year>2023</year>;<volume>131</volume>:<fpage>56002</fpage>. <pub-id pub-id-type="doi">10.1289/EHP11851</pub-id></mixed-citation></ref>
<ref id="osaf010-B72"><label>72</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Wieder</surname><given-names>C</given-names></string-name>, <string-name name-style="western"><surname>Bundy</surname><given-names>JG</given-names></string-name>, <string-name name-style="western"><surname>Frainay</surname><given-names>C</given-names></string-name></person-group>, <etal>et al</etal> <article-title>Avoiding the misuse of pathway analysis tools in environmental metabolomics</article-title>. <source>Environ Sci Technol.</source> <year>2022</year>;<volume>56</volume>:<fpage>14219</fpage>-<lpage>14222</lpage>. <pub-id pub-id-type="doi">10.1021/acs.est.2c05588</pub-id></mixed-citation></ref>
</ref-list>
</back>
</article>