GCC Code Coverage Report


./
File: src/XpertMass/IsotopicClusterGenerator.cpp
Date: 2024-08-24 11:26:06
Lines:
0/201
0.0%
Functions:
0/26
0.0%
Branches:
0/180
0.0%

Line Branch Exec Source
1 /* BEGIN software license
2 *
3 * MsXpertSuite - mass spectrometry software suite
4 * -----------------------------------------------
5 * Copyright (C) 2009--2020 Filippo Rusconi
6 *
7 * http://www.msxpertsuite.org
8 *
9 * This file is part of the MsXpertSuite project.
10 *
11 * The MsXpertSuite project is the successor of the massXpert project. This
12 * project now includes various independent modules:
13 *
14 * - massXpert, model polymer chemistries and simulate mass spectrometric data;
15 * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
16 *
17 * This program is free software: you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation, either version 3 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program. If not, see <http://www.gnu.org/licenses/>.
29 *
30 * END software license
31 */
32
33
34 /////////////////////// Std lib includes
35 #include <memory>
36
37 /////////////////////// Qt includes
38 #include <QDebug>
39
40
41 /////////////////////// IsoSpec
42 #include <IsoSpec++/isoSpec++.h>
43 #include <IsoSpec++/element_tables.h>
44
45
46 // extern const int elem_table_atomicNo[ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES];
47 // extern const double
48 // elem_table_probability[ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES];
49 // extern const double elem_table_mass[ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES];
50 // extern const int elem_table_massNo[ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES];
51 // extern const int
52 // elem_table_extraNeutrons[ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES];
53 // extern const char* elem_table_element[ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES];
54 // extern const char* elem_table_symbol[ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES];
55 // extern const bool elem_table_Radioactive[ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES];
56 // extern const double
57 // elem_table_log_probability[ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES];
58
59 /////////////////////// pappsomspp includes
60 #include "pappsomspp/trace/trace.h"
61 #include "pappsomspp/types.h"
62
63
64 /////////////////////// Local includes
65 #include "globals.hpp"
66 #include "PeakCentroid.hpp"
67 #include "Formula.hpp"
68 #include "IsotopicClusterGenerator.hpp"
69 #include "IsotopicDataLibraryHandler.hpp"
70 #include "IsotopicDataUserConfigHandler.hpp"
71 #include "IsotopicDataManualConfigHandler.hpp"
72
73
74 namespace MsXpS
75 {
76
77 namespace libXpertMass
78 {
79
80
81 /*!
82 \class MsXpS::libXpertMass::IsotopicClusterGenerator
83 \inmodule libXpertMass
84 \ingroup XpertMassMassCalculations
85 \inheaderfile IsotopicClusterGenerator.hpp
86
87 \brief The IsotopicClusterGenerator class provides the features needed to model
88 isotopic clusters starting from (elemental-composition, charge) pairs.
89
90 The modelling uses the member isotopic data. The generated isotopic clusters
91 only contain cluster centroid peaks. If peaks should have a profile, then they
92 need to be shaped.
93
94 \sa IsotopicClusterShaper, MassPeakShaper
95 */
96
97 /*!
98 \enum MsXpS::libXpertMass::IsotopicDataType
99
100 This enum specifies the type of isotopic data.
101
102 \value NOT_SET: Not configured.
103 .
104 \value LIBRARY_CONFIG: The isotopic data are loaded intact from the IsoSpec
105 library data and are considered pristine natural abundance data.
106 .
107 \value USER_CONFIG: The isotopic data are in the same format as for
108 LIBRARY_CONFIG but might have been modified by the user to configure new
109 abundances.
110 .
111 \value MANUAL_CONFIG: The isotopic data are in a specific format, different
112 than the two above, that actually crafts the isotopes starting from scratch.
113 */
114
115 /*!
116 \typealias FormulaChargePair
117
118 Alias for std::pair<QString, int>.
119 */
120
121 /*!
122 \typealias IsotopicClusterChargePair
123
124 Alias for std::pair<pappso::TraceCstSPtr, int>.
125 */
126
127 /*!
128 \typedef IsotopicClusterGeneratorSPtr
129 \relates IsotopicClusterGenerator.
130
131 Synonym for std::shared_ptr<IsotopicClusterGenerator>.
132 */
133 IsotopicClusterGenerator::IsotopicClusterGenerator()
134 {
135 }
136
137 /*!
138 \typedef IsotopicClusterGeneratorCstSPtr
139 \relates IsotopicClusterGenerator.
140
141 Synonym for std::shared_ptr<const IsotopicClusterGenerator>.
142 */
143
144 /*!
145 \variable MsXpS::libXpertMass::IsotopicClusterGenerator::msp_isotopicData
146
147 \brief The isotopic data needed for the computations.
148 */
149
150 /*!
151 \variable MsXpS::libXpertMass::IsotopicClusterGenerator::m_isotopicDataType
152
153 \brief The \l IsotopicDataType type of data.
154 */
155
156 /*!
157 \variable MsXpS::libXpertMass::IsotopicClusterGenerator::m_maxSummedProbability
158
159 \brief The summed probability of all the isotopic cluster peaks. The
160 computation stops when this probability is reached.
161 */
162
163 /*!
164 \variable MsXpS::libXpertMass::IsotopicClusterGenerator::m_normalizeIntensity
165
166 \brief The most intense cluster peak's intensity that is used to normalize all
167 the other cluster peaks.
168 */
169
170 /*!
171 \variable MsXpS::libXpertMass::IsotopicClusterGenerator::m_sortType
172
173 \brief The type of sorting required for the generated cluster peak centroids.
174 */
175
176 /*!
177 \variable MsXpS::libXpertMass::IsotopicClusterGenerator::m_sortOrder
178
179 \brief The order of the sorting for the generated cluster peak centroids.
180 */
181
182 /*!
183 \variable MsXpS::libXpertMass::IsotopicClusterGenerator::m_formulaChargePairs
184
185 \brief The set of (elemental composition, charge) pairs.
186 */
187
188 /*!
189 \variable MsXpS::libXpertMass::IsotopicClusterGenerator::m_isotopicClusterChargePairs
190
191 \brief The set of (isotopic cluster, charge) pairs.
192 */
193
194
195 /*!
196 \brief Constructs a IsotopicClusterGenerator instance.
197
198 \list
199 \li \a isotopic_data_sp: The isotopic data used for the calculations.
200 \endlist
201 */
202 IsotopicClusterGenerator::IsotopicClusterGenerator(
203 libXpertMass::IsotopicDataSPtr isotopic_data_sp)
204 : msp_isotopicData(isotopic_data_sp)
205 {
206 }
207
208 /*!
209 \brief Destructs this IsotopicClusterGenerator instance.
210 */
211 IsotopicClusterGenerator::~IsotopicClusterGenerator()
212 {
213 // qDebug();
214 }
215
216 /*!
217 \brief Sets the isotopic data type to \a isotopic_data_type.
218 */
219 void
220 IsotopicClusterGenerator::setIsotopicDataType(
221 IsotopicDataType isotopic_data_type)
222 {
223 m_isotopicDataType = isotopic_data_type;
224 }
225
226
227 /*!
228 \brief Sets the isotopic data to \a isotopic_data_sp.
229 */
230 void
231 IsotopicClusterGenerator::setIsotopicData(
232 libXpertMass::IsotopicDataSPtr isotopic_data_sp)
233 {
234 msp_isotopicData = isotopic_data_sp;
235 }
236
237
238 /*!
239 \brief Returns the isotopic data.
240 */
241 libXpertMass::IsotopicDataSPtr
242 IsotopicClusterGenerator::getIsotopicData() const
243 {
244 return msp_isotopicData;
245 }
246
247
248 /*!
249 \brief Adds the (elemental composition, charge) pair \a
250 formula_charge_pair to the member list of FormulaChargePair instances.
251
252 The member list of FormulaChargePair instances is first cleared.
253 */
254 void
255 IsotopicClusterGenerator::setFormulaChargePair(
256 FormulaChargePair &formula_charge_pair)
257 {
258 m_formulaChargePairs.clear();
259 m_formulaChargePairs.push_back(formula_charge_pair);
260
261 // qDebug() << formula_charge_pair.first << formula_charge_pair.second;
262 }
263
264 /*!
265 \brief Adds the (elemental composition, charge) pair \a
266 formula_charge_pair to the member list of FormulaChargePair instances.
267 */
268 void
269 IsotopicClusterGenerator::appendFormulaChargePair(
270 FormulaChargePair &formula_charge_pair)
271 {
272 m_formulaChargePairs.push_back(formula_charge_pair);
273
274 // qDebug() << formula_charge_pair.first << formula_charge_pair.second;
275 }
276
277 /*!
278 \brief Adds the (elemental composition, charge) pairs \a
279 formula_charge_pairs to the member list of FormulaChargePair instances.
280
281 The member list of FormulaChargePair instances is first cleared.
282 */
283 void
284 IsotopicClusterGenerator::setFormulaChargePairs(
285 const std::vector<FormulaChargePair> &formula_charge_pairs)
286 {
287 m_formulaChargePairs.clear();
288
289 m_formulaChargePairs.assign(formula_charge_pairs.begin(),
290 formula_charge_pairs.end());
291
292 // qDebug() << "Set" << m_formulaChargePairs.size() << "formula/charge pairs";
293 // for(auto pair : m_formulaChargePairs)
294 // qDebug() << pair.first << "/" << pair.second;
295 }
296
297 /*!
298 \brief Sets the summed probability maximum value to \a max_probability.
299 */
300 void
301 IsotopicClusterGenerator::setMaxSummedProbability(double max_probability)
302 {
303 m_maxSummedProbability = max_probability;
304 }
305
306 /*!
307 \brief Sets the normalization intensity to \a normalize_intensity.
308 */
309 void
310 IsotopicClusterGenerator::setNormalizationIntensity(int normalize_intensity)
311 {
312 m_normalizeIntensity = normalize_intensity;
313 }
314
315 /*!
316 \brief Sets the \a sort_type.
317 */
318 void
319 IsotopicClusterGenerator::setSortType(pappso::SortType sort_type)
320 {
321 m_sortType = sort_type;
322 }
323
324 /*!
325 \brief Sets the \a sort_order.
326 */
327 void
328 IsotopicClusterGenerator::setSortOrder(pappso::SortOrder sort_order)
329 {
330 m_sortOrder = sort_order;
331 }
332
333 /*!
334 \brief Validates the elemental composition \a formula.
335
336 The \a formula needs to be fully indexed, that is, even an atom present only
337 once needs to be indexed with '1', like this \c H2O1.
338
339 Returns true if validation was successful, false otherwise.
340
341 \sa Formula::validate()
342 */
343 bool
344 IsotopicClusterGenerator::validateFormula(Formula &formula)
345 {
346 // qDebug() << "Checking formula:" << formula.toString()
347 //<< "against an isotopic data set of " << msp_isotopicData->size()
348 //<< "isotopes";
349
350 // Check the syntax of the formula. Note that we need an obligatory count
351 // index even for elements that are present a single time (H2O1 note the
352 // 1).
353
354 // IsoSpec requires that even single-count element be qualified with an
355 // index (H2O1)
356
357 formula.setForceCountIndex(true);
358
359 // We have to validate because the formula might be "C5H6N3-O1", in which case
360 // if would fail the simple checkSyntax() call (that call needs to be used on
361 // already split parts of a formula).
362
363 if(!formula.validate(
364 msp_isotopicData, true /*store atom count*/, true /*reset counts*/))
365 return false;
366
367 return true;
368 }
369
370 /*!
371 \brief Validates all the elemental compositions in this
372 IsotopicClusterGenerator instance.
373
374 Each \l FormulaChargePair in m_formulaChargePairs is validated for its
375 elemental composition by first creating a \l Formula out of it.
376
377 Returns true if validation was successful, false otherwise.
378
379 \sa validateFormula, Formula::validate()
380 */
381 bool
382 IsotopicClusterGenerator::validateAllFormulas()
383 {
384 for(FormulaChargePair &pair : m_formulaChargePairs)
385 {
386 Formula formula(pair.first);
387
388 if(!validateFormula(formula))
389 return false;
390 }
391
392 return true;
393 }
394
395 /*!
396 \brief Runs the IsoSpec-based isotopic calculations.
397
398 \list
399
400 \li \a element_count: the number of elements in the chemical composition.
401 \li \a charge: the charge of the analyte.
402
403 \li \a per_element_isotopes_count_array_p: pointer to int array. This array
404 lists the number of isotopes that each element has. Typically, C has 2, O has 3,
405 P has 1...
406
407 \li \a per_element_symbol_count_array_p: pointer to int array. This array lists
408 the count of atoms of each element. Typically H20 will have 2 for H and 1 for O.
409
410 \li \a per_element_isotope_masses_arrays_p_p: pointer to pointer to double array
411 (array of arrays of double). Each array contains a new sub-array for each
412 symbol. The sub-array contains the isotopic masses for one of the element
413 symbols.
414
415 \li \a per_element_isotope_probs_arrays_p_p: pointer to pointer to double array
416 (array of arrays of double). Each array contains a new sub-array for each
417 symbol. The sub-array contains the isotopic probs for one of the element
418 symbols.
419
420 \endlist
421
422 Returns a pappso::Trace with the calculated isotopic cluster.
423 */
424 pappso::TraceSPtr
425 IsotopicClusterGenerator::runIsotopicDataCalculations(
426 std::size_t element_count,
427 int charge,
428 int *per_element_isotopes_count_array_p,
429 int *per_element_symbol_count_array_p,
430 double **per_element_isotope_masses_arrays_p_p,
431 double **per_element_isotope_probs_arrays_p_p)
432 {
433 // We get all the isotopic data relevant to the isotopic cluster modelling
434 // calculation as performed by the IsoSpec library.
435
436 if(per_element_isotopes_count_array_p == nullptr ||
437 per_element_symbol_count_array_p == nullptr ||
438 per_element_isotope_masses_arrays_p_p == nullptr ||
439 per_element_isotope_probs_arrays_p_p == nullptr)
440 qFatal("Programming error. The pointers cannot be nullptr.");
441
442
443 if(m_maxSummedProbability <= 0 || m_maxSummedProbability > 1)
444 {
445 qDebug() << "The maximum summed probability has an incorrect value:"
446 << m_maxSummedProbability;
447 return nullptr;
448 }
449
450 IsoSpec::IsoLayeredGenerator iso(
451 IsoSpec::Iso(element_count,
452 per_element_isotopes_count_array_p,
453 per_element_symbol_count_array_p,
454 per_element_isotope_masses_arrays_p_p,
455 per_element_isotope_probs_arrays_p_p),
456 // The three values below are from the documentation (default values in the
457 // constructor). We have added them 20230329 because we discovered that they
458 // were needed on minGW64, otherwise we would experience crashes.
459 1000,
460 1000,
461 true,
462 m_maxSummedProbability);
463
464 // qDebug() << "iso's mono peak mass:" << iso.getMonoisotopicPeakMass();
465
466 // Each time we run a calculation, we do store the results in a new
467 // IsotopicCluster.
468
469 pappso::TraceSPtr isotopic_cluster_sp = std::make_shared<pappso::Trace>();
470
471 // We store the results as std::vector<std::shared_ptr<libmass:PeakCentroid>>
472 // because we'll want to sort the values according to the user's requirements.
473
474 double effective_summed_probs = 0;
475
476 // Iterate in all the cluster configurations and output all the ones that
477 // summatively make a total probability <= to the probability set by the
478 // user.
479
480 /////////////// ATTENTION ////////////////
481 // The loop below is tricky,
482
483 while(iso.advanceToNextConfiguration())
484 {
485 double iso_prob = iso.prob();
486 double iso_mz = iso.mass() / charge;
487
488 // qDebug() << "For current configuration (charge accounted for):" <<
489 // iso_mz
490 //<< iso_prob
491 //<< "and effective_probs_sum : " << effective_summed_probs;
492
493 // Create a peak centroid and store it (remark that we change the mass
494 // of the ion into m/z because the user had set the charge corresponding
495 // to the formula for which the isotopic cluster is being computed.
496
497 isotopic_cluster_sp->push_back(pappso::DataPoint(iso_mz, iso_prob));
498
499 // qDebug() << "Pushed back new peak centroid:" << iso_mz << "/" <<
500 // iso_prob;
501
502 // We do this increment at the end of the block. Indeed, if we had set up
503 // at the top of the block, then, if the very first centroid had already a
504 // prob > m_maxSummedProbability, then we would end up with an empty
505 // cluster!
506
507 effective_summed_probs += iso_prob;
508
509 if(effective_summed_probs > m_maxSummedProbability)
510 {
511 // qDebug() << "Reached the max value: effective_summed_probs:"
512 //<< effective_summed_probs
513 //<< "and m_maxSummedProbability:" << m_maxSummedProbability
514 //<< "BREAKING.";
515 break;
516 }
517 else
518 {
519 // qDebug() << "Not yet reached the max value: effective_summed_probs
520 // : "
521 //<< effective_summed_probs
522 //<< "and m_maxSummedProbability:" << m_maxSummedProbability;
523 }
524 }
525
526 // Now perform the normalization to the Gaussian apex intensity value if
527 // so is requested by the user For this we first need to find
528 // what is the most intensity peak centroid. Then, we'll normalize against
529 // it by dividing all intensity that that most intense value and
530 // multiplying by the requested gaussian apex intensity value.
531
532 // qDebug() << "Now asking for normalization.";
533
534 // Just a debug check.
535 // qDebug() << "Before normalizing, first data point:"
536 //<< isotopic_cluster_sp->front().toString();
537
538 normalizeIntensities(isotopic_cluster_sp);
539
540 // Just a debug check.
541 // qDebug() << "After normalizing, first data point:"
542 //<< isotopic_cluster_sp->front().toString();
543
544 // Now check if the user requests to kind of sorting of the PeakCentroid
545 // instances.
546
547 sortPeakCentroids(isotopic_cluster_sp);
548
549 // qDebug() << "Now returning a cluster of size:" <<
550 // isotopic_cluster_sp->size();
551
552 return isotopic_cluster_sp;
553 }
554
555 /*!
556 \brief Calculates the isotopic cluster's peak centroids for \a
557 formula_charge_pair.
558
559 \list 1
560 \li The elemental composition formula string is converted to a \l Formula and
561 validated.
562 \li The proper isotopic data handler is allocated (\l IsotopicDataType).
563 \li The number of symbols in the elemental composition is determined.
564 \li The int arrays and arrays of double arrays are allocated.
565 \li The arrays are filled-in with \l configureIsotopicData().
566 \li The calculations are performed on these arrays with \l
567 runIsotopicDataCalculations().
568 \endlist
569
570 Returns the results of the computation in the form of a
571 IsotopicClusterChargePair instance.
572 */
573 IsotopicClusterChargePair
574 IsotopicClusterGenerator::generateIsotopicClusterCentroids(
575 FormulaChargePair formula_charge_pair)
576 {
577 // qDebug() << "Starting generation of isotopic cluster for formula:"
578 //<< formula_charge_pair.first;
579
580 Formula formula(formula_charge_pair.first);
581
582 // The check will generate useful data inside the Formula!
583 if(!validateFormula(formula))
584 return std::pair(std::make_shared<const pappso::Trace>(), 0);
585
586 // Use the correct handler!
587
588 std::unique_ptr<IsotopicDataBaseHandler> isotopic_data_handler_up = nullptr;
589
590 if(m_isotopicDataType == IsotopicDataType::LIBRARY_CONFIG)
591 {
592 isotopic_data_handler_up =
593 std::make_unique<IsotopicDataLibraryHandler>(msp_isotopicData);
594 }
595 else if(m_isotopicDataType == IsotopicDataType::USER_CONFIG)
596 {
597 isotopic_data_handler_up =
598 std::make_unique<IsotopicDataUserConfigHandler>(msp_isotopicData);
599 }
600 else if(m_isotopicDataType == IsotopicDataType::MANUAL_CONFIG)
601 {
602 isotopic_data_handler_up =
603 std::make_unique<IsotopicDataManualConfigHandler>(msp_isotopicData);
604 }
605 else
606 qFatal("Programming error. The isotopic data type is not correct.");
607
608
609 // At this point we need to create the arrays exactly as we do in the user
610 // manual config. So we need to know how many different chemical element
611 // symbols we have in the formula.
612
613 std::map<QString, double> symbol_double_count_map =
614 formula.getSymbolCountMap();
615
616 std::size_t element_count = symbol_double_count_map.size();
617
618 // qDebug() << "Number of different symbols in the formula:" << element_count;
619
620 if(!element_count)
621 {
622 qDebug() << "There is not a single element in the Formula.";
623 return std::pair(std::make_shared<const pappso::Trace>(), 0);
624 }
625
626 // qDebug() << "The validated formula has a symbol/count map size:"
627 //<< element_count;
628
629 // We have to copy the symbol/count map obtained by validating
630 // the formula into the isotopic data handler. That map is essential for the
631 // crafting by the handler of the different IsoSpec arrays.
632
633 // At this point, all the elements defined by the user have been completed and
634 // we'll have to create the static arrays that are needed by IsoSpec.
635
636 // We now need to construct the C arrays for IsoSpec. The arrays need to
637 // be filled-in very accurately.
638
639 // This array lists the number of isotopes that each element has.
640 // Typically, C has 2, O has 3, P has 1...
641 int *per_element_isotopes_count_array_p = nullptr;
642
643 // This array lists the count of atoms of each element.
644 // Typically H20 will have 2 for H and 1 for O.
645 int *per_element_symbol_count_array_p = nullptr;
646
647 // These are arrays of arrays! Each array contains a new sub-array for each
648 // symbol. The sub-array contains the isotopic masses (or probs) for one of
649 // the element symbols. The sub-arrays are allocated by the handler below.
650 double **per_element_isotope_masses_arrays_p_p = nullptr;
651 double **per_element_isotope_probs_arrays_p_p = nullptr;
652
653 // The isotopic cluster calculations do not understand
654 // formulas with double counts for the symbols!!
655 // We thus need to convert the symbol_count_map that is
656 // <QString,double>-based to a map that has its second
657 // member, not of double type but of int type.
658
659 std::map<QString, int> symbol_int_count_map;
660 std::map<QString, double>::const_iterator iter =
661 symbol_double_count_map.begin();
662
663 while(iter != symbol_double_count_map.end())
664 {
665 symbol_int_count_map[iter->first] = static_cast<int>(iter->second);
666
667 // qDebug() << "Old " << iter->first << "double version:" << iter->second
668 // << "new int version:" << symbol_int_count_map[iter->first];
669
670 ++iter;
671 }
672
673 // We pass the array pointers by reference.
674 if(!configureIsotopicData(symbol_int_count_map,
675 per_element_isotopes_count_array_p,
676 per_element_symbol_count_array_p,
677 per_element_isotope_masses_arrays_p_p,
678 per_element_isotope_probs_arrays_p_p))
679 {
680 qDebug() << "Failed to actually prepare the isotopic data tables for the "
681 "computation.";
682
683 return std::pair(std::make_shared<const pappso::Trace>(), 0);
684 }
685
686 // At this point we have all the arrays needed to work.
687
688 pappso::TraceSPtr isotopic_cluster_sp =
689 runIsotopicDataCalculations(element_count,
690 formula_charge_pair.second,
691 per_element_isotopes_count_array_p,
692 per_element_symbol_count_array_p,
693 per_element_isotope_masses_arrays_p_p,
694 per_element_isotope_probs_arrays_p_p);
695
696 // FIXME
697 // To avoid a memory leak, we need to delete the mass and prob heap-allocated
698 // arrays.
699
700 // delete[] per_element_isotopes_count_array_p;
701 // delete[] per_element_symbol_count_array_p;
702
703 // for(std::size_t iter = 0; iter < element_count; ++iter)
704 //{
705 // delete[] per_element_isotope_masses_arrays_p_p[iter];
706 // delete[] per_element_isotope_probs_arrays_p_p[iter];
707 //}
708
709 if(isotopic_cluster_sp == nullptr)
710 qDebug() << "Failed to compute an isotopic cluster for formula:"
711 << formula_charge_pair.first;
712
713 // Just a debug check.
714 // qDebug() << "After normalizing, first data point:"
715 //<< isotopic_cluster_sp->front().toString();
716
717 // qDebug() << "Done generating cluster for formula:"
718 //<< formula_charge_pair.first;
719
720 return std::pair(isotopic_cluster_sp, formula_charge_pair.second);
721 }
722
723
724 /*!
725 \brief Configures the isotopic data in a set of arrays for the (symbol,count)
726 pairs in \a symbol_count_map.
727
728 \list
729
730 \li \a per_element_isotopes_count_array_p: pointer to int array. This array
731 lists
732 the number of isotopes that each element has. Typically, C has 2, O has 3, P has
733 1...
734
735 \li \a per_element_symbol_count_array_p: pointer to int array. This array lists
736 the count of atoms of each element. Typically H20 will have 2 for H and 1 for O.
737
738 \li \a per_element_isotope_masses_arrays_p_p: pointer to pointer to double array
739 (array of arrays of double). Each array contains a new sub-array for each
740 symbol. The sub-array contains the isotopic masses for one of the element
741 symbols.
742
743 \li \a per_element_isotope_probs_arrays_p_p: pointer to pointer to double array
744 (array of arrays of double). Each array contains a new sub-array for each
745 symbol. The sub-array contains the isotopic probs for one of the element
746 symbols.
747
748 \endlist
749
750 Returns a pappso::Trace with the calculated isotopic cluster.
751 */
752 bool
753 IsotopicClusterGenerator::configureIsotopicData(
754 std::map<QString, int> &symbol_count_map,
755 int *&per_element_isotopes_count_array_p,
756 int *&per_element_symbol_count_array_p,
757 double **&per_element_isotope_masses_arrays_p_p,
758 double **&per_element_isotope_probs_arrays_p_p)
759 {
760 // Start by allocating the arrays we'll have to feed in this configuration
761 // work.
762
763 // However, we need to ensure that we actually have some stuff to work on.
764 // That stuff is kind of a formula in the form of the std::map<QString, int>
765 // m_symbolCountMap that pairs the symbols of the atoms in the formula and
766 // the count for each symbol.
767
768 if(!symbol_count_map.size())
769 return false;
770
771 // qDebug() << "The isotopic data have" << msp_isotopicData->size()
772 //<< "isotopes";
773
774 // Example used in the comments below: glucose, C6H12O6.
775
776 // How many isotopes of each element symbols are there?
777 // C:2, H:2, O:3
778 per_element_isotopes_count_array_p = new int[symbol_count_map.size()];
779
780 // How many atoms of each chemical element symbol are there?
781 // C:6, H:12, O:6
782 per_element_symbol_count_array_p = new int[symbol_count_map.size()];
783
784 // Each subarray contains the mass of one of the isotopes for the element
785 // symbol.
786 // First array for C (two masses), second array for H (two masses), third
787 // array for O (three masses).
788 per_element_isotope_masses_arrays_p_p = new double *[symbol_count_map.size()];
789
790 // Each subarray contains the prob of one of the isotopes for the element
791 // symbol.
792 // First array for C (two probs), second array for H (two probs), third
793 // array for O (three probs).
794 per_element_isotope_probs_arrays_p_p = new double *[symbol_count_map.size()];
795
796 // Index that will allow to address the right slot in the arrays being
797 // filled with isotopic data.
798 int current_symbol_index = 0;
799
800 for(auto item : symbol_count_map)
801 {
802 QString symbol = item.first;
803 int symbol_count = item.second;
804
805 // qDebug() << "Iterating in symbol/count:" << symbol << "/" <<
806 // symbol_count;
807
808 // Immediately fill-in the symbol count value.
809 per_element_symbol_count_array_p[current_symbol_index] = symbol_count;
810
811 // Now get iterator bounding the isotopes for this symbol.
812
813 std::pair<std::vector<IsotopeSPtr>::const_iterator,
814 std::vector<IsotopeSPtr>::const_iterator>
815 iter_pair = msp_isotopicData->getIsotopesBySymbol(symbol);
816
817 // Handy shortcuts
818 std::vector<IsotopeSPtr>::const_iterator iter = iter_pair.first;
819 std::vector<IsotopeSPtr>::const_iterator iter_end = iter_pair.second;
820
821 std::size_t isotope_count = std::distance(iter, iter_end);
822
823 // qDebug() << "For symbol:" << symbol << "there are:" << isotope_count
824 //<< "isotopes";
825
826 // Sanity check
827 if(isotope_count != msp_isotopicData->getIsotopeCountBySymbol(symbol))
828 qFatal(
829 "Programming error. The is something wrong with the isotopic "
830 "data.");
831
832 // Fill-in the isotope count for the current symbol.
833 per_element_isotopes_count_array_p[current_symbol_index] = isotope_count;
834
835 // Fill-in the isotopes (mass/prob). For each element symbol in the
836 // symbol/count map, we allocate a double array the size of the number
837 // of isotopes so as to store the mass of each isotope (same for the
838 // probs later). Once we have done that fill-in, we can set the address
839 // of the array of the array of array below.
840
841 // Allocate a double array for the masses and another one for the probs.
842
843 double *masses_p = new double[isotope_count];
844 double *probs_p = new double[isotope_count];
845
846 int current_isotope_index = 0;
847
848 while(iter != iter_end)
849 {
850 IsotopeSPtr isotope_sp = *iter;
851
852 // qDebug() << "For symbol" << symbol << "iterating with index"
853 //<< current_isotope_index << "with Isotope *"
854 //<< isotope_sp.get();
855
856 masses_p[current_isotope_index] = isotope_sp->getMass();
857 probs_p[current_isotope_index] = isotope_sp->getProbability();
858
859 // Increment the iterator
860 ++iter;
861 // Increment the isotope index so that we fill next array cell.
862 ++current_isotope_index;
863 }
864
865 // At this time the masses and probs for the isotopes of current symbol
866 // have been filled-in.
867
868 per_element_isotope_masses_arrays_p_p[current_symbol_index] = masses_p;
869 per_element_isotope_probs_arrays_p_p[current_symbol_index] = probs_p;
870
871 // We need to increment this index so as to address the right slot in
872 // the arrays!
873 ++current_symbol_index;
874 }
875
876 // At this point we have documented all the isotopic data required to
877 // perform a calculation with IsoSpec.
878
879 return true;
880 }
881
882 /*!
883 \brief Normalizes the intensities of the isotopic cluster's peak centroids in
884 \a isotopic_cluster_sp.
885
886 If normalization is asked for, the most intense peak centroid in \a
887 isotopic_cluster_sp is determined. That intensity becomes the
888 m_normalizeIntensity value and all the other peak centroids' intensities are
889 normalized.
890
891 \note The normalization occurs \e{in place}.
892 */
893 void
894 IsotopicClusterGenerator::normalizeIntensities(
895 pappso::TraceSPtr &isotopic_cluster_sp)
896 {
897
898 // qDebug() << "The normalize_intensity is:" << m_normalizeIntensity;
899
900 // The most intense peak centroid's intensity value needs to be set to the
901 // requested value and the same change ratio needs to be applied to all the
902 // other peak centroids.
903
904 // No normalization is asked for.
905 if(m_normalizeIntensity == std::numeric_limits<int>::min())
906 {
907 // qDebug() << "No normalization was asked for. Skipping.";
908 return;
909 }
910
911 if(!isotopic_cluster_sp->size())
912 {
913 qDebug() << "The isotopic cluster has not a single data point.";
914 return;
915 }
916 // First get the most intense centroid.
917
918 double max_found_intensity = 0;
919
920 // qDebug() << "isotopic_cluster_sp->size():" << isotopic_cluster_sp->size();
921
922 pappso::Trace::iterator vector_iterator = std::max_element(
923 isotopic_cluster_sp->begin(),
924 isotopic_cluster_sp->end(),
925 [](const pappso::DataPoint first, const pappso::DataPoint second) {
926 return first.y < second.y;
927 });
928
929 if(vector_iterator == isotopic_cluster_sp->end())
930 qFatal("Programming error");
931
932 max_found_intensity = (*vector_iterator).y;
933
934 if(!max_found_intensity)
935 qFatal("The maximum intensity of the whole isotopic cluster is 0.");
936
937 // qDebug().noquote() << "Peak centroid with maximum intensity: "
938 //<< vector_iterator->toString();
939
940 // Calculate the ratio between the final requested intensity and the greatest
941 // intensity. That ratio will be multiplied to intensity of the each peak
942 // centroid, which is why we call it a factor.
943
944 double intensity_factor = m_normalizeIntensity / max_found_intensity;
945
946 // qDebug() << "Will multiply this intensity factor to each data point's "
947 //"intensity value:"
948 //<< intensity_factor;
949
950 // Just a debug check.
951 // qDebug() << "Before normalizing, first data point:"
952 //<< isotopic_cluster_sp->front().toString();
953
954 std::for_each(
955 isotopic_cluster_sp->begin(),
956 isotopic_cluster_sp->end(),
957 [intensity_factor](pappso::DataPoint &data_point) // modify in-place
958 {
959 // qDebug() << "Before normalization:" <<
960 // data_point.toString();
961
962 double normalized_intensity = data_point.y * intensity_factor;
963
964 data_point.y = normalized_intensity;
965
966 // qDebug() << "After normalization:" <<
967 // data_point.toString();
968 });
969
970 // At this point the centroids' intensity have been normalized.
971
972 // Just a debug check.
973 // qDebug() << "After normalizing, first data point:"
974 //<< isotopic_cluster_sp->front().toString();
975 }
976
977 /*!
978 \brief Sorts the peak centroids of the isotopic cluster \a isotopic_cluster_sp.
979
980 The sort is performed according to \l m_sortType.
981 */
982 void
983 IsotopicClusterGenerator::sortPeakCentroids(
984 pappso::TraceSPtr &isotopic_cluster_sp)
985 {
986 if(m_sortType == pappso::SortType::no_sort)
987 return;
988
989 return isotopic_cluster_sp->sort(m_sortType, m_sortOrder);
990 }
991
992 /*!
993 \brief Returns a string containing a space-separated set of m/z, intensity
994 pairs, representing the isotopic cluster in \a isotopic_cluster_sp.
995 */
996 QString
997 IsotopicClusterGenerator::clusterToString(
998 const pappso::TraceCstSPtr &isotopic_cluster_sp) const
999 {
1000
1001 // Export the results as a string. Note how we do export the relative
1002 // intensity. If there was normalization, that value was updated, otherwise it
1003 // had been initialized identical to the intensity upon creation of the
1004 // PeakCentroid instances in the vector.
1005
1006 QString text;
1007
1008 for(const pappso::DataPoint &dp : *isotopic_cluster_sp)
1009 {
1010 text += QString("%1 %2\n").arg(dp.x, 0, 'f', 30).arg(dp.y, 0, 'f', 30);
1011 }
1012
1013 // qDebug().noquote() << "Cluster to string: " << text;
1014
1015 return text;
1016 }
1017
1018
1019 /*!
1020 \brief Returns a string containing a space-separated set of m/z, intensity
1021 pairs, representing the isotopic clusters in the member isotopic clusters \l
1022 m_isotopicClusterChargePairs.
1023 */
1024 QString
1025 IsotopicClusterGenerator::clustersToString() const
1026 {
1027
1028 // Export the results as a string. Note how we do export the relative
1029 // intensity. If there was normalization, that value was updated, otherwise it
1030 // had been initialized identical to the intensity upon creation of the
1031 // PeakCentroid instances in the vector.
1032
1033 QString text;
1034
1035 for(auto isotopic_cluster_charge_pair : m_isotopicClusterChargePairs)
1036 {
1037 text += clusterToString(isotopic_cluster_charge_pair.first);
1038 }
1039
1040 // qDebug().noquote() << text;
1041
1042 return text;
1043 }
1044
1045 /*!
1046 \brief Starts the computations.
1047
1048 The member m_isotopicClusterChargePairs are first cleared.
1049
1050 Returns the count of IsotopicClusterChargePair instances generated upon the
1051 calculations.
1052 */
1053 std::size_t
1054 IsotopicClusterGenerator::run()
1055 {
1056 // Iterate in all the formula/charge pairs and for each compute an isotopic
1057 // cluster.
1058
1059 m_isotopicClusterChargePairs.clear();
1060
1061 for(auto formula_charge_pair : m_formulaChargePairs)
1062 {
1063 IsotopicClusterChargePair pair =
1064 generateIsotopicClusterCentroids(formula_charge_pair);
1065
1066 if(!pair.first->size())
1067 {
1068 qFatal("The isotopic cluster is empty for formula: %s",
1069 formula_charge_pair.first.toLatin1().data());
1070 }
1071
1072 m_isotopicClusterChargePairs.push_back(pair);
1073 }
1074
1075 return m_isotopicClusterChargePairs.size();
1076 }
1077
1078 /*!
1079 \brief Returns the member list ofIsotopicClusterChargePair instances.
1080 */
1081 const std::vector<IsotopicClusterChargePair> &
1082 IsotopicClusterGenerator::getIsotopicClusterChargePairs() const
1083 {
1084 return m_isotopicClusterChargePairs;
1085 }
1086
1087 } // namespace libXpertMass
1088
1089 } // namespace MsXpS
1090