IsoSpec
Loading...
Searching...
No Matches
isoSpec++.cpp
1/*
2 * Copyright (C) 2015-2020 Mateusz Łącki and Michał Startek.
3 *
4 * This file is part of IsoSpec.
5 *
6 * IsoSpec is free software: you can redistribute it and/or modify
7 * it under the terms of the Simplified ("2-clause") BSD licence.
8 *
9 * IsoSpec is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 * You should have received a copy of the Simplified BSD Licence
14 * along with IsoSpec. If not, see <https://opensource.org/licenses/BSD-2-Clause>.
15 */
16
17
18#include "isoSpec++.h"
19#include <cmath>
20#include <algorithm>
21#include <vector>
22#include <cstdlib>
23#include <cstring>
24#include <unordered_map>
25#include <queue>
26#include <utility>
27#include <iostream>
28#include <iomanip>
29#include <stdexcept>
30#include <string>
31#include <limits>
32#include <memory>
33#include <cassert>
34#include <cctype>
35#include "platform.h"
36#include "conf.h"
37#include "dirtyAllocator.h"
38#include "operators.h"
39#include "summator.h"
40#include "marginalTrek++.h"
41#include "misc.h"
42#include "element_tables.h"
43#include "fasta.h"
44
45
46
47namespace IsoSpec
48{
49
50Iso::Iso() :
51disowned(false),
52dimNumber(0),
53isotopeNumbers(new int[0]),
54atomCounts(new int[0]),
55confSize(0),
56allDim(0),
57marginals(new Marginal*[0])
58{}
59
60
61Iso::Iso(
62 int _dimNumber,
63 const int* _isotopeNumbers,
64 const int* _atomCounts,
65 const double* const * _isotopeMasses,
66 const double* const * _isotopeProbabilities
67) :
68disowned(false),
69dimNumber(_dimNumber),
70isotopeNumbers(array_copy<int>(_isotopeNumbers, _dimNumber)),
71atomCounts(array_copy<int>(_atomCounts, _dimNumber)),
72confSize(_dimNumber * sizeof(int)),
73allDim(0),
74marginals(nullptr)
75{
76 for(int ii = 0; ii < dimNumber; ++ii)
77 allDim += isotopeNumbers[ii];
78
79 std::unique_ptr<double[]> masses(new double[allDim]);
80 std::unique_ptr<double[]> probs(new double[allDim]);
81 size_t idx = 0;
82
83 for(int ii = 0; ii < dimNumber; ++ii)
84 for(int jj = 0; jj < isotopeNumbers[ii]; ++jj)
85 {
86 masses[idx] = _isotopeMasses[ii][jj];
87 probs[idx] = _isotopeProbabilities[ii][jj];
88 ++idx;
89 }
90
91 allDim = 0; // setupMarginals will recalculate it, assuming it's set to 0
92
93 try{
94 setupMarginals(masses.get(), probs.get());
95 }
96 catch(...)
97 {
98 delete[] isotopeNumbers;
99 delete[] atomCounts;
100 // Since we're throwing in a constructor, the destructor won't run, and we don't need to NULL these.
101 // However, this is not the fast code path and we can afford two unneeded instructions to keep
102 // some static analysis tools happy.
103 isotopeNumbers = nullptr;
104 atomCounts = nullptr;
105 throw;
106 }
107}
108
109Iso::Iso(
110 int _dimNumber,
111 const int* _isotopeNumbers,
112 const int* _atomCounts,
113 const double* _isotopeMasses,
114 const double* _isotopeProbabilities
115) :
116disowned(false),
117dimNumber(_dimNumber),
118isotopeNumbers(array_copy<int>(_isotopeNumbers, _dimNumber)),
119atomCounts(array_copy<int>(_atomCounts, _dimNumber)),
120confSize(_dimNumber * sizeof(int)),
121allDim(0),
122marginals(nullptr)
123{
124 try{
125 setupMarginals(_isotopeMasses, _isotopeProbabilities);
126 }
127 catch(...)
128 {
129 delete[] isotopeNumbers;
130 delete[] atomCounts;
131 // Since we're throwing in a constructor, the destructor won't run, and we don't need to NULL these.
132 // However, this is not the fast code path and we can afford two unneeded instructions to keep
133 // some static analysis tools happy.
134 isotopeNumbers = nullptr;
135 atomCounts = nullptr;
136 throw;
137 }
138}
139
140Iso::Iso(Iso&& other) :
141disowned(other.disowned),
142dimNumber(other.dimNumber),
145confSize(other.confSize),
146allDim(other.allDim),
147marginals(other.marginals)
148{
149 other.disowned = true;
150}
151
152
153Iso::Iso(const Iso& other, bool fullcopy) :
154disowned(!fullcopy),
155dimNumber(other.dimNumber),
156isotopeNumbers(fullcopy ? array_copy<int>(other.isotopeNumbers, dimNumber) : other.isotopeNumbers),
157atomCounts(fullcopy ? array_copy<int>(other.atomCounts, dimNumber) : other.atomCounts),
158confSize(other.confSize),
159allDim(other.allDim),
160marginals(fullcopy ? new Marginal*[dimNumber] : other.marginals)
161{
162 if(fullcopy)
163 {
164 for(int ii = 0; ii < dimNumber; ii++)
165 marginals[ii] = new Marginal(*other.marginals[ii]);
166 }
167}
168
169Iso Iso::FromFASTA(const char* fasta, bool use_nominal_masses, bool add_water)
170{
171 int atomCounts[6];
172
173 parse_fasta(fasta, atomCounts);
174
175 if(add_water)
176 {
177 atomCounts[1] += 2;
178 atomCounts[3] += 1;
179 }
180
181 const int dimNr = atomCounts[5] > 0 ? 6 : 5;
182
183 return Iso(dimNr, aa_isotope_numbers, atomCounts, use_nominal_masses ? aa_elem_nominal_masses : aa_elem_masses, aa_elem_probabilities);
184}
185
186inline void Iso::setupMarginals(const double* _isotopeMasses, const double* _isotopeProbabilities)
187{
188 if (marginals == nullptr)
189 {
190 int ii = 0;
192 try
193 {
194 while(ii < dimNumber)
195 {
196 marginals[ii] = new Marginal(
197 &_isotopeMasses[allDim],
198 &_isotopeProbabilities[allDim],
199 isotopeNumbers[ii],
200 atomCounts[ii]
201 );
202 allDim += isotopeNumbers[ii];
203 ii++;
204 }
205 }
206 catch(...)
207 {
208 ii--;
209 while(ii >= 0)
210 {
211 delete marginals[ii];
212 ii--;
213 }
214 delete[] marginals;
215 marginals = nullptr;
216 throw;
217 }
218 }
219}
220
222{
223 if(!disowned)
224 {
225 if (marginals != nullptr)
226 dealloc_table(marginals, dimNumber);
227 delete[] isotopeNumbers;
228 delete[] atomCounts;
229 }
230}
231
232bool Iso::doMarginalsNeedSorting() const
233{
234 int nontrivial_marginals = 0;
235 for(int ii = 0; ii < dimNumber; ii++)
236 {
237 if(marginals[ii]->get_isotopeNo() > 1)
238 nontrivial_marginals++;
239 if(nontrivial_marginals > 1)
240 return true;
241 }
242 return false;
243}
244
246{
247 double mass = 0.0;
248 for (int ii = 0; ii < dimNumber; ii++)
249 mass += marginals[ii]->getLightestConfMass();
250 return mass;
251}
252
254{
255 double mass = 0.0;
256 for (int ii = 0; ii < dimNumber; ii++)
257 mass += marginals[ii]->getHeaviestConfMass();
258 return mass;
259}
260
262{
263 double mass = 0.0;
264 for (int ii = 0; ii < dimNumber; ii++)
265 mass += marginals[ii]->getMonoisotopicConfMass();
266 return mass;
267}
268
270{
271 double lprob = 0.0;
272 for (int ii = 0; ii < dimNumber; ii++)
273 lprob += marginals[ii]->getSmallestLProb();
274 return lprob;
275}
276
277double Iso::getModeMass() const
278{
279 double mass = 0.0;
280 for (int ii = 0; ii < dimNumber; ii++)
281 mass += marginals[ii]->getModeMass();
282 return mass;
283}
284
285double Iso::getModeLProb() const
286{
287 double ret = 0.0;
288 for (int ii = 0; ii < dimNumber; ii++)
289 ret += marginals[ii]->getModeLProb();
290 return ret;
291}
292
294{
295 double mass = 0.0;
296 for (int ii = 0; ii < dimNumber; ii++)
298 return mass;
299}
300
301double Iso::variance() const
302{
303 double ret = 0.0;
304 for(int ii = 0; ii < dimNumber; ii++)
305 ret += marginals[ii]->variance();
306 return ret;
307}
308
309
310Iso::Iso(const char* formula, bool use_nominal_masses) :
311disowned(false),
312allDim(0),
313marginals(nullptr)
314{
315 std::vector<double> isotope_masses;
316 std::vector<double> isotope_probabilities;
317
318 dimNumber = parse_formula(formula, isotope_masses, isotope_probabilities, &isotopeNumbers, &atomCounts, &confSize, use_nominal_masses);
319
320 setupMarginals(isotope_masses.data(), isotope_probabilities.data());
321}
322
323
324void Iso::addElement(int atomCount, int noIsotopes, const double* isotopeMasses, const double* isotopeProbabilities)
325{
326 Marginal* m = new Marginal(isotopeMasses, isotopeProbabilities, noIsotopes, atomCount);
327 realloc_append<int>(&isotopeNumbers, noIsotopes, dimNumber);
328 realloc_append<int>(&atomCounts, atomCount, dimNumber);
329 realloc_append<Marginal*>(&marginals, m, dimNumber);
330 dimNumber++;
331 confSize += sizeof(int);
332 allDim += noIsotopes;
333}
334
335void Iso::saveMarginalLogSizeEstimates(double* priorities, double target_total_prob) const
336{
337 /*
338 * We shall now use Gaussian approximations of the marginal multinomial distributions to estimate
339 * how many configurations we shall need to visit from each marginal. This should be approximately
340 * proportional to the volume of the optimal P-ellipsoid of the marginal, which, in turn is defined
341 * by the quantile function of the chi-square distribution plus some modifications.
342 *
343 * We're dropping the constant factor and the (monotonic) exp() transform - these will be used as keys
344 * for sorting, so only the ordering is important.
345 */
346
347 double K = allDim - dimNumber;
348
349 double log_R2 = log(InverseChiSquareCDF2(K, target_total_prob));
350
351 for(int ii = 0; ii < dimNumber; ii++)
352 priorities[ii] = marginals[ii]->getLogSizeEstimate(log_R2);
353}
354
355unsigned int parse_formula(const char* formula, std::vector<double>& isotope_masses, std::vector<double>& isotope_probabilities, int** isotopeNumbers, int** atomCounts, unsigned int* confSize, bool use_nominal_masses)
356{
357 // This function is NOT guaranteed to be secure against malicious input. It should be used only for debugging.
358 size_t slen = strlen(formula);
359 // Yes, it would be more elegant to use std::string here, but it's the only promiment place where it would be used in IsoSpec, and avoiding it here
360 // means we can run the whole thing through Clang's memory sanitizer without the need for instrumented libc++/libstdc++. That's worth messing with char pointers a
361 // little bit.
362 std::vector<std::pair<const char*, size_t> > elements;
363 std::vector<int> numbers;
364
365 if(slen == 0)
366 throw std::invalid_argument("Invalid formula: can't be empty");
367
368 if(!isdigit(formula[slen-1]))
369 throw std::invalid_argument("Invalid formula: every element must be followed by a number - write H2O1 and not H2O for water");
370
371 for(size_t ii = 0; ii < slen; ii++)
372 if(!isdigit(formula[ii]) && !isalpha(formula[ii]))
373 throw std::invalid_argument("Invalid formula: contains invalid (non-digit, non-alpha) character");
374
375 size_t position = 0;
376
377 while(position < slen)
378 {
379 size_t elem_end = position;
380 while(isalpha(formula[elem_end]))
381 elem_end++;
382 size_t digit_end = elem_end;
383 while(isdigit(formula[digit_end]))
384 digit_end++;
385 elements.emplace_back(&formula[position], elem_end-position);
386 numbers.push_back(std::stoi(&formula[elem_end]));
387 position = digit_end;
388 }
389
390 std::vector<int> element_indexes;
391
392 for (unsigned int i = 0; i < elements.size(); i++)
393 {
394 int idx = -1;
395 for(int j = 0; j < ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES; j++)
396 {
397 if ((strlen(elem_table_symbol[j]) == elements[i].second) && (strncmp(elements[i].first, elem_table_symbol[j], elements[i].second) == 0))
398 {
399 idx = j;
400 break;
401 }
402 }
403 if(idx < 0)
404 throw std::invalid_argument("Invalid formula");
405 element_indexes.push_back(idx);
406 }
407
408 std::vector<int> _isotope_numbers;
409 const double* masses = use_nominal_masses ? elem_table_massNo : elem_table_mass;
410
411 for(std::vector<int>::iterator it = element_indexes.begin(); it != element_indexes.end(); ++it)
412 {
413 int num = 0;
414 int at_idx = *it;
415 int elem_ID = elem_table_ID[at_idx];
416 while(at_idx < ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES && elem_table_ID[at_idx] == elem_ID)
417 {
418 isotope_masses.push_back(masses[at_idx]);
419 isotope_probabilities.push_back(elem_table_probability[at_idx]);
420 at_idx++;
421 num++;
422 }
423 _isotope_numbers.push_back(num);
424 }
425
426 const unsigned int dimNumber = elements.size();
427
428 *isotopeNumbers = array_copy<int>(_isotope_numbers.data(), dimNumber);
429 *atomCounts = array_copy<int>(numbers.data(), dimNumber);
430 *confSize = dimNumber * sizeof(int);
431
432 return dimNumber;
433}
434
435
436/*
437 * ----------------------------------------------------------------------------------------------------------
438 */
439
440
441
442IsoGenerator::IsoGenerator(Iso&& iso, bool alloc_partials) :
443 Iso(std::move(iso)),
444 mode_lprob(getModeLProb()),
445 partialLProbs(alloc_partials ? new double[dimNumber+1] : nullptr),
446 partialMasses(alloc_partials ? new double[dimNumber+1] : nullptr),
447 partialProbs(alloc_partials ? new double[dimNumber+1] : nullptr)
448{
449 for(int ii = 0; ii < dimNumber; ++ii)
450 marginals[ii]->ensureModeConf();
451 if(alloc_partials)
452 {
455 partialProbs[dimNumber] = 1.0;
456 }
457}
458
459
461{
462 if(partialLProbs != nullptr)
463 delete[] partialLProbs;
464 if(partialMasses != nullptr)
465 delete[] partialMasses;
466 if(partialProbs != nullptr)
467 delete[] partialProbs;
468}
469
470
471
472/*
473 * ----------------------------------------------------------------------------------------------------------
474 */
475
476static const double minsqrt = -1.3407796239501852e+154; // == constexpr(-sqrt(std::numeric_limits<double>::max()));
477
478IsoThresholdGenerator::IsoThresholdGenerator(Iso&& iso, double _threshold, bool _absolute, int tabSize, int hashSize, bool reorder_marginals)
479: IsoGenerator(std::move(iso)),
480Lcutoff(_threshold <= 0.0 ? minsqrt : (_absolute ? log(_threshold) : log(_threshold) + mode_lprob))
481{
482 counter = new int[dimNumber];
483 maxConfsLPSum = new double[dimNumber-1];
484 marginalResultsUnsorted = new PrecalculatedMarginal*[dimNumber];
485
486 empty = false;
487
488 const bool marginalsNeedSorting = doMarginalsNeedSorting();
489
490 for(int ii = 0; ii < dimNumber; ii++)
491 {
492 counter[ii] = 0;
493 marginalResultsUnsorted[ii] = new PrecalculatedMarginal(std::move(*(marginals[ii])),
494 Lcutoff - mode_lprob + marginals[ii]->fastGetModeLProb(),
495 marginalsNeedSorting,
496 tabSize,
497 hashSize);
498
499 if(!marginalResultsUnsorted[ii]->inRange(0))
500 empty = true;
501 }
502
503 if(reorder_marginals && dimNumber > 1)
504 {
505 OrderMarginalsBySizeDecresing<PrecalculatedMarginal> comparator(marginalResultsUnsorted);
506 int* tmpMarginalOrder = new int[dimNumber];
507
508 for(int ii = 0; ii < dimNumber; ii++)
509 tmpMarginalOrder[ii] = ii;
510
511 std::sort(tmpMarginalOrder, tmpMarginalOrder + dimNumber, comparator);
512 marginalResults = new PrecalculatedMarginal*[dimNumber];
513
514 for(int ii = 0; ii < dimNumber; ii++)
515 marginalResults[ii] = marginalResultsUnsorted[tmpMarginalOrder[ii]];
516
517 marginalOrder = new int[dimNumber];
518 for(int ii = 0; ii < dimNumber; ii++)
519 marginalOrder[tmpMarginalOrder[ii]] = ii;
520
521 delete[] tmpMarginalOrder;
522 }
523 else
524 {
525 marginalResults = marginalResultsUnsorted;
526 marginalOrder = nullptr;
527 }
528
529 lProbs_ptr_start = marginalResults[0]->get_lProbs_ptr();
530
531 if(dimNumber > 1)
532 maxConfsLPSum[0] = marginalResults[0]->fastGetModeLProb();
533
534 for(int ii = 1; ii < dimNumber-1; ii++)
535 maxConfsLPSum[ii] = maxConfsLPSum[ii-1] + marginalResults[ii]->fastGetModeLProb();
536
537 lProbs_ptr = lProbs_ptr_start;
538
539 partialLProbs_second = partialLProbs;
540 partialLProbs_second++;
541
542 if(!empty)
543 {
544 recalc(dimNumber-1);
545 counter[0]--;
546 lProbs_ptr--;
547 }
548 else
549 {
551 lcfmsv = std::numeric_limits<double>::infinity();
552 }
553}
554
556{
557 for(int ii = 0; ii < dimNumber; ii++)
558 {
559 counter[ii] = marginalResults[ii]->get_no_confs()-1;
560 partialLProbs[ii] = -std::numeric_limits<double>::infinity();
561 }
562 partialLProbs[dimNumber] = -std::numeric_limits<double>::infinity();
563 lProbs_ptr = lProbs_ptr_start + marginalResults[0]->get_no_confs()-1;
564}
565
567{
568 if(empty)
569 return 0;
570
571 if(dimNumber == 1)
572 return marginalResults[0]->get_no_confs();
573
574 const double* lProbs_ptr_l = marginalResults[0]->get_lProbs_ptr() + marginalResults[0]->get_no_confs();
575
576 std::unique_ptr<const double* []> lProbs_restarts(new const double*[dimNumber]);
577
578 for(int ii = 0; ii < dimNumber; ii++)
579 lProbs_restarts[ii] = lProbs_ptr_l;
580
581 size_t count = 0;
582
583 while(*lProbs_ptr_l < lcfmsv)
584 lProbs_ptr_l--;
585
586 while(true)
587 {
588 count += lProbs_ptr_l - lProbs_ptr_start + 1;
589
590 int idx = 0;
591 int * cntr_ptr = counter;
592
593 while(idx < dimNumber - 1)
594 {
595 *cntr_ptr = 0;
596 idx++;
597 cntr_ptr++;
598 (*cntr_ptr)++;
599
600 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
601 if(partialLProbs[idx] + maxConfsLPSum[idx-1] >= Lcutoff)
602 {
603 short_recalc(idx-1);
604 lProbs_ptr_l = lProbs_restarts[idx];
605 while(*lProbs_ptr_l < lcfmsv)
606 lProbs_ptr_l--;
607 for(idx--; idx > 0; idx--)
608 lProbs_restarts[idx] = lProbs_ptr_l;
609 break;
610 }
611 }
612 if(idx == dimNumber - 1)
613 {
614 reset();
615 return count;
616 }
617 }
618}
619
621{
622 if(empty)
623 {
625 return;
626 }
627
629
630 memset(counter, 0, sizeof(int)*dimNumber);
631 recalc(dimNumber-1);
632 counter[0]--;
633
634 lProbs_ptr = lProbs_ptr_start - 1;
635}
636
637IsoThresholdGenerator::~IsoThresholdGenerator()
638{
639 delete[] counter;
640 delete[] maxConfsLPSum;
641 if (marginalResultsUnsorted != marginalResults)
642 delete[] marginalResultsUnsorted;
643 dealloc_table(marginalResults, dimNumber);
644 if(marginalOrder != nullptr)
645 delete[] marginalOrder;
646}
647
648
649/*
650 * ------------------------------------------------------------------------------------------------------------------------
651 */
652
653template <typename MarginalType>
654IsoLayeredGeneratorTemplate<MarginalType>::IsoLayeredGeneratorTemplate(Iso&& iso, int tabSize, int hashSize, bool reorder_marginals, double t_prob_hint)
655: IsoGenerator(std::move(iso))
656{
657 counter = new int[dimNumber];
658 maxConfsLPSum = new double[dimNumber-1];
659 currentLThreshold = nextafter(mode_lprob, -std::numeric_limits<double>::infinity());
660 lastLThreshold = (std::numeric_limits<double>::min)();
661 marginalResultsUnsorted = new MarginalType*[dimNumber];
662 resetPositions = new const double*[dimNumber];
663 marginalsNeedSorting = doMarginalsNeedSorting();
664
665 memset(counter, 0, sizeof(int)*dimNumber);
666
667 for(int ii = 0; ii < dimNumber; ii++)
668 marginalResultsUnsorted[ii] = new MarginalType(std::move(*(marginals[ii])), tabSize, hashSize);
669
670 if(reorder_marginals && dimNumber > 1)
671 {
672 double* marginal_priorities = new double[dimNumber];
673
674 saveMarginalLogSizeEstimates(marginal_priorities, t_prob_hint);
675
676 int* tmpMarginalOrder = new int[dimNumber];
677
678 for(int ii = 0; ii < dimNumber; ii++)
679 tmpMarginalOrder[ii] = ii;
680
681 TableOrder<double> TO(marginal_priorities);
682
683 std::sort(tmpMarginalOrder, tmpMarginalOrder + dimNumber, TO);
684 marginalResults = new MarginalType*[dimNumber];
685
686 for(int ii = 0; ii < dimNumber; ii++)
687 marginalResults[ii] = marginalResultsUnsorted[tmpMarginalOrder[ii]];
688
689 marginalOrder = new int[dimNumber];
690 for(int ii = 0; ii < dimNumber; ii++)
691 marginalOrder[tmpMarginalOrder[ii]] = ii;
692
693 delete[] tmpMarginalOrder;
694 delete[] marginal_priorities;
695 }
696 else
697 {
698 marginalResults = marginalResultsUnsorted;
699 marginalOrder = nullptr;
700 }
701
702 lProbs_ptr_start = marginalResults[0]->get_lProbs_ptr();
703
704 if(dimNumber > 1)
705 maxConfsLPSum[0] = marginalResults[0]->fastGetModeLProb();
706
707 for(int ii = 1; ii < dimNumber-1; ii++)
708 maxConfsLPSum[ii] = maxConfsLPSum[ii-1] + marginalResults[ii]->fastGetModeLProb();
709
710 lProbs_ptr = lProbs_ptr_start;
711
712 partialLProbs_second = partialLProbs;
713 partialLProbs_second++;
714
715 counter[0]--;
716 lProbs_ptr--;
717 lastLThreshold = 10.0;
718 IsoLayeredGeneratorTemplate<MarginalType>::nextLayer(-0.00001);
719}
720
721template <typename MarginalType>
722bool IsoLayeredGeneratorTemplate<MarginalType>::nextLayer(double offset)
723{
724 size_t first_mrg_size = marginalResults[0]->get_no_confs();
725
726 if(lastLThreshold < getUnlikeliestPeakLProb())
727 return false;
728
729 lastLThreshold = currentLThreshold;
730 currentLThreshold += offset;
731
732 for(int ii = 0; ii < dimNumber; ii++)
733 {
734 marginalResults[ii]->extend(currentLThreshold - mode_lprob + marginalResults[ii]->fastGetModeLProb(), marginalsNeedSorting);
735 counter[ii] = 0;
736 }
737
738 lProbs_ptr_start = marginalResults[0]->get_lProbs_ptr(); // vector relocation might have happened
739
740 lProbs_ptr = lProbs_ptr_start + first_mrg_size - 1;
741
742 for(int ii = 0; ii < dimNumber; ii++)
743 resetPositions[ii] = lProbs_ptr;
744
745 recalc(dimNumber-1);
746
747 return true;
748}
749
750template <typename MarginalType>
751bool IsoLayeredGeneratorTemplate<MarginalType>::carry()
752{
753 // If we reached this point, a carry is needed
754
755 int idx = 0;
756
757 int * cntr_ptr = counter;
758
759 while(idx < dimNumber-1)
760 {
761 *cntr_ptr = 0;
762 idx++;
763 cntr_ptr++;
764 (*cntr_ptr)++;
765 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
766 if(partialLProbs[idx] + maxConfsLPSum[idx-1] >= currentLThreshold)
767 {
768 partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->get_mass(counter[idx]);
769 partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->get_prob(counter[idx]);
770 recalc(idx-1);
771 lProbs_ptr = resetPositions[idx];
772
773 while(*lProbs_ptr <= last_lcfmsv)
774 lProbs_ptr--;
775
776 for(int ii = 0; ii < idx; ii++)
777 resetPositions[ii] = lProbs_ptr;
778
779 return true;
780 }
781 }
782
783 return false;
784}
785
786template<typename MarginalType>
788{
789 for(int ii = 0; ii < dimNumber; ii++)
790 {
791 counter[ii] = marginalResults[ii]->get_no_confs()-1;
792 partialLProbs[ii] = -std::numeric_limits<double>::infinity();
793 }
794 partialLProbs[dimNumber] = -std::numeric_limits<double>::infinity();
795 lProbs_ptr = lProbs_ptr_start + marginalResults[0]->get_no_confs()-1;
796}
797
798template<typename MarginalType>
799IsoLayeredGeneratorTemplate<MarginalType>::~IsoLayeredGeneratorTemplate()
800{
801 delete[] counter;
802 delete[] maxConfsLPSum;
803 delete[] resetPositions;
804 if (marginalResultsUnsorted != marginalResults)
805 delete[] marginalResultsUnsorted;
806 dealloc_table(marginalResults, dimNumber);
807 if(marginalOrder != nullptr)
808 delete[] marginalOrder;
809}
810
811template class IsoLayeredGeneratorTemplate<LayeredMarginal>;
812//template class IsoLayeredGeneratorTemplate<PrecalculatedMarginal>;
813//template class IsoLayeredGeneratorTemplate<MarginalTrek>;
814template class IsoLayeredGeneratorTemplate<SingleAtomMarginal<true>>;
815
816/*
817 * ------------------------------------------------------------------------------------------------------------------------
818 */
819
820template<typename MarginalType>
821IsoOrderedGeneratorTemplate<MarginalType>::IsoOrderedGeneratorTemplate(Iso&& iso, int _tabSize, int _hashSize) :
822IsoGenerator(std::move(iso), false), allocator(dimNumber, _tabSize)
823{
824 partialLProbs = &currentLProb;
825 partialMasses = &currentMass;
826 partialProbs = &currentProb;
827
828 marginalResults = new MarginalType*[dimNumber];
829
830 for(int i = 0; i < dimNumber; i++)
831 marginalResults[i] = new MarginalType(std::move(*(marginals[i])), _tabSize, _hashSize);
832
833 logProbs = new const pod_vector<double>*[dimNumber];
834 masses = new const pod_vector<double>*[dimNumber];
835
836 for(int i = 0; i < dimNumber; i++)
837 {
838 masses[i] = &marginalResults[i]->conf_masses();
839 logProbs[i] = &marginalResults[i]->conf_lprobs();
840 }
841
842 topConf = allocator.newConf();
843 memset(
844 reinterpret_cast<char*>(topConf) + sizeof(double),
845 0,
846 sizeof(int)*dimNumber
847 );
848
849 *(reinterpret_cast<double*>(topConf)) =
850 combinedSum(
851 getConf(topConf),
852 logProbs,
854 );
855
856 pq.push(topConf);
857}
858
859template<typename MarginalType>
861{
862 dealloc_table<MarginalType*>(marginalResults, dimNumber);
863 delete[] logProbs;
864 delete[] masses;
865 partialLProbs = nullptr;
866 partialMasses = nullptr;
867 partialProbs = nullptr;
868}
869
870template<typename MarginalType>
872{
873 if(pq.size() < 1)
874 return false;
875
876
877 topConf = pq.top();
878 pq.pop();
879
880 int* topConfIsoCounts = getConf(topConf);
881
882 currentLProb = *(reinterpret_cast<double*>(topConf));
883 currentMass = combinedSum( topConfIsoCounts, masses, dimNumber );
884 currentProb = exp(currentLProb);
885
886 ccount = -1;
887 for(int j = 0; j < dimNumber; ++j)
888 {
889 if(marginalResults[j]->probeConfigurationIdx(topConfIsoCounts[j] + 1))
890 {
891 if(ccount == -1)
892 {
893 topConfIsoCounts[j]++;
894 *(reinterpret_cast<double*>(topConf)) = combinedSum(topConfIsoCounts, logProbs, dimNumber);
895 pq.push(topConf);
896 topConfIsoCounts[j]--;
897 ccount = j;
898 }
899 else
900 {
901 void* acceptedCandidate = allocator.newConf();
902 int* acceptedCandidateIsoCounts = getConf(acceptedCandidate);
903 memcpy(acceptedCandidateIsoCounts, topConfIsoCounts, confSize);
904
905 acceptedCandidateIsoCounts[j]++;
906
907 *(reinterpret_cast<double*>(acceptedCandidate)) = combinedSum(acceptedCandidateIsoCounts, logProbs, dimNumber);
908
909 pq.push(acceptedCandidate);
910 }
911 }
912 if(topConfIsoCounts[j] > 0)
913 break;
914 }
915 if(ccount >=0)
916 topConfIsoCounts[ccount]++;
917
918 return true;
919}
920
923
924
925/*
926 * ---------------------------------------------------------------------------------------------------
927 */
928
929template<typename IsoType>
930IsoStochasticGeneratorTemplate<IsoType>::IsoStochasticGeneratorTemplate(Iso&& iso, size_t no_molecules, double _precision, double _beta_bias, std::mt19937& _rng) :
931IsoGenerator(std::move(iso)),
932ILG(std::move(*this)),
933to_sample_left(no_molecules),
934precision(_precision),
935beta_bias(_beta_bias),
936confs_prob(0.0),
937chasing_prob(0.0),
938rdvariate_gen(_rng)
939{}
940
941template class IsoStochasticGeneratorTemplate<IsoLayeredGeneratorTemplate<LayeredMarginal>>;
942template class IsoStochasticGeneratorTemplate<IsoLayeredGeneratorTemplate<SingleAtomMarginal<true>>>;
943template class IsoStochasticGeneratorTemplate<IsoOrderedGeneratorTemplate<MarginalTrek>>;
944template class IsoStochasticGeneratorTemplate<IsoOrderedGeneratorTemplate<SingleAtomMarginal<false>>>;
945//template class IsoStochasticGeneratorTemplate<IsoThresholdGenerator>;
946
947/*
948 * ---------------------------------------------------------------------------------------------------
949 */
950
951
952
953
954
955} // namespace IsoSpec
956
The generator of isotopologues.
Definition isoSpec++.h:185
virtual ~IsoGenerator()
Destructor.
IsoGenerator(Iso &&iso, bool alloc_partials=true)
Move constructor.
The Iso class for the calculation of the isotopic distribution.
Definition isoSpec++.h:50
void saveMarginalLogSizeEstimates(double *priorities, double target_total_prob) const
Save estimates of logarithms of target sizes of marginals using Gaussian approximation into argument ...
double getHeaviestPeakMass() const
Get the mass of the heaviest peak in the isotopic distribution.
double getTheoreticalAverageMass() const
Get the theoretical average mass of the molecule.
double getUnlikeliestPeakLProb() const
Get the logprobability of the least probable subisotopologue.
void addElement(int atomCount, int noIsotopes, const double *isotopeMasses, const double *isotopeProbabilities)
Add an element to the molecule. Note: this method can only be used BEFORE Iso is used to construct an...
double getModeMass() const
Get the mass of the mode-configuration (if there are many modes, it is undefined which one will be se...
int * isotopeNumbers
Definition isoSpec++.h:65
static Iso FromFASTA(const char *fasta, bool use_nominal_masses=false, bool add_water=true)
Constructor (named) from aminoacid FASTA sequence as C string.
double getLightestPeakMass() const
Get the mass of the lightest peak in the isotopic distribution.
double variance() const
Get the theoretical variance of the distribution.
double getMonoisotopicPeakMass() const
unsigned int confSize
Definition isoSpec++.h:67
virtual ~Iso()
Destructor.
double getModeLProb() const
Get the log-probability of the mode-configuration (if there are many modes, they share this value).
int * atomCounts
Definition isoSpec++.h:66
Marginal ** marginals
Definition isoSpec++.h:69
void terminate_search()
Block the subsequent search of isotopologues.
The generator of isotopologues sorted by their probability of occurrence.
Definition isoSpec++.h:239
bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
virtual ~IsoOrderedGeneratorTemplate()
Destructor.
void terminate_search()
Block the subsequent search of isotopologues.
The marginal distribution class (a subisotopologue).
Precalculated Marginal class.
void parse_fasta(const char *fasta, int atomCounts[6])
Count elemental composition of an unmodificed sequence of amino acids, resulting in CHNOSSe counts.
Definition fasta.h:38