24#include <unordered_map>
37#include "dirtyAllocator.h"
40#include "marginalTrek++.h"
42#include "element_tables.h"
53isotopeNumbers(new int[0]),
54atomCounts(new int[0]),
63 const int* _isotopeNumbers,
64 const int* _atomCounts,
65 const double*
const * _isotopeMasses,
66 const double*
const * _isotopeProbabilities
70isotopeNumbers(array_copy<int>(_isotopeNumbers, _dimNumber)),
71atomCounts(array_copy<int>(_atomCounts, _dimNumber)),
72confSize(_dimNumber * sizeof(int)),
76 for(
int ii = 0; ii < dimNumber; ++ii)
77 allDim += isotopeNumbers[ii];
79 std::unique_ptr<double[]> masses(
new double[allDim]);
80 std::unique_ptr<double[]> probs(
new double[allDim]);
83 for(
int ii = 0; ii < dimNumber; ++ii)
84 for(
int jj = 0; jj < isotopeNumbers[ii]; ++jj)
86 masses[idx] = _isotopeMasses[ii][jj];
87 probs[idx] = _isotopeProbabilities[ii][jj];
94 setupMarginals(masses.get(), probs.get());
98 delete[] isotopeNumbers;
103 isotopeNumbers =
nullptr;
104 atomCounts =
nullptr;
111 const int* _isotopeNumbers,
112 const int* _atomCounts,
113 const double* _isotopeMasses,
114 const double* _isotopeProbabilities
119atomCounts(array_copy<int>(_atomCounts, _dimNumber)),
125 setupMarginals(_isotopeMasses, _isotopeProbabilities);
140Iso::Iso(Iso&& other) :
141disowned(other.disowned),
149 other.disowned =
true;
153Iso::Iso(
const Iso& other,
bool fullcopy) :
183 return Iso(dimNr, aa_isotope_numbers,
atomCounts, use_nominal_masses ? aa_elem_nominal_masses : aa_elem_masses, aa_elem_probabilities);
186inline void Iso::setupMarginals(
const double* _isotopeMasses,
const double* _isotopeProbabilities)
198 &_isotopeProbabilities[
allDim],
232bool Iso::doMarginalsNeedSorting()
const
234 int nontrivial_marginals = 0;
238 nontrivial_marginals++;
239 if(nontrivial_marginals > 1)
249 mass +=
marginals[ii]->getLightestConfMass();
257 mass +=
marginals[ii]->getHeaviestConfMass();
265 mass +=
marginals[ii]->getMonoisotopicConfMass();
273 lprob +=
marginals[ii]->getSmallestLProb();
310Iso::Iso(
const char* formula,
bool use_nominal_masses) :
315 std::vector<double> isotope_masses;
316 std::vector<double> isotope_probabilities;
320 setupMarginals(isotope_masses.data(), isotope_probabilities.data());
324void Iso::addElement(
int atomCount,
int noIsotopes,
const double* isotopeMasses,
const double* isotopeProbabilities)
326 Marginal* m =
new Marginal(isotopeMasses, isotopeProbabilities, noIsotopes, atomCount);
349 double log_R2 = log(InverseChiSquareCDF2(K, target_total_prob));
352 priorities[ii] =
marginals[ii]->getLogSizeEstimate(log_R2);
355unsigned int parse_formula(
const char* formula, std::vector<double>& isotope_masses, std::vector<double>& isotope_probabilities,
int** isotopeNumbers,
int** atomCounts,
unsigned int* confSize,
bool use_nominal_masses)
358 size_t slen = strlen(formula);
362 std::vector<std::pair<const char*, size_t> > elements;
363 std::vector<int> numbers;
366 throw std::invalid_argument(
"Invalid formula: can't be empty");
368 if(!isdigit(formula[slen-1]))
369 throw std::invalid_argument(
"Invalid formula: every element must be followed by a number - write H2O1 and not H2O for water");
371 for(
size_t ii = 0; ii < slen; ii++)
372 if(!isdigit(formula[ii]) && !isalpha(formula[ii]))
373 throw std::invalid_argument(
"Invalid formula: contains invalid (non-digit, non-alpha) character");
377 while(position < slen)
379 size_t elem_end = position;
380 while(isalpha(formula[elem_end]))
382 size_t digit_end = elem_end;
383 while(isdigit(formula[digit_end]))
385 elements.emplace_back(&formula[position], elem_end-position);
386 numbers.push_back(std::stoi(&formula[elem_end]));
387 position = digit_end;
390 std::vector<int> element_indexes;
392 for (
unsigned int i = 0; i < elements.size(); i++)
395 for(
int j = 0; j < ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES; j++)
397 if ((strlen(elem_table_symbol[j]) == elements[i].second) && (strncmp(elements[i].first, elem_table_symbol[j], elements[i].second) == 0))
404 throw std::invalid_argument(
"Invalid formula");
405 element_indexes.push_back(idx);
408 std::vector<int> _isotope_numbers;
409 const double* masses = use_nominal_masses ? elem_table_massNo : elem_table_mass;
411 for(std::vector<int>::iterator it = element_indexes.begin(); it != element_indexes.end(); ++it)
415 int elem_ID = elem_table_ID[at_idx];
416 while(at_idx < ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES && elem_table_ID[at_idx] == elem_ID)
418 isotope_masses.push_back(masses[at_idx]);
419 isotope_probabilities.push_back(elem_table_probability[at_idx]);
423 _isotope_numbers.push_back(num);
426 const unsigned int dimNumber = elements.size();
428 *isotopeNumbers = array_copy<int>(_isotope_numbers.data(), dimNumber);
429 *atomCounts = array_copy<int>(numbers.data(), dimNumber);
430 *confSize = dimNumber *
sizeof(int);
476static const double minsqrt = -1.3407796239501852e+154;
478IsoThresholdGenerator::IsoThresholdGenerator(
Iso&& iso,
double _threshold,
bool _absolute,
int tabSize,
int hashSize,
bool reorder_marginals)
480Lcutoff(_threshold <= 0.0 ? minsqrt : (_absolute ? log(_threshold) : log(_threshold) + mode_lprob))
488 const bool marginalsNeedSorting = doMarginalsNeedSorting();
494 Lcutoff - mode_lprob +
marginals[ii]->fastGetModeLProb(),
495 marginalsNeedSorting,
499 if(!marginalResultsUnsorted[ii]->inRange(0))
506 int* tmpMarginalOrder =
new int[
dimNumber];
509 tmpMarginalOrder[ii] = ii;
511 std::sort(tmpMarginalOrder, tmpMarginalOrder +
dimNumber, comparator);
515 marginalResults[ii] = marginalResultsUnsorted[tmpMarginalOrder[ii]];
519 marginalOrder[tmpMarginalOrder[ii]] = ii;
521 delete[] tmpMarginalOrder;
525 marginalResults = marginalResultsUnsorted;
526 marginalOrder =
nullptr;
529 lProbs_ptr_start = marginalResults[0]->get_lProbs_ptr();
532 maxConfsLPSum[0] = marginalResults[0]->fastGetModeLProb();
535 maxConfsLPSum[ii] = maxConfsLPSum[ii-1] + marginalResults[ii]->fastGetModeLProb();
537 lProbs_ptr = lProbs_ptr_start;
540 partialLProbs_second++;
551 lcfmsv = std::numeric_limits<double>::infinity();
559 counter[ii] = marginalResults[ii]->get_no_confs()-1;
560 partialLProbs[ii] = -std::numeric_limits<double>::infinity();
563 lProbs_ptr = lProbs_ptr_start + marginalResults[0]->get_no_confs()-1;
572 return marginalResults[0]->get_no_confs();
574 const double* lProbs_ptr_l = marginalResults[0]->get_lProbs_ptr() + marginalResults[0]->get_no_confs();
576 std::unique_ptr<const double* []> lProbs_restarts(
new const double*[
dimNumber]);
579 lProbs_restarts[ii] = lProbs_ptr_l;
583 while(*lProbs_ptr_l < lcfmsv)
588 count += lProbs_ptr_l - lProbs_ptr_start + 1;
591 int * cntr_ptr = counter;
604 lProbs_ptr_l = lProbs_restarts[idx];
605 while(*lProbs_ptr_l < lcfmsv)
607 for(idx--; idx > 0; idx--)
608 lProbs_restarts[idx] = lProbs_ptr_l;
630 memset(counter, 0,
sizeof(
int)*
dimNumber);
634 lProbs_ptr = lProbs_ptr_start - 1;
637IsoThresholdGenerator::~IsoThresholdGenerator()
640 delete[] maxConfsLPSum;
641 if (marginalResultsUnsorted != marginalResults)
642 delete[] marginalResultsUnsorted;
643 dealloc_table(marginalResults,
dimNumber);
644 if(marginalOrder !=
nullptr)
645 delete[] marginalOrder;
653template <
typename MarginalType>
654IsoLayeredGeneratorTemplate<MarginalType>::IsoLayeredGeneratorTemplate(Iso&& iso,
int tabSize,
int hashSize,
bool reorder_marginals,
double t_prob_hint)
655: IsoGenerator(std::move(iso))
659 currentLThreshold = nextafter(mode_lprob, -std::numeric_limits<double>::infinity());
660 lastLThreshold = (std::numeric_limits<double>::min)();
661 marginalResultsUnsorted =
new MarginalType*[
dimNumber];
662 resetPositions =
new const double*[
dimNumber];
663 marginalsNeedSorting = doMarginalsNeedSorting();
665 memset(counter, 0,
sizeof(
int)*
dimNumber);
668 marginalResultsUnsorted[ii] =
new MarginalType(std::move(*(
marginals[ii])), tabSize, hashSize);
672 double* marginal_priorities =
new double[
dimNumber];
676 int* tmpMarginalOrder =
new int[
dimNumber];
679 tmpMarginalOrder[ii] = ii;
681 TableOrder<double> TO(marginal_priorities);
683 std::sort(tmpMarginalOrder, tmpMarginalOrder +
dimNumber, TO);
684 marginalResults =
new MarginalType*[
dimNumber];
687 marginalResults[ii] = marginalResultsUnsorted[tmpMarginalOrder[ii]];
691 marginalOrder[tmpMarginalOrder[ii]] = ii;
693 delete[] tmpMarginalOrder;
694 delete[] marginal_priorities;
698 marginalResults = marginalResultsUnsorted;
699 marginalOrder =
nullptr;
702 lProbs_ptr_start = marginalResults[0]->get_lProbs_ptr();
705 maxConfsLPSum[0] = marginalResults[0]->fastGetModeLProb();
708 maxConfsLPSum[ii] = maxConfsLPSum[ii-1] + marginalResults[ii]->fastGetModeLProb();
710 lProbs_ptr = lProbs_ptr_start;
713 partialLProbs_second++;
717 lastLThreshold = 10.0;
718 IsoLayeredGeneratorTemplate<MarginalType>::nextLayer(-0.00001);
721template <
typename MarginalType>
722bool IsoLayeredGeneratorTemplate<MarginalType>::nextLayer(
double offset)
724 size_t first_mrg_size = marginalResults[0]->get_no_confs();
726 if(lastLThreshold < getUnlikeliestPeakLProb())
729 lastLThreshold = currentLThreshold;
730 currentLThreshold += offset;
732 for(
int ii = 0; ii < dimNumber; ii++)
734 marginalResults[ii]->extend(currentLThreshold - mode_lprob + marginalResults[ii]->fastGetModeLProb(), marginalsNeedSorting);
738 lProbs_ptr_start = marginalResults[0]->get_lProbs_ptr();
740 lProbs_ptr = lProbs_ptr_start + first_mrg_size - 1;
742 for(
int ii = 0; ii < dimNumber; ii++)
743 resetPositions[ii] = lProbs_ptr;
750template <
typename MarginalType>
751bool IsoLayeredGeneratorTemplate<MarginalType>::carry()
757 int * cntr_ptr = counter;
759 while(idx < dimNumber-1)
765 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
766 if(partialLProbs[idx] + maxConfsLPSum[idx-1] >= currentLThreshold)
768 partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->get_mass(counter[idx]);
769 partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->get_prob(counter[idx]);
771 lProbs_ptr = resetPositions[idx];
773 while(*lProbs_ptr <= last_lcfmsv)
776 for(
int ii = 0; ii < idx; ii++)
777 resetPositions[ii] = lProbs_ptr;
786template<
typename MarginalType>
791 counter[ii] = marginalResults[ii]->get_no_confs()-1;
792 partialLProbs[ii] = -std::numeric_limits<double>::infinity();
795 lProbs_ptr = lProbs_ptr_start + marginalResults[0]->get_no_confs()-1;
798template<
typename MarginalType>
799IsoLayeredGeneratorTemplate<MarginalType>::~IsoLayeredGeneratorTemplate()
802 delete[] maxConfsLPSum;
803 delete[] resetPositions;
804 if (marginalResultsUnsorted != marginalResults)
805 delete[] marginalResultsUnsorted;
806 dealloc_table(marginalResults, dimNumber);
807 if(marginalOrder !=
nullptr)
808 delete[] marginalOrder;
811template class IsoLayeredGeneratorTemplate<LayeredMarginal>;
814template class IsoLayeredGeneratorTemplate<SingleAtomMarginal<true>>;
820template<
typename MarginalType>
821IsoOrderedGeneratorTemplate<MarginalType>::IsoOrderedGeneratorTemplate(
Iso&& iso,
int _tabSize,
int _hashSize) :
828 marginalResults =
new MarginalType*[
dimNumber];
831 marginalResults[i] =
new MarginalType(std::move(*(
marginals[i])), _tabSize, _hashSize);
838 masses[i] = &marginalResults[i]->conf_masses();
839 logProbs[i] = &marginalResults[i]->conf_lprobs();
842 topConf = allocator.newConf();
844 reinterpret_cast<char*
>(topConf) +
sizeof(
double),
849 *(
reinterpret_cast<double*
>(topConf)) =
859template<
typename MarginalType>
862 dealloc_table<MarginalType*>(marginalResults,
dimNumber);
870template<
typename MarginalType>
880 int* topConfIsoCounts = getConf(topConf);
882 currentLProb = *(
reinterpret_cast<double*
>(topConf));
883 currentMass = combinedSum( topConfIsoCounts, masses,
dimNumber );
884 currentProb = exp(currentLProb);
889 if(marginalResults[j]->probeConfigurationIdx(topConfIsoCounts[j] + 1))
893 topConfIsoCounts[j]++;
894 *(
reinterpret_cast<double*
>(topConf)) = combinedSum(topConfIsoCounts, logProbs,
dimNumber);
896 topConfIsoCounts[j]--;
901 void* acceptedCandidate = allocator.newConf();
902 int* acceptedCandidateIsoCounts = getConf(acceptedCandidate);
903 memcpy(acceptedCandidateIsoCounts, topConfIsoCounts,
confSize);
905 acceptedCandidateIsoCounts[j]++;
907 *(
reinterpret_cast<double*
>(acceptedCandidate)) = combinedSum(acceptedCandidateIsoCounts, logProbs,
dimNumber);
909 pq.push(acceptedCandidate);
912 if(topConfIsoCounts[j] > 0)
916 topConfIsoCounts[ccount]++;
929template<
typename IsoType>
930IsoStochasticGeneratorTemplate<IsoType>::IsoStochasticGeneratorTemplate(
Iso&& iso,
size_t no_molecules,
double _precision,
double _beta_bias, std::mt19937& _rng) :
932ILG(std::move(*this)),
933to_sample_left(no_molecules),
934precision(_precision),
935beta_bias(_beta_bias),
941template class IsoStochasticGeneratorTemplate<IsoLayeredGeneratorTemplate<LayeredMarginal>>;
942template class IsoStochasticGeneratorTemplate<IsoLayeredGeneratorTemplate<SingleAtomMarginal<true>>>;
943template class IsoStochasticGeneratorTemplate<IsoOrderedGeneratorTemplate<MarginalTrek>>;
944template class IsoStochasticGeneratorTemplate<IsoOrderedGeneratorTemplate<SingleAtomMarginal<false>>>;
The generator of isotopologues.
virtual ~IsoGenerator()
Destructor.
IsoGenerator(Iso &&iso, bool alloc_partials=true)
Move constructor.
The Iso class for the calculation of the isotopic distribution.
void saveMarginalLogSizeEstimates(double *priorities, double target_total_prob) const
Save estimates of logarithms of target sizes of marginals using Gaussian approximation into argument ...
double getHeaviestPeakMass() const
Get the mass of the heaviest peak in the isotopic distribution.
double getTheoreticalAverageMass() const
Get the theoretical average mass of the molecule.
double getUnlikeliestPeakLProb() const
Get the logprobability of the least probable subisotopologue.
void addElement(int atomCount, int noIsotopes, const double *isotopeMasses, const double *isotopeProbabilities)
Add an element to the molecule. Note: this method can only be used BEFORE Iso is used to construct an...
double getModeMass() const
Get the mass of the mode-configuration (if there are many modes, it is undefined which one will be se...
static Iso FromFASTA(const char *fasta, bool use_nominal_masses=false, bool add_water=true)
Constructor (named) from aminoacid FASTA sequence as C string.
double getLightestPeakMass() const
Get the mass of the lightest peak in the isotopic distribution.
double variance() const
Get the theoretical variance of the distribution.
double getMonoisotopicPeakMass() const
virtual ~Iso()
Destructor.
double getModeLProb() const
Get the log-probability of the mode-configuration (if there are many modes, they share this value).
void terminate_search()
Block the subsequent search of isotopologues.
The generator of isotopologues sorted by their probability of occurrence.
bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
virtual ~IsoOrderedGeneratorTemplate()
Destructor.
void terminate_search()
Block the subsequent search of isotopologues.
The marginal distribution class (a subisotopologue).
Precalculated Marginal class.
void parse_fasta(const char *fasta, int atomCounts[6])
Count elemental composition of an unmodificed sequence of amino acids, resulting in CHNOSSe counts.