19#include <unordered_map>
26#include "dirtyAllocator.h"
29#include "marginalTrek++.h"
37unsigned int parse_formula(
const char* formula,
38 std::vector<double>& isotope_masses,
39 std::vector<double>& isotope_probabilities,
42 unsigned int* confSize,
43 bool use_nominal_masses =
false);
50class ISOSPEC_EXPORT_SYMBOL Iso {
59 void setupMarginals(
const double* _isotopeMasses,
60 const double* _isotopeProbabilities);
71 bool doMarginalsNeedSorting()
const;
86 const int* _isotopeNumbers,
87 const int* _atomCounts,
88 const double* _isotopeMasses,
89 const double* _isotopeProbabilities
93 const int* _isotopeNumbers,
94 const int* _atomCounts,
95 const double*
const * _isotopeMasses,
96 const double*
const * _isotopeProbabilities
100 Iso(
const char* formula,
bool use_nominal_masses =
false);
103 inline Iso(
const std::string& formula,
bool use_nominal_masses =
false) : Iso(formula.c_str(), use_nominal_masses) {}
114 static Iso FromFASTA(
const char* fasta,
bool use_nominal_masses =
false,
bool add_water =
true);
117 static inline Iso
FromFASTA(
const std::string& fasta,
bool use_nominal_masses =
false,
bool add_water =
true) {
return FromFASTA(fasta.c_str(), use_nominal_masses, add_water); }
123 Iso& operator=(
const Iso& other) =
delete;
130 Iso(
const Iso& other,
bool fullcopy);
136 double getLightestPeakMass()
const;
139 double getHeaviestPeakMass()
const;
146 double getMonoisotopicPeakMass()
const;
149 double getModeLProb()
const;
152 double getUnlikeliestPeakLProb()
const;
155 double getModeMass()
const;
158 double getTheoreticalAverageMass()
const;
161 double variance()
const;
173 void addElement(
int atomCount,
int noIsotopes,
const double* isotopeMasses,
const double* isotopeProbabilities);
176 void saveMarginalLogSizeEstimates(
double* priorities,
double target_total_prob)
const;
187 const double mode_lprob;
237template<
typename MarginalType>
238class ISOSPEC_EXPORT_SYMBOL IsoOrderedGeneratorTemplate:
public IsoGenerator
241 MarginalType** marginalResults;
242 std::priority_queue<void*, pod_vector<void*>,
ConfOrder> pq;
253 IsoOrderedGeneratorTemplate(
const IsoOrderedGeneratorTemplate& other) =
delete;
254 IsoOrderedGeneratorTemplate& operator=(
const IsoOrderedGeneratorTemplate& other) =
delete;
265 if constexpr (std::is_same<MarginalType, MarginalTrek>::value)
267 int* c = getConf(topConf);
274 memcpy(space, marginalResults[ii]->confs()[c[ii]],
isotopeNumbers[ii]*
sizeof(
int));
282 throw std::runtime_error(
"IsoOrderedGeneratorTemplate::get_conf_signature() called on a non-MarginalTrek generator. This is not supported yet.");
291 inline void get_conf_by_indexes(
int* space)
293 if constexpr (std::is_same<MarginalType, SingleAtomMarginal<false>>::value)
298 int* c = getConf(topConf);
299 space[0] = std::max(c[0]-1, 0);
320 double* maxConfsLPSum;
321 const double Lcutoff;
326 const double* lProbs_ptr;
327 const double* lProbs_ptr_start;
328 double* partialLProbs_second;
329 double partialLProbs_second_val, lcfmsv;
333 IsoThresholdGenerator(
const IsoThresholdGenerator& other) =
delete;
334 IsoThresholdGenerator& operator=(
const IsoThresholdGenerator& other) =
delete;
338 counter[0] = lProbs_ptr - lProbs_ptr_start;
339 if(marginalOrder !=
nullptr)
343 int jj = marginalOrder[ii];
344 memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[jj]),
isotopeNumbers[ii]*
sizeof(
int));
352 memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[ii]),
isotopeNumbers[ii]*
sizeof(
int));
367 IsoThresholdGenerator(
Iso&& iso,
double _threshold,
bool _absolute =
true,
int _tabSize = 1000,
int _hashSize = 1000,
bool reorder_marginals =
true);
377 if(ISOSPEC_LIKELY(*lProbs_ptr >= lcfmsv))
385 lProbs_ptr = lProbs_ptr_start;
387 int * cntr_ptr = counter;
412 ISOSPEC_FORCE_INLINE
double lprob() const override final {
return partialLProbs_second_val + (*(lProbs_ptr)); }
413 ISOSPEC_FORCE_INLINE
double mass() const override final {
return partialMasses[1] + marginalResults[0]->get_mass(lProbs_ptr - lProbs_ptr_start); }
414 ISOSPEC_FORCE_INLINE
double prob() const override final {
return partialProbs[1] * marginalResults[0]->get_prob(lProbs_ptr - lProbs_ptr_start); }
417 void terminate_search();
429 size_t count_confs();
433 ISOSPEC_FORCE_INLINE
void recalc(
int idx)
435 for(; idx > 0; idx--)
437 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->
get_lProb(counter[idx]);
438 partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->
get_mass(counter[idx]);
439 partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->
get_prob(counter[idx]);
441 partialLProbs_second_val = *partialLProbs_second;
442 partialLProbs[0] = *partialLProbs_second + marginalResults[0]->
get_lProb(counter[0]);
443 lcfmsv = Lcutoff - partialLProbs_second_val;
446 ISOSPEC_FORCE_INLINE
void short_recalc(
int idx)
448 for(; idx > 0; idx--)
449 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
450 partialLProbs_second_val = *partialLProbs_second;
451 partialLProbs[0] = *partialLProbs_second + marginalResults[0]->
get_lProb(counter[0]);
452 lcfmsv = Lcutoff - partialLProbs_second_val;
459template<
typename MarginalType>
460class ISOSPEC_EXPORT_SYMBOL IsoLayeredGeneratorTemplate :
public IsoGenerator
464 double* maxConfsLPSum;
465 double currentLThreshold, lastLThreshold;
466 MarginalType** marginalResults;
467 MarginalType** marginalResultsUnsorted;
470 const double* lProbs_ptr;
471 const double* lProbs_ptr_start;
472 const double** resetPositions;
473 double* partialLProbs_second;
474 double partialLProbs_second_val, lcfmsv, last_lcfmsv;
475 bool marginalsNeedSorting;
479 IsoLayeredGeneratorTemplate(
const IsoLayeredGeneratorTemplate& other) =
delete;
480 IsoLayeredGeneratorTemplate& operator=(
const IsoLayeredGeneratorTemplate& other) =
delete;
484 counter[0] = lProbs_ptr - lProbs_ptr_start;
485 if(marginalOrder !=
nullptr)
489 int jj = marginalOrder[ii];
490 memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[jj]),
isotopeNumbers[ii]*
sizeof(
int));
498 memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[ii]),
isotopeNumbers[ii]*
sizeof(
int));
504 inline double get_currentLThreshold()
const {
return currentLThreshold; }
506 IsoLayeredGeneratorTemplate(Iso&& iso,
int _tabSize = 1000,
int _hashSize = 1000,
bool reorder_marginals =
true,
double t_prob_hint = 0.99);
508 ~IsoLayeredGeneratorTemplate();
514 if(advanceToNextConfigurationWithinLayer())
516 }
while(IsoLayeredGeneratorTemplate<MarginalType>::nextLayer(-2.0));
520 ISOSPEC_FORCE_INLINE
bool advanceToNextConfigurationWithinLayer()
525 if(ISOSPEC_LIKELY(*lProbs_ptr >= lcfmsv))
532 ISOSPEC_FORCE_INLINE
double lprob() const override final {
return partialLProbs_second_val + (*(lProbs_ptr)); };
533 ISOSPEC_FORCE_INLINE
double mass() const override final {
return partialMasses[1] + marginalResults[0]->get_mass(lProbs_ptr - lProbs_ptr_start); };
534 ISOSPEC_FORCE_INLINE
double prob() const override final {
return partialProbs[1] * marginalResults[0]->get_prob(lProbs_ptr - lProbs_ptr_start); };
541 ISOSPEC_FORCE_INLINE
void recalc(
int idx)
543 for(; idx > 0; idx--)
549 partialLProbs_second_val = *partialLProbs_second;
550 partialLProbs[0] = partialLProbs_second_val + marginalResults[0]->get_lProb(counter[0]);
551 lcfmsv = currentLThreshold - partialLProbs_second_val;
552 last_lcfmsv = lastLThreshold - partialLProbs_second_val;
555 bool nextLayer(
double offset);
557 void get_conf_by_indexes(
int* space)
const
559 if constexpr (std::is_same<MarginalType, SingleAtomMarginal<true>>::value)
561 counter[0] = lProbs_ptr - lProbs_ptr_start;
562 if(marginalOrder !=
nullptr)
564 for(
int ii = 0; ii < dimNumber; ii++)
566 int jj = marginalOrder[ii];
567 space[ii] = marginalResultsUnsorted[ii]->get_original_position(counter[jj]);
572 for(
int ii = 0; ii < dimNumber; ii++)
573 space[ii] = marginalResultsUnsorted[ii]->get_original_position(counter[ii]);
577 throw std::runtime_error(
"IsoLayeredGeneratorTemplate::get_conf_by_indexes() called on a non-SingleAtomMarginal generator. This is not supported yet.");
585template<
typename IsoType>
589 size_t to_sample_left;
590 const double precision;
591 const double beta_bias;
594 size_t current_count;
595 std::mt19937& rdvariate_gen;
598 IsoStochasticGeneratorTemplate(
Iso&& iso,
size_t no_molecules,
double precision = 0.9999,
double beta_bias = 5.0, std::mt19937& rdvariate_gen = random_gen);
600 ISOSPEC_FORCE_INLINE
size_t count()
const {
return current_count; }
602 ISOSPEC_FORCE_INLINE
double mass() const override final {
return ILG.mass(); }
604 ISOSPEC_FORCE_INLINE
double prob() const override final {
return static_cast<double>(count()); }
606 ISOSPEC_FORCE_INLINE
double lprob() const override final {
return log(
prob()); }
608 ISOSPEC_FORCE_INLINE
void get_conf_signature(
int* space)
const override final { ILG.get_conf_signature(space); }
617 double curr_conf_prob_left, current_prob;
619 if(to_sample_left <= 0)
622 if(confs_prob < chasing_prob)
627 if(!ILG.advanceToNextConfiguration())
629 current_prob = ILG.prob();
630 confs_prob += current_prob;
631 while(confs_prob <= chasing_prob)
633 if(!ILG.advanceToNextConfiguration())
635 current_prob = ILG.prob();
636 confs_prob += current_prob;
638 if(to_sample_left <= 0)
640 curr_conf_prob_left = confs_prob - chasing_prob;
646 if(!ILG.advanceToNextConfiguration())
648 current_prob = ILG.prob();
649 confs_prob += current_prob;
650 curr_conf_prob_left = current_prob;
653 double prob_left_to_1 = precision - chasing_prob;
654 double expected_confs = curr_conf_prob_left * to_sample_left / prob_left_to_1;
656 if(expected_confs <= beta_bias)
659 chasing_prob += rdvariate_beta_1_b(to_sample_left, rdvariate_gen) * prob_left_to_1;
660 while(chasing_prob <= confs_prob)
664 if(to_sample_left == 0)
666 prob_left_to_1 = precision - chasing_prob;
667 chasing_prob += rdvariate_beta_1_b(to_sample_left, rdvariate_gen) * prob_left_to_1;
669 if(current_count > 0)
675 size_t rbin = rdvariate_binom(to_sample_left, curr_conf_prob_left/prob_left_to_1, rdvariate_gen);
676 current_count += rbin;
677 to_sample_left -= rbin;
678 chasing_prob = confs_prob;
679 if(current_count > 0)
685 ISOSPEC_FORCE_INLINE
void get_indexes(
int* space)
687 ILG.get_conf_by_indexes(space);
virtual void get_conf_signature(int *space) const =0
Write the signature of configuration into target memory location. It must be large enough to accomoda...
virtual bool advanceToNextConfiguration()=0
Advance to the next, not yet visited, most probable isotopologue.
virtual double mass() const
Get the mass of the current isotopologue.
IsoGenerator(Iso &&iso, bool alloc_partials=true)
Move constructor.
virtual double lprob() const
Get the log-probability of the current isotopologue.
virtual double prob() const
Get the probability of the current isotopologue.
The Iso class for the calculation of the isotopic distribution.
double stddev() const
Get the standard deviation of the theoretical distribution.
static Iso FromFASTA(const std::string &fasta, bool use_nominal_masses=false, bool add_water=true)
Constructor (named) from aminoacid FASTA sequence as C++ std::string. See above for details.
int getDimNumber() const
Get the number of elements in the chemical formula of the molecule.
int getAllDim() const
Get the total number of isotopes of elements present in a chemical formula.
double variance() const
Get the theoretical variance of the distribution.
Iso(const std::string &formula, bool use_nominal_masses=false)
Constructor from C++ std::string chemical formula.
ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
ISOSPEC_FORCE_INLINE double mass() const override final
Get the mass of the current isotopologue.
ISOSPEC_FORCE_INLINE void recalc(int idx)
Recalculate the current partial log-probabilities, masses, and probabilities.
void get_conf_signature(int *space) const override final
Write the signature of configuration into target memory location. It must be large enough to accomoda...
ISOSPEC_FORCE_INLINE double prob() const override final
Get the probability of the current isotopologue.
void terminate_search()
Block the subsequent search of isotopologues.
ISOSPEC_FORCE_INLINE double lprob() const override final
Get the log-probability of the current isotopologue.
The generator of isotopologues sorted by their probability of occurrence.
bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
void get_conf_signature(int *space) const override final
IsoOrderedGeneratorTemplate(Iso &&iso, int _tabSize=1000, int _hashSize=1000)
The move-contstructor.
virtual ~IsoOrderedGeneratorTemplate()
Destructor.
ISOSPEC_FORCE_INLINE double prob() const override final
Get the probability of the current isotopologue.
ISOSPEC_FORCE_INLINE double mass() const override final
Get the mass of the current isotopologue.
ISOSPEC_FORCE_INLINE double lprob() const override final
Get the log-probability of the current isotopologue.
ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
ISOSPEC_FORCE_INLINE void get_conf_signature(int *space) const override final
Write the signature of configuration into target memory location. It must be large enough to accomoda...
The generator of isotopologues above a given threshold value.
ISOSPEC_FORCE_INLINE double lprob() const override final
Get the log-probability of the current isotopologue.
void get_conf_signature(int *space) const override final
Write the signature of configuration into target memory location. It must be large enough to accomoda...
ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
ISOSPEC_FORCE_INLINE double prob() const override final
Get the probability of the current isotopologue.
void terminate_search()
Block the subsequent search of isotopologues.
ISOSPEC_FORCE_INLINE double mass() const override final
Get the mass of the current isotopologue.
The marginal distribution class (a subisotopologue).
Precalculated Marginal class.
const double & get_prob(int idx) const
Get the probability of the idx-th subisotopologue.
const double & get_lProb(int idx) const
Get the log-probability of the idx-th subisotopologue.
const double & get_mass(int idx) const
Get the mass of the idx-th subisotopologue.