GCC Code Coverage Report


./
File: src/XpertMass/Sequence.cpp
Date: 2024-08-24 11:26:06
Lines:
170/275
61.8%
Functions:
21/27
77.8%
Branches:
118/328
36.0%

Line Branch Exec Source
1 /* BEGIN software license
2 *
3 * MsXpertSuite - mass spectrometry software suite
4 * -----------------------------------------------
5 * Copyright(C) 2009,...,2018 Filippo Rusconi
6 *
7 * http://www.msxpertsuite.org
8 *
9 * This file is part of the MsXpertSuite project.
10 *
11 * The MsXpertSuite project is the successor of the massXpert project. This
12 * project now includes various independent modules:
13 *
14 * - massXpert, model polymer chemistries and simulate mass spectrometric data;
15 * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
16 *
17 * This program is free software: you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation, either version 3 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program. If not, see <http://www.gnu.org/licenses/>.
29 *
30 * END software license
31 */
32
33 #include <QByteArrayView>
34
35 /////////////////////// Local includes
36 #include "Sequence.hpp"
37 #include "PolChemDef.hpp"
38
39
40 namespace MsXpS
41 {
42
43 namespace libXpertMass
44 {
45
46
47 /*!
48
49 \class MsXpS::libXpertMass::Sequence
50 \inmodule libXpertMass
51 \ingroup PolChemDefBuildingdBlocks
52 \inheaderfile Sequence.hpp
53
54 \brief The Sequence class provides abstractions to work with
55 a simple sequence of \l{Monomer}s.
56
57 A sequence of monomer can be represented in two ways:
58
59 \list
60 \li A string of monomer codes concatenated one to the other with no
61 delimitation(like "ATGC" or "AlaThrGlyCys");
62 \li A list of fully qualified \l{Monomer} instances allocate on the heap.
63 \endlist
64
65 \note The reference status of a sequence is in the form of a list of
66 allocated Monomer instances. The conversion to the string of monomer codes
67 is only a utility. When a sequence is created (with an argument that is a
68 string of monomer codes) the caller should ensure that the text sequence is
69 converted into a list of monomers prior to starting using its methods
70 extensively (see makeMonomerList()). Functions size() and
71 removeMonomerAt()) only work on a sequence in the form of a list of
72 \l{Monomer} instances.
73
74 Methods are provided to convert from one sequence kind
75 (concatenated codes) to the other sequence kind(list of Monomer
76 instances).
77
78 Equally interesting is the ability of the methods in this class to
79 be able to:
80
81 - parse the monomer sequence and to extract monomer codes one
82 after the other;
83
84 - remove monomers from the sequence at specified indexes;
85
86 - add monomers to the sequence at specified indexes.
87
88 However, for this rather basic class to be able to perform
89 interesting tasks it has to be able to know where to find polymer
90 chemistry definition \l{PolChemDef} data. This is possible only when a
91 pointer to a polymer chemistry definition is passed to the used functions.
92 */
93
94
95 /*!
96 \variable MsXpS::libXpertMass::Sequence::m_monomerText
97
98 \brief String holding the sequence of monomer codes.
99 */
100
101 /*!
102 \variable MsXpS::libXpertMass::Sequence::m_monomerList
103
104 \brief List of allocated \l Monomer instances that should match the
105 sequence of codes string (m_monomerText).
106 */
107
108
109 /*!
110 \brief Construct a Sequence using \a text.
111
112 The sequence is in the form of a string of concatenated monomer
113 codes. No quality check is performed.
114 */
115 6116 Sequence::Sequence(const QString &text) : m_monomerText(text)
116 {
117 6116 }
118
119 /*!
120 \brief Construct this Sequence as a copy of \a other.
121
122 The copying is deep with the list of Monomer instances copied to this
123 Sequence.
124 */
125 16 Sequence::Sequence(const Sequence &other) : m_monomerText(other.m_monomerText)
126 {
127
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
16 for(int iter = 0; iter < other.m_monomerList.size(); ++iter)
128 m_monomerList.append(new Monomer(*other.m_monomerList.at(iter)));
129 16 }
130
131 /*!
132 \brief Destructs this sequence.
133
134 The \l Monomer instances are deleted.
135 */
136 12264 Sequence::~Sequence()
137 {
138
2/2
✓ Branch 1 taken 28616 times.
✓ Branch 2 taken 6132 times.
81760 while(!m_monomerList.isEmpty())
139
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 28616 times.
57232 delete m_monomerList.takeFirst();
140 12264 }
141
142 /*!
143 \brief Assigns \a other to this Sequence
144
145 Returns a reference to this Sequence.
146 */
147 Sequence &
148 Sequence::operator=(const Sequence &other)
149 {
150 if(&other == this)
151 return *this;
152
153 m_monomerText = other.m_monomerText;
154
155 while(!m_monomerList.isEmpty())
156 delete m_monomerList.takeFirst();
157
158 for(int iter = 0; iter < other.m_monomerList.size(); ++iter)
159 {
160 m_monomerList.append(new Monomer(*other.m_monomerList.at(iter)));
161 }
162
163 return *this;
164 }
165
166
167 /*!
168 \brief Returns true if the \c this Sequence is identical to \a other, false
169 otherwise.
170 */
171 bool
172 32 Sequence::operator==(const Sequence &other)
173 {
174
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 32 times.
32 if(&other == this)
175 return true;
176
177
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 32 times.
32 if(m_monomerText != other.m_monomerText)
178 return false;
179
1/2
✗ Branch 2 not taken.
✓ Branch 3 taken 32 times.
32 if(m_monomerList.size() != other.m_monomerList.size())
180 return false;
181
182
2/2
✓ Branch 1 taken 2512 times.
✓ Branch 2 taken 32 times.
2544 for(int iter = 0; iter < m_monomerList.size(); ++iter)
183 {
184
1/2
✗ Branch 3 not taken.
✓ Branch 4 taken 2512 times.
2512 if(*(m_monomerList.at(iter)) != *(other.m_monomerList.at(iter)))
185 return false;
186 }
187
188 32 return true;
189 }
190
191 /*!
192 \brief Returns true if the \c this Sequence is different than \a other, false
193 otherwise.
194
195 Returns the negation of operator==(other).
196 */
197 bool
198 16 Sequence::operator!=(const Sequence &other)
199 {
200
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
16 if(&other == this)
201 return false;
202
203 16 return !operator==(other);
204 }
205
206
207 /*!
208 \brief Set this Sequence' string of monomer codes to \a text.
209 */
210 void
211 16 Sequence::setMonomerText(const QString &text)
212 {
213 16 m_monomerText = text;
214 16 }
215
216 /*!
217 \brief Appends the \a text sequence of monomer codes to this Sequence.
218
219 No verification is performed on \a text.
220 */
221 void
222 4 Sequence::appendMonomerText(const QString &text)
223 {
224
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 4 times.
4 if(text.isEmpty())
225 return;
226
227 4 m_monomerText += text;
228 }
229
230
231 /*!
232 \brief Returns this Sequence's string of monomer codes.
233 */
234 const QString *
235 72 Sequence::monomerText()
236 {
237 72 return &m_monomerText;
238 }
239
240 /*!
241 \brief Returns a reference to this Sequence's list of \l Monomer instances.
242 */
243 const QList<const Monomer *> &
244 4 Sequence::monomerList() const
245 {
246 4 return m_monomerList;
247 }
248
249
250 /*!
251 \brief Returns this Sequence's list of \l Monomer instances.
252 */
253 QList<const Monomer *> *
254 Sequence::monomerListPtr()
255 {
256 return &m_monomerList;
257 }
258
259 /*!
260 \brief Returns the size of this Sequence as the size of the list of Monomers
261 instances.
262 */
263 int
264 6732 Sequence::size() const
265 {
266 6732 return m_monomerList.size();
267 }
268
269 /*!
270 \brief Returns true if \a index is valid as an index of a Monomer instance in
271 this Sequence's list of \l{Monomer}s, false otherwise.
272 */
273 bool
274 28 Sequence::isInBound(int index)
275 {
276
5/6
✓ Branch 0 taken 28 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 20 times.
✓ Branch 4 taken 8 times.
✓ Branch 5 taken 20 times.
✓ Branch 6 taken 8 times.
28 if(index >= 0 && index < size())
277 20 return true;
278
279 8 return false;
280 }
281
282 /*!
283 \brief Removes all spaces, carriage returns and linefeeds from this Sequence's
284 monomer codes string.
285 */
286 void
287 304 Sequence::unspacifyMonomerText()
288 {
289 // Removal of all spaces, carriage returns and linefeeds:
290
291
2/2
✓ Branch 1 taken 31040 times.
✓ Branch 2 taken 304 times.
31344 for(int iter = m_monomerText.length() - 1; iter >= 0; --iter)
292 {
293 31040 QChar curChar = m_monomerText.at(iter);
294
295
3/4
✓ Branch 4 taken 31032 times.
✓ Branch 5 taken 4 times.
✓ Branch 6 taken 31032 times.
✗ Branch 7 not taken.
93108 if(curChar == QChar::Tabulation || curChar == QChar::LineFeed ||
296
3/4
✓ Branch 4 taken 31032 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 31012 times.
✓ Branch 7 taken 20 times.
93096 curChar == QChar::FormFeed || curChar == QChar::CarriageReturn ||
297
4/6
✓ Branch 0 taken 31036 times.
✓ Branch 1 taken 4 times.
✓ Branch 6 taken 31012 times.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✓ Branch 9 taken 31012 times.
124120 curChar == QChar::Space || curChar == QChar::Nbsp ||
298
2/2
✓ Branch 2 taken 28 times.
✓ Branch 3 taken 31012 times.
62052 curChar == QChar::SoftHyphen)
299
1/2
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
28 m_monomerText.remove(iter, 1);
300 }
301 304 }
302
303 /*!
304 \brief Creates the monomer codes string version of this Sequence's list of
305 \l{Monomer} instances and set it to this Sequence
306
307 The function essentially writes to the m_monomerText string the code of each
308 Monomer instance in m_monomerList.
309
310 m_monomerText is cleared before writing the codes to it.
311
312 Returns the count of monomer codes added to m_monomerText.
313
314 \sa makeMonomerList()
315 */
316 int
317 16 Sequence::makeMonomerText()
318 {
319 16 int iter = 0;
320
321 16 m_monomerText.clear();
322
323
2/2
✓ Branch 1 taken 2512 times.
✓ Branch 2 taken 16 times.
2528 for(iter = 0; iter < m_monomerList.size(); ++iter)
324
2/4
✓ Branch 2 taken 2512 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 2512 times.
✗ Branch 6 not taken.
2512 m_monomerText.append(m_monomerList.at(iter)->code());
325
326 16 return iter;
327 }
328
329 /*!
330 \brief Returns an allocated string with the monomer codes.
331
332 The returned string only contains the sequence of monomer codes for Monomer
333 instances between indices [\a start -- \a end] in this Sequence's list
334 of Monomer instances (m_monomerList).
335
336 If \a with_modif is true, the modification(s) associated to \l{Monomer}s are
337 also output to the string. The form of the string is, in this case,
338
339 \code
340 Thr<Phosphorylation>
341 \endcode
342 */
343 QString *
344 Sequence::monomerText(int start, int end, bool with_modif) const
345 {
346 int localStart = 0;
347 int localEnd = 0;
348
349 QString *p_text = new QString();
350
351 if(size() == 0)
352 return p_text;
353
354 if(start > end)
355 {
356 localStart = end;
357 localEnd = start;
358 }
359 else
360 {
361 localStart = start;
362 localEnd = end;
363 }
364
365 if(localStart < 0)
366 localStart = 0;
367
368 if(localEnd < 0 || localEnd >= size())
369 localEnd = size() - 1;
370
371 QString text;
372
373 for(int iter = localStart; iter < localEnd + 1; ++iter)
374 {
375 const Monomer *monomer = m_monomerList.at(iter);
376
377 // FIXME Error, the code below does not seem to work.
378 if(with_modif)
379 {
380 if(monomer->isModified())
381 {
382 for(int iter = 0; iter < monomer->modifList()->size(); ++iter)
383 {
384 text = QString("%1<%2>")
385 .arg(monomer->code())
386 .arg(monomer->modifList()->at(iter)->name());
387 }
388 }
389 else
390 text = monomer->code();
391 }
392 else
393 text = monomer->code();
394
395 p_text->append(text);
396 }
397
398 return p_text;
399 }
400
401
402 /*!
403 \brief Returns an allocated string with the monomer codes.
404
405 The returned string only contains the sequence of monomer codes for Monomer
406 instances contained in the regions (\l Coordinates) described in \a
407 coordinate_list.
408
409 If \a with_modif is true, the modification(s) associated to \l{Monomer}s are
410 also output to the string. The form of the string is, in this case,
411
412 \code
413 Thr<Phosphorylation>
414 \endcode
415
416 If \a delimited_regions, the sequence of Monomer codes beloging to each will be
417 delimited using the Coordinates positions.
418
419 \sa Coordinates::positionsAsText()
420 */
421 QString *
422 Sequence::monomerText(const CoordinateList &coordinate_list,
423 bool with_modif,
424 bool delimited_regions) const
425 {
426 QString *p_text = new QString();
427
428 for(int iter = 0; iter < coordinate_list.size(); ++iter)
429 {
430 // New coordinates instance we are iterating into.
431 Coordinates *coordinates = coordinate_list.at(iter);
432
433 QString *tempString =
434 monomerText(coordinates->start(), coordinates->end(), with_modif);
435
436 if(delimited_regions)
437 *p_text += QString("Region %1: %2\n")
438 .arg(coordinates->positionsAsText())
439 .arg(*tempString);
440 else
441 *p_text += *tempString;
442
443 delete(tempString);
444 }
445
446 *p_text += QString("\n");
447
448 return p_text;
449 }
450
451
452 /*!
453 \brief Allocates all the Monomer instances to describe this Sequence's string
454 representation of monomer codes.
455
456 This function parses the member Monomer codes string (m_monomerText) and, for
457 each encountered code, creates a \l Monomer instance and add it to the member
458 list of Monomer instances (m_monomerList).
459
460 If \a reset is true, the member list of Monomer instances is reset before the
461 work is done.
462
463 If \a errorList is non-nullptr, errors are stored in this list in the form of
464 the indices of failing monomer codes in the string.
465
466 The allocation of each Monomer instance based on its code is performed by
467 looking at the reference Monomer list in \a pol_chem_def_csp.
468
469 Because the m_monomerText member string of Monomer codes does not document any
470 monomer modification, no modifications are handled in this function.
471
472 Returns the size of the Monomer instances list or -1 if an error occurred.
473 */
474 int
475 300 Sequence::makeMonomerList(PolChemDefCstSPtr pol_chem_def_csp,
476 bool reset,
477 QList<int> *errorList)
478 {
479
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 300 times.
300 if(!pol_chem_def_csp)
480 {
481 qDebug() << "The PolChemDef pointer is nullptr!";
482 return -1;
483 }
484
485 // If error indices are to be stored, the list MUST be empty.
486
2/2
✓ Branch 0 taken 72 times.
✓ Branch 1 taken 228 times.
300 if(errorList)
487
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 72 times.
72 Q_ASSERT(errorList->size() == 0);
488
489
2/2
✓ Branch 0 taken 176 times.
✓ Branch 1 taken 124 times.
300 if(reset)
490 {
491
2/2
✓ Branch 1 taken 1796 times.
✓ Branch 2 taken 176 times.
2148 while(!m_monomerList.isEmpty())
492
2/4
✓ Branch 1 taken 1796 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 1796 times.
1796 delete m_monomerList.takeFirst();
493 }
494
495
1/2
✓ Branch 1 taken 300 times.
✗ Branch 2 not taken.
300 unspacifyMonomerText();
496
497 // qDebug() << "Sequence:" << m_monomerText;
498
499 300 int index = 0;
500 300 int ret = -1;
501 300 QString err;
502 300 QString code;
503
504
2/4
✓ Branch 2 taken 300 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 300 times.
✗ Branch 6 not taken.
300 ret = nextCode(&code, &index, &err, pol_chem_def_csp->codeLength());
505
506
1/2
✓ Branch 2 taken 300 times.
✗ Branch 3 not taken.
300 const QList<Monomer *> &refList = pol_chem_def_csp->monomerList();
507
508 while(1)
509 {
510
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 30684 times.
30684 if(ret < 0)
511 {
512 // There was an error in the parsed code. Store the index.
513 if(errorList)
514 {
515 errorList->append(index);
516 ++index;
517 ret =
518 nextCode(&code, &index, &err, pol_chem_def_csp->codeLength());
519 continue;
520 }
521 else
522 {
523 break;
524 }
525 }
526
527
2/2
✓ Branch 0 taken 300 times.
✓ Branch 1 taken 30384 times.
30684 if(ret == 0)
528 300 break;
529
530
3/8
✓ Branch 1 taken 30384 times.
✗ Branch 2 not taken.
✓ Branch 6 taken 30384 times.
✗ Branch 7 not taken.
✓ Branch 10 taken 30384 times.
✗ Branch 11 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
30384 Monomer *monomer = new Monomer(pol_chem_def_csp, "NOT_SET");
531
532
2/4
✓ Branch 1 taken 30384 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 30384 times.
30384 if(Monomer::isCodeInList(code, refList, monomer) == -1)
533 {
534 qDebug() << "Monomer:" << monomer->name()
535 << "was not found in the monomer reference list.";
536
537 delete monomer;
538
539 if(errorList)
540 {
541 errorList->append(index);
542 ++index;
543 ret =
544 nextCode(&code, &index, &err, pol_chem_def_csp->codeLength());
545 continue;
546 }
547 else
548 {
549 return -1;
550 }
551 }
552 else
553 {
554 // qDebug() << "Monomer:" << monomer->name()
555 //<< "with code:" << monomer->code()
556 //<< "was indeed found in the monomer reference list.";
557 }
558
559
1/2
✓ Branch 1 taken 30384 times.
✗ Branch 2 not taken.
30384 m_monomerList.append(monomer);
560
561 // qDebug() << "New monomer:" << monomer->name();
562
563 30384 ++index;
564
565 // qDebug() << "index:" << index;
566
567
2/4
✓ Branch 2 taken 30384 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 30384 times.
✗ Branch 6 not taken.
30384 ret = nextCode(&code, &index, &err, pol_chem_def_csp->codeLength());
568 30384 }
569 // End of
570 // while(1)
571
572
2/2
✓ Branch 0 taken 72 times.
✓ Branch 1 taken 228 times.
300 if(errorList)
573 {
574
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 72 times.
72 if(errorList->size())
575 return -1;
576 }
577
578
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 300 times.
300 if(ret == -1)
579 return -1;
580
581 300 return m_monomerList.size();
582 300 }
583
584
585 /*!
586 \brief Seeks the next code occurring in this Sequence's string of Monomer
587 codes.
588
589 This function starts looking in this Sequence's string of Monomer codes
590 (m_monomerText) at \a index. The next found Monomer code is stored in \a code.
591 If the text is not a monomer code, it is set to \a err.
592
593 The parsing of this Sequence's string of Monomer codes takes into account the
594 \a code_length.
595
596 Returns the count of characters that make \a code. This count can be used
597 to search for the next code by setting its value incremented by 1 to \a index
598 for a next function call.
599 */
600 int
601 49124 Sequence::nextCode(QString *code, int *index, QString *err, int code_length)
602 {
603 49124 QString newCode;
604 49124 int iter = 0;
605
606 // We get a sequence of monomer codes(like "LysArgGlu" for example)
607 // and we have to return the next code starting from *index. Note
608 // that the sequence must not contain invalid characters. The
609 // invalid characters might be placed in err for further scrutiny by
610 // the caller.
611
612 // Returns the count of actually parsed characters in the string
613 // newCode(copied to 'code' param). If an error occurs -1 is
614 // returned and the faulty character is copied in 'err'. 'index' is
615 // updated with the index of the last valid character parsed for
616 // current code.
617
618
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 49124 times.
49124 Q_ASSERT(code);
619
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 49124 times.
49124 Q_ASSERT(index);
620
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 49124 times.
49124 Q_ASSERT(err);
621
622 49124 code->clear();
623 49124 err->clear();
624
625 49124 int monomer_text_length = m_monomerText.length();
626
627 while(1)
628 {
629
2/2
✓ Branch 0 taken 37064 times.
✓ Branch 1 taken 49140 times.
86204 if(iter >= code_length)
630 {
631 // Because we have progressed farther than authorized by
632 // the number of characters allowed in the monomer codes
633 // of this polymer chemistry definition, we decrement iter
634 // and break the loop... Later in this function, we'll set
635 // the proper index in the sequence where next parsing run
636 // should occurs (the calling function will increment
637 // *index by one).
638
639 37064 --iter;
640 37064 break;
641 }
642
643
2/2
✓ Branch 0 taken 6180 times.
✓ Branch 1 taken 42960 times.
49140 if(iter + *index >= monomer_text_length)
644 6180 break;
645
646 42960 QChar curChar = m_monomerText.at(iter + *index);
647
648
2/2
✓ Branch 1 taken 5880 times.
✓ Branch 2 taken 37080 times.
42960 if(!curChar.isLetter())
649 {
650 // qDebug() << __FILE__ << __LINE__
651 // << "The character is not a letter:"
652 // << curChar;
653
654
1/2
✓ Branch 1 taken 5880 times.
✗ Branch 2 not taken.
5880 *err = curChar;
655
656 // The non-Letter character might be '/', which would be
657 // perfectly fine, as we use it to symbolize the actual
658 // cleavage site. Which means that we will continue
659 // parsing the rest of the string : we have to give the
660 // current position back to the caller in the *index
661 // variable for the next call to this function to start at
662 // next character (not falling back to '/', which would
663 // make us enter in an infinite loop).
664
665 5880 *index = *index + iter;
666
667 5880 return -1;
668 }
669
670 37080 bool isLower = (curChar.category() == QChar::Letter_Lowercase);
671
672
2/2
✓ Branch 0 taken 37064 times.
✓ Branch 1 taken 16 times.
37080 if(iter == 0)
673 {
674
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 37064 times.
37064 if(isLower)
675 {
676 // qDebug() << __FILE__ << __LINE__
677 // << "First character of monomer code might not be"
678 // << "lower case; sequence is"
679 // << m_monomerText;
680
681 *err = curChar;
682
683 return -1;
684 }
685 else
686 {
687 // Good, first char is uppercase.
688
1/2
✓ Branch 1 taken 37064 times.
✗ Branch 2 not taken.
37064 newCode += curChar;
689 }
690 }
691 else //(iter != 0)
692 {
693 // We are not in our first iteration. So either the current
694 // character is lowercase and we are just continuing to
695 // iterate into a multi-char monomer code, or the current
696 // character is uppercase, in which case we are starting to
697 // iterate in a new monomer code.
698
699
1/2
✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
16 if(isLower)
700
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 newCode += curChar;
701 else
702 {
703 // Decrement iter, because this round was for nothing:
704 // we had "invaded" the next monomer code in sequence,
705 // which we must not do.
706
707 --iter;
708 break;
709 }
710 }
711
712 37080 ++iter;
713 37080 }
714
715 // We finished parsing at most codeLength characters out of
716 // 'm_monomerText', so we have a valid code in the 'code' variable. We
717 // can also compute a new index position in the sequence and return
718 // the number of characters that we effectively parsed. Note that
719 // the caller will be responsible for incrementing the 'index' value
720 // by one character unit so as not to reparse the last characters of
721 // the sent 'code' object.
722
723 43244 *index = *index + iter;
724 43244 *code = newCode;
725 43244 err->clear();
726
727 43244 return code->length();
728 49124 }
729
730 /*!
731 \brief Searches in for a Sequence textual \a motif in this Sequence's
732 list of Monomer instances starting at \a index.
733
734 \a motif, a text string is first converted to a list of Monomer instances
735 (using the reference list of monomers in \a pol_chem_def_csp). Then, this
736 Sequence's monomer instances list is searched for a monomer stretch matching
737 that created for \a motif.
738
739 As soon as a monomer code stretch is found, the index in this Sequence's list of
740 Monomer instances is set to \a index.
741
742 Returns -1 if an error occurred, 1 if \a motif was found in this Sequence, 0
743 otherwise.
744 */
745
746 int
747 72 Sequence::findForwardMotif(Sequence *motif,
748 PolChemDefCstSPtr pol_chem_def_csp,
749 int *index)
750 {
751
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 72 times.
72 Q_ASSERT(motif);
752
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 72 times.
72 Q_ASSERT(pol_chem_def_csp);
753
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 72 times.
72 Q_ASSERT(index);
754
755 // qDebug() << "motif:" << *(motif->monomerText()) << "index :" << *index;
756
757
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 72 times.
72 if(*index < 0)
758 return -1;
759
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 72 times.
72 if(*index >= size())
760 return -1;
761
762 72 int motifSize = motif->size();
763
764 // If motif's length is 0, then nothing to search for, return
765 // unmodified 'index'.
766
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 72 times.
72 if(!motifSize)
767 return 0;
768
769 // Simple optimization, if index + size of motif is greater then
770 // size of sequence, return right away.
771
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 72 times.
72 if(*index + motifSize > size())
772 return 0;
773
774 // First, make a monomerList.
775
2/4
✓ Branch 2 taken 72 times.
✗ Branch 3 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 72 times.
72 if(motif->makeMonomerList(pol_chem_def_csp) == -1)
776 return -1;
777
778 // Compare *this sequence with the one in 'motif', starting at index
779 // 'index' in *this sequence and 0 in 'motif'.
780
781 72 bool matched = false;
782 72 int matchIndex = 0;
783
784
2/2
✓ Branch 1 taken 4508 times.
✓ Branch 2 taken 8 times.
4516 for(int iter = *index; iter < size(); ++iter)
785 {
786 4508 matched = false;
787 4508 int jter = 0;
788
789 4508 const Monomer *monomer = at(iter);
790 4508 const Monomer *motifMonomer = motif->at(jter);
791
792 // qDebug() << "this monomer:" << monomer->code();
793 // qDebug() << "motif monomer:" << motifMonomer->code();
794
795 // We do not compare with operator == because that comparison
796 // would involve the comparison of modifications inside the
797 // monomers, which would not work here.
798
4/6
✓ Branch 1 taken 4508 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 4508 times.
✗ Branch 5 not taken.
✓ Branch 9 taken 4080 times.
✓ Branch 10 taken 428 times.
4508 if(monomer->code() != motifMonomer->code())
799 4080 continue;
800
801 // An easy check is to see if the number of remaining monomers
802 // in the polymer sequence is compatible with the number of
803 // monomers still to be matched in the find array. Imagine the
804 // sequence of the polymer ends like this: ==========JTOUTVU and
805 // the sequence to be searched for is : TVUL What we see is that
806 // the T of the TVU of the sequence matches; however we can stop
807 // the search right away because there is a 'L' in the search
808 // pattern that is not present in the end part of the
809 // sequence. This is exactly what is checked below. Note that
810 // this check makes SURE that at the end of the second inner
811 // loop, when we get out of it, the sole reason we may not
812 // consider that the match did not occur is because actually two
813 // monomers differred and not because anybody came out of the
814 // borders of the sequence in neither the array of the sequence
815 // to be searched, nor the array of the polymer sequence. This
816 // makes it very easy to assess if a match occurred or not.
817
818
2/2
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 424 times.
428 if(size() - iter < motif->size() - jter)
819 {
820 // Note that if it were ==, then it would have been possible
821 // that the sequence "just-in-time" match prior to ending of
822 // the polymer sequence array. Do not forget that we are in
823 // forward mode, thus we can break immediately, because we
824 // are certain that we won't have any chance to find the
825 // sequence downstream of current index.
826
827 4 matched = false;
828 4 break;
829 }
830
831 424 matchIndex = iter;
832
833 // We have to set the matched boolean to true, because if the
834 // motif to find is one monomer-long, then the loop below will
835 // not be entered, and we'll fail to know that the match
836 // occurred later on.
837 424 matched = true;
838
839 // Now that we have our anchoring point in the *this sequence,
840 // let's iterate in the motif, and check if the identity in
841 // sequence goes along.
842
843
2/2
✓ Branch 1 taken 740 times.
✓ Branch 2 taken 60 times.
800 for(int kter = jter + 1; kter < motif->size(); ++kter)
844 {
845 // At first run in this loop, we are in the second cell of
846 // the find list, which means that we should have jter ==
847 // 1. And we should compare its contents with those of the
848 // cell in the sequence list at index(iter + jter).
849
850 740 monomer = at(iter + kter);
851 740 motifMonomer = motif->at(kter);
852
853 // We do not compare with operator == because that
854 // comparison would involve the comparison of modifications
855 // inside the monomers, which would not work here.
856
4/6
✓ Branch 1 taken 740 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 740 times.
✗ Branch 5 not taken.
✓ Branch 9 taken 376 times.
✓ Branch 10 taken 364 times.
740 if(monomer->code() == motifMonomer->code())
857 {
858 // The monomers still match.
859 376 matched = true;
860 376 continue;
861 }
862 else
863 {
864 364 matched = false;
865 364 break;
866 }
867 }
868 // End of
869 // for (int kter = jter + 1 ; kter < motif->size() ; ++kter)
870
871 // At this point, we either have normally extinguished the run
872 // in the inner loop, or we have gone out of it before its
873 // normal termination. In either case, we have to test if the
874 // match occurred or not.
875
876 // Check if the match did NOT occur:
877
878
2/2
✓ Branch 0 taken 364 times.
✓ Branch 1 taken 60 times.
424 if(!matched)
879 {
880 // We just continue with the outer loop, that is, we continue
881 // searching in this sequence for a match with the
882 // first monomer in the motif.
883
884 364 continue;
885 }
886 else
887 {
888 // The match indeed occurred.
889
890 60 *index = matchIndex;
891 60 return 1;
892 }
893 }
894 // End of
895 // for (int iter = *index; iter < size(); ++iter)
896
897
898 // No match could be achieved, we have to let the caller function
899 // know this in a durable manner : returning 0.
900
901 12 return 0;
902 }
903
904 /*!
905 \brief Returns the Monomer instance at index \a index in this Sequence's
906 monomer instance list.
907 */
908 const Monomer *
909 16716 Sequence::at(int index) const
910 {
911 // qDebug() << "In call at() with value:" << index;
912
913
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 16716 times.
16716 if(index < 0)
914 qFatal("%s@%d -- Index cannot be less than 0.", __FILE__, __LINE__);
915
916
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 16716 times.
16716 if(index > m_monomerList.size())
917 qFatal("%s@%d -- Index cannot be greater than polymer size.",
918 __FILE__,
919 __LINE__);
920
921 16716 return m_monomerList.at(index);
922 }
923
924 /*!
925 \brief Returns the \index of \a monomer in this Sequence's list of Monomer
926 instances.
927
928 The search is based on comparison of the pointers, that is, the returned
929 index is for the \e same monomer object.
930
931 Returns -1 if \a monomer is not found.
932 */
933 int
934 Sequence::monomerIndex(const Monomer *monomer)
935 {
936 for(int iter = 0; iter < m_monomerList.size(); ++iter)
937 {
938 if(m_monomerList.at(iter) == monomer)
939 return iter;
940 }
941
942 return -1;
943 }
944
945 /*!
946 \brief Insert \a monomer at index \a index.
947
948 Returns true.
949 */
950 bool
951 32 Sequence::insertMonomerAt(const Monomer *monomer, int index)
952 {
953
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 32 times.
32 Q_ASSERT(monomer);
954
2/4
✓ Branch 0 taken 32 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 32 times.
✗ Branch 4 not taken.
32 Q_ASSERT(index > -1 && index <= size());
955
956 32 m_monomerList.insert(index, monomer);
957
958 32 return true;
959 }
960
961
962 bool
963 4 Sequence::prepareMonomerRemoval([[maybe_unused]] const Monomer *monomer)
964 {
965 4 return true;
966 }
967
968 /*!
969 \brief Removes the monomer instance at index \a index from this Sequence's list
970 of Monomer instances.
971
972 Returns true.
973 */
974 bool
975 4 Sequence::removeMonomerAt(int index)
976 {
977
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 Q_ASSERT(index > -1);
978
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 4 times.
4 Q_ASSERT(index < size());
979
980 4 const Monomer *monomer = at(index);
981
982
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 4 times.
4 if(!prepareMonomerRemoval(monomer))
983 return false;
984
985 4 m_monomerList.removeAt(index);
986
987
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 delete monomer;
988
989 4 return true;
990 }
991
992 /*!
993 \brief Validates this Sequence using \a pol_chem_def_csp as the reference
994 polymer chemistry definition.
995
996 Returns true if all the monomers in textual representation of the sequence
997 (m_monomerText) could be converted into Monomer instances. This conversion
998 actually fills in m_monomerList. If an error occurred, returns false.
999
1000 \sa makeMonomerList()
1001 */
1002 bool
1003 4 Sequence::validate(PolChemDefCstSPtr pol_chem_def_csp)
1004 {
1005
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 4 times.
4 Q_ASSERT(pol_chem_def_csp);
1006
1007
2/4
✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 4 times.
✗ Branch 6 not taken.
4 if(makeMonomerList(pol_chem_def_csp) > -1)
1008 4 return true;
1009
1010 return false;
1011 }
1012
1013 /*!
1014 \brief Returns a checksum calculated on this Sequence's portion contained in
1015 [\a index_start -- \a index_end].
1016
1017 The sequence matching the [\a index_start -- \a index_end] range is extracted
1018 from m_monomerText, with (\a with_modifs is true) or without (\a with_modifs is
1019 false) the monomer modifications. The checksum is computed on that extracted
1020 string.
1021
1022 Returns the checksum.
1023 */
1024 quint16
1025 Sequence::checksum(int index_start, int index_end, bool with_modifs) const
1026 {
1027 if(!size())
1028 return 0;
1029
1030 QString *text = monomerText(index_start, index_end, with_modifs);
1031
1032 QByteArray bytes = text->toUtf8();
1033
1034 quint16 checksum = qChecksum(QByteArrayView(bytes));
1035
1036 // qDebug() << __FILE__ << __LINE__
1037 // << "checksum:" << checksum;
1038
1039 return checksum;
1040 }
1041
1042
1043 } // namespace libXpertMass
1044
1045 } // namespace MsXpS
1046