GCC Code Coverage Report

Line	Branch	Exec	Source
1			/* BEGIN software license
2			*
3			* MsXpertSuite - mass spectrometry software suite
4			* -----------------------------------------------
5			* Copyright(C) 2009,...,2018 Filippo Rusconi
6			*
7			* http://www.msxpertsuite.org
8			*
9			* This file is part of the MsXpertSuite project.
10			*
11			* The MsXpertSuite project is the successor of the massXpert project. This
12			* project now includes various independent modules:
13			*
14			* - massXpert, model polymer chemistries and simulate mass spectrometric data;
15			* - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
16			*
17			* This program is free software: you can redistribute it and/or modify
18			* it under the terms of the GNU General Public License as published by
19			* the Free Software Foundation, either version 3 of the License, or
20			* (at your option) any later version.
21			*
22			* This program is distributed in the hope that it will be useful,
23			* but WITHOUT ANY WARRANTY; without even the implied warranty of
24			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25			* GNU General Public License for more details.
26			*
27			* You should have received a copy of the GNU General Public License
28			* along with this program. If not, see <http://www.gnu.org/licenses/>.
29			*
30			* END software license
31			*/
32
33			#include <QByteArrayView>
34
35			/////////////////////// Local includes
36			#include "Sequence.hpp"
37			#include "PolChemDef.hpp"
38
39
40			namespace MsXpS
41			{
42
43			namespace libXpertMass
44			{
45
46
47			/*!
48
49			\class MsXpS::libXpertMass::Sequence
50			\inmodule libXpertMass
51			\ingroup PolChemDefBuildingdBlocks
52			\inheaderfile Sequence.hpp
53
54			\brief The Sequence class provides abstractions to work with
55			a simple sequence of \l{Monomer}s.
56
57			A sequence of monomer can be represented in two ways:
58
59			\list
60			\li A string of monomer codes concatenated one to the other with no
61			delimitation(like "ATGC" or "AlaThrGlyCys");
62			\li A list of fully qualified \l{Monomer} instances allocate on the heap.
63			\endlist
64
65			\note The reference status of a sequence is in the form of a list of
66			allocated Monomer instances. The conversion to the string of monomer codes
67			is only a utility. When a sequence is created (with an argument that is a
68			string of monomer codes) the caller should ensure that the text sequence is
69			converted into a list of monomers prior to starting using its methods
70			extensively (see makeMonomerList()). Functions size() and
71			removeMonomerAt()) only work on a sequence in the form of a list of
72			\l{Monomer} instances.
73
74			Methods are provided to convert from one sequence kind
75			(concatenated codes) to the other sequence kind(list of Monomer
76			instances).
77
78			Equally interesting is the ability of the methods in this class to
79			be able to:
80
81			- parse the monomer sequence and to extract monomer codes one
82			after the other;
83
84			- remove monomers from the sequence at specified indexes;
85
86			- add monomers to the sequence at specified indexes.
87
88			However, for this rather basic class to be able to perform
89			interesting tasks it has to be able to know where to find polymer
90			chemistry definition \l{PolChemDef} data. This is possible only when a
91			pointer to a polymer chemistry definition is passed to the used functions.
92			*/
93
94
95			/*!
96			\variable MsXpS::libXpertMass::Sequence::m_monomerText
97
98			\brief String holding the sequence of monomer codes.
99			*/
100
101			/*!
102			\variable MsXpS::libXpertMass::Sequence::m_monomerList
103
104			\brief List of allocated \l Monomer instances that should match the
105			sequence of codes string (m_monomerText).
106			*/
107
108
109			/*!
110			\brief Construct a Sequence using \a text.
111
112			The sequence is in the form of a string of concatenated monomer
113			codes. No quality check is performed.
114			*/
115		6116	Sequence::Sequence(const QString &text) : m_monomerText(text)
116			{
117		6116	}
118
119			/*!
120			\brief Construct this Sequence as a copy of \a other.
121
122			The copying is deep with the list of Monomer instances copied to this
123			Sequence.
124			*/
125		16	Sequence::Sequence(const Sequence &other) : m_monomerText(other.m_monomerText)
126			{
127	1/2 ✗ Branch 1 not taken. ✓ Branch 2 taken 16 times.	16	for(int iter = 0; iter < other.m_monomerList.size(); ++iter)
128		✗	m_monomerList.append(new Monomer(*other.m_monomerList.at(iter)));
129		16	}
130
131			/*!
132			\brief Destructs this sequence.
133
134			The \l Monomer instances are deleted.
135			*/
136		12264	Sequence::~Sequence()
137			{
138	2/2 ✓ Branch 1 taken 28616 times. ✓ Branch 2 taken 6132 times.	81760	while(!m_monomerList.isEmpty())
139	1/2 ✗ Branch 1 not taken. ✓ Branch 2 taken 28616 times.	57232	delete m_monomerList.takeFirst();
140		12264	}
141
142			/*!
143			\brief Assigns \a other to this Sequence
144
145			Returns a reference to this Sequence.
146			*/
147			Sequence &
148		✗	Sequence::operator=(const Sequence &other)
149			{
150		✗	if(&other == this)
151		✗	return *this;
152
153		✗	m_monomerText = other.m_monomerText;
154
155		✗	while(!m_monomerList.isEmpty())
156		✗	delete m_monomerList.takeFirst();
157
158		✗	for(int iter = 0; iter < other.m_monomerList.size(); ++iter)
159			{
160		✗	m_monomerList.append(new Monomer(*other.m_monomerList.at(iter)));
161			}
162
163		✗	return *this;
164			}
165
166
167			/*!
168			\brief Returns true if the \c this Sequence is identical to \a other, false
169			otherwise.
170			*/
171			bool
172		32	Sequence::operator==(const Sequence &other)
173			{
174	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 32 times.	32	if(&other == this)
175		✗	return true;
176
177	1/2 ✗ Branch 1 not taken. ✓ Branch 2 taken 32 times.	32	if(m_monomerText != other.m_monomerText)
178		✗	return false;
179	1/2 ✗ Branch 2 not taken. ✓ Branch 3 taken 32 times.	32	if(m_monomerList.size() != other.m_monomerList.size())
180		✗	return false;
181
182	2/2 ✓ Branch 1 taken 2512 times. ✓ Branch 2 taken 32 times.	2544	for(int iter = 0; iter < m_monomerList.size(); ++iter)
183			{
184	1/2 ✗ Branch 3 not taken. ✓ Branch 4 taken 2512 times.	2512	if((m_monomerList.at(iter)) != (other.m_monomerList.at(iter)))
185		✗	return false;
186			}
187
188		32	return true;
189			}
190
191			/*!
192			\brief Returns true if the \c this Sequence is different than \a other, false
193			otherwise.
194
195			Returns the negation of operator==(other).
196			*/
197			bool
198		16	Sequence::operator!=(const Sequence &other)
199			{
200	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 16 times.	16	if(&other == this)
201		✗	return false;
202
203		16	return !operator==(other);
204			}
205
206
207			/*!
208			\brief Set this Sequence' string of monomer codes to \a text.
209			*/
210			void
211		16	Sequence::setMonomerText(const QString &text)
212			{
213		16	m_monomerText = text;
214		16	}
215
216			/*!
217			\brief Appends the \a text sequence of monomer codes to this Sequence.
218
219			No verification is performed on \a text.
220			*/
221			void
222		4	Sequence::appendMonomerText(const QString &text)
223			{
224	1/2 ✗ Branch 1 not taken. ✓ Branch 2 taken 4 times.	4	if(text.isEmpty())
225		✗	return;
226
227		4	m_monomerText += text;
228			}
229
230
231			/*!
232			\brief Returns this Sequence's string of monomer codes.
233			*/
234			const QString *
235		72	Sequence::monomerText()
236			{
237		72	return &m_monomerText;
238			}
239
240			/*!
241			\brief Returns a reference to this Sequence's list of \l Monomer instances.
242			*/
243			const QList<const Monomer *> &
244		4	Sequence::monomerList() const
245			{
246		4	return m_monomerList;
247			}
248
249
250			/*!
251			\brief Returns this Sequence's list of \l Monomer instances.
252			*/
253			QList<const Monomer >
254		✗	Sequence::monomerListPtr()
255			{
256		✗	return &m_monomerList;
257			}
258
259			/*!
260			\brief Returns the size of this Sequence as the size of the list of Monomers
261			instances.
262			*/
263			int
264		6732	Sequence::size() const
265			{
266		6732	return m_monomerList.size();
267			}
268
269			/*!
270			\brief Returns true if \a index is valid as an index of a Monomer instance in
271			this Sequence's list of \l{Monomer}s, false otherwise.
272			*/
273			bool
274		28	Sequence::isInBound(int index)
275			{
276	5/6 ✓ Branch 0 taken 28 times. ✗ Branch 1 not taken. ✓ Branch 3 taken 20 times. ✓ Branch 4 taken 8 times. ✓ Branch 5 taken 20 times. ✓ Branch 6 taken 8 times.	28	if(index >= 0 && index < size())
277		20	return true;
278
279		8	return false;
280			}
281
282			/*!
283			\brief Removes all spaces, carriage returns and linefeeds from this Sequence's
284			monomer codes string.
285			*/
286			void
287		304	Sequence::unspacifyMonomerText()
288			{
289			// Removal of all spaces, carriage returns and linefeeds:
290
291	2/2 ✓ Branch 1 taken 31040 times. ✓ Branch 2 taken 304 times.	31344	for(int iter = m_monomerText.length() - 1; iter >= 0; --iter)
292			{
293		31040	QChar curChar = m_monomerText.at(iter);
294
295	3/4 ✓ Branch 4 taken 31032 times. ✓ Branch 5 taken 4 times. ✓ Branch 6 taken 31032 times. ✗ Branch 7 not taken.	93108	if(curChar == QChar::Tabulation \|\| curChar == QChar::LineFeed \|\|
296	3/4 ✓ Branch 4 taken 31032 times. ✗ Branch 5 not taken. ✓ Branch 6 taken 31012 times. ✓ Branch 7 taken 20 times.	93096	curChar == QChar::FormFeed \|\| curChar == QChar::CarriageReturn \|\|
297	4/6 ✓ Branch 0 taken 31036 times. ✓ Branch 1 taken 4 times. ✓ Branch 6 taken 31012 times. ✗ Branch 7 not taken. ✗ Branch 8 not taken. ✓ Branch 9 taken 31012 times.	124120	curChar == QChar::Space \|\| curChar == QChar::Nbsp \|\|
298	2/2 ✓ Branch 2 taken 28 times. ✓ Branch 3 taken 31012 times.	62052	curChar == QChar::SoftHyphen)
299	1/2 ✓ Branch 1 taken 28 times. ✗ Branch 2 not taken.	28	m_monomerText.remove(iter, 1);
300			}
301		304	}
302
303			/*!
304			\brief Creates the monomer codes string version of this Sequence's list of
305			\l{Monomer} instances and set it to this Sequence
306
307			The function essentially writes to the m_monomerText string the code of each
308			Monomer instance in m_monomerList.
309
310			m_monomerText is cleared before writing the codes to it.
311
312			Returns the count of monomer codes added to m_monomerText.
313
314			\sa makeMonomerList()
315			*/
316			int
317		16	Sequence::makeMonomerText()
318			{
319		16	int iter = 0;
320
321		16	m_monomerText.clear();
322
323	2/2 ✓ Branch 1 taken 2512 times. ✓ Branch 2 taken 16 times.	2528	for(iter = 0; iter < m_monomerList.size(); ++iter)
324	2/4 ✓ Branch 2 taken 2512 times. ✗ Branch 3 not taken. ✓ Branch 5 taken 2512 times. ✗ Branch 6 not taken.	2512	m_monomerText.append(m_monomerList.at(iter)->code());
325
326		16	return iter;
327			}
328
329			/*!
330			\brief Returns an allocated string with the monomer codes.
331
332			The returned string only contains the sequence of monomer codes for Monomer
333			instances between indices [\a start -- \a end] in this Sequence's list
334			of Monomer instances (m_monomerList).
335
336			If \a with_modif is true, the modification(s) associated to \l{Monomer}s are
337			also output to the string. The form of the string is, in this case,
338
339			\code
340			Thr<Phosphorylation>
341			\endcode
342			*/
343			QString *
344		✗	Sequence::monomerText(int start, int end, bool with_modif) const
345			{
346		✗	int localStart = 0;
347		✗	int localEnd = 0;
348
349		✗	QString *p_text = new QString();
350
351		✗	if(size() == 0)
352		✗	return p_text;
353
354		✗	if(start > end)
355			{
356		✗	localStart = end;
357		✗	localEnd = start;
358			}
359			else
360			{
361		✗	localStart = start;
362		✗	localEnd = end;
363			}
364
365		✗	if(localStart < 0)
366		✗	localStart = 0;
367
368		✗	if(localEnd < 0 \|\| localEnd >= size())
369		✗	localEnd = size() - 1;
370
371		✗	QString text;
372
373		✗	for(int iter = localStart; iter < localEnd + 1; ++iter)
374			{
375		✗	const Monomer *monomer = m_monomerList.at(iter);
376
377			// FIXME Error, the code below does not seem to work.
378		✗	if(with_modif)
379			{
380		✗	if(monomer->isModified())
381			{
382		✗	for(int iter = 0; iter < monomer->modifList()->size(); ++iter)
383			{
384		✗	text = QString("%1<%2>")
385		✗	.arg(monomer->code())
386		✗	.arg(monomer->modifList()->at(iter)->name());
387			}
388			}
389			else
390		✗	text = monomer->code();
391			}
392			else
393		✗	text = monomer->code();
394
395		✗	p_text->append(text);
396			}
397
398		✗	return p_text;
399		✗	}
400
401
402			/*!
403			\brief Returns an allocated string with the monomer codes.
404
405			The returned string only contains the sequence of monomer codes for Monomer
406			instances contained in the regions (\l Coordinates) described in \a
407			coordinate_list.
408
409			If \a with_modif is true, the modification(s) associated to \l{Monomer}s are
410			also output to the string. The form of the string is, in this case,
411
412			\code
413			Thr<Phosphorylation>
414			\endcode
415
416			If \a delimited_regions, the sequence of Monomer codes beloging to each will be
417			delimited using the Coordinates positions.
418
419			\sa Coordinates::positionsAsText()
420			*/
421			QString *
422		✗	Sequence::monomerText(const CoordinateList &coordinate_list,
423			bool with_modif,
424			bool delimited_regions) const
425			{
426		✗	QString *p_text = new QString();
427
428		✗	for(int iter = 0; iter < coordinate_list.size(); ++iter)
429			{
430			// New coordinates instance we are iterating into.
431		✗	Coordinates *coordinates = coordinate_list.at(iter);
432
433			QString *tempString =
434		✗	monomerText(coordinates->start(), coordinates->end(), with_modif);
435
436		✗	if(delimited_regions)
437		✗	*p_text += QString("Region %1: %2\n")
438		✗	.arg(coordinates->positionsAsText())
439		✗	.arg(*tempString);
440			else
441		✗	p_text += tempString;
442
443		✗	delete(tempString);
444			}
445
446		✗	*p_text += QString("\n");
447
448		✗	return p_text;
449			}
450
451
452			/*!
453			\brief Allocates all the Monomer instances to describe this Sequence's string
454			representation of monomer codes.
455
456			This function parses the member Monomer codes string (m_monomerText) and, for
457			each encountered code, creates a \l Monomer instance and add it to the member
458			list of Monomer instances (m_monomerList).
459
460			If \a reset is true, the member list of Monomer instances is reset before the
461			work is done.
462
463			If \a errorList is non-nullptr, errors are stored in this list in the form of
464			the indices of failing monomer codes in the string.
465
466			The allocation of each Monomer instance based on its code is performed by
467			looking at the reference Monomer list in \a pol_chem_def_csp.
468
469			Because the m_monomerText member string of Monomer codes does not document any
470			monomer modification, no modifications are handled in this function.
471
472			Returns the size of the Monomer instances list or -1 if an error occurred.
473			*/
474			int
475		300	Sequence::makeMonomerList(PolChemDefCstSPtr pol_chem_def_csp,
476			bool reset,
477			QList<int> *errorList)
478			{
479	1/2 ✗ Branch 1 not taken. ✓ Branch 2 taken 300 times.	300	if(!pol_chem_def_csp)
480			{
481		✗	qDebug() << "The PolChemDef pointer is nullptr!";
482		✗	return -1;
483			}
484
485			// If error indices are to be stored, the list MUST be empty.
486	2/2 ✓ Branch 0 taken 72 times. ✓ Branch 1 taken 228 times.	300	if(errorList)
487	1/2 ✗ Branch 1 not taken. ✓ Branch 2 taken 72 times.	72	Q_ASSERT(errorList->size() == 0);
488
489	2/2 ✓ Branch 0 taken 176 times. ✓ Branch 1 taken 124 times.	300	if(reset)
490			{
491	2/2 ✓ Branch 1 taken 1796 times. ✓ Branch 2 taken 176 times.	2148	while(!m_monomerList.isEmpty())
492	2/4 ✓ Branch 1 taken 1796 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✓ Branch 4 taken 1796 times.	1796	delete m_monomerList.takeFirst();
493			}
494
495	1/2 ✓ Branch 1 taken 300 times. ✗ Branch 2 not taken.	300	unspacifyMonomerText();
496
497			// qDebug() << "Sequence:" << m_monomerText;
498
499		300	int index = 0;
500		300	int ret = -1;
501		300	QString err;
502		300	QString code;
503
504	2/4 ✓ Branch 2 taken 300 times. ✗ Branch 3 not taken. ✓ Branch 5 taken 300 times. ✗ Branch 6 not taken.	300	ret = nextCode(&code, &index, &err, pol_chem_def_csp->codeLength());
505
506	1/2 ✓ Branch 2 taken 300 times. ✗ Branch 3 not taken.	300	const QList<Monomer *> &refList = pol_chem_def_csp->monomerList();
507
508			while(1)
509			{
510	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 30684 times.	30684	if(ret < 0)
511			{
512			// There was an error in the parsed code. Store the index.
513		✗	if(errorList)
514			{
515		✗	errorList->append(index);
516		✗	++index;
517			ret =
518		✗	nextCode(&code, &index, &err, pol_chem_def_csp->codeLength());
519		✗	continue;
520			}
521			else
522			{
523		✗	break;
524			}
525			}
526
527	2/2 ✓ Branch 0 taken 300 times. ✓ Branch 1 taken 30384 times.	30684	if(ret == 0)
528		300	break;
529
530	3/8 ✓ Branch 1 taken 30384 times. ✗ Branch 2 not taken. ✓ Branch 6 taken 30384 times. ✗ Branch 7 not taken. ✓ Branch 10 taken 30384 times. ✗ Branch 11 not taken. ✗ Branch 20 not taken. ✗ Branch 21 not taken.	30384	Monomer *monomer = new Monomer(pol_chem_def_csp, "NOT_SET");
531
532	2/4 ✓ Branch 1 taken 30384 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✓ Branch 4 taken 30384 times.	30384	if(Monomer::isCodeInList(code, refList, monomer) == -1)
533			{
534		✗	qDebug() << "Monomer:" << monomer->name()
535		✗	<< "was not found in the monomer reference list.";
536
537		✗	delete monomer;
538
539		✗	if(errorList)
540			{
541		✗	errorList->append(index);
542		✗	++index;
543			ret =
544		✗	nextCode(&code, &index, &err, pol_chem_def_csp->codeLength());
545		✗	continue;
546			}
547			else
548			{
549		✗	return -1;
550			}
551			}
552			else
553			{
554			// qDebug() << "Monomer:" << monomer->name()
555			//<< "with code:" << monomer->code()
556			//<< "was indeed found in the monomer reference list.";
557			}
558
559	1/2 ✓ Branch 1 taken 30384 times. ✗ Branch 2 not taken.	30384	m_monomerList.append(monomer);
560
561			// qDebug() << "New monomer:" << monomer->name();
562
563		30384	++index;
564
565			// qDebug() << "index:" << index;
566
567	2/4 ✓ Branch 2 taken 30384 times. ✗ Branch 3 not taken. ✓ Branch 5 taken 30384 times. ✗ Branch 6 not taken.	30384	ret = nextCode(&code, &index, &err, pol_chem_def_csp->codeLength());
568		30384	}
569			// End of
570			// while(1)
571
572	2/2 ✓ Branch 0 taken 72 times. ✓ Branch 1 taken 228 times.	300	if(errorList)
573			{
574	1/2 ✗ Branch 1 not taken. ✓ Branch 2 taken 72 times.	72	if(errorList->size())
575		✗	return -1;
576			}
577
578	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 300 times.	300	if(ret == -1)
579		✗	return -1;
580
581		300	return m_monomerList.size();
582		300	}
583
584
585			/*!
586			\brief Seeks the next code occurring in this Sequence's string of Monomer
587			codes.
588
589			This function starts looking in this Sequence's string of Monomer codes
590			(m_monomerText) at \a index. The next found Monomer code is stored in \a code.
591			If the text is not a monomer code, it is set to \a err.
592
593			The parsing of this Sequence's string of Monomer codes takes into account the
594			\a code_length.
595
596			Returns the count of characters that make \a code. This count can be used
597			to search for the next code by setting its value incremented by 1 to \a index
598			for a next function call.
599			*/
600			int
601		49124	Sequence::nextCode(QString code, int index, QString *err, int code_length)
602			{
603		49124	QString newCode;
604		49124	int iter = 0;
605
606			// We get a sequence of monomer codes(like "LysArgGlu" for example)
607			// and we have to return the next code starting from *index. Note
608			// that the sequence must not contain invalid characters. The
609			// invalid characters might be placed in err for further scrutiny by
610			// the caller.
611
612			// Returns the count of actually parsed characters in the string
613			// newCode(copied to 'code' param). If an error occurs -1 is
614			// returned and the faulty character is copied in 'err'. 'index' is
615			// updated with the index of the last valid character parsed for
616			// current code.
617
618	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 49124 times.	49124	Q_ASSERT(code);
619	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 49124 times.	49124	Q_ASSERT(index);
620	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 49124 times.	49124	Q_ASSERT(err);
621
622		49124	code->clear();
623		49124	err->clear();
624
625		49124	int monomer_text_length = m_monomerText.length();
626
627			while(1)
628			{
629	2/2 ✓ Branch 0 taken 37064 times. ✓ Branch 1 taken 49140 times.	86204	if(iter >= code_length)
630			{
631			// Because we have progressed farther than authorized by
632			// the number of characters allowed in the monomer codes
633			// of this polymer chemistry definition, we decrement iter
634			// and break the loop... Later in this function, we'll set
635			// the proper index in the sequence where next parsing run
636			// should occurs (the calling function will increment
637			// *index by one).
638
639		37064	--iter;
640		37064	break;
641			}
642
643	2/2 ✓ Branch 0 taken 6180 times. ✓ Branch 1 taken 42960 times.	49140	if(iter + *index >= monomer_text_length)
644		6180	break;
645
646		42960	QChar curChar = m_monomerText.at(iter + *index);
647
648	2/2 ✓ Branch 1 taken 5880 times. ✓ Branch 2 taken 37080 times.	42960	if(!curChar.isLetter())
649			{
650			// qDebug() << __FILE__ << __LINE__
651			// << "The character is not a letter:"
652			// << curChar;
653
654	1/2 ✓ Branch 1 taken 5880 times. ✗ Branch 2 not taken.	5880	*err = curChar;
655
656			// The non-Letter character might be '/', which would be
657			// perfectly fine, as we use it to symbolize the actual
658			// cleavage site. Which means that we will continue
659			// parsing the rest of the string : we have to give the
660			// current position back to the caller in the *index
661			// variable for the next call to this function to start at
662			// next character (not falling back to '/', which would
663			// make us enter in an infinite loop).
664
665		5880	index = index + iter;
666
667		5880	return -1;
668			}
669
670		37080	bool isLower = (curChar.category() == QChar::Letter_Lowercase);
671
672	2/2 ✓ Branch 0 taken 37064 times. ✓ Branch 1 taken 16 times.	37080	if(iter == 0)
673			{
674	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 37064 times.	37064	if(isLower)
675			{
676			// qDebug() << __FILE__ << __LINE__
677			// << "First character of monomer code might not be"
678			// << "lower case; sequence is"
679			// << m_monomerText;
680
681		✗	*err = curChar;
682
683		✗	return -1;
684			}
685			else
686			{
687			// Good, first char is uppercase.
688	1/2 ✓ Branch 1 taken 37064 times. ✗ Branch 2 not taken.	37064	newCode += curChar;
689			}
690			}
691			else //(iter != 0)
692			{
693			// We are not in our first iteration. So either the current
694			// character is lowercase and we are just continuing to
695			// iterate into a multi-char monomer code, or the current
696			// character is uppercase, in which case we are starting to
697			// iterate in a new monomer code.
698
699	1/2 ✓ Branch 0 taken 16 times. ✗ Branch 1 not taken.	16	if(isLower)
700	1/2 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken.	16	newCode += curChar;
701			else
702			{
703			// Decrement iter, because this round was for nothing:
704			// we had "invaded" the next monomer code in sequence,
705			// which we must not do.
706
707		✗	--iter;
708		✗	break;
709			}
710			}
711
712		37080	++iter;
713		37080	}
714
715			// We finished parsing at most codeLength characters out of
716			// 'm_monomerText', so we have a valid code in the 'code' variable. We
717			// can also compute a new index position in the sequence and return
718			// the number of characters that we effectively parsed. Note that
719			// the caller will be responsible for incrementing the 'index' value
720			// by one character unit so as not to reparse the last characters of
721			// the sent 'code' object.
722
723		43244	index = index + iter;
724		43244	*code = newCode;
725		43244	err->clear();
726
727		43244	return code->length();
728		49124	}
729
730			/*!
731			\brief Searches in for a Sequence textual \a motif in this Sequence's
732			list of Monomer instances starting at \a index.
733
734			\a motif, a text string is first converted to a list of Monomer instances
735			(using the reference list of monomers in \a pol_chem_def_csp). Then, this
736			Sequence's monomer instances list is searched for a monomer stretch matching
737			that created for \a motif.
738
739			As soon as a monomer code stretch is found, the index in this Sequence's list of
740			Monomer instances is set to \a index.
741
742			Returns -1 if an error occurred, 1 if \a motif was found in this Sequence, 0
743			otherwise.
744			*/
745
746			int
747		72	Sequence::findForwardMotif(Sequence *motif,
748			PolChemDefCstSPtr pol_chem_def_csp,
749			int *index)
750			{
751	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 72 times.	72	Q_ASSERT(motif);
752	1/2 ✗ Branch 1 not taken. ✓ Branch 2 taken 72 times.	72	Q_ASSERT(pol_chem_def_csp);
753	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 72 times.	72	Q_ASSERT(index);
754
755			// qDebug() << "motif:" << (motif->monomerText()) << "index :" << index;
756
757	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 72 times.	72	if(*index < 0)
758		✗	return -1;
759	1/2 ✗ Branch 1 not taken. ✓ Branch 2 taken 72 times.	72	if(*index >= size())
760		✗	return -1;
761
762		72	int motifSize = motif->size();
763
764			// If motif's length is 0, then nothing to search for, return
765			// unmodified 'index'.
766	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 72 times.	72	if(!motifSize)
767		✗	return 0;
768
769			// Simple optimization, if index + size of motif is greater then
770			// size of sequence, return right away.
771	1/2 ✗ Branch 1 not taken. ✓ Branch 2 taken 72 times.	72	if(*index + motifSize > size())
772		✗	return 0;
773
774			// First, make a monomerList.
775	2/4 ✓ Branch 2 taken 72 times. ✗ Branch 3 not taken. ✗ Branch 5 not taken. ✓ Branch 6 taken 72 times.	72	if(motif->makeMonomerList(pol_chem_def_csp) == -1)
776		✗	return -1;
777
778			// Compare *this sequence with the one in 'motif', starting at index
779			// 'index' in *this sequence and 0 in 'motif'.
780
781		72	bool matched = false;
782		72	int matchIndex = 0;
783
784	2/2 ✓ Branch 1 taken 4508 times. ✓ Branch 2 taken 8 times.	4516	for(int iter = *index; iter < size(); ++iter)
785			{
786		4508	matched = false;
787		4508	int jter = 0;
788
789		4508	const Monomer *monomer = at(iter);
790		4508	const Monomer *motifMonomer = motif->at(jter);
791
792			// qDebug() << "this monomer:" << monomer->code();
793			// qDebug() << "motif monomer:" << motifMonomer->code();
794
795			// We do not compare with operator == because that comparison
796			// would involve the comparison of modifications inside the
797			// monomers, which would not work here.
798	4/6 ✓ Branch 1 taken 4508 times. ✗ Branch 2 not taken. ✓ Branch 4 taken 4508 times. ✗ Branch 5 not taken. ✓ Branch 9 taken 4080 times. ✓ Branch 10 taken 428 times.	4508	if(monomer->code() != motifMonomer->code())
799		4080	continue;
800
801			// An easy check is to see if the number of remaining monomers
802			// in the polymer sequence is compatible with the number of
803			// monomers still to be matched in the find array. Imagine the
804			// sequence of the polymer ends like this: ==========JTOUTVU and
805			// the sequence to be searched for is : TVUL What we see is that
806			// the T of the TVU of the sequence matches; however we can stop
807			// the search right away because there is a 'L' in the search
808			// pattern that is not present in the end part of the
809			// sequence. This is exactly what is checked below. Note that
810			// this check makes SURE that at the end of the second inner
811			// loop, when we get out of it, the sole reason we may not
812			// consider that the match did not occur is because actually two
813			// monomers differred and not because anybody came out of the
814			// borders of the sequence in neither the array of the sequence
815			// to be searched, nor the array of the polymer sequence. This
816			// makes it very easy to assess if a match occurred or not.
817
818	2/2 ✓ Branch 2 taken 4 times. ✓ Branch 3 taken 424 times.	428	if(size() - iter < motif->size() - jter)
819			{
820			// Note that if it were ==, then it would have been possible
821			// that the sequence "just-in-time" match prior to ending of
822			// the polymer sequence array. Do not forget that we are in
823			// forward mode, thus we can break immediately, because we
824			// are certain that we won't have any chance to find the
825			// sequence downstream of current index.
826
827		4	matched = false;
828		4	break;
829			}
830
831		424	matchIndex = iter;
832
833			// We have to set the matched boolean to true, because if the
834			// motif to find is one monomer-long, then the loop below will
835			// not be entered, and we'll fail to know that the match
836			// occurred later on.
837		424	matched = true;
838
839			// Now that we have our anchoring point in the *this sequence,
840			// let's iterate in the motif, and check if the identity in
841			// sequence goes along.
842
843	2/2 ✓ Branch 1 taken 740 times. ✓ Branch 2 taken 60 times.	800	for(int kter = jter + 1; kter < motif->size(); ++kter)
844			{
845			// At first run in this loop, we are in the second cell of
846			// the find list, which means that we should have jter ==
847			// 1. And we should compare its contents with those of the
848			// cell in the sequence list at index(iter + jter).
849
850		740	monomer = at(iter + kter);
851		740	motifMonomer = motif->at(kter);
852
853			// We do not compare with operator == because that
854			// comparison would involve the comparison of modifications
855			// inside the monomers, which would not work here.
856	4/6 ✓ Branch 1 taken 740 times. ✗ Branch 2 not taken. ✓ Branch 4 taken 740 times. ✗ Branch 5 not taken. ✓ Branch 9 taken 376 times. ✓ Branch 10 taken 364 times.	740	if(monomer->code() == motifMonomer->code())
857			{
858			// The monomers still match.
859		376	matched = true;
860		376	continue;
861			}
862			else
863			{
864		364	matched = false;
865		364	break;
866			}
867			}
868			// End of
869			// for (int kter = jter + 1 ; kter < motif->size() ; ++kter)
870
871			// At this point, we either have normally extinguished the run
872			// in the inner loop, or we have gone out of it before its
873			// normal termination. In either case, we have to test if the
874			// match occurred or not.
875
876			// Check if the match did NOT occur:
877
878	2/2 ✓ Branch 0 taken 364 times. ✓ Branch 1 taken 60 times.	424	if(!matched)
879			{
880			// We just continue with the outer loop, that is, we continue
881			// searching in this sequence for a match with the
882			// first monomer in the motif.
883
884		364	continue;
885			}
886			else
887			{
888			// The match indeed occurred.
889
890		60	*index = matchIndex;
891		60	return 1;
892			}
893			}
894			// End of
895			// for (int iter = *index; iter < size(); ++iter)
896
897
898			// No match could be achieved, we have to let the caller function
899			// know this in a durable manner : returning 0.
900
901		12	return 0;
902			}
903
904			/*!
905			\brief Returns the Monomer instance at index \a index in this Sequence's
906			monomer instance list.
907			*/
908			const Monomer *
909		16716	Sequence::at(int index) const
910			{
911			// qDebug() << "In call at() with value:" << index;
912
913	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 16716 times.	16716	if(index < 0)
914		✗	qFatal("%s@%d -- Index cannot be less than 0.", __FILE__, __LINE__);
915
916	1/2 ✗ Branch 1 not taken. ✓ Branch 2 taken 16716 times.	16716	if(index > m_monomerList.size())
917		✗	qFatal("%s@%d -- Index cannot be greater than polymer size.",
918			__FILE__,
919			__LINE__);
920
921		16716	return m_monomerList.at(index);
922			}
923
924			/*!
925			\brief Returns the \index of \a monomer in this Sequence's list of Monomer
926			instances.
927
928			The search is based on comparison of the pointers, that is, the returned
929			index is for the \e same monomer object.
930
931			Returns -1 if \a monomer is not found.
932			*/
933			int
934		✗	Sequence::monomerIndex(const Monomer *monomer)
935			{
936		✗	for(int iter = 0; iter < m_monomerList.size(); ++iter)
937			{
938		✗	if(m_monomerList.at(iter) == monomer)
939		✗	return iter;
940			}
941
942		✗	return -1;
943			}
944
945			/*!
946			\brief Insert \a monomer at index \a index.
947
948			Returns true.
949			*/
950			bool
951		32	Sequence::insertMonomerAt(const Monomer *monomer, int index)
952			{
953	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 32 times.	32	Q_ASSERT(monomer);
954	2/4 ✓ Branch 0 taken 32 times. ✗ Branch 1 not taken. ✓ Branch 3 taken 32 times. ✗ Branch 4 not taken.	32	Q_ASSERT(index > -1 && index <= size());
955
956		32	m_monomerList.insert(index, monomer);
957
958		32	return true;
959			}
960
961
962			bool
963		4	Sequence::prepareMonomerRemoval([[maybe_unused]] const Monomer *monomer)
964			{
965		4	return true;
966			}
967
968			/*!
969			\brief Removes the monomer instance at index \a index from this Sequence's list
970			of Monomer instances.
971
972			Returns true.
973			*/
974			bool
975		4	Sequence::removeMonomerAt(int index)
976			{
977	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 4 times.	4	Q_ASSERT(index > -1);
978	1/2 ✗ Branch 1 not taken. ✓ Branch 2 taken 4 times.	4	Q_ASSERT(index < size());
979
980		4	const Monomer *monomer = at(index);
981
982	1/2 ✗ Branch 1 not taken. ✓ Branch 2 taken 4 times.	4	if(!prepareMonomerRemoval(monomer))
983		✗	return false;
984
985		4	m_monomerList.removeAt(index);
986
987	1/2 ✓ Branch 0 taken 4 times. ✗ Branch 1 not taken.	4	delete monomer;
988
989		4	return true;
990			}
991
992			/*!
993			\brief Validates this Sequence using \a pol_chem_def_csp as the reference
994			polymer chemistry definition.
995
996			Returns true if all the monomers in textual representation of the sequence
997			(m_monomerText) could be converted into Monomer instances. This conversion
998			actually fills in m_monomerList. If an error occurred, returns false.
999
1000			\sa makeMonomerList()
1001			*/
1002			bool
1003		4	Sequence::validate(PolChemDefCstSPtr pol_chem_def_csp)
1004			{
1005	1/2 ✗ Branch 1 not taken. ✓ Branch 2 taken 4 times.	4	Q_ASSERT(pol_chem_def_csp);
1006
1007	2/4 ✓ Branch 2 taken 4 times. ✗ Branch 3 not taken. ✓ Branch 5 taken 4 times. ✗ Branch 6 not taken.	4	if(makeMonomerList(pol_chem_def_csp) > -1)
1008		4	return true;
1009
1010		✗	return false;
1011			}
1012
1013			/*!
1014			\brief Returns a checksum calculated on this Sequence's portion contained in
1015			[\a index_start -- \a index_end].
1016
1017			The sequence matching the [\a index_start -- \a index_end] range is extracted
1018			from m_monomerText, with (\a with_modifs is true) or without (\a with_modifs is
1019			false) the monomer modifications. The checksum is computed on that extracted
1020			string.
1021
1022			Returns the checksum.
1023			*/
1024			quint16
1025		✗	Sequence::checksum(int index_start, int index_end, bool with_modifs) const
1026			{
1027		✗	if(!size())
1028		✗	return 0;
1029
1030		✗	QString *text = monomerText(index_start, index_end, with_modifs);
1031
1032		✗	QByteArray bytes = text->toUtf8();
1033
1034		✗	quint16 checksum = qChecksum(QByteArrayView(bytes));
1035
1036			// qDebug() << __FILE__ << __LINE__
1037			// << "checksum:" << checksum;
1038
1039		✗	return checksum;
1040		✗	}
1041
1042
1043			} // namespace libXpertMass
1044
1045			} // namespace MsXpS
1046