GCC Code Coverage Report


./
File: src/XpertMass/MonomerDictionary.cpp
Date: 2024-08-24 11:26:06
Lines:
0/150
0.0%
Functions:
0/11
0.0%
Branches:
0/298
0.0%

Line Branch Exec Source
1 /* BEGIN software license
2 *
3 * MsXpertSuite - mass spectrometry software suite
4 * -----------------------------------------------
5 * Copyright(C) 2009,...,2018 Filippo Rusconi
6 *
7 * http://www.msxpertsuite.org
8 *
9 * This file is part of the MsXpertSuite project.
10 *
11 * The MsXpertSuite project is the successor of the massXpert project. This
12 * project now includes various independent modules:
13 *
14 * - massXpert, model polymer chemistries and simulate mass spectrometric data;
15 * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
16 *
17 * This program is free software: you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation, either version 3 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program. If not, see <http://www.gnu.org/licenses/>.
29 *
30 * END software license
31 */
32
33
34 /////////////////////// Qt includes
35 #include <QObject>
36 #include <QFile>
37
38
39 /////////////////////// Local includes
40 #include "MonomerDictionary.hpp"
41 #include "Sequence.hpp"
42
43
44 namespace MsXpS
45 {
46
47 namespace libXpertMass
48 {
49
50
51 MonomerDictionary::MonomerDictionary(QString file_path,
52 const QStringList &input_chain_string_list,
53 int input_code_length,
54 int output_code_length)
55 : m_filePath(file_path),
56 m_inputChainStringList(input_chain_string_list),
57 m_inputCodeLength(input_code_length),
58 m_outputCodeLength(output_code_length)
59 {
60 }
61
62 /*!
63 \brief Destructs this MonomerDictionary instance.
64 */
65 MonomerDictionary::~MonomerDictionary()
66 {
67 }
68
69 void
70 MonomerDictionary::setFilePath(QString &file_path)
71 {
72 m_filePath = file_path;
73 }
74
75 void
76 MonomerDictionary::setInputChainStringList(
77 const QStringList &input_chain_string_list)
78 {
79 m_inputChainStringList = input_chain_string_list;
80 }
81
82 void
83 MonomerDictionary::setInputCodeLength(int code_length)
84 {
85 m_inputCodeLength = code_length;
86 }
87
88 void
89 MonomerDictionary::setOutputCodeLength(int code_length)
90 {
91 m_outputCodeLength = code_length;
92 }
93
94 bool
95 MonomerDictionary::isLineProperSectionDivider(const QString &line)
96 {
97 // Section dividers in the monomer dictionary file format are
98 // lines containing the following syntax: X>Y, that is for example
99 // 3>1. This means that the following conversion rules (like
100 // ILE>I) should convert 3-letter codes into 1-letter codes.
101
102 // However, this line should only be considered proper if X is
103 // actually the value of m_inputCodeLength and Y the value of
104 // m_outputCodeLength.
105
106 // qDebug() << __FILE__ << __LINE__
107 // << "Checking if line is proper section divider :" << line;
108
109 if(line.contains(QRegularExpression("[0-9]+>[0-9]+")))
110 {
111 // We are opening a new section, get the input/output code
112 // lengths and if they math what we expect, then set the
113 // current stream position and call the section parser.
114
115 int greaterThanIndex = line.indexOf('>');
116
117 QString codeLengthString = line.left(greaterThanIndex);
118
119 // qDebug() << __FILE__ << __LINE__
120 // << "Left codeLengthString:" << codeLengthString
121 // << "m_inputCodeLength:" << m_inputCodeLength;
122
123 bool ok = false;
124 int codeLength = codeLengthString.toInt(&ok, 10);
125
126 if(!codeLength && !ok)
127 {
128 qDebug() << __FILE__ << __LINE__ << "Monomer dictionary"
129 << "Failed to parse file " << m_filePath << "at line "
130 << line;
131
132 return false;
133 }
134
135 if(codeLength != m_inputCodeLength)
136 {
137 return false;
138 }
139
140 codeLengthString = line.mid(greaterThanIndex + 1, -1);
141
142 // qDebug() << __FILE__ << __LINE__
143 // << "Right codeLengthString:" << codeLengthString
144 // << "m_outputCodeLength:" << m_outputCodeLength;
145
146 ok = false;
147 codeLength = codeLengthString.toInt(&ok, 10);
148
149 if(!codeLength && !ok)
150 {
151 qDebug() << __FILE__ << __LINE__ << "Monomer dictionary"
152 << "Failed to parse file " << m_filePath << "at line "
153 << line;
154
155 return false;
156 }
157
158 if(codeLength != m_outputCodeLength)
159 {
160 return false;
161 }
162
163 // At this point, it seems we are in the proper
164 // section.
165
166 return true;
167 }
168
169 // If we are here, that means that the section is not for us.
170
171 // qDebug() << __FILE__ << __LINE__
172 // << "Line is no proper section divider.";
173
174 return false;
175 }
176
177
178 void
179 MonomerDictionary::skipSection(QTextStream *stream)
180 {
181 // We have entered a section, all we have to do is go through it
182 // and return when we have found either the end of the stream or
183 // the {END} marker.
184
185 qint64 lineLength = 1024;
186 QString line;
187
188 while(!stream->atEnd())
189 {
190 line = stream->readLine(lineLength);
191
192 if(!line.contains("{END}"))
193 {
194 line = stream->readLine(lineLength);
195 }
196 else
197 return;
198 }
199 }
200
201
202 int
203 MonomerDictionary::parseSection(QTextStream *stream)
204 {
205 Q_ASSERT(stream);
206
207 qint64 lineLength = 1024;
208 QString line;
209
210 // Iterate in the file using the stream and for each line create
211 // an item to insert into the dictionary hash.
212
213 while(!stream->atEnd())
214 {
215 line = stream->readLine(lineLength);
216
217 // We might encounter the end of the section, that is a line
218 // having {END} as its sole content.
219
220 if(line.contains("{END}"))
221 break;
222
223 QStringList stringList = line.split('>');
224
225 QString inputCode = stringList.first();
226 QString outputCode = stringList.last();
227
228 // Check that the monomer codes have the proper length.
229
230 if(inputCode.length() != m_inputCodeLength ||
231 outputCode.length() != m_outputCodeLength)
232 {
233 qDebug() << __FILE__ << __LINE__ << QObject::tr("Monomer dictionary:")
234 << QObject::tr("Failed to load dictionary.")
235 << QObject::tr("Monomer code lengths do not match:")
236 << QObject::tr("inputCode:") << inputCode
237 << QObject::tr("outputCode:") << outputCode;
238
239
240 // We have to empty the hash
241 m_dictionaryHash.clear();
242
243 break;
244 }
245
246 m_dictionaryHash.insert(inputCode, outputCode);
247
248 // qDebug() << __FILE__ << __LINE__
249 // << stringList.first () << stringList.last ();
250 }
251
252 // At this point the parsing is finished, either because we
253 // encountered the {END} section-ending delimiter, or because we
254 // reached the en of file.
255
256 int hashSize = m_dictionaryHash.size();
257
258 if(hashSize)
259 m_dictionaryLoaded = true;
260 else
261 {
262 qDebug() << __FILE__ << __LINE__ << QObject::tr("Monomer dictionary:")
263 << QObject::tr("Failed to load dictionary.");
264
265 m_dictionaryLoaded = false;
266 }
267
268 return hashSize;
269 }
270
271
272 bool
273 MonomerDictionary::loadDictionary()
274 {
275 // Load the file and for each line deconstruct the item into two
276 // QString objects that are used to make a QHash entry in
277 // QHash<QString, QString> m_dictionaryHash.
278 bool success = true;
279 qint64 lineLength = 1024;
280 QString line;
281
282 QFile file(m_filePath);
283
284 if(!file.open(QIODevice::ReadOnly))
285 {
286
287 m_dictionaryLoaded = false;
288
289 qDebug() << __FILE__ << __LINE__ << "Monomer dictionary:"
290 << "Failed to open file" << m_filePath << "for writing.";
291
292 return false;
293 }
294
295 if(m_inputCodeLength < 1 || m_outputCodeLength < 1)
296 {
297 qDebug() << __FILE__ << __LINE__ << "Monomer dictionary:"
298 << "Failed to parse file " << m_filePath
299 << "Please, set the m_inputCodeLength and "
300 "m_ouputCodeLength variables first.";
301
302 return false;
303 }
304
305 QTextStream *stream = new QTextStream(&file);
306 stream->setEncoding(QStringConverter::Utf8);
307
308 while(!stream->atEnd())
309 {
310 line = stream->readLine(lineLength);
311
312 // qDebug() << __FILE__ << __LINE__
313 // << "line: " << line;
314
315 // Remove spaces from start and end of line.
316 line = line.simplified();
317
318 if(line.startsWith('#') || line.isEmpty())
319 {
320 line = stream->readLine(lineLength);
321 continue;
322 }
323
324 // There might be any number of sections in the file, all
325 // delimited with a X>Y directive, indicating how many
326 // characters are allowed for the input code and for the
327 // output code.
328
329 if(!isLineProperSectionDivider(line))
330 {
331 // qDebug() << __FILE__ << __LINE__
332 // << "skipping line:" << line;
333
334 line = stream->readLine(lineLength);
335 continue;
336 }
337 else
338 {
339 // qDebug() << __FILE__ << __LINE__
340 // << "parsing section: " << line;
341
342 if(parseSection(stream) < 1)
343 {
344 qDebug() << __FILE__ << __LINE__ << "Monomer dictionary:"
345 << "Failed to parse file " << m_filePath;
346
347 success = false;
348 break;
349 }
350 else
351 {
352 // We successfully parsed the section. Our work is done.
353
354 success = true;
355 break;
356 }
357 }
358 }
359
360 delete stream;
361
362 return success;
363 }
364
365
366 QStringList *
367 MonomerDictionary::translate(const QStringList &chain_string_list)
368 {
369 // The string in sequence is a space-separated list of monomer
370 // codes in the original monomer code format. We have to translate
371 // that to the proper monomer code format using the hash in this
372 // dictionary.
373
374 QStringList *outputChainStringList = new QStringList();
375
376 if(!chain_string_list.isEmpty())
377 m_inputChainStringList = chain_string_list;
378
379 // If there is nothing to do return an empty string list so that
380 // caller knows nothing is actually wrong, only there is no
381 // sequence to translate.
382 if(m_inputChainStringList.isEmpty())
383 return outputChainStringList;
384
385 // Iterate in each chain string of the list and perform the
386 // translation.
387
388 for(int iter = 0; iter < m_inputChainStringList.size(); ++iter)
389 {
390 QString iterString = chain_string_list.at(iter);
391
392 // qDebug() << __FILE__ << __LINE__
393 // << "translating sequence:" << iterString;
394
395 QStringList codeList =
396 iterString.split(QRegularExpression("\\s+"), Qt::SkipEmptyParts);
397
398 // qDebug() << __FILE__ << __LINE__
399 // << "codeList:" << codeList;
400
401 // qDebug() << __FILE__ << __LINE__
402 // << "hash:"
403 // << m_dictionaryHash;
404
405 for(int jter = 0; jter < codeList.size(); ++jter)
406 {
407 QString code = codeList.at(jter);
408
409 QHash<QString, QString>::const_iterator hashIter =
410 m_dictionaryHash.find(code);
411
412 if(hashIter != m_dictionaryHash.end())
413 codeList.replace(jter, hashIter.value());
414 else
415 {
416 // Delete the string list, set the pointer to 0 and
417 // return that pointer so that caller knows something
418 // has gone wrong.
419
420 qDebug() << __FILE__ << __LINE__ << "Monomer dictionary:"
421 << "Failed to convert monomer code " << code;
422
423 outputChainStringList->clear();
424
425 delete outputChainStringList;
426 outputChainStringList = 0;
427
428 return outputChainStringList;
429 }
430 }
431
432 // At this point the sequence codes have been translated. Join all
433 // the item of the codeList into one single string.
434
435 outputChainStringList->append(codeList.join(QString("")));
436 }
437
438 // End of
439 // for (int iter = 0; iter < chainStringList.size(); ++iter)
440
441 // If no translation could be performed, return a n
442
443 if(!outputChainStringList->size())
444 {
445 outputChainStringList->clear();
446
447 delete outputChainStringList;
448 outputChainStringList = 0;
449 }
450
451 return outputChainStringList;
452 }
453
454
455 } // namespace libXpertMass
456
457 } // namespace MsXpS
458