Line |
Branch |
Exec |
Source |
1 |
|
|
/* BEGIN software license |
2 |
|
|
* |
3 |
|
|
* MsXpertSuite - mass spectrometry software suite |
4 |
|
|
* ----------------------------------------------- |
5 |
|
|
* Copyright(C) 2009,...,2018 Filippo Rusconi |
6 |
|
|
* |
7 |
|
|
* http://www.msxpertsuite.org |
8 |
|
|
* |
9 |
|
|
* This file is part of the MsXpertSuite project. |
10 |
|
|
* |
11 |
|
|
* The MsXpertSuite project is the successor of the massXpert project. This |
12 |
|
|
* project now includes various independent modules: |
13 |
|
|
* |
14 |
|
|
* - massXpert, model polymer chemistries and simulate mass spectrometric data; |
15 |
|
|
* - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner; |
16 |
|
|
* |
17 |
|
|
* This program is free software: you can redistribute it and/or modify |
18 |
|
|
* it under the terms of the GNU General Public License as published by |
19 |
|
|
* the Free Software Foundation, either version 3 of the License, or |
20 |
|
|
* (at your option) any later version. |
21 |
|
|
* |
22 |
|
|
* This program is distributed in the hope that it will be useful, |
23 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
24 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
25 |
|
|
* GNU General Public License for more details. |
26 |
|
|
* |
27 |
|
|
* You should have received a copy of the GNU General Public License |
28 |
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
29 |
|
|
* |
30 |
|
|
* END software license |
31 |
|
|
*/ |
32 |
|
|
|
33 |
|
|
|
34 |
|
|
/////////////////////// Qt includes |
35 |
|
|
#include <QObject> |
36 |
|
|
#include <QFile> |
37 |
|
|
|
38 |
|
|
|
39 |
|
|
/////////////////////// Local includes |
40 |
|
|
#include "MonomerDictionary.hpp" |
41 |
|
|
#include "Sequence.hpp" |
42 |
|
|
|
43 |
|
|
|
44 |
|
|
namespace MsXpS |
45 |
|
|
{ |
46 |
|
|
|
47 |
|
|
namespace libXpertMass |
48 |
|
|
{ |
49 |
|
|
|
50 |
|
|
|
51 |
|
✗ |
MonomerDictionary::MonomerDictionary(QString file_path, |
52 |
|
|
const QStringList &input_chain_string_list, |
53 |
|
|
int input_code_length, |
54 |
|
✗ |
int output_code_length) |
55 |
|
✗ |
: m_filePath(file_path), |
56 |
|
✗ |
m_inputChainStringList(input_chain_string_list), |
57 |
|
✗ |
m_inputCodeLength(input_code_length), |
58 |
|
✗ |
m_outputCodeLength(output_code_length) |
59 |
|
|
{ |
60 |
|
✗ |
} |
61 |
|
|
|
62 |
|
|
/*! |
63 |
|
|
\brief Destructs this MonomerDictionary instance. |
64 |
|
|
*/ |
65 |
|
✗ |
MonomerDictionary::~MonomerDictionary() |
66 |
|
|
{ |
67 |
|
✗ |
} |
68 |
|
|
|
69 |
|
|
void |
70 |
|
✗ |
MonomerDictionary::setFilePath(QString &file_path) |
71 |
|
|
{ |
72 |
|
✗ |
m_filePath = file_path; |
73 |
|
✗ |
} |
74 |
|
|
|
75 |
|
|
void |
76 |
|
✗ |
MonomerDictionary::setInputChainStringList( |
77 |
|
|
const QStringList &input_chain_string_list) |
78 |
|
|
{ |
79 |
|
✗ |
m_inputChainStringList = input_chain_string_list; |
80 |
|
✗ |
} |
81 |
|
|
|
82 |
|
|
void |
83 |
|
✗ |
MonomerDictionary::setInputCodeLength(int code_length) |
84 |
|
|
{ |
85 |
|
✗ |
m_inputCodeLength = code_length; |
86 |
|
✗ |
} |
87 |
|
|
|
88 |
|
|
void |
89 |
|
✗ |
MonomerDictionary::setOutputCodeLength(int code_length) |
90 |
|
|
{ |
91 |
|
✗ |
m_outputCodeLength = code_length; |
92 |
|
✗ |
} |
93 |
|
|
|
94 |
|
|
bool |
95 |
|
✗ |
MonomerDictionary::isLineProperSectionDivider(const QString &line) |
96 |
|
|
{ |
97 |
|
|
// Section dividers in the monomer dictionary file format are |
98 |
|
|
// lines containing the following syntax: X>Y, that is for example |
99 |
|
|
// 3>1. This means that the following conversion rules (like |
100 |
|
|
// ILE>I) should convert 3-letter codes into 1-letter codes. |
101 |
|
|
|
102 |
|
|
// However, this line should only be considered proper if X is |
103 |
|
|
// actually the value of m_inputCodeLength and Y the value of |
104 |
|
|
// m_outputCodeLength. |
105 |
|
|
|
106 |
|
|
// qDebug() << __FILE__ << __LINE__ |
107 |
|
|
// << "Checking if line is proper section divider :" << line; |
108 |
|
|
|
109 |
|
✗ |
if(line.contains(QRegularExpression("[0-9]+>[0-9]+"))) |
110 |
|
|
{ |
111 |
|
|
// We are opening a new section, get the input/output code |
112 |
|
|
// lengths and if they math what we expect, then set the |
113 |
|
|
// current stream position and call the section parser. |
114 |
|
|
|
115 |
|
✗ |
int greaterThanIndex = line.indexOf('>'); |
116 |
|
|
|
117 |
|
✗ |
QString codeLengthString = line.left(greaterThanIndex); |
118 |
|
|
|
119 |
|
|
// qDebug() << __FILE__ << __LINE__ |
120 |
|
|
// << "Left codeLengthString:" << codeLengthString |
121 |
|
|
// << "m_inputCodeLength:" << m_inputCodeLength; |
122 |
|
|
|
123 |
|
✗ |
bool ok = false; |
124 |
|
✗ |
int codeLength = codeLengthString.toInt(&ok, 10); |
125 |
|
|
|
126 |
|
✗ |
if(!codeLength && !ok) |
127 |
|
|
{ |
128 |
|
✗ |
qDebug() << __FILE__ << __LINE__ << "Monomer dictionary" |
129 |
|
✗ |
<< "Failed to parse file " << m_filePath << "at line " |
130 |
|
✗ |
<< line; |
131 |
|
|
|
132 |
|
✗ |
return false; |
133 |
|
|
} |
134 |
|
|
|
135 |
|
✗ |
if(codeLength != m_inputCodeLength) |
136 |
|
|
{ |
137 |
|
✗ |
return false; |
138 |
|
|
} |
139 |
|
|
|
140 |
|
✗ |
codeLengthString = line.mid(greaterThanIndex + 1, -1); |
141 |
|
|
|
142 |
|
|
// qDebug() << __FILE__ << __LINE__ |
143 |
|
|
// << "Right codeLengthString:" << codeLengthString |
144 |
|
|
// << "m_outputCodeLength:" << m_outputCodeLength; |
145 |
|
|
|
146 |
|
✗ |
ok = false; |
147 |
|
✗ |
codeLength = codeLengthString.toInt(&ok, 10); |
148 |
|
|
|
149 |
|
✗ |
if(!codeLength && !ok) |
150 |
|
|
{ |
151 |
|
✗ |
qDebug() << __FILE__ << __LINE__ << "Monomer dictionary" |
152 |
|
✗ |
<< "Failed to parse file " << m_filePath << "at line " |
153 |
|
✗ |
<< line; |
154 |
|
|
|
155 |
|
✗ |
return false; |
156 |
|
|
} |
157 |
|
|
|
158 |
|
✗ |
if(codeLength != m_outputCodeLength) |
159 |
|
|
{ |
160 |
|
✗ |
return false; |
161 |
|
|
} |
162 |
|
|
|
163 |
|
|
// At this point, it seems we are in the proper |
164 |
|
|
// section. |
165 |
|
|
|
166 |
|
✗ |
return true; |
167 |
|
✗ |
} |
168 |
|
|
|
169 |
|
|
// If we are here, that means that the section is not for us. |
170 |
|
|
|
171 |
|
|
// qDebug() << __FILE__ << __LINE__ |
172 |
|
|
// << "Line is no proper section divider."; |
173 |
|
|
|
174 |
|
✗ |
return false; |
175 |
|
|
} |
176 |
|
|
|
177 |
|
|
|
178 |
|
|
void |
179 |
|
✗ |
MonomerDictionary::skipSection(QTextStream *stream) |
180 |
|
|
{ |
181 |
|
|
// We have entered a section, all we have to do is go through it |
182 |
|
|
// and return when we have found either the end of the stream or |
183 |
|
|
// the {END} marker. |
184 |
|
|
|
185 |
|
✗ |
qint64 lineLength = 1024; |
186 |
|
✗ |
QString line; |
187 |
|
|
|
188 |
|
✗ |
while(!stream->atEnd()) |
189 |
|
|
{ |
190 |
|
✗ |
line = stream->readLine(lineLength); |
191 |
|
|
|
192 |
|
✗ |
if(!line.contains("{END}")) |
193 |
|
|
{ |
194 |
|
✗ |
line = stream->readLine(lineLength); |
195 |
|
|
} |
196 |
|
|
else |
197 |
|
✗ |
return; |
198 |
|
|
} |
199 |
|
✗ |
} |
200 |
|
|
|
201 |
|
|
|
202 |
|
|
int |
203 |
|
✗ |
MonomerDictionary::parseSection(QTextStream *stream) |
204 |
|
|
{ |
205 |
|
✗ |
Q_ASSERT(stream); |
206 |
|
|
|
207 |
|
✗ |
qint64 lineLength = 1024; |
208 |
|
✗ |
QString line; |
209 |
|
|
|
210 |
|
|
// Iterate in the file using the stream and for each line create |
211 |
|
|
// an item to insert into the dictionary hash. |
212 |
|
|
|
213 |
|
✗ |
while(!stream->atEnd()) |
214 |
|
|
{ |
215 |
|
✗ |
line = stream->readLine(lineLength); |
216 |
|
|
|
217 |
|
|
// We might encounter the end of the section, that is a line |
218 |
|
|
// having {END} as its sole content. |
219 |
|
|
|
220 |
|
✗ |
if(line.contains("{END}")) |
221 |
|
✗ |
break; |
222 |
|
|
|
223 |
|
✗ |
QStringList stringList = line.split('>'); |
224 |
|
|
|
225 |
|
✗ |
QString inputCode = stringList.first(); |
226 |
|
✗ |
QString outputCode = stringList.last(); |
227 |
|
|
|
228 |
|
|
// Check that the monomer codes have the proper length. |
229 |
|
|
|
230 |
|
✗ |
if(inputCode.length() != m_inputCodeLength || |
231 |
|
✗ |
outputCode.length() != m_outputCodeLength) |
232 |
|
|
{ |
233 |
|
✗ |
qDebug() << __FILE__ << __LINE__ << QObject::tr("Monomer dictionary:") |
234 |
|
✗ |
<< QObject::tr("Failed to load dictionary.") |
235 |
|
✗ |
<< QObject::tr("Monomer code lengths do not match:") |
236 |
|
✗ |
<< QObject::tr("inputCode:") << inputCode |
237 |
|
✗ |
<< QObject::tr("outputCode:") << outputCode; |
238 |
|
|
|
239 |
|
|
|
240 |
|
|
// We have to empty the hash |
241 |
|
✗ |
m_dictionaryHash.clear(); |
242 |
|
|
|
243 |
|
✗ |
break; |
244 |
|
|
} |
245 |
|
|
|
246 |
|
✗ |
m_dictionaryHash.insert(inputCode, outputCode); |
247 |
|
|
|
248 |
|
|
// qDebug() << __FILE__ << __LINE__ |
249 |
|
|
// << stringList.first () << stringList.last (); |
250 |
|
✗ |
} |
251 |
|
|
|
252 |
|
|
// At this point the parsing is finished, either because we |
253 |
|
|
// encountered the {END} section-ending delimiter, or because we |
254 |
|
|
// reached the en of file. |
255 |
|
|
|
256 |
|
✗ |
int hashSize = m_dictionaryHash.size(); |
257 |
|
|
|
258 |
|
✗ |
if(hashSize) |
259 |
|
✗ |
m_dictionaryLoaded = true; |
260 |
|
|
else |
261 |
|
|
{ |
262 |
|
✗ |
qDebug() << __FILE__ << __LINE__ << QObject::tr("Monomer dictionary:") |
263 |
|
✗ |
<< QObject::tr("Failed to load dictionary."); |
264 |
|
|
|
265 |
|
✗ |
m_dictionaryLoaded = false; |
266 |
|
|
} |
267 |
|
|
|
268 |
|
✗ |
return hashSize; |
269 |
|
✗ |
} |
270 |
|
|
|
271 |
|
|
|
272 |
|
|
bool |
273 |
|
✗ |
MonomerDictionary::loadDictionary() |
274 |
|
|
{ |
275 |
|
|
// Load the file and for each line deconstruct the item into two |
276 |
|
|
// QString objects that are used to make a QHash entry in |
277 |
|
|
// QHash<QString, QString> m_dictionaryHash. |
278 |
|
✗ |
bool success = true; |
279 |
|
✗ |
qint64 lineLength = 1024; |
280 |
|
✗ |
QString line; |
281 |
|
|
|
282 |
|
✗ |
QFile file(m_filePath); |
283 |
|
|
|
284 |
|
✗ |
if(!file.open(QIODevice::ReadOnly)) |
285 |
|
|
{ |
286 |
|
|
|
287 |
|
✗ |
m_dictionaryLoaded = false; |
288 |
|
|
|
289 |
|
✗ |
qDebug() << __FILE__ << __LINE__ << "Monomer dictionary:" |
290 |
|
✗ |
<< "Failed to open file" << m_filePath << "for writing."; |
291 |
|
|
|
292 |
|
✗ |
return false; |
293 |
|
|
} |
294 |
|
|
|
295 |
|
✗ |
if(m_inputCodeLength < 1 || m_outputCodeLength < 1) |
296 |
|
|
{ |
297 |
|
✗ |
qDebug() << __FILE__ << __LINE__ << "Monomer dictionary:" |
298 |
|
✗ |
<< "Failed to parse file " << m_filePath |
299 |
|
|
<< "Please, set the m_inputCodeLength and " |
300 |
|
✗ |
"m_ouputCodeLength variables first."; |
301 |
|
|
|
302 |
|
✗ |
return false; |
303 |
|
|
} |
304 |
|
|
|
305 |
|
✗ |
QTextStream *stream = new QTextStream(&file); |
306 |
|
✗ |
stream->setEncoding(QStringConverter::Utf8); |
307 |
|
|
|
308 |
|
✗ |
while(!stream->atEnd()) |
309 |
|
|
{ |
310 |
|
✗ |
line = stream->readLine(lineLength); |
311 |
|
|
|
312 |
|
|
// qDebug() << __FILE__ << __LINE__ |
313 |
|
|
// << "line: " << line; |
314 |
|
|
|
315 |
|
|
// Remove spaces from start and end of line. |
316 |
|
✗ |
line = line.simplified(); |
317 |
|
|
|
318 |
|
✗ |
if(line.startsWith('#') || line.isEmpty()) |
319 |
|
|
{ |
320 |
|
✗ |
line = stream->readLine(lineLength); |
321 |
|
✗ |
continue; |
322 |
|
|
} |
323 |
|
|
|
324 |
|
|
// There might be any number of sections in the file, all |
325 |
|
|
// delimited with a X>Y directive, indicating how many |
326 |
|
|
// characters are allowed for the input code and for the |
327 |
|
|
// output code. |
328 |
|
|
|
329 |
|
✗ |
if(!isLineProperSectionDivider(line)) |
330 |
|
|
{ |
331 |
|
|
// qDebug() << __FILE__ << __LINE__ |
332 |
|
|
// << "skipping line:" << line; |
333 |
|
|
|
334 |
|
✗ |
line = stream->readLine(lineLength); |
335 |
|
✗ |
continue; |
336 |
|
|
} |
337 |
|
|
else |
338 |
|
|
{ |
339 |
|
|
// qDebug() << __FILE__ << __LINE__ |
340 |
|
|
// << "parsing section: " << line; |
341 |
|
|
|
342 |
|
✗ |
if(parseSection(stream) < 1) |
343 |
|
|
{ |
344 |
|
✗ |
qDebug() << __FILE__ << __LINE__ << "Monomer dictionary:" |
345 |
|
✗ |
<< "Failed to parse file " << m_filePath; |
346 |
|
|
|
347 |
|
✗ |
success = false; |
348 |
|
✗ |
break; |
349 |
|
|
} |
350 |
|
|
else |
351 |
|
|
{ |
352 |
|
|
// We successfully parsed the section. Our work is done. |
353 |
|
|
|
354 |
|
✗ |
success = true; |
355 |
|
✗ |
break; |
356 |
|
|
} |
357 |
|
|
} |
358 |
|
|
} |
359 |
|
|
|
360 |
|
✗ |
delete stream; |
361 |
|
|
|
362 |
|
✗ |
return success; |
363 |
|
✗ |
} |
364 |
|
|
|
365 |
|
|
|
366 |
|
|
QStringList * |
367 |
|
✗ |
MonomerDictionary::translate(const QStringList &chain_string_list) |
368 |
|
|
{ |
369 |
|
|
// The string in sequence is a space-separated list of monomer |
370 |
|
|
// codes in the original monomer code format. We have to translate |
371 |
|
|
// that to the proper monomer code format using the hash in this |
372 |
|
|
// dictionary. |
373 |
|
|
|
374 |
|
✗ |
QStringList *outputChainStringList = new QStringList(); |
375 |
|
|
|
376 |
|
✗ |
if(!chain_string_list.isEmpty()) |
377 |
|
✗ |
m_inputChainStringList = chain_string_list; |
378 |
|
|
|
379 |
|
|
// If there is nothing to do return an empty string list so that |
380 |
|
|
// caller knows nothing is actually wrong, only there is no |
381 |
|
|
// sequence to translate. |
382 |
|
✗ |
if(m_inputChainStringList.isEmpty()) |
383 |
|
✗ |
return outputChainStringList; |
384 |
|
|
|
385 |
|
|
// Iterate in each chain string of the list and perform the |
386 |
|
|
// translation. |
387 |
|
|
|
388 |
|
✗ |
for(int iter = 0; iter < m_inputChainStringList.size(); ++iter) |
389 |
|
|
{ |
390 |
|
✗ |
QString iterString = chain_string_list.at(iter); |
391 |
|
|
|
392 |
|
|
// qDebug() << __FILE__ << __LINE__ |
393 |
|
|
// << "translating sequence:" << iterString; |
394 |
|
|
|
395 |
|
|
QStringList codeList = |
396 |
|
✗ |
iterString.split(QRegularExpression("\\s+"), Qt::SkipEmptyParts); |
397 |
|
|
|
398 |
|
|
// qDebug() << __FILE__ << __LINE__ |
399 |
|
|
// << "codeList:" << codeList; |
400 |
|
|
|
401 |
|
|
// qDebug() << __FILE__ << __LINE__ |
402 |
|
|
// << "hash:" |
403 |
|
|
// << m_dictionaryHash; |
404 |
|
|
|
405 |
|
✗ |
for(int jter = 0; jter < codeList.size(); ++jter) |
406 |
|
|
{ |
407 |
|
✗ |
QString code = codeList.at(jter); |
408 |
|
|
|
409 |
|
|
QHash<QString, QString>::const_iterator hashIter = |
410 |
|
✗ |
m_dictionaryHash.find(code); |
411 |
|
|
|
412 |
|
✗ |
if(hashIter != m_dictionaryHash.end()) |
413 |
|
✗ |
codeList.replace(jter, hashIter.value()); |
414 |
|
|
else |
415 |
|
|
{ |
416 |
|
|
// Delete the string list, set the pointer to 0 and |
417 |
|
|
// return that pointer so that caller knows something |
418 |
|
|
// has gone wrong. |
419 |
|
|
|
420 |
|
✗ |
qDebug() << __FILE__ << __LINE__ << "Monomer dictionary:" |
421 |
|
✗ |
<< "Failed to convert monomer code " << code; |
422 |
|
|
|
423 |
|
✗ |
outputChainStringList->clear(); |
424 |
|
|
|
425 |
|
✗ |
delete outputChainStringList; |
426 |
|
✗ |
outputChainStringList = 0; |
427 |
|
|
|
428 |
|
✗ |
return outputChainStringList; |
429 |
|
|
} |
430 |
|
✗ |
} |
431 |
|
|
|
432 |
|
|
// At this point the sequence codes have been translated. Join all |
433 |
|
|
// the item of the codeList into one single string. |
434 |
|
|
|
435 |
|
✗ |
outputChainStringList->append(codeList.join(QString(""))); |
436 |
|
✗ |
} |
437 |
|
|
|
438 |
|
|
// End of |
439 |
|
|
// for (int iter = 0; iter < chainStringList.size(); ++iter) |
440 |
|
|
|
441 |
|
|
// If no translation could be performed, return a n |
442 |
|
|
|
443 |
|
✗ |
if(!outputChainStringList->size()) |
444 |
|
|
{ |
445 |
|
✗ |
outputChainStringList->clear(); |
446 |
|
|
|
447 |
|
✗ |
delete outputChainStringList; |
448 |
|
✗ |
outputChainStringList = 0; |
449 |
|
|
} |
450 |
|
|
|
451 |
|
✗ |
return outputChainStringList; |
452 |
|
|
} |
453 |
|
|
|
454 |
|
|
|
455 |
|
|
} // namespace libXpertMass |
456 |
|
|
|
457 |
|
|
} // namespace MsXpS |
458 |
|
|
|