| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* BEGIN software license | ||
| 2 | * | ||
| 3 | * MsXpertSuite - mass spectrometry software suite | ||
| 4 | * ----------------------------------------------- | ||
| 5 | * Copyright(C) 2009,...,2018 Filippo Rusconi | ||
| 6 | * | ||
| 7 | * http://www.msxpertsuite.org | ||
| 8 | * | ||
| 9 | * This file is part of the MsXpertSuite project. | ||
| 10 | * | ||
| 11 | * The MsXpertSuite project is the successor of the massXpert project. This | ||
| 12 | * project now includes various independent modules: | ||
| 13 | * | ||
| 14 | * - massXpert, model polymer chemistries and simulate mass spectrometric data; | ||
| 15 | * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner; | ||
| 16 | * | ||
| 17 | * This program is free software: you can redistribute it and/or modify | ||
| 18 | * it under the terms of the GNU General Public License as published by | ||
| 19 | * the Free Software Foundation, either version 3 of the License, or | ||
| 20 | * (at your option) any later version. | ||
| 21 | * | ||
| 22 | * This program is distributed in the hope that it will be useful, | ||
| 23 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 24 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 25 | * GNU General Public License for more details. | ||
| 26 | * | ||
| 27 | * You should have received a copy of the GNU General Public License | ||
| 28 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 29 | * | ||
| 30 | * END software license | ||
| 31 | */ | ||
| 32 | |||
| 33 | |||
| 34 | /////////////////////// Qt includes | ||
| 35 | #include <QObject> | ||
| 36 | #include <QFile> | ||
| 37 | |||
| 38 | |||
| 39 | /////////////////////// Local includes | ||
| 40 | #include "MonomerDictionary.hpp" | ||
| 41 | #include "Sequence.hpp" | ||
| 42 | |||
| 43 | |||
| 44 | namespace MsXpS | ||
| 45 | { | ||
| 46 | |||
| 47 | namespace libXpertMass | ||
| 48 | { | ||
| 49 | |||
| 50 | |||
| 51 | ✗ | MonomerDictionary::MonomerDictionary(QString file_path, | |
| 52 | const QStringList &input_chain_string_list, | ||
| 53 | int input_code_length, | ||
| 54 | ✗ | int output_code_length) | |
| 55 | ✗ | : m_filePath(file_path), | |
| 56 | ✗ | m_inputChainStringList(input_chain_string_list), | |
| 57 | ✗ | m_inputCodeLength(input_code_length), | |
| 58 | ✗ | m_outputCodeLength(output_code_length) | |
| 59 | { | ||
| 60 | ✗ | } | |
| 61 | |||
| 62 | /*! | ||
| 63 | \brief Destructs this MonomerDictionary instance. | ||
| 64 | */ | ||
| 65 | ✗ | MonomerDictionary::~MonomerDictionary() | |
| 66 | { | ||
| 67 | ✗ | } | |
| 68 | |||
| 69 | void | ||
| 70 | ✗ | MonomerDictionary::setFilePath(QString &file_path) | |
| 71 | { | ||
| 72 | ✗ | m_filePath = file_path; | |
| 73 | ✗ | } | |
| 74 | |||
| 75 | void | ||
| 76 | ✗ | MonomerDictionary::setInputChainStringList( | |
| 77 | const QStringList &input_chain_string_list) | ||
| 78 | { | ||
| 79 | ✗ | m_inputChainStringList = input_chain_string_list; | |
| 80 | ✗ | } | |
| 81 | |||
| 82 | void | ||
| 83 | ✗ | MonomerDictionary::setInputCodeLength(int code_length) | |
| 84 | { | ||
| 85 | ✗ | m_inputCodeLength = code_length; | |
| 86 | ✗ | } | |
| 87 | |||
| 88 | void | ||
| 89 | ✗ | MonomerDictionary::setOutputCodeLength(int code_length) | |
| 90 | { | ||
| 91 | ✗ | m_outputCodeLength = code_length; | |
| 92 | ✗ | } | |
| 93 | |||
| 94 | bool | ||
| 95 | ✗ | MonomerDictionary::isLineProperSectionDivider(const QString &line) | |
| 96 | { | ||
| 97 | // Section dividers in the monomer dictionary file format are | ||
| 98 | // lines containing the following syntax: X>Y, that is for example | ||
| 99 | // 3>1. This means that the following conversion rules (like | ||
| 100 | // ILE>I) should convert 3-letter codes into 1-letter codes. | ||
| 101 | |||
| 102 | // However, this line should only be considered proper if X is | ||
| 103 | // actually the value of m_inputCodeLength and Y the value of | ||
| 104 | // m_outputCodeLength. | ||
| 105 | |||
| 106 | // qDebug() << __FILE__ << __LINE__ | ||
| 107 | // << "Checking if line is proper section divider :" << line; | ||
| 108 | |||
| 109 | ✗ | if(line.contains(QRegularExpression("[0-9]+>[0-9]+"))) | |
| 110 | { | ||
| 111 | // We are opening a new section, get the input/output code | ||
| 112 | // lengths and if they math what we expect, then set the | ||
| 113 | // current stream position and call the section parser. | ||
| 114 | |||
| 115 | ✗ | int greaterThanIndex = line.indexOf('>'); | |
| 116 | |||
| 117 | ✗ | QString codeLengthString = line.left(greaterThanIndex); | |
| 118 | |||
| 119 | // qDebug() << __FILE__ << __LINE__ | ||
| 120 | // << "Left codeLengthString:" << codeLengthString | ||
| 121 | // << "m_inputCodeLength:" << m_inputCodeLength; | ||
| 122 | |||
| 123 | ✗ | bool ok = false; | |
| 124 | ✗ | int codeLength = codeLengthString.toInt(&ok, 10); | |
| 125 | |||
| 126 | ✗ | if(!codeLength && !ok) | |
| 127 | { | ||
| 128 | ✗ | qDebug() << __FILE__ << __LINE__ << "Monomer dictionary" | |
| 129 | ✗ | << "Failed to parse file " << m_filePath << "at line " | |
| 130 | ✗ | << line; | |
| 131 | |||
| 132 | ✗ | return false; | |
| 133 | } | ||
| 134 | |||
| 135 | ✗ | if(codeLength != m_inputCodeLength) | |
| 136 | { | ||
| 137 | ✗ | return false; | |
| 138 | } | ||
| 139 | |||
| 140 | ✗ | codeLengthString = line.mid(greaterThanIndex + 1, -1); | |
| 141 | |||
| 142 | // qDebug() << __FILE__ << __LINE__ | ||
| 143 | // << "Right codeLengthString:" << codeLengthString | ||
| 144 | // << "m_outputCodeLength:" << m_outputCodeLength; | ||
| 145 | |||
| 146 | ✗ | ok = false; | |
| 147 | ✗ | codeLength = codeLengthString.toInt(&ok, 10); | |
| 148 | |||
| 149 | ✗ | if(!codeLength && !ok) | |
| 150 | { | ||
| 151 | ✗ | qDebug() << __FILE__ << __LINE__ << "Monomer dictionary" | |
| 152 | ✗ | << "Failed to parse file " << m_filePath << "at line " | |
| 153 | ✗ | << line; | |
| 154 | |||
| 155 | ✗ | return false; | |
| 156 | } | ||
| 157 | |||
| 158 | ✗ | if(codeLength != m_outputCodeLength) | |
| 159 | { | ||
| 160 | ✗ | return false; | |
| 161 | } | ||
| 162 | |||
| 163 | // At this point, it seems we are in the proper | ||
| 164 | // section. | ||
| 165 | |||
| 166 | ✗ | return true; | |
| 167 | ✗ | } | |
| 168 | |||
| 169 | // If we are here, that means that the section is not for us. | ||
| 170 | |||
| 171 | // qDebug() << __FILE__ << __LINE__ | ||
| 172 | // << "Line is no proper section divider."; | ||
| 173 | |||
| 174 | ✗ | return false; | |
| 175 | } | ||
| 176 | |||
| 177 | |||
| 178 | void | ||
| 179 | ✗ | MonomerDictionary::skipSection(QTextStream *stream) | |
| 180 | { | ||
| 181 | // We have entered a section, all we have to do is go through it | ||
| 182 | // and return when we have found either the end of the stream or | ||
| 183 | // the {END} marker. | ||
| 184 | |||
| 185 | ✗ | qint64 lineLength = 1024; | |
| 186 | ✗ | QString line; | |
| 187 | |||
| 188 | ✗ | while(!stream->atEnd()) | |
| 189 | { | ||
| 190 | ✗ | line = stream->readLine(lineLength); | |
| 191 | |||
| 192 | ✗ | if(!line.contains("{END}")) | |
| 193 | { | ||
| 194 | ✗ | line = stream->readLine(lineLength); | |
| 195 | } | ||
| 196 | else | ||
| 197 | ✗ | return; | |
| 198 | } | ||
| 199 | ✗ | } | |
| 200 | |||
| 201 | |||
| 202 | int | ||
| 203 | ✗ | MonomerDictionary::parseSection(QTextStream *stream) | |
| 204 | { | ||
| 205 | ✗ | Q_ASSERT(stream); | |
| 206 | |||
| 207 | ✗ | qint64 lineLength = 1024; | |
| 208 | ✗ | QString line; | |
| 209 | |||
| 210 | // Iterate in the file using the stream and for each line create | ||
| 211 | // an item to insert into the dictionary hash. | ||
| 212 | |||
| 213 | ✗ | while(!stream->atEnd()) | |
| 214 | { | ||
| 215 | ✗ | line = stream->readLine(lineLength); | |
| 216 | |||
| 217 | // We might encounter the end of the section, that is a line | ||
| 218 | // having {END} as its sole content. | ||
| 219 | |||
| 220 | ✗ | if(line.contains("{END}")) | |
| 221 | ✗ | break; | |
| 222 | |||
| 223 | ✗ | QStringList stringList = line.split('>'); | |
| 224 | |||
| 225 | ✗ | QString inputCode = stringList.first(); | |
| 226 | ✗ | QString outputCode = stringList.last(); | |
| 227 | |||
| 228 | // Check that the monomer codes have the proper length. | ||
| 229 | |||
| 230 | ✗ | if(inputCode.length() != m_inputCodeLength || | |
| 231 | ✗ | outputCode.length() != m_outputCodeLength) | |
| 232 | { | ||
| 233 | ✗ | qDebug() << __FILE__ << __LINE__ << QObject::tr("Monomer dictionary:") | |
| 234 | ✗ | << QObject::tr("Failed to load dictionary.") | |
| 235 | ✗ | << QObject::tr("Monomer code lengths do not match:") | |
| 236 | ✗ | << QObject::tr("inputCode:") << inputCode | |
| 237 | ✗ | << QObject::tr("outputCode:") << outputCode; | |
| 238 | |||
| 239 | |||
| 240 | // We have to empty the hash | ||
| 241 | ✗ | m_dictionaryHash.clear(); | |
| 242 | |||
| 243 | ✗ | break; | |
| 244 | } | ||
| 245 | |||
| 246 | ✗ | m_dictionaryHash.insert(inputCode, outputCode); | |
| 247 | |||
| 248 | // qDebug() << __FILE__ << __LINE__ | ||
| 249 | // << stringList.first () << stringList.last (); | ||
| 250 | ✗ | } | |
| 251 | |||
| 252 | // At this point the parsing is finished, either because we | ||
| 253 | // encountered the {END} section-ending delimiter, or because we | ||
| 254 | // reached the en of file. | ||
| 255 | |||
| 256 | ✗ | int hashSize = m_dictionaryHash.size(); | |
| 257 | |||
| 258 | ✗ | if(hashSize) | |
| 259 | ✗ | m_dictionaryLoaded = true; | |
| 260 | else | ||
| 261 | { | ||
| 262 | ✗ | qDebug() << __FILE__ << __LINE__ << QObject::tr("Monomer dictionary:") | |
| 263 | ✗ | << QObject::tr("Failed to load dictionary."); | |
| 264 | |||
| 265 | ✗ | m_dictionaryLoaded = false; | |
| 266 | } | ||
| 267 | |||
| 268 | ✗ | return hashSize; | |
| 269 | ✗ | } | |
| 270 | |||
| 271 | |||
| 272 | bool | ||
| 273 | ✗ | MonomerDictionary::loadDictionary() | |
| 274 | { | ||
| 275 | // Load the file and for each line deconstruct the item into two | ||
| 276 | // QString objects that are used to make a QHash entry in | ||
| 277 | // QHash<QString, QString> m_dictionaryHash. | ||
| 278 | ✗ | bool success = true; | |
| 279 | ✗ | qint64 lineLength = 1024; | |
| 280 | ✗ | QString line; | |
| 281 | |||
| 282 | ✗ | QFile file(m_filePath); | |
| 283 | |||
| 284 | ✗ | if(!file.open(QIODevice::ReadOnly)) | |
| 285 | { | ||
| 286 | |||
| 287 | ✗ | m_dictionaryLoaded = false; | |
| 288 | |||
| 289 | ✗ | qDebug() << __FILE__ << __LINE__ << "Monomer dictionary:" | |
| 290 | ✗ | << "Failed to open file" << m_filePath << "for writing."; | |
| 291 | |||
| 292 | ✗ | return false; | |
| 293 | } | ||
| 294 | |||
| 295 | ✗ | if(m_inputCodeLength < 1 || m_outputCodeLength < 1) | |
| 296 | { | ||
| 297 | ✗ | qDebug() << __FILE__ << __LINE__ << "Monomer dictionary:" | |
| 298 | ✗ | << "Failed to parse file " << m_filePath | |
| 299 | << "Please, set the m_inputCodeLength and " | ||
| 300 | ✗ | "m_ouputCodeLength variables first."; | |
| 301 | |||
| 302 | ✗ | return false; | |
| 303 | } | ||
| 304 | |||
| 305 | ✗ | QTextStream *stream = new QTextStream(&file); | |
| 306 | ✗ | stream->setEncoding(QStringConverter::Utf8); | |
| 307 | |||
| 308 | ✗ | while(!stream->atEnd()) | |
| 309 | { | ||
| 310 | ✗ | line = stream->readLine(lineLength); | |
| 311 | |||
| 312 | // qDebug() << __FILE__ << __LINE__ | ||
| 313 | // << "line: " << line; | ||
| 314 | |||
| 315 | // Remove spaces from start and end of line. | ||
| 316 | ✗ | line = line.simplified(); | |
| 317 | |||
| 318 | ✗ | if(line.startsWith('#') || line.isEmpty()) | |
| 319 | { | ||
| 320 | ✗ | line = stream->readLine(lineLength); | |
| 321 | ✗ | continue; | |
| 322 | } | ||
| 323 | |||
| 324 | // There might be any number of sections in the file, all | ||
| 325 | // delimited with a X>Y directive, indicating how many | ||
| 326 | // characters are allowed for the input code and for the | ||
| 327 | // output code. | ||
| 328 | |||
| 329 | ✗ | if(!isLineProperSectionDivider(line)) | |
| 330 | { | ||
| 331 | // qDebug() << __FILE__ << __LINE__ | ||
| 332 | // << "skipping line:" << line; | ||
| 333 | |||
| 334 | ✗ | line = stream->readLine(lineLength); | |
| 335 | ✗ | continue; | |
| 336 | } | ||
| 337 | else | ||
| 338 | { | ||
| 339 | // qDebug() << __FILE__ << __LINE__ | ||
| 340 | // << "parsing section: " << line; | ||
| 341 | |||
| 342 | ✗ | if(parseSection(stream) < 1) | |
| 343 | { | ||
| 344 | ✗ | qDebug() << __FILE__ << __LINE__ << "Monomer dictionary:" | |
| 345 | ✗ | << "Failed to parse file " << m_filePath; | |
| 346 | |||
| 347 | ✗ | success = false; | |
| 348 | ✗ | break; | |
| 349 | } | ||
| 350 | else | ||
| 351 | { | ||
| 352 | // We successfully parsed the section. Our work is done. | ||
| 353 | |||
| 354 | ✗ | success = true; | |
| 355 | ✗ | break; | |
| 356 | } | ||
| 357 | } | ||
| 358 | } | ||
| 359 | |||
| 360 | ✗ | delete stream; | |
| 361 | |||
| 362 | ✗ | return success; | |
| 363 | ✗ | } | |
| 364 | |||
| 365 | |||
| 366 | QStringList * | ||
| 367 | ✗ | MonomerDictionary::translate(const QStringList &chain_string_list) | |
| 368 | { | ||
| 369 | // The string in sequence is a space-separated list of monomer | ||
| 370 | // codes in the original monomer code format. We have to translate | ||
| 371 | // that to the proper monomer code format using the hash in this | ||
| 372 | // dictionary. | ||
| 373 | |||
| 374 | ✗ | QStringList *outputChainStringList = new QStringList(); | |
| 375 | |||
| 376 | ✗ | if(!chain_string_list.isEmpty()) | |
| 377 | ✗ | m_inputChainStringList = chain_string_list; | |
| 378 | |||
| 379 | // If there is nothing to do return an empty string list so that | ||
| 380 | // caller knows nothing is actually wrong, only there is no | ||
| 381 | // sequence to translate. | ||
| 382 | ✗ | if(m_inputChainStringList.isEmpty()) | |
| 383 | ✗ | return outputChainStringList; | |
| 384 | |||
| 385 | // Iterate in each chain string of the list and perform the | ||
| 386 | // translation. | ||
| 387 | |||
| 388 | ✗ | for(int iter = 0; iter < m_inputChainStringList.size(); ++iter) | |
| 389 | { | ||
| 390 | ✗ | QString iterString = chain_string_list.at(iter); | |
| 391 | |||
| 392 | // qDebug() << __FILE__ << __LINE__ | ||
| 393 | // << "translating sequence:" << iterString; | ||
| 394 | |||
| 395 | QStringList codeList = | ||
| 396 | ✗ | iterString.split(QRegularExpression("\\s+"), Qt::SkipEmptyParts); | |
| 397 | |||
| 398 | // qDebug() << __FILE__ << __LINE__ | ||
| 399 | // << "codeList:" << codeList; | ||
| 400 | |||
| 401 | // qDebug() << __FILE__ << __LINE__ | ||
| 402 | // << "hash:" | ||
| 403 | // << m_dictionaryHash; | ||
| 404 | |||
| 405 | ✗ | for(int jter = 0; jter < codeList.size(); ++jter) | |
| 406 | { | ||
| 407 | ✗ | QString code = codeList.at(jter); | |
| 408 | |||
| 409 | QHash<QString, QString>::const_iterator hashIter = | ||
| 410 | ✗ | m_dictionaryHash.find(code); | |
| 411 | |||
| 412 | ✗ | if(hashIter != m_dictionaryHash.end()) | |
| 413 | ✗ | codeList.replace(jter, hashIter.value()); | |
| 414 | else | ||
| 415 | { | ||
| 416 | // Delete the string list, set the pointer to 0 and | ||
| 417 | // return that pointer so that caller knows something | ||
| 418 | // has gone wrong. | ||
| 419 | |||
| 420 | ✗ | qDebug() << __FILE__ << __LINE__ << "Monomer dictionary:" | |
| 421 | ✗ | << "Failed to convert monomer code " << code; | |
| 422 | |||
| 423 | ✗ | outputChainStringList->clear(); | |
| 424 | |||
| 425 | ✗ | delete outputChainStringList; | |
| 426 | ✗ | outputChainStringList = 0; | |
| 427 | |||
| 428 | ✗ | return outputChainStringList; | |
| 429 | } | ||
| 430 | ✗ | } | |
| 431 | |||
| 432 | // At this point the sequence codes have been translated. Join all | ||
| 433 | // the item of the codeList into one single string. | ||
| 434 | |||
| 435 | ✗ | outputChainStringList->append(codeList.join(QString(""))); | |
| 436 | ✗ | } | |
| 437 | |||
| 438 | // End of | ||
| 439 | // for (int iter = 0; iter < chainStringList.size(); ++iter) | ||
| 440 | |||
| 441 | // If no translation could be performed, return a n | ||
| 442 | |||
| 443 | ✗ | if(!outputChainStringList->size()) | |
| 444 | { | ||
| 445 | ✗ | outputChainStringList->clear(); | |
| 446 | |||
| 447 | ✗ | delete outputChainStringList; | |
| 448 | ✗ | outputChainStringList = 0; | |
| 449 | } | ||
| 450 | |||
| 451 | ✗ | return outputChainStringList; | |
| 452 | } | ||
| 453 | |||
| 454 | |||
| 455 | } // namespace libXpertMass | ||
| 456 | |||
| 457 | } // namespace MsXpS | ||
| 458 |