import React from 'react';
import * as helpers from '../../support/Helpers';
import * as validations from '../../support/Validations';

// Validations:
// If left blank (default) each validation will allow any value
// FileTypes, MolTypes and Count allow a single value or an array
// Length and Count arrays:
//   - index-0: minimum value; if undefined there is no minimum
//   - index-1: maximum value; if undefined there is no maximum
// Examples:
// validFileTypes = 'GenBank', ['GenBank', 'FASTA']
// validMolTypes = 'Protein', ['DNA', 'Protein']
// ValidLength = [1000, 100000]
// validCount = 1, [2, 6]

// TODO: add default placeholders
// const placeHolder = 'Upload a sequence file (e.g. GenBank, EMBL, FASTA) ...';

class SequenceFile {

  constructor(file) {
    this._file = file;
  }

  get file() {
    return this._file;
  }

  //////////////////////////////////////////////////////////////////////////
  // Required Delegate Methods
  //////////////////////////////////////////////////////////////////////////

  get fileType() {
    return 'sequence';
  }

  get metaKeys() {
    return ['fileFormat', 'molType', 'length', 'count', 'definition', 'master'];
  }

  get validationKeys() {
    return ['validFileFormats', 'validMolTypes', 'validLength', 'validCount'];
  }

  processText(text) {
    // console.log(text)
    // const text = this.file.text;
    if (/^LOCUS\s+/.test(text)) {
      this.processGenbankText(text);
    } else if (/^ID\s+/.test(text)) {
      this.processEmblText(text);
    } else if (/^>/.test(text)) {
      this.processFastaText(text);
    } else {
      this.processRawText(text);
    }
  }

  async processAccession(accession) {
    const file = this.file;
    file.accession = accession;
    const url = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=nucleotide&id=${file.accession}&retmode=json`
    console.log(url)
    try {
      let response = await fetch(url);
      let data = await response.json();
      console.log(data)
      const uid = data.result && data.result.uids && data.result.uids[0]
      if (uid) {
        const seq = data.result[uid];
        this.definition = seq.title;
        this.length = seq.slen;
        this.molType = (seq.moltype == 'aa') ? 'Protein' : 'DNA';
        file.fileFormat = 'GenBank';
        this.fileFormat = 'GenBank';
        this.count = 1;
        if (seq.properties.master) {
          this.length = 0;
          this.count = seq.slen;
          this.master = true;
        }
      } else {
        file.fileError = `Accession "${file.accession}" not found`
      }
    }
    catch (error) {
      file.fileError = error.message;
    }
    return file;
  }

  validate(rules=this.file.validationRules) {
    const validationErrors = [];
    const { validFileFormats, validMolTypes, validLength, validCount } = rules;
    const details = this.file.details();
    const v = validations;

    const fileFormat = details.meta.fileFormat;
    if (!v.validateString(fileFormat, validFileFormats)) {
      validationErrors.push(`File Format "${fileFormat}" is not valid. Must be ${helpers.toSentence([validFileFormats].flat(), {conjunction: 'or'})}`);
    }
    const molType = details.meta.molType;
    if (!v.validateString(molType, validMolTypes)) {
      validationErrors.push(`Molecule Type "${molType}" is not valid. Must be ${helpers.toSentence([validMolTypes].flat(), {conjunction: 'or'})}`);
    }
    const length = details.meta.length;
    if (!v.validateNumericRange(length, validLength)) {
      validationErrors.push(`Sequence Length (${helpers.commaNumber(length)}) is not valid. ${v.rangeValidationString(validLength)}`);
    }
    const count = details.meta.count;
    if (!v.validateNumericRange(count, validCount)) {
      validationErrors.push(`Sequence Count (${helpers.commaNumber(count)}) is not valid. ${v.rangeValidationString(validCount)}`);
    }

    if (details.meta.master) {
      validationErrors.push('This a master record for a sequencing project and contains no sequence data');
    }

    if (this.file.fileError) {
      validationErrors.push('There was a problem with the sequence file');
    }

    this.file.validationErrors = validationErrors;
    return {ok: (validationErrors.length === 0), errors: validationErrors};
  }

  static formatLength(meta) {
    const length = helpers.commaNumber(meta.length);
    const molType = (meta.molType && meta.molType.toLowerCase() === 'protein') ? 'aa' : 'bp';
    return `${length} ${molType}`;
  }

  static detailsRenderer(details={}, rules={}) {
    const meta = details.meta || {};
    const lengthText = SequenceFile.formatLength(meta);
    const countText = (meta.count == 1) ? "1 sequence" : `${meta.count} sequences`;
    const definition = (meta.definition && meta.definition.length > 0) ?  <div>{meta.definition}</div> : '';
    return (
      <div>
        <div>{meta.fileFormat} ({lengthText}; {meta.molType}; {countText})</div>
        <div>{definition}</div>
      </div>
    );
  }

  //////////////////////////////////////////////////////////////////////////
  // Processing Sequencing Files
  //////////////////////////////////////////////////////////////////////////

  processGenbankText(text) {
    this.fileFormat = 'GenBank';

    const molType = text.match(/\s+\d+\s+(bp|aa)\s+/);
    if (molType) {
      this.molType = (molType[1] == 'aa') ? 'Protein' : 'DNA';
    }

    const lengthMatch = text.match(/ORIGIN([\s\S]*?)\/\//mg);
    if (lengthMatch) {
      let seqText = '';
      for (const match of lengthMatch) {
        seqText += match.replace(/(ORIGIN|\s|\d|\/)/g, '');
      }
      this.length = seqText.length;
    } else {
      this.length = 'length unknown';
    }

    this.count = text.match(/^\/\//mg).length;
    const definition = text.match(/^DEFINITION\s+(.+)/m);
    this.definition = (definition && this.count == 1) ? definition[1] : '';
  }

  processEmblText(text) {
    this.fileFormat = 'EMBL';

    const molType = text.match(/\s+\d+\s+(BP|AA)/);
    if (molType) {
      this.molType = (molType[1] == 'AA') ? 'Protein' : 'DNA';
    }

    const lengthMatch = text.match(/^SQ.*?\n[\s\S]*?\/\//mg);
    if (lengthMatch) {
      let seqText = '';
      for (let match of lengthMatch) {
        match = match.replace(/.*?\n/, '') // Remove first line
        seqText += match.replace(/[\s\d\/]/g, '');
      }
      this.length = seqText.length;
    } else {
      this.length = 'length unknown';
    }

    this.count = text.match(/^\/\//mg).length;
    const definition = text.match(/^DE\s+(.+)/m);
    this.definition = (definition && this.count == 1) ? definition[1] : '';
  }

  processFastaText(text) {
    this.fileFormat = 'FASTA';
    this.count = text.match(/^>/mg).length;
    const definition = text.match(/^>(.*)/m);
    this.definition = (definition && this.count == 1) ? definition[1] : '';
    let sequence = text.replace(/^>.*/gm, '');
    sequence = sequence.replace(/\s+/g, '');
    this.length = sequence.length;
    this.molType = this.seqMolType(sequence);
    this.checkIUPAC(sequence);
    // console.log(this.length)
  }

  processRawText(text) {
    if (this.isASCII(text)) {
      this.fileFormat = 'Raw';
      this.count = 1;
      this.definition = '';
      const sequence = text.replace(/\s+/g, '');
      this.length = sequence.length;
      this.molType = this.seqMolType(sequence);
      this.checkIUPAC(sequence);
    } else {
      this.file.fileError = "The file contains non-text characters. Is it a binary file?"
    }
  }

  isASCII(text) {
    return /^[\x00-\x7F]*$/.test(text);
  }

  seqMolType(seq) {
    const nonDNASeq = seq.replace(/[AGCTN\-]/gi, '');
    return ( (nonDNASeq.length / seq.length) > 0.25 ) ? 'Protein' : 'DNA';
  }

  checkIUPAC(seq) {
    if (this.molType === 'DNA') {
      const nonIUPAC = seq.replace(/[AGCTURYSWKMBDHVN\-\.]/gi, '');
      if (nonIUPAC.length > 0) {
        const chars = Array.from(new Set([...nonIUPAC])).join(',');
        this.file.fileError = `Non IUPAC characters found in this sequence: ${chars}`;
      }
    } else if (this.molType === 'Protein') {
      const nonIUPAC = seq.replace(/[ARNDCQEGHILKMFPOSUTWYVBZXJ\-\.\*]/gi, '');
      if (nonIUPAC.length > 0) {
        const chars = Array.from(new Set([...nonIUPAC])).join(',');
        this.file.fileError = `Non IUPAC characters found in this sequence: ${chars}`;
      }
    }
  }

}

export default SequenceFile;
