/* eslint-disable no-console */
import { convertFromRaw } from 'draft-js';
import _ from 'lodash';
import mammoth from 'mammoth';

import SectionType, { CANDIDATE_TYPE } from '../enums/SectionType';
import { escapeTags, parseSection } from '../models/Content';
import DealNumberFormat, { extractNumber } from '../models/DealNumberFormat';
import { VariableType } from '../models/Variable';
import { MERGE_TYPE, hasDiffs } from '../models/Version';
import Classic from '../themes/Classic';
import stateFromHTML from '../utils/StateFromHTML';

import Fire from '@root/Fire';

import { APPENDIX_NAMES, SIGNATURE_INDICATORS } from './Legalese';

/*
  Return the total count of children recursively starting w/ an array.
*/
const getTotalChildrenCount = (children) => {
  if (!children) return 0;

  let count = children.length;
  children.forEach((child) => {
    if (child.children) {
      count += getTotalChildrenCount(child.children);
    }
  });
  return count;
};

const TABLE_THRESHOLD = 0.7;
const isTableExceedingThreshold = (documentChildren, table) => {
  const percentOfTotal = getTotalChildrenCount(table.children) / getTotalChildrenCount(documentChildren);
  return percentOfTotal > TABLE_THRESHOLD;
};

/* type: 'run' contains text, return the fully styled HTML out of it */
const getStyledText = (run) => {
  if (!run.children || !run.children.length) {
    return '';
  }

  let html = run.children
    .map((child) => {
      let text = '';
      if (child.type === 'break') {
        text = '<br />';
      } else if (_.includes(['text', 'tab'], child.type)) {
        text = child.value || '';
      } else {
        console.warn('getStyledText() : Passed an unhandled run.children[0].type', run);
      }

      return text;
    })
    .join('');

  if (run.isBold) html = `<strong>${html}</strong>`;
  if (run.isUnderlined) html = `<u>${html}</u>`;
  if (run.isItalic) html = `<em>${html}</em>`;
  if (run.isStrikethrough) html = `<del>${html}</del>`;

  return html;
};

const META_REGEXP = {
  SECTION: /OUTLAW-(-[\w-_]+)/,
  TITLE: /OUTLAW-TITLE/,
  IGNORE: /OUTLAW-IGNORE-[0-9]+/,
};

/*
  Loop children to find roundtrip Outlaw Meta data.
  Return:
    isTitle: bool;
    isIgnored: bool;
    sectionId: string | null;
*/
const getOutlawMeta = (children = []) => {
  const bookmarks = _.filter(children, (child) => child.type === 'bookmarkStart'),
    meta = {
      isTitle: false,
      isIgnored: false,
      sectionId: null,
    };

  bookmarks.forEach((bookmark) => {
    if (META_REGEXP.TITLE.test(bookmark.name)) {
      meta.isTitle = true;
    }
    if (META_REGEXP.IGNORE.test(bookmark.name)) {
      meta.isIgnored = true;
    }

    const sectionIdMatch = META_REGEXP.SECTION.exec(bookmark.name);
    if (sectionIdMatch) {
      meta.sectionId = sectionIdMatch[1];
    }
  });

  return meta;
};

const getNonTextChildren = (children = []) => {
  return children.filter((child) => {
    if (child.type === 'run') {
      return !!_.find(child.children, (cc) => !_.includes(['text', 'tab', 'break', 'bookmarkStart'], cc.type));
    }

    return !_.includes(['text', 'tab', 'break', 'bookmarkStart'], child.type);
  });
};

function parseObjects(rootChildren, images) {
  const flat = [],
    meta = {};

  let imgIndex = 0,
    isRoundtrip = false,
    title = null;

  const pushSection = ({ text, depth = 0, type, numbered, childMeta = {} }) => {
    flat.push({
      // Source documents will sometimes contain intended fields in angle brackets, e.g., <Insert Name>
      // We need to escape these so they're not interpreted as (invalid) html)
      text: escapeTags(text),
      depth,
      hideOrder: !numbered,
      type,
      sectionId: childMeta.sectionId,
      isTitle: childMeta.isTitle,
    });
  };

  const objectLooper = (loopedChildren) => {
    loopedChildren.forEach((child) => {
      let text = '',
        childDepth = 0,
        numbered = false,
        type = null;

      const tableText = [];

      // Filter out bookmarkStart for now, we'll see what we wanna do with these later.
      // const children = _.filter(child.children, (c) => c.type !== 'bookmarkStart');
      const { children = [] } = child,
        nonTextChildren = getNonTextChildren(children),
        childMeta = getOutlawMeta(children);

      switch (child.type) {
        // type=paragraph is the main type, it'll contain children.type=run of image | text | bookmarkStart
        case 'paragraph':
          // List
          if (child.numbering) {
            childDepth = child.numbering.level;
            numbered = true;
            text = children.map(getStyledText).join('');
          } else {
            // Don't process Outlaw ignored paragraphs
            if (childMeta.isIgnored) {
              // Before breaking, make sure we didn't hav ean image in there, it's usually the case w/ docs originating from Outlaw
              if (_.get(nonTextChildren, '[0].children[0].type') === 'image') {
                imgIndex++;
              }
              break;
            }

            // Catch the image runs
            if (_.get(nonTextChildren, '[0].children[0].type') === 'image') {
              /*
                Since we're not importing images, we will create variable candidate for it to keep
                the document structure and guide user to add it if it's needed.
              */
              text = `<em>[image ${imgIndex + 1}]<br>${images[imgIndex].altText}</em>`;
              imgIndex++;

              /*
                We're not handling images at the moment because we're restricted to 1MB and we
                would need to create a variable for it.
              */
              // text = `<img src="${images[imgIndex].src}" alt="${images[imgIndex].altText}" />`;

              break;
            }

            text = children.map(getStyledText).join('');
          }
          break;
        case 'table':
          /*
            Before getting the table's content, verify if it's size is bigger than our defined threshold.
            If so, keep looping the table's children which will create section for each one.
          */
          if (isTableExceedingThreshold(rootChildren, child)) {
            console.log('FOUND: overexceeding table threshold!');
            // Restart the process, starting w/ the table.
            objectLooper(children);
            break;
          }

          /*
            If we're not exceeding threshold, grab all the content of the table and spit
            it into one section. This will stop the recursion of the table's children.
            (tableRow, tableCell won't be accessed through the objectLooper switch())
          */
          meta.hasTables = true;
          type = 'table';

          children.forEach((tableRow) => {
            _.map(tableRow.children, (tableCell) => {
              _.map(tableCell.children, (paragraph) => {
                tableText.push(_.map(paragraph.children, getStyledText).join('\r\n'));
              });
            });
          });

          text = tableText.join('<br>');
          break;
        case 'tableRow':
        case 'tableCell':
          objectLooper(children);
          break;
        default:
          meta.hasUnsupported = true;
          console.log(`[${child.type}] not yet handled`);
          break;
      }

      // Detect that we're in a roundtrip
      if (childMeta.sectionId) {
        isRoundtrip = true;
      }

      // In case of a roundtrip, try to get the title
      if (childMeta.isTitle) {
        title = children.map(getStyledText).join('');
      }

      const trimmedText = `${text}`.trim();
      if (text && trimmedText) {
        pushSection({
          text,
          depth: childDepth,
          type,
          numbered,
          childMeta,
        });
      }
    });
  };

  // Let's have fun !
  objectLooper(rootChildren);

  return {
    flat,
    isRoundtrip,
    meta,
    title,
  };
}

const cleanupNumbering = (numbering = []) => {
  // Sometimes we end up w/ an array of numbering that has no level: 0 , it starts at level: 1
  // This is problematic because it creates arrays like : [empty, {…}] and crashes the process.
  // Remove empty indexes, then rebuild the levels.
  const newNumbering = _.compact(numbering);
  return _.map(newNumbering, (num, idx) => {
    return { ...num, level: idx };
  });
};

// eslint-disable-next-line import/prefer-default-export
export async function parseDocx(arrayBuffer, deal, mergeType) {
  const docxSections = [],
    docxNumbering = [],
    docxImages = [];

  let documentNode = null;

  const getNumberingLevels = (element) => {
    const existingFormat = docxNumbering[element.numbering.level];

    // Capture numbering format for this level of indentation
    if (!existingFormat) {
      docxNumbering[element.numbering.level] = element.numbering;
      return;
    }

    // If we've already found a numbering format for this level, others at the same level should be the same
    // (Except in the case of a subsequent appendix with a different numbering system, TBD how to handle that)
    if (!_.isEqual(existingFormat, element.numbering)) {
      console.log('Inconsistent number formatting:', existingFormat, element.numbering);
    }
  };

  const buildOutput = (element, parent = null) => {
    const newElement = { ...element };
    // Loop through these children with the same function.
    if (element.children) {
      const children = _.map(element.children, (child) => buildOutput(child, element));
      newElement.children = children;
    }

    // Grab numbering levels
    if (element.numbering) {
      getNumberingLevels(element);
    }

    /*
      If element has no parent, we got the root node, the "document".
      If the element's parent is the document, we got a root level paragraphs (section).
        We push these root level paragraphs to our "docxSections" do that we can loop them later
        and build "section"s out of it.
    */
    if (!parent) {
      documentNode = element;
    } else if (parent && parent.type === 'document') {
      docxSections.push(element);

      /* Based on how we export docx, if it's a roundtrip, we will have at least one paragraph
         with an Outlaw styleId. Let's use this to detect wether we','re in a roundtrip or not.
         This could be improved if we make it clearer during the export (e.g setting it on the document).
      */
      // Removed this weak check, instead, look at bookmarkStar values during the parseObject() loop.
      // if (!isRoundtrip && element.styleId === 'Outlaw') { isRoundtrip = true; }
    }

    return element;
  };

  /*
    Handling images
    Mammoth doesn't provide the image data in the object format so we need to grab it throught the transformer.
    Images will theorically be accessed in order, so we'll use this to match them to their proper sections.
  */
  const getDocxImages = ({ altText = '', contentType, read }) =>
    read('base64').then((imageBuffer) => {
      const img = {
        altText,
        contentType,
        src: `data:${contentType};base64,${imageBuffer}`,
      };

      docxImages.push(img);
      return img;
    });

  const options = {
    convertImage: mammoth.images.imgElement(getDocxImages),
    transformDocument: buildOutput,
  };

  // We're using Mammoth (https://www.npmjs.com/package/mammoth) to get html fragments from the docx
  // It's not great but appears to be (by far) the leading package
  // The only (major) deficiency is that it deliberately strips out the numbering format
  await mammoth.convertToHtml({ arrayBuffer }, options);

  // Once we've gathered our values from the buildOutput, make sure they're all valid.
  const cleanDocxNumbering = cleanupNumbering(docxNumbering);

  const { flat: raw, isRoundtrip, title: dealTitle } = parseObjects(docxSections, docxImages);

  // We may be uploading a new version of an existing Deal ("roundtrip") so see if we have one supplied
  const refs = deal ? _.filter(deal.variables, { type: VariableType.REF }) : [];
  const sections = [],
    flat = [],
    numbers = [],
    lastDepths = [];

  const stats = {
    changed: 0,
    unchanged: 0,
    added: 0,
    removed: 0,
    outlaw: false,
  };

  let inSigning = false,
    inAppendix = false, // whether we identify as we parse that we're currently in a signature/appendix
    prior = null,
    title = dealTitle || null,
    formats = [];

  // track whether we've found a signature section yet
  let hasSignature = false;

  // Now iterate through the semi-raw blocks and take a best-efforts approach
  // to parsing out title, numbering and structure
  // Also attempt to merge multiple blocks into sections/titles where it makes sense
  _.forEach(raw, (block) => {
    if (!block.text.trim()) return;

    const REF_FINDER = /<a href="#OUTLAW-([\w-_]+)">([\w\s\(\).:-]+)<\/a>/gi;

    let depth = block.depth;
    let markup = block.text;
    const type = block.type;
    let canCombine = true; // whether we can attempt to combine sequential title-only and body-only sections into one section

    // We get ignore IDs on paragraphs that were exported from Outlaw and should not be considered part of the contract
    // This includes branding/header and signature blocks
    if (block.sectionId != null && block.sectionId.indexOf('IGNORE') > -1) return;

    const existing = block.sectionId != null && deal != null ? deal.sections[block.sectionId] : null;

    // Refs are VariableType.REF variables that get rendered as actual internal doc links in the docx, which is cool
    // Problem is, their underlying IDs are lost in the re-import so this would show up as a change
    // Workaround is, if we find one of these, we can compute the potential raw text possibilities based on the deal's existing refs
    // If we find one in the existing section's text, assume that's what was intended and use that
    // This way it does not show up as a change
    let ref;
    const existingText = existing ? existing.currentVersion.body.getPlainText() : '';

    while ((ref = REF_FINDER.exec(block.text)) !== null) {
      const rendered = ref[2];
      let found = false;
      _.forEach(refs, (r) => {
        if (found) return;
        // Candidate is how this reference variable would show up in source text
        const candidate = `[${VariableType.REF}${r.name}]`;
        // If we find a potential reference variable that DOES appear in the source section,
        // and renders to the text we see in the inbound upload,
        // we can safely assume that this was the source reference var and execute a reverse replacement
        if (existingText.indexOf(candidate) > -1 && r.val == rendered) {
          markup = markup.replace(ref[0], candidate);
          found = true;
        }
      });

      if (!found) {
        markup = markup.replace(ref[0], rendered);
      }
    }

    // Next, if the raw text has <angle brackets>, mammoth ignores it (leaves intact),
    // and DraftJS will interpret it as invalid HTML tags and simply not display
    // So we need to explicitly escape in the text before creating ContentState
    markup = escapeTags(markup);

    // At this point we've somewhat cleaned the markup by identifying and stripping IDs,
    // And converting Outlaw-rendered hyperlinks back to the references they came from
    // Next we need to parse out title and body, and also attempt to extract numbering if found
    // Ideally we keep body styles intact
    const cs = stateFromHTML(markup);

    // TODO: parse differently in order to maintain styles and line breaks
    const text = cs.getPlainText();

    // If we're in OVERWRITE mode (e.g., ad hoc ingestion) parse as markup (instead of default raw ContentState)
    const useMarkup = mergeType === MERGE_TYPE.OVERWRITE;

    const sec = parseSection(cs, useMarkup) || { content: '' };
    // TODO: capture hideOrder during parseSection...
    // ... but also need to cover if there's proper numbering in a header section
    // e.g., the Shadowbox example
    sec.hideOrder = block.hideOrder;
    const num = extractNumber(text);

    // We have several checks to try to discover headers
    let isHeader = false;
    // If it's a header from style map, treat as such (though this is rare)
    if (type === 'header') isHeader = true;
    // If there's an existing section (i.e., roundtrip), use that to determine
    else if (existing) {
      if (existing.sectiontype === SectionType.HEADER) isHeader = true;
    }
    // Otherwise try to infer based on structural position and content
    else {
      if (depth == 0 && sec.hideOrder && sec.displayname && !sec.content) isHeader = true;
    }

    if (isHeader) {
      // Interpret the first header we find as deal title -- don't add to section list!
      if (!title) {
        title = text.trim();
        return;
      }

      // Interpret subsequent headers as HEADER type sections
      sec.sectiontype = SectionType.HEADER;
      sec.displayname = text;
      sec.content = null;
    }

    // if we found a reference then use that -- this is an updated section
    if (existing) {
      sec.id = existing.id;
      // existing section; do diff compare here and add to stats
      let newBody;
      try {
        if (sec.content && typeof sec.content === 'object') newBody = convertFromRaw(sec.content);
      } catch (e) {
        console.log(e, sec.content.getPlainText());
      }
      if (newBody && hasDiffs(existing.currentVersion.body, newBody)) {
        // console.log(existing.currentVersion.body.getPlainText());
        // console.log(newBody.getPlainText());
        stats.changed += 1;
      } else {
        stats.unchanged += 1;
      }
    } else {
      stats.added += 1;
      // new section
      if (deal) {
        const newSectionId = Fire.getNewSectionID(deal.dealID);
        sec.id = newSectionId;
      }
      stats.added += 1;
    }

    // Properly structured docs will have numbered items in lists,
    // but sometimes they are written manually, e.g., "1. Bla bla bla"
    // so we want to cover both and keep the section numbered even if it's manually written in
    if (num) {
      numbers.push(num);
      sec.scrapedNumber = num;
      sec.hideOrder = null;
    }

    // otherwise assume we've got a normal source section
    sec.sectiontype = SectionType.SOURCE;

    // Tricky but fairly common scenario. if numbering is written manually, everything will appear to be at 0 depth
    // but we can attempt to infer structure if the numbering system changes as we parse the blocks
    // if order computed from scraped text is not 1 incrementally higher than prior one,
    // it means we're at a different level of structure

    const priorNum = numbers.length > 1 ? numbers[numbers.length - 2] : null;
    if (num && prior && priorNum) {
      // ensure that we keep computed depth of sequentially numbered sections in the same number style (1. 2. 3.)
      if (num.type.key === priorNum.type.key && num.order === priorNum.order + 1) depth = prior.depth;
      // if the numbering restarted, it means we have a child list, i.e., increment depth
      else if (num.order === 0) depth = prior.depth + 1;
      // otherwise assume outdent
      else depth = Math.max(prior.depth - 1, 0);
    }
    /*
    // for unnumbered content sections with body and no titles (i.e., a normal paragraph of text)
    // ensure that they stay at same depth level as prior section
    else if (prior && prior.depth > 0 && sec.hideOrder && !sec.displayname && sec.content && sec.sectiontype == SectionType.SOURCE) {
      console.log('stay at prior depth', sec.depth, sec.content);
      depth = prior.depth;
    }
    */

    const searchText = `${sec.displayname || ''}|${sec.content || ''}`;
    const priorText = prior ? `${prior.displayname || ''}|${prior.content || ''}` : '';
    const regSig = new RegExp(`(${SIGNATURE_INDICATORS.START.join(')|(')})`, 'i');
    const regPre = new RegExp(`(${SIGNATURE_INDICATORS.PRE.join(')|(')})`, 'i');
    if (!inSigning && (searchText.match(regSig) || priorText.match(regPre))) {
      inSigning = true;
      hasSignature = true;
      canCombine = false;
      // Store the candidate value on the section itself,
      // to be used in Draft to prompt user to convert to proper SIGNATURE section
      sec.candidate = CANDIDATE_TYPE.SIGNATURE;
    }

    // Make a best-effort guess at finding appendices
    // if it's top-level unnumbered, and starts with one of the appendix keywords, make it an APPENDIX section
    const regApp = new RegExp(`^(${APPENDIX_NAMES.join(')|(')})`, 'i');
    if (depth === 0 && sec.hideOrder && sec.displayname && sec.displayname.match(regApp) !== null) {
      sec.sectiontype = SectionType.APPENDIX;
      inAppendix = true;
      // seeing an appendix title gets us out of signature page mode
      inSigning = false;
    }

    // Alternatively, if we've already found a SIGNATURE block but we're no longer in it
    // ensure that we're in an APPENDIX section so that no content is orphaned
    if (hasSignature && !inSigning && !inAppendix && canCombine) {
      sec.sectiontype = SectionType.APPENDIX;
      inAppendix = true;
    }

    // Force nesting on subsequent content after an identified or forced APPENDIX section,
    // again to ensure no orphaned child content
    if (inAppendix && sec.sectiontype !== SectionType.APPENDIX && depth <= 1) {
      depth += 1;
    }

    // TBD if this is in the right spot (should it be before or after the appendix/signature checks?)
    const parent = depth > 0 ? lastDepths[depth - 1] : null;
    if (!parent) depth = 0;
    sec.depth = depth;

    // Before we proceed with pushing section into list, see if we can combine it with a prior section
    // This is frequently the case for ingestex docx files because they will come in as alternating title and body sections
    // So see if we can combine
    if (
      prior &&
      prior.depth === depth &&
      prior.displayname &&
      !prior.content &&
      sec.content &&
      !sec.displayname &&
      sec.hideOrder &&
      canCombine
    ) {
      prior.content = sec.content;
      return;
    }
    // If we're in a signing block, we most likely want to append the contents of what we find next to the prior section
    // As it is very likely either the actual signature lines
    // (which we want to prompt the user to replace with Outlaw-rendered signature block)
    if (inSigning) {
      // regex test here for SIGNATURE_INDICATORS.END words

      let merge = false,
        done = false;
      // If we find a table and we were already in signing (cued by prior legalese) we're good to go
      // this means we've just found the whole signature block
      if (type === 'table') {
        merge = true;
        done = true;
      }
      // else if () {
      // regex test here for SIGNATURE_INDICATORS.PARTY words
      // const regParty = new RegExp(`^(${SIGNATURE_INDICATORS.PARTY.join(')|(')})`, 'g');
      // if ()
      // }
      // else if () {
      // regex test here for SIGNATURE_INDICATORS.END words
      // const regParty = new RegExp(`^(${APPENDIX_NAMES.join(')|(')})`, 'g');
      // if ()
      // }

      if (merge) {
        if (!prior.content) prior.content = '';
        else prior.content += '<br /><br />';

        if (sec.displayname) prior.content += `${sec.displayname}<br />`;
        if (sec.content) prior.content += `${sec.content}`;

        if (done) inSigning = false;
        return;
      }

      // TBD: how long do we keep tacking things on until we're confident that we're back out of signing?
      // maybe not a big deal though because the signature is usually either:
      // 1) at end of doc anyway
      // 2) in a table
      // 3) followed by an appendix (which will be named as such)
    }

    // Otherwise continue to build the structured and flat trees/arrays of sections
    if (depth) {
      if (!parent.children) parent.children = [];
      parent.children.push(sec);
    } else {
      sections.push(sec);
    }

    // Finally, push into flattened list and keep track of this section as the last processed section
    flat.push(sec);
    lastDepths[depth] = sec;
    prior = sec;
  });

  // Now we've compiled all section content in a nested list
  // we have 2 methods of discovering numbering style
  // If the docx is properly numbered, we'll have collected styles during html conversion in which case we simply translate the model
  // If not, we can try to use scraped number formats from text
  if (cleanDocxNumbering.length) {
    formats = _.map(cleanDocxNumbering, DealNumberFormat.FromDocx);
  } else {
    lastDepths.forEach((d, i) => {
      if (!formats[i]) {
        const firstAtDepth = _.find(
          flat,
          (s) => !s.hideOrder && s.sectiontype === SectionType.SOURCE && s.depth === i && s.scrapedNumber != null
        );
        if (firstAtDepth) {
          formats[i] = DealNumberFormat.FromText(firstAtDepth.scrapedNumber.raw);
        }
      }
    });
  }

  const style = deal ? deal.style : Classic;
  const numbering = formats.length ? _.merge({}, style.numbering, formats) : null;

  // now compute how many sections are in the current deal but are NOT found in the candidate import
  if (deal) {
    const source = deal.applyConditions(deal.buildSource(true));
    const removed = _.filter(source, (s) => !_.find(flat, { id: s.id }));
    // if (removed.length > 0) console.log(removed);
    stats.removed = removed.length;
  }

  return {
    numbering,
    outlaw: isRoundtrip,
    sections,
    stats,
    title,
  };
}
