import { diffWords } from 'diff';
import { parseSection } from '../models/Content';
import { extractNumber } from '../models/DealNumberFormat';
import _ from 'lodash';
import TextLine from './TextLine';
import TextPage from './TextPage';
import TextParagraph, { rxParagraphEnder } from './TextParagraph';

// Minimum spacing (margin) that indicates columns
// Note this should be fairly small because the second column's numbering may be outside of the column 
// so this value is the space between the first column's right edge of text and second column's left edge of numbering
const PT_COL_SPACING = 10;

// Margin of error (pts) for text that should be considered on the same line 
const PT_VERTICAL_ALIGN = 4;

// Margin (pts) to find text in candidate footer area (1.25 inches)
const PT_BOTTOM_MARGIN = 1.25 * 72;
const PT_TOP_MARGIN = 1.0 * 72;

// Margin of error (pts) to consider line spacing as being equivalent 
const PT_LS_FLEX = .5;

// Identify footers and remove from content
// TODO: "page" should be optional, 
// but then we have no way to differentiate between a number that's part of the prose and a number that's a page number...
const rxPageFinder = /Page[\s]+[\d]+([\s]+of[\s]+[\d]+)?/ig;


export function parsePDF(rawPages) {
  const pages = _.map(rawPages, (rawPage, p) => {
    return new TextPage({
      page: p, 
      fragments: rawPage.items,
      width: rawPage.width,
      height: rawPage.height,
    });
  });

  // First pass is to go through ALL text fragments and group into lines of text
  // TODO: this should (all?) be moved to TextPage constructor...

  _.forEach(pages, (page, p) => {
    // Initially sort fragments top to bottom, left to right
    page.fragments = sortFragments(page.fragments);

    // Find special index ranges which represent contiguous rectangles of fragments which should be sorted differently,
    // this covers columns, tables and forms
    const columnRanges = findColumnRanges(page, p);
    _.forEach(columnRanges, ({start, end}) => {
      page.fragments = sortFragments(page.fragments, start, end, true);
    });
    if (columnRanges.length > 0) {
      _.forEach(columnRanges, (range) => {
        const frags = page.fragments.slice(range.start, range.end);
        // console.log(_.map(frags, 'value').join(''));
        // console.log(columnRanges);
      })
      // page.hasColumns = true;
    }

    page.lines = linify(page, p);
  });

  let omitted = [];

  const titleLines = stripTitle(pages);

  omitted.push(...stripHeaders(pages));
  omitted.push(...stripFooters(pages));
  omitted.push(...titleLines);

  const paragraphs = pagesToParagraphs(pages);

  return { 
    pages,
    paragraphs, 
    omitted,
    titleLines,
  };
}

export function stripTitle(pages) {
  const firstPage = _.get(pages, '[0]', null);

  // Only look on first page (TODO... expand this?)
  if (!firstPage) return null;
  
  const bodyLineHeight = firstPage.probableBodyLineHeight;

  const titleLines = [];

  _.forEach(firstPage.bodyLines, (line, idx) => {
    let activeSignals = [];

    // Alignment -- centered
    if (line.isCenteredOnPage(firstPage)) {
      activeSignals.push('ALIGNMENT');
    }
    
    // Font size -- larger than body
    if (bodyLineHeight > 0) {
      if (line.height > bodyLineHeight) {
        activeSignals.push('SIZE');
      }
    }
    
    // If we haven't found a title yet and we have either alignment or size signal, 
    // start with that
    // TODO: play with weighted/points based system
    const hasRequiredSignals = activeSignals.includes('SIZE') || activeSignals.includes('ALIGNMENT');
    if (!titleLines.length) {
      if (hasRequiredSignals) {
        titleLines.push(line);
      }
    }
    // If we have already found a title, ensure that it's consecutive
    else {
      // Only continue titles if consecutive and same alignment
      const lastIndex = _.findIndex(firstPage.bodyLines, _.last(titleLines));
      // console.log(lastIndex, idx);
      if (lastIndex + 1 === idx && line.isCenteredOnPage(firstPage)) {
        titleLines.push(line);
      }
    }

  });

  _.forEach(titleLines, (line) => {
    line.omit = true;
    line.omitReason = 'docTitle';
  });

  // console.log(titleLines);

  return titleLines;
}

export function stripFooters(pages) {
  const candidateFooters = [];
  const omitted = [];
  
  _.forEach(pages, (page) => {

    // TODO: another strategy, start from bottom (y=0) up, see if there's consistently smaller text than rest of page

    _.forEach(page.lines, (line) => {
      // TBD what determines the "bottom" of the page... using a 1.25 inch margin for now
      if (line.y <= PT_BOTTOM_MARGIN) {
        let m = rxPageFinder.exec(line.value);
        if (m) {
          line.value = line.value.replace(rxPageFinder, '').trim();
          if (!line.value) {
            line.omit = true;
            line.omitReason = 'footer';
          }
        }
        // Whether we found (and stripped) the page number, we're now at the bottom of the page
        // collect candidate footers, which we'll then loop through again
        candidateFooters.push(line);
      }
    });
  });

  _.forEach(candidateFooters, (candidate) => {
    const isOnEveryPage = _.every(pages, (page) => {
      const bodyLineHeight = page.probableBodyLineHeight;
      return !!_.find(page.lines, (line) => {
        if (!candidateFooters.includes(line)) return false;
        // If the line of text is identical on every page, consider it a footer
        if (line.value === candidate.value) return true;
        
        // If it's not *quite* identical, but the following things are all true, 
        // we can relax the equality constraint *slightly* to still correctly identify footers:
        // 1. The line height (font size) is smaller than body
        // 2. The text only differs from page to page by a number (ie, the page number)
        if (bodyLineHeight > 0 && line.height < bodyLineHeight) {
          const ranges = diffWords(candidate.value, line.value);
          const added = _.filter(ranges, 'added');
          const removed = _.filter(ranges, 'added');
          if (added.length === 1 && removed.length === 1) {
            const diffVal = added[0].value;
            if (!isNaN(parseInt(diffVal))) return true;
          }
        }
        return false;
      });
    });
    // Note we can only safely do this if the doc is more than 1 page
    // Otherwise there's no way to confirm that something like "Confidential and proprietary" 
    // is repeated and not part of actual contract text
    // TODO: capture footer text (from first page?) and apply it appropriately in structure (once we have a Footer Section type!)
    if (isOnEveryPage && pages.length > 1) {
      candidate.omit = true;
      candidate.omitReason = 'footer';
      // console.log('OMIT FOOTER', candidate);
      omitted.push(candidate);
    }
  });

  // Finally, go through pages a last time and remove all lines marked for omission!
  _.forEach(pages, (page) => {
    page.lines = _.filter(page.lines, (line) => !line.omit && !!line.value.trim());
    // console.log(_.map(page.lines, 'value').join('\n'));
  });

  return omitted;
}

export function stripHeaders(pages) {
  const candidateHeaders = [];
  const omitted = [];
  
  _.forEach(pages, (page) => {

    // TODO: another strategy, start from bottom (y=0) up, see if there's consistently smaller text than rest of page
    _.forEach(page.lines, (line) => {
      // TBD what determines the "top" of the page... using a 1 inch margin for now
      // collect candidate footers, which we'll then loop through again
      if (page.height - (line.y + line.height) <= PT_TOP_MARGIN) {
        candidateHeaders.push(line);
      }
    });
  });

  _.forEach(candidateHeaders, (candidate) => {
    const isOnEveryPage = _.every(pages, (page) => {
      return !!_.find(page.lines, (line) => candidateHeaders.includes(line) && line.value === candidate.value);
    });
    // Note we can only safely do this if the doc is more than 1 page
    // Otherwise there's no way to confirm that something like "Confidential and proprietary" 
    // is repeated and not part of actual contract text
    // TODO: capture footer text (from first page?) and apply it appropriately in structure (once we have a Footer Section type!)
    if (isOnEveryPage && pages.length > 1) {
      candidate.omit = true;
      candidate.omitReason = 'header';
      // console.log('OMIT HEADER', candidate);
      omitted.push(candidate);
    }
  });

  // Finally, go through pages a last time and remove all lines marked for omission!
  _.forEach(pages, (page) => {
    page.lines = _.filter(page.lines, (line) => !line.omit && !!line.value.trim());
    // console.log(_.map(page.lines, 'value').join('\n'));
  });

  // Return array of lines that were omitted
  return omitted;
}

export function sortFragments(fragments, start = 0, end, special = false) {
  if (end === undefined) end = fragments.length;

  let rangeToSort = fragments.slice(start, end);

  rangeToSort.sort((a, b) => {
    // If one is in a column and the other is not, first sort by that
    if (special && (a.isInLeftColumn || b.isInLeftColumn) && a.isInLeftColumn !== b.isInLeftColumn) {
      return a.isInLeftColumn ? -1 : 1;
    }
    else if (Math.abs(a.y - b.y) <= PT_VERTICAL_ALIGN) {
      // Treat text that is within a vertical margin of error as being on the same line
      return a.x < b.x ? -1 : a.x > b.x ? 1 : 0;
    }
    else {
      return a.y > b.y ? -1 : a.y < b.y ? 1 : 0;
    }
  });

  let newArr = [];
  if (start > 0) newArr = newArr.concat(fragments.slice(0, start));
  newArr = newArr.concat(rangeToSort);
  if (end < fragments.length) newArr = newArr.concat(fragments.slice(end));
  return newArr;
}

export function linify(page, p) {
  const lines = [];
  let currentLine;

  _.forEach(page.fragments, (fragment, i) => {
    if (!currentLine) currentLine = new TextLine({
      x: fragment.x, 
      y: fragment.y, 
      page: p,
    });

    // Always push this fragment into the current line first
    currentLine.push(fragment);

    const nextFrag = i + 1 >= page.fragments.length ? null : page.fragments[i+1];
    const lineIsContinued = nextFrag && Math.abs(fragment.y - nextFrag.y) <= PT_VERTICAL_ALIGN;
    
    // Reset for next fragment
    if (!lineIsContinued) {
      lines.push(currentLine);
      currentLine = null;
    }
  });

  return lines;
}

export function pagesToParagraphs(pages) {
  let paragraphs = [];
  let paragraphBuffer;
  let inTable = false;
  let foundTitle = false;

  _.forEach(pages, (page, p) => {
    
    _.forEach(page.lines, (line, i) => {
      const newParagraph = !paragraphBuffer;
      if (newParagraph) {

        paragraphBuffer = new TextParagraph({
          page: p,
          x: line.x,
          y: line.y, //TODO: right-aligned para will mess this up
          value: '',
        });
      }

      // Push line content into current buffer
      paragraphBuffer.push(line);

      const prevLine = i === 0 ? null : page.lines[i-1];
      const nextLine = i + 1 > page.lines.length ? null : page.lines[i+1];
      let flush = false;

      if (!foundTitle  && !p) {
        // console.log(line);
      }

      // First line on page
      if (!prevLine) {
        
      }
      // Last line on page; only flush paragraph if there's an ending delimiter
      // otherwise continue this paragraph with starting text of next page!
      else if (!nextLine) {
        if (rxParagraphEnder.test(line.value)) {
          flush = true;
        }
      }
      // Here, there are lines before and after 
      else {
        if (!newParagraph) {
          const spaceBefore = Math.abs(prevLine.y - line.y);
          const spaceAfter = Math.abs(nextLine.y - line.y);
          if (Math.abs(spaceBefore - spaceAfter) > PT_LS_FLEX) {
            flush = true;
          }
        }
      }

      if (nextLine) {
        const currentNumber = extractNumber(line.value);
        const nextNumber = extractNumber(nextLine.value);
        // If the next line starts with a number and this line either ends a paragraph or has its own number, force a new paragraph
        if (nextNumber && (rxParagraphEnder.test(line.value) || currentNumber)) {
          flush = true;
        }

        // If the line height changes on the next line (e.g., font change), that's a new paragraph
        if (Math.abs(line.height - nextLine.height) > PT_LS_FLEX) {
          flush = true;
        }

        // If the next line is a section title, that's a new paragraph
        if (nextLine && parseSection(line.value.slice()).displayname) {
          flush = true;
        }
        
        // Table ID start/continue
        const spacerChange = compareSpacers(line, nextLine);
        if (!currentNumber && 
            !nextNumber && 
            [SPACERS.START, SPACERS.CONTINUE].includes(spacerChange)
          ) {
          // console.log('TABLE ROW', line, nextLine);
          inTable = true;
          flush = false;
          if (!paragraphBuffer.subType && spacerChange === SPACERS.CONTINUE) {
            paragraphBuffer.subType = 'table';
          }
        }

        // Table ID stop
        if (inTable && ![SPACERS.CONTINUE].includes(compareSpacers(line, nextLine))) {
          // console.log('TABLE LAST ROW', line);
          inTable = false;
          flush = true;
        }
      }

      // TODO: there are definitely some more edge cases to handle here

      // TODO: scan full doc lines to identify standard line height
      // and add a special case to add new paragraphs (e.g. for title-only double-spacing)

      if (flush) {
        paragraphBuffer.value = paragraphBuffer.value;
        paragraphs.push(paragraphBuffer);
        paragraphBuffer = null;
        inTable = false;
      }
    });
  });

  // Merge paragraphs where we see accidental breaks (e.g., from columns, or from weird line spacing)
  let merged = [];
  let lastParagraph;
  for (let i = 0; i < paragraphs.length; i++) {
    const current = paragraphs[i]; 
    if (
      lastParagraph && 
      !lastParagraph.hasExplicitEnd && 
      [SPACERS.NONE, SPACERS.CONTINUE].includes(compareSpacers(lastParagraph.lastLine, current.firstLine)) &&
      (!current.hasExplicitStart || (lastParagraph.hasNumber && !current.hasNumber))
    ) {
      lastParagraph.combine(current);
      // console.log('COMBINE', current);
    }
    else {
      merged.push(current);
      lastParagraph = current;
    }
  }

  _.forEach(merged, (p) => {
    // console.log(p.value);
  });
  
  return merged;
}

export function findColumnRanges(page, p) {
  let currentColumnRangeStart = -1;
  let currentColumnRangeEnd = -1;
  let columnRanges = [];
  
  // First, group all page fragments into lines
  const lines = linify(page);

  // Now capture ranges of contiguous lines that need to be specially sorted
  _.forEach(lines, (line, i) => {
    const startFrag = line.firstFragment;
    if (!line.crossesMiddle && currentColumnRangeStart < 0) {
      currentColumnRangeStart = page.fragments.indexOf(startFrag);
    } 
    else if (currentColumnRangeStart > -1 && (line.crossesMiddle || i + 1 >= lines.length)) {
      currentColumnRangeEnd = page.fragments.indexOf(startFrag);
    }

    if (currentColumnRangeStart > -1 && currentColumnRangeEnd >= currentColumnRangeStart) {
      columnRanges.push({
        start: currentColumnRangeStart,
        end: currentColumnRangeEnd,
      });
      currentColumnRangeStart = -1;
      currentColumnRangeEnd = -1;
    }
  });

  return columnRanges;
}

// This is similar to DOCX.js starting on line 393
// But this is for *documents* not *templates* 
// so a lot of the text processing is simpler and there is no potential Word-defined numbering; 
// it's only parsed from the plain text content in paragraphs 
export function parseParagraphs(paragraphs) {
  const sections = [],
    flat = [],
    numbers = [],
    lastDepths = [];

  let inSigning = false,
    inAppendix = false, // whether we identify as we parse that we're currently in a signature/appendix
    prior = null,
    title = null,
    formats = [],
    // track whether we've found a signature section yet
    hasSignature = false;

  // Now iterate through the semi-raw blocks and take a best-efforts approach
  // to parsing out title, numbering and structure
  // Also attempt to merge multiple blocks into sections/titles where it makes sense
  _.forEach(paragraphs, (para) => {
    if (!para.lines.length) return;

    const sec = parseSection(para.value, false);
    const num = extractNumber(para.lines[0].value);

    // if (num) console.log(num.raw, para.lines[0].value);

    sec.lines = _.map(para.lines, 'json');
    // TODO: use core enum once we figure out dependency process
    sec.sectiontype = 'external';
    
    if (para.subType) {
      sec.subType = para.subType;
    }

    if (para.subType === 'table') {
      sec.displayname = null;
      sec.content = para.value;
    }

    // TBD whether the nested structure/ordering matters
    // since the source is a a pdf and we have a way to route back to exact page/position
    if (num) {
      numbers.push(num);
      sec.scrapedNumber = num.raw;
      sec.hideOrder = null;
    }
    else {
      sec.hideOrder = true;
    }

    if (
      prior &&
      prior.displayname && 
      !prior.content && 
      !num && 
      !sec.displayname && 
      sec.content && 
      sec.subType === prior.subType
    ) {
      // console.log(`Combine body into [${prior.scrapedNumber || ''}${prior.displayname}]`);
      prior.content = sec.content;
      prior.lines.push(..._.map(para.lines, 'json'));
      prior = null;
    }
    else {
      // ... do we actually just keep combining section content until we hit the next named section?
      sections.push(sec);
      prior = sec;
    }

    // console.log(num, sec);
  });

  // TODO: "disposed of or used other than in accordance with" -- accidental section break
  _.forEach(sections, (sec) => {
    // console.log(sec.scrapedNumber?.raw || '', sec.displayname || '');
    // console.log(sec.content || '');
  });

  return {
    sections,
  };

}

const SPACERS = {
  NONE: 'none',
  START: 'start',
  CONTINUE: 'continue',
  CHANGE: 'change',
  END: 'end',
};

export function compareSpacers(line1, line2) {
  const spacers1 = line1.spacers, spacers2 = line2.spacers;

  if (!spacers1.length) {
    if (!spacers2.length) return SPACERS.NONE;

    else return SPACERS.START;
  }
  else {
    if (!spacers2.length) return SPACERS.END;

    // TODO: both lines have spacers, compare actual values
    return SPACERS.CONTINUE;
  }
}

export function findVitals(pdfDeal, relatedTemplate) {

  let found = [], missing = [];
  let sectionsInPDF = _.filter(pdfDeal.sections, (sec) => !!sec.namedClause);
  let vitalSections = _.filter(relatedTemplate.sections, (sec) => !!sec.vitals);

  // console.log(sectionsInPDF);
  // console.log(vitalSections);

  // Create index of pdf sections by named clause
  /*
  const pdfIndex = new FlexSearch.Document({
    preset: 'default',
    tokenize: 'full',
    minLength: 3,
    language: 'en',
    document: {
      store: true,
      id: 'id',
      // tag: 'tag',
      index: ['id', 'namedClause'],
    },
  });

  _.forEach(sectionsInPDF, (section) => {
    pdfIndex.add({
      id: section.id,
      namedClause: section.namedClause,
    });
    // console.log(section);
  });

  _.forEach(vitalSections, (sec) => {
    const queries = sec.vitals.split('\n');
    // TODO: new getter and UI reqs for vitalTitle
    const vitalTitle = sec.displayName || queries[0];
    // console.log(queries);
    let results = null, query = null, topHit = null;
    for (let i = 0; i < queries.length; i++) {
      query = queries[i];
      results = pdfIndex.search(query, { index: 'namedClause', enrich: true, limit: 10 });
      // console.log(query, results);
      if (results.length) break;
    }

    topHit = _.get(results, '[0].result[0].doc.namedClause', null);
    console.log(topHit);

    if (topHit) {
      found.push({
        query, 
        vital: sec,
        match: topHit,
        vitalTitle,
      });
    }
    else {
      missing.push({
        vital: sec,
        query: sec.vitals,
        vitalTitle,
      })
    }
  });

  */

  return { found, missing };

  console.log('FOUND', found);
  console.log('MISSING', missing);

  // console.log(pdfIndex);

  // _.forEach(sectionsInPDF, (section) => {
  //   const matchedIndex = _.findIndex(sectionsInTemplate, (sec) => {
      
  //   });
  //   if (matchedIndex > -1) {
  //     matched.push(sectionsInTemplate[matchedIndex]);
  //     sectionsInTemplate = sectionsInTemplate.splice(matchedIndex, 1);
  //   }
  //   else {
  //     added.push(section);
  //   }
  // });
  // return {
  //   matched,
  //   added,
  //   missing: sectionsInTemplate
  // };
}