import * as sentenceSplitter from 'sentence-splitter';
import { Item, ItemType, FreeText } from '@nebula/common';
import { Datapoint } from 'src/types';

export enum SplitMode {
  LINE = 'LINE',
  SENTENCE = 'SENTENCE',
}

type Splitter = (text: string) => string[];

const splitByLines = (text: string): string[] => {
  return text.split(/[\n\r]+/);
};

const splitBySentences = (text: string): string[] => {
  const parseResult = sentenceSplitter.split(text);
  const sentences: string[] = [];
  for (const entry of parseResult) {
    if (entry.type === 'Sentence') {
      console.assert(entry.raw);
      sentences.push(entry.raw);
    }
  }
  return sentences;
};

const splitters: { [mode in SplitMode]: Splitter } = {
  LINE: splitByLines,
  SENTENCE: splitBySentences,
};

export const parseAsDatapoints = (
  text: string,
  parseMode: SplitMode
): Datapoint[] => {
  if (!text) return [];
  text = text.trim();
  console.assert(splitters.hasOwnProperty(parseMode));
  const splitText = splitters[parseMode](text);
  return splitText.map(
    (elem) =>
      new Datapoint({
        item: { type: ItemType.FreeText, body: elem } as FreeText,
      })
  );
};
