export const types = {
  START: 'start',
  OPENING_TAG: 'topen',
  OPENING_TAG_END: 'topen-end',
  CLOSING_TAG: 'tclose',
  ATTRIBUTE: 'attr',
  COMMENT: 'comment',
  COMMENT_IN_TAG: 'comment_in_tag',
  TEXT: 'text',
  DONE: 'done'
}

const states = {
  TAG: Symbol(),
  TEXT: Symbol(),
  SCRIPT: Symbol(),
  COMMENT: Symbol()
}

function apply_regex (text, i, regex)
{
  regex.lastIndex = i
  const match = regex.exec(text)
  if (!match || match.index !== i)
  {
    return undefined
  }
  else
  {
    return {
      length: match[1].length,
      match,
    }
  }
}

const get_opening_tag = (text, i) => apply_regex(text, i, /(<(([a-z0-9-]+:)?[a-z0-9-]+))/ig)
const get_closing_tag = (text, i) => apply_regex(text, i, /(<\/(([a-z0-9-]+:)?[a-z0-9-]+)>)/ig)
const get_tag_end = (text, i) => apply_regex(text, i, /(\s*(\/?>))/g)
const get_text = (text, i) => apply_regex(text, i, /([^<]+)/g)
const get_attribute = (text, i) => apply_regex(text, i, /(\s+(([a-z0-9\-_]+:)?[a-z0-9\-_]+)(\s*=\s*)?)/ig)
const get_script = (text, i) => apply_regex(text, i, /(([\s\S]*?)<\/script>)/g)
const get_comment_open = (text, i) => apply_regex(text, i, /(<!--)/g)
const get_comment_open_in_tag = (text, i) => apply_regex(text, i, /(\s*<!--)/g)
const get_comment = (text, i) => apply_regex(text, i, /(([\s\S]*?)-->)/g)


function* tokenize (text, start = 0, end = text.length)
{
  yield* new_tokenizer().tokenize(text, start, end)
}

export function new_tokenizer ()
{
  let next = undefined, cur_tag = undefined, state = states.TEXT

  function* tokenize (text, start = 0, end = text.length)
  {
    function* tokenize ()
    {
      let i = start

      yield {type: types.START}

      const read_attr = (() => {
        //const regex = /(\s*([^>\s]*))/g;
        const regex = /(\s*(".*?"|'.*?'|[^\s"'>]+))/g
        const quotes = new Set('"\'')
        return (str, i) => {

          regex.lastIndex = i
          const match = regex.exec(str)
          const val = match[2]
          const quote = val.charAt(0)

          if (quotes.has(quote))
          {
            const next_quote_idx = val.indexOf(quote, 1)
            if (next_quote_idx === -1)
            {
              return {length: val.length, value: val.substring(1)}
            }
            else
            {
              return {length: val.length, value: val.substring(1, next_quote_idx)}
            }
          }
          else
          {
            return {length: val.length, value: val}
          }

          /*const quote = str.charAt(i);
          const j = i + 1;
          if (quotes.has(quote))
          {
            const next_quote = str.indexOf(quote, j)
            if (next_quote === -1)
            {
              return {length: str.length - i, value: str.substring(pos1)}
            }
            else
            {
              return {length: (next_quote - i) + 1, value: str.substring(j, next_quote)};
            }
          }
          else
          {
            ;
            const match = regex.exec(str)
            return {length: match[1].length, value: match[2]}
          }*/
        }
      })()

      function* handle_script ()
      {
        if (next = get_script(text, i))
        {
          i += next.length
          yield {type: types.TEXT, text: next.match[2]}
          yield {type: types.CLOSING_TAG, name: 'script'}
          state = states.TEXT
        }
        else
        {
          yield {type: types.TEXT, text: text.substring(i)};
          return true
        }

        return false
      }

      function* handle_text (text)
      {
        let next
        const is_bracket = text.charAt(i) === '<'

        if (is_bracket && (next = get_comment_open(text, i)))
        {
          i += next.length
          state = states.COMMENT
        }
        else if (is_bracket && (next = get_opening_tag(text, i)))
        {
          i += next.length
          cur_tag = next.match[2]
          yield {type: types.OPENING_TAG, name: cur_tag}
          state = states.TAG
        }
        else if (is_bracket && (next = get_closing_tag(text, i)))
        {
          i += next.length
          yield {type: types.CLOSING_TAG, name: next.match[2]}
        }
        else if ((next = get_text(text, i)))
        {
          i += next.length
          yield {type: types.TEXT, text: next.match[1]}
        }
        else
        {
          const ch = text.substring(i, i + 1)
          i += 1
          yield {type: types.TEXT, ch};
        }

        return false
      }

      try
      {
        while (i < end)
        {
          switch (state)
          {
            case states.TEXT:
              if (yield* handle_text(text)) return
              break
            case states.COMMENT:
              if ((next = get_comment(text, i)))
              {
                i += next.length
                yield {type: types.COMMENT, text: next.match[2]}
                state = states.TEXT
              }

              break
            case states.SCRIPT:
              if (yield* handle_script()) return
              break
            case states.TAG:

              if ((next = get_attribute(text, i)))
              {
                i += next.length
                const name = next.match[2];
                if (next.match[4])
                { // attribute has val
                  const attr = read_attr(text, i)
                  i += attr.length
                  yield {type: types.ATTRIBUTE, name, value: attr.value}
                }
                else
                {
                  yield {type: types.ATTRIBUTE, name, value: ''}
                }
              } else if((next = get_comment_open_in_tag(text, i))) {
                i+= next.length
                if ((next = get_comment(text, i)))
                {
                  i += next.length
                  yield {type: types.COMMENT_IN_TAG, text: next.match[2]}
                }

                break
              }
              else if ((next = get_tag_end(text, i)))
              {
                i += next.length
                const token = next.match[2];
                yield {type: types.OPENING_TAG_END, name: cur_tag, token}
                state = cur_tag === 'script' ? states.SCRIPT : states.TEXT
              }
              else
              {
                state = states.TEXT
              }

              break

            default:
              return
          }
        }
      } finally
      {
        yield {type: types.DONE}
      }
    }

    let cur_text = undefined
    for (const tk of tokenize())
    {
      const {type, text} = tk
      switch (type)
      {
        case types.TEXT:
          if (cur_text === undefined)
          {
            cur_text = text
          }
          else
          {
            cur_text += text
          }
          break
        default:
          if (cur_text)
          {
            yield {type: types.TEXT, text: cur_text}
            cur_text = undefined
          }

          yield tk
          break
      }
    }
  }

  return {tokenize}
}

export {tokenize}

