/* SPDX-License-Identifier: GPL-3.0-or-later */
/*
 * Copyright (c) 2020,2022,2023 Andreas K. Foerster <akf@akfoerster.de>
 *
 * This file is part of AKFText.
 *
 * AKFText is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * AKFText is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include "akftext.h"

// check number of bytes in char up to max
static size_t
check_char_length (const unsigned char *s, size_t max)
{
  size_t byte = 1;		// first byte assumed to be correct

  // first bit set, second unset
  while (byte < max && (s[byte] & 0xC0) == 0x80)
    ++byte;

  return byte;
}


extern void
Text_u8 (const char *t)
{
  const unsigned char *s;

  if (!t)
    return;

  s = (const unsigned char *) t;

  while (*s)
    {
      char32_t c;
      size_t bytes;

      // assume invalid byte
      c = 0xFFFD;
      bytes = 1;

      if (*s <= 0x7F)
	{
	  bytes = 1;
	  c = *s;
	}
      else if (*s <= 0xBF)
	bytes = 1;		// runaway continuation byte
      else if (*s <= 0xDF)
	{
	  bytes = check_char_length (s, 2);
	  if (bytes == 2)
	    c = ((s[0] & ~0xC0) << 6) | (s[1] & ~0x80);
	}
      else if (*s <= 0xEF)
	{
	  bytes = check_char_length (s, 3);
	  if (bytes == 3)
	    c = ((s[0] & ~0xE0) << (2 * 6))
	      | ((s[1] & ~0x80) << 6) | (s[2] & ~0x80);
	}
      else if (*s <= 0xF4)
	{
	  bytes = check_char_length (s, 4);
	  if (bytes == 4)
	    c = ((s[0] & ~0xF0) << (3 * 6))
	      | ((s[1] & ~0x80) << (2 * 6))
	      | ((s[2] & ~0x80) << 6) | (s[3] & ~0x80);
	}
      else if (*s <= 0xFB)	// no valid Unicode
	bytes = check_char_length (s, 5);
      else if (*s <= 0xFD)	// no valid Unicode
	bytes = check_char_length (s, 6);

      Text_Character (c);
      s += bytes;
    }
}
