/* Reading binary .mo files.
   Copyright (C) 1995-2024 Free Software Foundation, Inc.
   Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */

#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

/* Specification.  */
#include "read-mo.h"

#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>

/* These two include files describe the binary .mo format.  */
#include "gmo.h"
#include "hash-string.h"

#include <error.h>
#include "xalloc.h"
#include "binary-io.h"
#include "message.h"
#include "format.h"
#include "gettext.h"
#include "xsize.h"

#define _(str) gettext (str)


enum mo_endianness
{
  MO_LITTLE_ENDIAN,
  MO_BIG_ENDIAN
};

/* We read the file completely into memory.  This is more efficient than
   lots of lseek().  This struct represents the .mo file in memory.  */
struct binary_mo_file
{
  const char *filename;
  char *data;
  size_t size;
  enum mo_endianness endian;
};


/* Read the contents of the given input stream.  */
static void
read_binary_mo_file (struct binary_mo_file *bfp,
                     FILE *fp, const char *filename)
{
  char *buf = NULL;
  size_t alloc = 0;
  size_t size = 0;
  size_t count;

  while (!feof (fp))
    {
      const size_t increment = 4096;
      if (size + increment > alloc)
        {
          alloc = alloc + alloc / 2;
          if (alloc < size + increment)
            alloc = size + increment;
          buf = (char *) xrealloc (buf, alloc);
        }
      count = fread (buf + size, 1, increment, fp);
      if (count == 0)
        {
          if (ferror (fp))
            error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
                   filename);
        }
      else
        size += count;
    }
  buf = (char *) xrealloc (buf, size);
  bfp->filename = filename;
  bfp->data = buf;
  bfp->size = size;
}

/* Get a 32-bit number from the file, at the given file position.  */
static nls_uint32
get_uint32 (const struct binary_mo_file *bfp, size_t offset)
{
  nls_uint32 b0, b1, b2, b3;
  size_t end = xsum (offset, 4);

  if (size_overflow_p (end) || end > bfp->size)
    error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename);

  b0 = *(unsigned char *) (bfp->data + offset + 0);
  b1 = *(unsigned char *) (bfp->data + offset + 1);
  b2 = *(unsigned char *) (bfp->data + offset + 2);
  b3 = *(unsigned char *) (bfp->data + offset + 3);
  if (bfp->endian == MO_LITTLE_ENDIAN)
    return b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
  else
    return (b0 << 24) | (b1 << 16) | (b2 << 8) | b3;
}

/* Get a static string from the file, at the given file position.  */
static char *
get_string (const struct binary_mo_file *bfp, size_t offset, size_t *lengthp)
{
  /* See 'struct string_desc'.  */
  nls_uint32 s_length = get_uint32 (bfp, offset);
  nls_uint32 s_offset = get_uint32 (bfp, offset + 4);
  size_t s_end = xsum3 (s_offset, s_length, 1);

  if (size_overflow_p (s_end) || s_end > bfp->size)
    error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename);
  if (bfp->data[s_offset + s_length] != '\0')
    error (EXIT_FAILURE, 0,
           _("file \"%s\" contains a not NUL terminated string"),
           bfp->filename);

  *lengthp = s_length + 1;
  return bfp->data + s_offset;
}

/* Get a system dependent string from the file, at the given file position.  */
static char *
get_sysdep_string (const struct binary_mo_file *bfp, size_t offset,
                   const struct mo_file_header *header, size_t *lengthp)
{
  /* See 'struct sysdep_string'.  */
  size_t length;
  char *string;
  size_t i;
  char *p;
  nls_uint32 s_offset;

  /* Compute the length.  */
  s_offset = get_uint32 (bfp, offset);
  length = 0;
  for (i = 4; ; i += 8)
    {
      nls_uint32 segsize = get_uint32 (bfp, offset + i);
      nls_uint32 sysdepref = get_uint32 (bfp, offset + i + 4);
      nls_uint32 sysdep_segment_offset;
      nls_uint32 ss_length;
      nls_uint32 ss_offset;
      size_t ss_end;
      size_t s_end;
      size_t n;

      s_end = xsum (s_offset, segsize);
      if (size_overflow_p (s_end) || s_end > bfp->size)
        error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename);
      length += segsize;
      s_offset += segsize;

      if (sysdepref == SEGMENTS_END)
        {
          /* The last static segment must end in a NUL.  */
          if (!(segsize > 0 && bfp->data[s_offset - 1] == '\0'))
            /* Invalid.  */
            error (EXIT_FAILURE, 0,
                   _("file \"%s\" contains a not NUL terminated system dependent string"),
                   bfp->filename);
          break;
        }
      if (sysdepref >= header->n_sysdep_segments)
        /* Invalid.  */
        error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format"),
               bfp->filename);
      /* See 'struct sysdep_segment'.  */
      sysdep_segment_offset = header->sysdep_segments_offset + sysdepref * 8;
      ss_length = get_uint32 (bfp, sysdep_segment_offset);
      ss_offset = get_uint32 (bfp, sysdep_segment_offset + 4);
      ss_end = xsum (ss_offset, ss_length);
      if (size_overflow_p (ss_end) || ss_end > bfp->size)
        error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename);
      if (!(ss_length > 0 && bfp->data[ss_end - 1] == '\0'))
        {
          char location[30];
          sprintf (location, "sysdep_segment[%u]", (unsigned int) sysdepref);
          error (EXIT_FAILURE, 0,
                 _("file \"%s\" contains a not NUL terminated string, at %s"),
                 bfp->filename, location);
        }
      n = strlen (bfp->data + ss_offset);
      length += (n > 1 ? 1 + n + 1 : n);
    }

  /* Allocate and fill the string.  */
  string = XNMALLOC (length, char);
  p = string;
  s_offset = get_uint32 (bfp, offset);
  for (i = 4; ; i += 8)
    {
      nls_uint32 segsize = get_uint32 (bfp, offset + i);
      nls_uint32 sysdepref = get_uint32 (bfp, offset + i + 4);
      nls_uint32 sysdep_segment_offset;
      nls_uint32 ss_length;
      nls_uint32 ss_offset;
      size_t n;

      memcpy (p, bfp->data + s_offset, segsize);
      p += segsize;
      s_offset += segsize;

      if (sysdepref == SEGMENTS_END)
        break;
      if (sysdepref >= header->n_sysdep_segments)
        abort ();
      /* See 'struct sysdep_segment'.  */
      sysdep_segment_offset = header->sysdep_segments_offset + sysdepref * 8;
      ss_length = get_uint32 (bfp, sysdep_segment_offset);
      ss_offset = get_uint32 (bfp, sysdep_segment_offset + 4);
      if (ss_offset + ss_length > bfp->size)
        abort ();
      if (!(ss_length > 0 && bfp->data[ss_offset + ss_length - 1] == '\0'))
        abort ();
      n = strlen (bfp->data + ss_offset);
      if (n > 1)
        *p++ = '<';
      memcpy (p, bfp->data + ss_offset, n);
      p += n;
      if (n > 1)
        *p++ = '>';
    }

  if (p != string + length)
    abort ();

  *lengthp = length;
  return string;
}

/* Reads an existing .mo file and adds the messages to mlp.  */
void
read_mo_file (message_list_ty *mlp, const char *filename)
{
  FILE *fp;
  struct binary_mo_file bf;
  struct mo_file_header header;
  unsigned int i;
  static lex_pos_ty pos = { __FILE__, __LINE__ };

  if (strcmp (filename, "-") == 0 || strcmp (filename, "/dev/stdin") == 0)
    {
      fp = stdin;
      SET_BINARY (fileno (fp));
    }
  else
    {
      fp = fopen (filename, "rb");
      if (fp == NULL)
        error (EXIT_FAILURE, errno,
               _("error while opening \"%s\" for reading"), filename);
    }

  /* Read the file contents into memory.  */
  read_binary_mo_file (&bf, fp, filename);

  /* Get a 32-bit number from the file header.  */
# define GET_HEADER_FIELD(field) \
    get_uint32 (&bf, offsetof (struct mo_file_header, field))

  /* We must grope the file to determine which endian it is.
     Perversity of the universe tends towards maximum, so it will
     probably not match the currently executing architecture.  */
  bf.endian = MO_BIG_ENDIAN;
  header.magic = GET_HEADER_FIELD (magic);
  if (header.magic != _MAGIC)
    {
      bf.endian = MO_LITTLE_ENDIAN;
      header.magic = GET_HEADER_FIELD (magic);
      if (header.magic != _MAGIC)
        {
        unrecognised:
          error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format"),
                 filename);
        }
    }

  header.revision = GET_HEADER_FIELD (revision);

  /* We support only the major revisions 0 and 1.  */
  switch (header.revision >> 16)
    {
    case 0:
    case 1:
      /* Fill the header parts that apply to major revisions 0 and 1.  */
      header.nstrings = GET_HEADER_FIELD (nstrings);
      header.orig_tab_offset = GET_HEADER_FIELD (orig_tab_offset);
      header.trans_tab_offset = GET_HEADER_FIELD (trans_tab_offset);
      header.hash_tab_size = GET_HEADER_FIELD (hash_tab_size);
      header.hash_tab_offset = GET_HEADER_FIELD (hash_tab_offset);

      /* The following verifications attempt to ensure that 'msgunfmt' complains
         about a .mo file that may make libintl crash at run time.  */

      /* Verify that the array of messages is sorted.  */
      {
        char *prev_msgid = NULL;

        for (i = 0; i < header.nstrings; i++)
          {
            char *msgid;
            size_t msgid_len;

            msgid = get_string (&bf, header.orig_tab_offset + i * 8,
                                &msgid_len);
            if (i == 0)
              prev_msgid = msgid;
            else
              {
                if (!(strcmp (prev_msgid, msgid) < 0))
                  error (EXIT_FAILURE, 0,
                         _("file \"%s\" is not in GNU .mo format: The array of messages is not sorted."),
                         filename);
              }
          }
      }

      /* Verify the hash table.  */
      if (header.hash_tab_size > 0)
        {
          char *seen;
          unsigned int j;

          /* Verify the hash table's size.  */
          if (!(header.hash_tab_size > 2))
            error (EXIT_FAILURE, 0,
                   _("file \"%s\" is not in GNU .mo format: The hash table size is invalid."),
                   filename);

          /* Verify that the non-empty hash table entries contain the values
             1, ..., nstrings, each exactly once.  */
          seen = (char *) xcalloc (header.nstrings, 1);
          for (j = 0; j < header.hash_tab_size; j++)
            {
              nls_uint32 entry =
                get_uint32 (&bf, header.hash_tab_offset + j * 4);

              if (entry != 0)
                {
                  i = entry - 1;
                  if (!(i < header.nstrings && seen[i] == 0))
                    error (EXIT_FAILURE, 0,
                           _("file \"%s\" is not in GNU .mo format: The hash table contains invalid entries."),
                           filename);
                  seen[i] = 1;
                }
            }
          for (i = 0; i < header.nstrings; i++)
            if (seen[i] == 0)
              error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format: Some messages are not present in the hash table."),
                     filename);
          free (seen);

          /* Verify that the hash table lookup algorithm finds the entry for
             each message.  */
          for (i = 0; i < header.nstrings; i++)
            {
              size_t msgid_len;
              char *msgid = get_string (&bf, header.orig_tab_offset + i * 8,
                                        &msgid_len);
              nls_uint32 hash_val = hash_string (msgid);
              nls_uint32 idx = hash_val % header.hash_tab_size;
              nls_uint32 incr = 1 + (hash_val % (header.hash_tab_size - 2));
              for (;;)
                {
                  nls_uint32 entry =
                    get_uint32 (&bf, header.hash_tab_offset + idx * 4);

                  if (entry == 0)
                    error (EXIT_FAILURE, 0,
                           _("file \"%s\" is not in GNU .mo format: Some messages are at a wrong index in the hash table."),
                           filename);
                  if (entry == i + 1)
                    break;

                  if (idx >= header.hash_tab_size - incr)
                    idx -= header.hash_tab_size - incr;
                  else
                    idx += incr;
                }
            }
        }

      for (i = 0; i < header.nstrings; i++)
        {
          message_ty *mp;
          char *msgctxt;
          char *msgid;
          size_t msgid_len;
          char *separator;
          char *msgstr;
          size_t msgstr_len;

          /* Read the msgctxt and msgid.  */
          msgid = get_string (&bf, header.orig_tab_offset + i * 8,
                              &msgid_len);
          /* Split into msgctxt and msgid.  */
          separator = strchr (msgid, MSGCTXT_SEPARATOR);
          if (separator != NULL)
            {
              /* The part before the MSGCTXT_SEPARATOR is the msgctxt.  */
              *separator = '\0';
              msgctxt = msgid;
              msgid = separator + 1;
              msgid_len -= msgid - msgctxt;
            }
          else
            msgctxt = NULL;

          /* Read the msgstr.  */
          msgstr = get_string (&bf, header.trans_tab_offset + i * 8,
                               &msgstr_len);

          mp = message_alloc (msgctxt,
                              msgid,
                              (strlen (msgid) + 1 < msgid_len
                               ? msgid + strlen (msgid) + 1
                               : NULL),
                              msgstr, msgstr_len,
                              &pos);
          message_list_append (mlp, mp);
        }

      switch (header.revision & 0xffff)
        {
        case 0:
          break;
        case 1:
        default:
          /* Fill the header parts that apply to minor revision >= 1.  */
          header.n_sysdep_segments = GET_HEADER_FIELD (n_sysdep_segments);
          header.sysdep_segments_offset =
            GET_HEADER_FIELD (sysdep_segments_offset);
          header.n_sysdep_strings = GET_HEADER_FIELD (n_sysdep_strings);
          header.orig_sysdep_tab_offset =
            GET_HEADER_FIELD (orig_sysdep_tab_offset);
          header.trans_sysdep_tab_offset =
            GET_HEADER_FIELD (trans_sysdep_tab_offset);

          for (i = 0; i < header.n_sysdep_strings; i++)
            {
              message_ty *mp;
              char *msgctxt;
              char *msgid;
              size_t msgid_len;
              char *separator;
              char *msgstr;
              size_t msgstr_len;
              nls_uint32 offset;
              size_t f;

              /* Read the msgctxt and msgid.  */
              offset = get_uint32 (&bf, header.orig_sysdep_tab_offset + i * 4);
              msgid = get_sysdep_string (&bf, offset, &header, &msgid_len);
              /* Split into msgctxt and msgid.  */
              separator = strchr (msgid, MSGCTXT_SEPARATOR);
              if (separator != NULL)
                {
                  /* The part before the MSGCTXT_SEPARATOR is the msgctxt.  */
                  *separator = '\0';
                  msgctxt = msgid;
                  msgid = separator + 1;
                  msgid_len -= msgid - msgctxt;
                }
              else
                msgctxt = NULL;

              /* Read the msgstr.  */
              offset = get_uint32 (&bf, header.trans_sysdep_tab_offset + i * 4);
              msgstr = get_sysdep_string (&bf, offset, &header, &msgstr_len);

              mp = message_alloc (msgctxt,
                                  msgid,
                                  (strlen (msgid) + 1 < msgid_len
                                   ? msgid + strlen (msgid) + 1
                                   : NULL),
                                  msgstr, msgstr_len,
                                  &pos);

              /* Only messages with c-format or objc-format annotation are
                 recognized as having system-dependent strings by msgfmt.
                 Which one of the two, we don't know.  We have to guess,
                 assuming that c-format is more probable than objc-format and
                 that the .mo was likely produced by "msgfmt -c".  */
              for (f = format_c; ; f = format_objc)
                {
                  bool valid = true;
                  struct formatstring_parser *parser = formatstring_parsers[f];
                  const char *str_end;
                  const char *str;

                  str_end = msgid + msgid_len;
                  for (str = msgid; str < str_end; str += strlen (str) + 1)
                    {
                      char *invalid_reason = NULL;
                      void *descr =
                        parser->parse (str, false, NULL, &invalid_reason);

                      if (descr != NULL)
                        parser->free (descr);
                      else
                        {
                          free (invalid_reason);
                          valid = false;
                          break;
                        }
                    }
                  if (valid)
                    {
                      str_end = msgstr + msgstr_len;
                      for (str = msgstr; str < str_end; str += strlen (str) + 1)
                        {
                          char *invalid_reason = NULL;
                          void *descr =
                            parser->parse (str, true, NULL, &invalid_reason);

                          if (descr != NULL)
                            parser->free (descr);
                          else
                            {
                              free (invalid_reason);
                              valid = false;
                              break;
                            }
                        }
                    }

                  if (valid)
                    {
                      /* Found the most likely among c-format, objc-format.  */
                      mp->is_format[f] = yes;
                      break;
                    }

                  /* Try next f.  */
                  if (f == format_objc)
                    break;
                }

              message_list_append (mlp, mp);
            }
          break;
        }
      break;

    default:
      goto unrecognised;
    }

  if (fp != stdin)
    fclose (fp);
}
