// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef THIRD_PARTY_HUNSPELL_GOOGLE_BDICT_READER_H_
#define THIRD_PARTY_HUNSPELL_GOOGLE_BDICT_READER_H_

#include <stddef.h>

#include <string>
#include <vector>

#include "base/macros.h"
#include "google/bdict.h"

namespace hunspell {

class BDictReader;
class NodeReader;

// Iterators -------------------------------------------------------------------

// Iterates through all words in the dictionary. It will fill the word into
// a caller-specified buffer.
class WordIterator {
 public:
  WordIterator(const WordIterator& other);
  ~WordIterator();

  // This must be explicitly declared and implemneted in the .cc file so it will
  // compile without knowing the size of NodeInfo.
  WordIterator& operator=(const WordIterator&);

  // Fills the buffer with the next word and the affixes for it into the given
  // array. Returns the number of affixes. A return value of 0 means there are
  // no more words.
  int Advance(char* output_buffer, size_t output_len,
              int affix_ids[BDict::MAX_AFFIXES_PER_WORD]);

 private:
  friend class BDictReader;
  struct NodeInfo;

  WordIterator(const NodeReader& reader);

  // Called by Advance when a leaf is found to generate the word, affix list,
  // and return value.
  int FoundLeaf(const NodeReader& node, char cur_char,
                char* output_buffer, size_t output_len,
                int affix_ids[BDict::MAX_AFFIXES_PER_WORD]);

  std::vector<NodeInfo> stack_;
};

// Will iterate over a list of lines separated by NULLs.
class LineIterator {
 public:
  // Returns the next word in the sequence or NULL if there are no mode.
  const char* Advance();

  // Advances to the next word in the sequence and copies it into the given
  // buffer, of the given length. If it doesn't fit, it will be truncated.
  // Returns true on success.
  bool AdvanceAndCopy(char* buf, size_t buf_len);

  // Returns true when all data has been read. We're done when we reach a
  // double-NULL or a the end of the input (shouldn't happen).
  bool IsDone() const;

 protected:
  friend class BDictReader;

  LineIterator(const unsigned char* bdict_data, size_t bdict_length,
               size_t first_offset);

  const unsigned char* bdict_data_;
  size_t bdict_length_;

  // Current offset within bdict_data of the next string to read.
  size_t cur_offset_;
};

// Created by GetReplacementIterator to iterate over all replacement pairs.
class ReplacementIterator : public LineIterator {
 public:
  // Fills pointers to NULL terminated strings into the given output params.
  // Returns false if there are no more pairs and nothing was filled in.
  bool GetNext(const char** first, const char** second);

 private:
  friend class BDictReader;

  ReplacementIterator(const unsigned char* bdict_data, size_t bdict_length,
                      size_t first_offset)
      : LineIterator(bdict_data, bdict_length, first_offset) {
  }
};

// Reads a BDict file mapped into memory.
class BDictReader {
 public:
  // You must call Init and it must succeed before calling any other functions.
  BDictReader();

  // Initializes the reader with the given data. The data does not transfer
  // ownership, and the caller must keep it valid until the reader is destroyed.
  // Returns true on success.
  bool Init(const unsigned char* bdic_data, size_t bdic_length);

  // Returns true if Init() succeeded and other functions can be called.
  bool IsValid() const { return !!bdict_data_; }

  // Locates the given word in the dictionary. There may be multiple matches if
  // the word is listed multiple times in the dictionary with different affix
  // rules.
  //
  // The number of matches is returned, and that number of corresponding affix
  // group IDs are filled into |*affix_indices|. These IDs may be 0 to indicate
  // there is no affix for that particular match. A return valuf of 0 means that
  // there are no matches.
  int FindWord(const char* word,
               int affix_indices[BDict::MAX_AFFIXES_PER_WORD]) const;

  // Returns an iterator that will go over all AF lines ("affix groups").
  LineIterator GetAfLineIterator() const;

  // Returns an iterator that will go over all SFX/PFX lines ("affix rules").
  LineIterator GetAffixLineIterator() const;

  // Returns an iterator that will go over all "other" lines.
  LineIterator GetOtherLineIterator() const;

  // Returns an iterator that can be used to iterate all replacements.
  ReplacementIterator GetReplacementIterator() const;

  // Used for testing, returns an iterator for all words in the dictionary.
  WordIterator GetAllWordIterator() const;

 private:
  // Non-NULL indicates Init succeeded.
  const unsigned char* bdict_data_;
  size_t bdict_length_;

  // Pointer not owned by this class. It will point into the data. It will be
  // NULL if the data is invalid.
  const BDict::Header* header_;

  const BDict::AffHeader* aff_header_;

  DISALLOW_COPY_AND_ASSIGN(BDictReader);
};

}  // namespace hunspell

#endif  // THIRD_PARTY_HUNSPELL_GOOGLE_BDICT_READER_H_
