// Copyright 2008 The RE2 Authors.  All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#ifndef RE2_TESTING_STRING_GENERATOR_H_
#define RE2_TESTING_STRING_GENERATOR_H_

// String generator: generates all possible strings of up to
// maxlen letters using the set of letters in alpha.
// Fetch strings using a Java-like Next()/HasNext() interface.

#include <stdint.h>
#include <random>
#include <string>
#include <vector>

#include "util/util.h"
#include "re2/stringpiece.h"

namespace re2 {

class StringGenerator {
 public:
  StringGenerator(int maxlen, const std::vector<std::string>& alphabet);
  ~StringGenerator() {}

  const StringPiece& Next();
  bool HasNext() { return hasnext_; }

  // Resets generator to start sequence over.
  void Reset();

  // Causes generator to emit random strings for next n calls to Next().
  void Random(int32_t seed, int n);

  // Causes generator to emit a NULL as the next call.
  void GenerateNULL();

 private:
  bool IncrementDigits();
  bool RandomDigits();

  // Global state.
  int maxlen_;                         // Maximum length string to generate.
  std::vector<std::string> alphabet_;  // Alphabet, one string per letter.

  // Iteration state.
  StringPiece sp_;           // Last StringPiece returned by Next().
  std::string s_;            // String data in last StringPiece returned by Next().
  bool hasnext_;             // Whether Next() can be called again.
  std::vector<int> digits_;  // Alphabet indices for next string.
  bool generate_null_;       // Whether to generate a NULL StringPiece next.
  bool random_;              // Whether generated strings are random.
  int nrandom_;              // Number of random strings left to generate.
  std::minstd_rand0 rng_;    // Random number generator.

  StringGenerator(const StringGenerator&) = delete;
  StringGenerator& operator=(const StringGenerator&) = delete;
};

// Generates and returns a string over binary alphabet {0,1} that contains
// all possible binary sequences of length n as subsequences.  The obvious
// brute force method would generate a string of length n * 2^n, but this
// generates a string of length n-1 + 2^n called a De Bruijn cycle.
// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
//
// Such a string is useful for testing a DFA.  If you have a DFA
// where distinct last n bytes implies distinct states, then running on a
// DeBruijn string causes the DFA to need to create a new state at every
// position in the input, never reusing any states until it gets to the
// end of the string.  This is the worst possible case for DFA execution.
std::string DeBruijnString(int n);

}  // namespace re2

#endif  // RE2_TESTING_STRING_GENERATOR_H_