//
// Fast-appending ropes.

// I was thinking about the problem of fast appending to a vector, and
// also about Boehm’s “ropes”. And it occurred to me that there might
// be a solution that preserves both the immutable cheap snapshottable
// semantics of ropes and something like the high speed of appending
// to a stralloc or an STL vector.

// It was only after I went through implementing B-tree concatenation
// in C++ (untested!!) that I realized that my idea wouldn’t work.

// Idea: The rope leaf type has a little buffer that has a fill
// pointer and a capacity, say 32 bytes by default, and knows what
// part of the buffer it’s using. If it’s using the whole buffer up to
// the buffer’s fill pointer, then it can bump the fill pointer and
// write a new byte in there without disturbing any other references
// to the buffer. So it’s nice and efficient: it doesn’t have to
// allocate a new leaf node with just a single byte in it, and it
// still preserves the immutable semantics of rope leaf nodes.

// Problem: while this does ultimately result in a lovely little
// balanced tree with 32-byte leaves and therefore a reasonably
// manageable amount of memory, it still has to allocate and
// initialize nodes all the way up to the roof on every append
// operation. If the internal nodes have 3.5 pointers on average and
// contain 3 other words that need to be initialized, then when you
// have a mebibyte of text, you'll have 32768 leaf nodes and 8 or 9
// tree levels up to the root, so you'll have to allocate 8 or 9 new
// tree nodes, so you’ll have to write about 54 words to memory for
// the treenodes, plus whatever the allocator requires, plus the
// character itself and its pointer, plus function activation records
// and stuff.

// This is not going to be competitive with a bounds-check followed by
// *x++ = c. And in fact another 3 levels in the tree due to not
// having this clever little “optimization” would hardly be
// overwhelming, except that it would increase the number of tree
// nodes from about 46 thousand to about 1500 thousand. A simpler
// optimization that would have the same effect: when constructing a
// concatenation of fewer than some number of bytes (say, 32),
// just construct a new leaf node and copy the bytes!

// So this is basically kind of a dumb idea on its own. To be
// efficient for this, you would need some kind of splaying mechanism
// so that the node being written to moves up close to the root, so
// you don’t incur 10 or 20 allocations per byte appended.

See comments above for why this is a dumb idea.

// For clarity, this code is written assuming garbage collection.

class Concat;

class RopeException { };
class IndexError: public RopeException { };
class InvalidTree: public RopeException { };
class PreconditionFailed: public RopeException { };

class Rope {
public:
  virtual char operator[](int index) const = 0;
  virtual int len() const = 0;
  virtual const Rope *operator+(const Rope& other) const;
  virtual const Rope *substring(int index, int end_index) const;

  // virtual void copy_range(int index, int end_index, char *dest) const;

  // Apparently I don't understand "protected"... but these are for
  // internal use.
  virtual int depth() const = 0;
  virtual const Rope *prepend_to_concat(const Concat &prefix) const = 0;
  int index_after(int index) const {
    if (index < 0) throw IndexError();
    return index - len();
  }

  virtual void validate() const = 0;
};

// Our ropes are B-trees with B=4, so their internal nodes have 2, 3, or 4
// children.
const int max_kids = 4;
class Concat: public Rope {
public:
  Concat(const Rope * const *kids, int n);

  virtual char operator[](int index) const;
  virtual int len() const;
  virtual const Rope *operator+(const Rope& other) const;
  virtual const Rope *substring(int index, int end_index) const;

  virtual void validate() const;
  virtual int depth() const;
  virtual const Rope *prepend_to_concat(const Concat &prefix) const;

private:
  void index_lookup(int index, int &which_kid, int &index_in_kid) const {
    for (int jj = 0; jj < m_nkids; jj++) {
      int next_index = m_kids[jj]->index_after(index);
      // XXX index signedness problems?
      if (next_index < 0) {
        which_kid = jj;
        index_in_kid = index;
        return;
      }
      index = next_index;
    }
  }
  const Rope * const *m_kids;
  int m_len, m_depth, m_nkids;
};

class RopeLeaf: public Rope {
public:
  RopeLeaf(const char *s);    // nul-terminated
  RopeLeaf();
  virtual char operator[](int index) const;
  virtual int len() const;
  virtual const Rope *operator+(const Rope& other) const;
  virtual const Rope *substring(int index, int end_index) const;

protected:
  virtual void validate() const = 0;
  virtual int depth() const = 0;
  virtual const Rope *prepend_to_concat(const Rope &prefix) const {
    throw PreconditionFailed();
  }

private:
  class CharBuffer {
  public:
    const char &operator[](int index) const;
    int len() const { return m_len; };
    int allocated_len() const { return m_allocated_len; };
    void append(char cc);

  private:
    char *m_ss;
    int m_len;
    int m_allocated_len;
  };

  CharBuffer &m_contents;
  int m_contents_len;
};


// --- Concat --- 

Concat::Concat(const Rope * const *kids, int n):
  m_kids(kids),
  m_depth(kids[0]->depth() + 1) {
  int len = 0;
  for (int ii = 0; ii < n; ++ii) len += m_kids[ii]->len();
  m_len = len;
  validate();
}

char Concat::operator[](int index) const {
  int which_kid, index_in_kid;
  index_lookup(index, which_kid, index_in_kid);
  return (*m_kids[which_kid])[index_in_kid];
}

int Concat::len() const { return m_len; }

const Rope *Concat::operator+(const Rope &other) const {
  if (other.depth() >= this->depth()) { return other.prepend_to_concat(*this); }

  const Rope *new_last_child = *m_kids[m_nkids - 1] + other;

  // Did that concatenation bubble up a node? If so, we may have to
  // bubble up further.
  if (new_last_child->depth() == this->depth()) {
    // We would like to create a node with one less subnode than we
    // ourselves have, and then create a 2-node. However, that might
    // leave too few subnodes (e.g. one).  In that case, we prepend
    // the one subnode to the other tree.
    if (m_nkids == 2) return *m_kids[0] + *new_last_child;
    
    // XXX should this delegate to prepend_to_concat too?
    // Because everything is immutable and we are assuming GC, we can
    // share the list of subnodes for the new left child in this case.
    const Rope *new_left_child = new Concat(m_kids, m_nkids-1);
    const Rope **kids = new const Rope*[2];
    kids[0] = this;
    kids[1] = new_last_child;
    return new Concat(kids, 2);
  }

  // But if it didn’t bubble up, we can just return a modified version
  // of ourselves.
  const Rope **kids = new const Rope*[m_nkids];
  for (int ii = 0; ii < m_nkids-1; ++ii) kids[ii] = m_kids[ii];
  kids[m_nkids - 1] = new_last_child;
  return new Concat(kids, m_nkids);
}

// Called only from Concat::operator+.
const Rope *Concat::prepend_to_concat(const Concat &prefix) const {
  if (prefix.depth() > this->depth()) throw PreconditionFailed();

  if (prefix.depth() == this->depth()) {
    // Normally this entails creating a new parent node, but there’s a
    // possibility that we’re both 2-nodes, in which case we should
    // create a 4-node instead.
    int total_nkids = prefix.m_nkids + m_nkids;
    if (total_nkids <= max_kids) {
      const Rope **kids = new const Rope*[total_nkids];
      for (int ii = 0; ii < prefix.m_nkids; ++ii) {
        kids[ii] = prefix.m_kids[ii];
      }
      for (int ii = 0; ii < m_nkids; ++ii) {
        kids[prefix.m_nkids + ii] = m_kids[ii];
      }
      return new Concat(kids, total_nkids);
    }

    // So here’s the normal case for equal-depth trees.
    const Rope **kids = new const Rope*[2]; // XXX why are there two of these?
    kids[0] = &prefix;
    kids[1] = this;
    return new Concat(kids, 2);
  }

  // Given that the prefix is less deep, we recurse down the left
  // subtree. This is symmetric with the code in operator+.

  const Rope *new_first_child = prefix + *m_kids[0];

  if (new_first_child->depth() == this->depth()) {
    const Rope *new_right_child = new Concat(m_kids + 1, m_nkids - 1);
    const Rope **kids = new const Rope*[2];
    kids[0] = new_first_child;
    kids[1] = new_right_child;
    return new Concat(kids, 2); // XXX another fucking binary case!
  }

  const Rope **kids = new const Rope*[m_nkids];
  kids[0] = new_first_child;
  for (int ii = 1; ii < m_nkids; ++ii) kids[ii] = m_kids[ii];
  return new Concat(kids, m_nkids);
}

const Rope *Concat::substring(int index, int end_index) const {
  if (end_index < index) throw PreconditionFailed();
  if (index < 0) throw IndexError();

  int start_kid, start_kid_index, end_kid, end_kid_index;
  index_lookup(index, start_kid, start_kid_index);
  index_lookup(index, end_kid, end_kid_index);

  if (start_kid == end_kid) {
    return m_kids[start_kid]->substring(start_kid_index, end_kid_index);
  }

  int nkids = end_kid - start_kid + 1;
  const Rope **kids = new const Rope*[end_kid - start_kid + 1];
  for (int ii = start_kid; ii <= end_kid; ++ii) {
    kids[ii - start_kid] = m_kids[ii];
  }
  kids[0] = kids[0]->substring(start_kid_index, kids[0]->len());
  // XXX This may produce a zero-length substring.
  kids[nkids - 1] = kids[nkids - 1]->substring(0, end_kid_index);
  return new Concat(kids, nkids);
}

void Concat::validate() const {
  if (!m_kids) throw InvalidTree();
  if (m_len > max_kids) throw InvalidTree();
  if (m_len < 2) throw InvalidTree();

  int len = 0;
  for (int ii = 0; ii < m_len; ++ii) {
    len += m_kids[ii]->len();
    if (m_kids[ii]->depth() != this->depth() - 1) throw InvalidTree();
  }

  if (len != m_len) throw InvalidTree();
  
  for (int ii = 0; ii < m_len; ++ii) m_kids[ii]->validate();
}

int Concat::depth() const { return m_depth; }


// --- RopeLeaf --- 

RopeLeaf::RopeLeaf(const char *s) {
  

}

