0
\$\begingroup\$

I want to serialize a class Mango recursively.

Mango Class

class Mango
{
public:
    const MangoType &getMangoType() const { return typeMan; }
    MangoType &getMangoType() { return typeMan; }

private:
    // There are many members of different types : I just mention one.
    MangoType typeMan;
};

Data type classes

//MangoType Class
class MangoType
{
    /// It only has one member ie content
public:
    /// Getter of content vector.

    std::vector<FuntionMango> &getContent() noexcept { return Content; }

private:
    /// \name Data of MangoType.
    
    std::vector<FuntionMango> Content;
    
};


/// FuntionMango class.
class FuntionMango
{
public:
    /// Getter of param types.
    const std::vector<ValType> &getParamTypes() const noexcept
    {
        return ParamTypes;
    }
    std::vector<ValType> &getParamTypes() noexcept { return ParamTypes; }

    /// Getter of return types.
    const std::vector<ValType> &getReturnTypes() const noexcept
    {
        return ReturnTypes;
    }
    std::vector<ValType> &getReturnTypes() noexcept { return ReturnTypes; }

    

private:
    /// \name Data of FuntionMango.
   
    std::vector<ValType> ParamTypes;
    std::vector<ValType> ReturnTypes;

};

//ValType Class
  
enum class ValType : uint8_t
  {
     #define UseValType
     #define Line(NAME, VALUE, STRING) NAME = VALUE
     #undef Line
     #undef UseValType
  };

Note : Serialize and Deserialize function works an API in my project. So I want them in below signature. But it is fine if it is possible to directly serialize in memory. so by just calling deserialize() it should get return original data

std::vector<uint8_t> serialize(Mango const& Man) { }

Mango deserialize(std::span<uint8_t const> data) { }

I have two implementation with using boost (Boost Endian) and without using boost (Boost Endian):

Code: Without using boost

#include <algorithm>
#include <iomanip> // debug output
#include <iostream>
#include <string>
#include <vector>
#include <span>

namespace MangoLib {
    // your requested signatures:
    class Mango;

    void serialize_to_stream(std::ostream& os, Mango const& Man);
    void deserialize(std::istream& is, Mango& Man);
    std::vector<uint8_t> serialize(Mango const& Man);
    Mango                deserialize(std::span<uint8_t const> data);

    // your specified types (with some demo fill)
    enum class ValType : uint8_t {
#define UseValType
#define Line(NAME, VALUE, STRING) NAME = VALUE
        Line(void_,   0, "void"),
        Line(int_,    1, "int"),
        Line(bool_,   2, "bool"),
        Line(string_, 3, "string"),
#undef Line
#undef UseValType
    };

    using ValTypes = std::vector<ValType>;
    class FuntionMango {
      public:
        const ValTypes& getParamTypes() const noexcept { return ParamTypes; }
        ValTypes& getParamTypes() noexcept { return ParamTypes; }

        const ValTypes& getReturnTypes() const noexcept { return ReturnTypes; }
        ValTypes& getReturnTypes() noexcept { return ReturnTypes; }

      private:
        ValTypes ParamTypes, ReturnTypes;
    };

    using FuntionMangos = std::vector<FuntionMango>;

    class MangoType {
      public:
        FuntionMangos&       getContent() noexcept { return Content; }
        const FuntionMangos& getContent() const noexcept { return Content; }

      private:
        FuntionMangos Content;
    };

    class Mango {
      public:
        const MangoType& getMangoType() const { return typeMan; }
        MangoType&       getMangoType() { return typeMan; }

      private:
        MangoType typeMan;
        // many other members
    };
} // namespace MangoLib

namespace my_serialization_helpers {

    ////////////////////////////////////////////////////////////////////////////
    // This namespace serves as an extension point for your serialization; in
    // particular we choose endianness and representation of strings
    //
    // TODO add overloads as needed (signed integer types, binary floats,
    // containers of... etc)
    ////////////////////////////////////////////////////////////////////////////
    
    // decide on the max supported container capacity:
    using container_size_type = std::uint32_t;
    
    ////////////////////////////////////////////////////////////////////////////
    // generators
    template <typename Out>
    Out do_generate(Out out, std::string const& data) {
        container_size_type len = data.length();
        out = std::copy_n(reinterpret_cast<char const*>(&len), sizeof(len), out);
        return std::copy(data.begin(), data.end(), out);
    }

    template <typename Out, typename T>
    Out do_generate(Out out, std::vector<T> const& data) {
        container_size_type len = data.size();
        out = std::copy_n(reinterpret_cast<char const*>(&len), sizeof(len), out);
        for (auto& el : data)
            out = do_generate(out, el);
        return out;
    }

    template <typename Out> Out do_generate(Out out, uint8_t const& data) {
        return std::copy_n(&data, sizeof(data), out);
    }

    template <typename Out>
    Out do_generate(Out out, uint16_t const& data) {
        return std::copy_n(reinterpret_cast<char const*>(&data), sizeof(data), out);
    }

    template <typename Out>
    Out do_generate(Out out, uint32_t const& data) {
        return std::copy_n(reinterpret_cast<char const*>(&data), sizeof(data), out);
    }

    ////////////////////////////////////////////////////////////////////////////
    // parsers
    template <typename It>
    bool parse_raw(It& in, It last, char* raw_into, size_t n) { // length guarded copy_n
        while (in != last && n) {
            *raw_into++ = *in++;
            --n;
        }
        return n == 0;
    }

    template <typename It, typename T>
    bool parse_raw(It& in, It last, T& into) {
        static_assert(std::is_trivially_copyable_v<T>);
        return parse_raw(in, last, reinterpret_cast<char*>(&into), sizeof(into));
    }

    template <typename It>
    bool do_parse(It& in, It last, std::string& data) {
        container_size_type len;
        if (!parse_raw(in, last, len))
            return false;
        data.resize(len);
        return parse_raw(in, last, data.data(), len);
    }

    template <typename It, typename T>
    bool do_parse(It& in, It last, std::vector<T>& data) {
        container_size_type len;
        if (!parse_raw(in, last, len))
            return false;
        data.clear();
        data.reserve(len);
        while (len--) {
            data.emplace_back();
            if (!do_parse(in, last, data.back()))
                return false;
        };
        return true;
    }

    template <typename It>
    bool do_parse(It& in, It last, uint8_t& data) {
        return parse_raw(in, last, data);
    }

    template <typename It>
    bool do_parse(It& in, It last, uint16_t& data) {
        return parse_raw(in, last, data);
    }

    template <typename It>
    bool do_parse(It& in, It last, uint32_t& data) {
        return parse_raw(in, last, data);
    }
}

namespace MangoLib {

    template <typename Out> Out do_generate(Out out, ValType const& x) {
        using my_serialization_helpers::do_generate;
        return do_generate(out,
                           static_cast<std::underlying_type_t<ValType>>(x));
    }
    template <typename It> bool do_parse(It& in, It last, ValType& x) {
        using my_serialization_helpers::do_parse;
        std::underlying_type_t<ValType> tmp;
        bool ok = do_parse(in, last, tmp);
        if (ok)
            x = static_cast<ValType>(tmp);
        return ok;
    }

    template <typename Out> Out do_generate(Out out, FuntionMango const& x) {
        using my_serialization_helpers::do_generate;
        out = do_generate(out, x.getParamTypes());
        out = do_generate(out, x.getReturnTypes());
        return out;
    }
    template <typename It> bool do_parse(It& in, It last, FuntionMango& x) {
        using my_serialization_helpers::do_parse;
        return do_parse(in, last, x.getParamTypes()) &&
            do_parse(in, last, x.getReturnTypes());
    }

    template <typename Out> Out do_generate(Out out, MangoType const& x) {
        using my_serialization_helpers::do_generate;
        out = do_generate(out, x.getContent());
        return out;
    }
    template <typename It> bool do_parse(It& in, It last, MangoType& x) {
        using my_serialization_helpers::do_parse;
        return do_parse(in, last, x.getContent());
    }

    template <typename Out> Out do_generate(Out out, Mango const& x) {
        out = do_generate(out, x.getMangoType());
        return out;
    }
    template <typename It> bool do_parse(It& in, It last, Mango& x) {
        return do_parse(in, last, x.getMangoType());
    }
}

#include <cassert>

MangoLib::Mango makeMango() {
    MangoLib::Mango mango;

    using MangoLib::ValType;
    MangoLib::FuntionMango f1;
    f1.getParamTypes()  = {ValType::bool_, ValType::string_};
    f1.getReturnTypes() = {ValType::void_};

    MangoLib::FuntionMango f2;
    f2.getParamTypes()  = {ValType::string_};
    f2.getReturnTypes() = {ValType::int_};

    mango.getMangoType().getContent() = {f1, f2};
    return mango;
}

#include <fstream>

int main() {
    auto const mango = makeMango();

    auto const bytes = serialize(mango);
    auto const roundtrip = serialize(MangoLib::deserialize(bytes));
    assert(roundtrip == bytes);

    // alternatively with file IO:
    {
        std::ofstream ofs("output.bin", std::ios::binary);
        serialize_to_stream(ofs, mango);
    }
    // read back:
    {
        std::ifstream ifs("output.bin", std::ios::binary);
        MangoLib::Mango from_file;
        deserialize(ifs, from_file);

        assert(serialize(from_file) == bytes);
    }

    std::cout << "\nDebug dump " << std::dec << bytes.size() << " bytes:\n";
    for (auto ch : bytes)
        std::cout << "0x" << std::hex << std::setw(2) << std::setfill('0')
                  << static_cast<int>((uint8_t)ch) << " " << std::dec;
    std::cout << "\nDone\n";
}

// suggested implementations:
namespace MangoLib {
    std::vector<uint8_t> serialize(Mango const& Man) {
        std::vector<uint8_t> bytes;
        do_generate(back_inserter(bytes), Man);
        return bytes;
    }

    Mango deserialize(std::span<uint8_t const> data) {
        Mango result;
        auto  f = begin(data), l = end(data);
        if (!do_parse(f, l, result))
            throw std::runtime_error("deserialize");
        return result;
    }

    void serialize_to_stream(std::ostream& os, Mango const& Man)  {
        do_generate(std::ostreambuf_iterator<char>(os), Man);
    }

    void deserialize(std::istream& is, Mango& Man) {
        Man = {}; // clear it!
        std::istreambuf_iterator<char> f(is), l{};
        if (!do_parse(f, l, Man))
            throw std::runtime_error("deserialize");
    }
}

with boost Boost Endian

#include <boost/endian/arithmetic.hpp>
#include <algorithm>
#include <iomanip> // debug output
#include <iostream>
#include <string>
#include <vector>
#include <span>

namespace MangoLib {
    // your requested signatures:
    class Mango;

    void serialize_to_stream(std::ostream& os, Mango const& Man);
    void deserialize(std::istream& is, Mango& Man);
    std::vector<uint8_t> serialize(Mango const& Man);
    Mango                deserialize(std::span<uint8_t const> data);

    // your specified types (with some demo fill)
    enum class ValType : uint8_t {
#define UseValType
#define Line(NAME, VALUE, STRING) NAME = VALUE
        Line(void_,   0, "void"),
        Line(int_,    1, "int"),
        Line(bool_,   2, "bool"),
        Line(string_, 3, "string"),
#undef Line
#undef UseValType
    };

    using ValTypes = std::vector<ValType>;
    class FuntionMango {
      public:
        const ValTypes& getParamTypes() const noexcept { return ParamTypes; }
        ValTypes& getParamTypes() noexcept { return ParamTypes; }

        const ValTypes& getReturnTypes() const noexcept { return ReturnTypes; }
        ValTypes& getReturnTypes() noexcept { return ReturnTypes; }

      private:
        ValTypes ParamTypes, ReturnTypes;
    };

    using FuntionMangos = std::vector<FuntionMango>;

    class MangoType {
      public:
        FuntionMangos&       getContent() noexcept { return Content; }
        const FuntionMangos& getContent() const noexcept { return Content; }

      private:
        FuntionMangos Content;
    };

    class Mango {
      public:
        const MangoType& getMangoType() const { return typeMan; }
        MangoType&       getMangoType() { return typeMan; }

      private:
        MangoType typeMan;
        // many other members
    };
} // namespace MangoLib

namespace my_serialization_helpers {
    ////////////////////////////////////////////////////////////////////////////
    // This namespace serves as an extension point for your serialization; in
    // particular we choose endianness and representation of strings
    //
    // TODO add overloads as needed (signed integer types, binary floats,
    // containers of... etc)
    ////////////////////////////////////////////////////////////////////////////
    
    ////////////////////////////////////////////////////////////////////////////
    // generators
    template <typename Out> Out do_generate(Out out, uint8_t const& data) {
        return std::copy_n(&data, sizeof(data), out);
    }

    template <typename Out>
    Out do_generate(Out out, uint16_t const& data) {
        boost::endian::big_uint16_t tmp = data;
        return std::copy_n(reinterpret_cast<char const*>(&tmp), sizeof(tmp), out);
    }

    template <typename Out>
    Out do_generate(Out out, uint32_t const& data) {
        boost::endian::big_uint32_t tmp = data;
        return std::copy_n(reinterpret_cast<char const*>(&tmp), sizeof(tmp), out);
    }

    template <typename Out>
    Out do_generate(Out out, std::string const& data) {
        uint32_t len = data.length();
        out = do_generate(out, len);
        return std::copy(data.begin(), data.end(), out);
    }

    template <typename Out, typename T>
    Out do_generate(Out out, std::vector<T> const& data) {
        uint32_t len = data.size();
        out = do_generate(out, len);
        for (auto& el : data)
            out = do_generate(out, el);
        return out;
    }

    ////////////////////////////////////////////////////////////////////////////
    // parsers
    template <typename It>
    bool parse_raw(It& in, It last, char* raw_into, size_t n) { // length guarded copy_n
        while (in != last && n) {
            *raw_into++ = *in++;
            --n;
        }
        return n == 0;
    }

    template <typename It, typename T>
    bool parse_raw(It& in, It last, T& into) {
        static_assert(std::is_trivially_copyable_v<T>);
        return parse_raw(in, last, reinterpret_cast<char*>(&into), sizeof(into));
    }

    template <typename It>
    bool do_parse(It& in, It last, uint8_t& data) {
        return parse_raw(in, last, data);
    }

    template <typename It>
    bool do_parse(It& in, It last, uint16_t& data) {
        boost::endian::big_uint16_t tmp;
        bool ok = parse_raw(in, last, tmp);
        if (ok)
            data = tmp;
        return ok;
    }

    template <typename It>
    bool do_parse(It& in, It last, uint32_t& data) {
        boost::endian::big_uint32_t tmp;
        bool ok = parse_raw(in, last, tmp);
        if (ok)
            data = tmp;
        return ok;
    }

    template <typename It>
    bool do_parse(It& in, It last, std::string& data) {
        uint32_t len;
        if (!do_parse(in, last, len))
            return false;
        data.resize(len);
        return parse_raw(in, last, data.data(), len);
    }

    template <typename It, typename T>
    bool do_parse(It& in, It last, std::vector<T>& data) {
        uint32_t len;
        if (!do_parse(in, last, len))
            return false;
        data.clear();
        data.reserve(len);
        while (len--) {
            data.emplace_back();
            if (!do_parse(in, last, data.back()))
                return false;
        };
        return true;
    }
}

namespace MangoLib {
    template <typename Out> Out do_generate(Out out, ValType const& x) {
        using my_serialization_helpers::do_generate;
        return do_generate(out,
                           static_cast<std::underlying_type_t<ValType>>(x));
    }
    template <typename It> bool do_parse(It& in, It last, ValType& x) {
        using my_serialization_helpers::do_parse;
        std::underlying_type_t<ValType> tmp;
        bool ok = do_parse(in, last, tmp);
        if (ok)
            x = static_cast<ValType>(tmp);
        return ok;
    }

    template <typename Out> Out do_generate(Out out, FuntionMango const& x) {
        using my_serialization_helpers::do_generate;
        out = do_generate(out, x.getParamTypes());
        out = do_generate(out, x.getReturnTypes());
        return out;
    }
    template <typename It> bool do_parse(It& in, It last, FuntionMango& x) {
        using my_serialization_helpers::do_parse;
        return do_parse(in, last, x.getParamTypes()) &&
            do_parse(in, last, x.getReturnTypes());
    }

    template <typename Out> Out do_generate(Out out, MangoType const& x) {
        using my_serialization_helpers::do_generate;
        out = do_generate(out, x.getContent());
        return out;
    }
    template <typename It> bool do_parse(It& in, It last, MangoType& x) {
        using my_serialization_helpers::do_parse;
        return do_parse(in, last, x.getContent());
    }

    template <typename Out> Out do_generate(Out out, Mango const& x) {
        out = do_generate(out, x.getMangoType());
        return out;
    }
    template <typename It> bool do_parse(It& in, It last, Mango& x) {
        return do_parse(in, last, x.getMangoType());
    }
}

#include <cassert>

MangoLib::Mango makeMango() {
    MangoLib::Mango mango;

    using MangoLib::ValType;
    MangoLib::FuntionMango f1;
    f1.getParamTypes()  = {ValType::bool_, ValType::string_};
    f1.getReturnTypes() = {ValType::void_};

    MangoLib::FuntionMango f2;
    f2.getParamTypes()  = {ValType::string_};
    f2.getReturnTypes() = {ValType::int_};

    mango.getMangoType().getContent() = {f1, f2};
    return mango;
}

#include <fstream>

int main() {
    auto const mango = makeMango();

    auto const bytes = serialize(mango);
    auto const roundtrip = serialize(MangoLib::deserialize(bytes));

    assert(roundtrip == bytes);

    // alternatively with file IO:
    {
        std::ofstream ofs("output.bin", std::ios::binary);
        serialize_to_stream(ofs, mango);
    }
    // read back:
    {
        std::ifstream ifs("output.bin", std::ios::binary);
        MangoLib::Mango from_file;
        deserialize(ifs, from_file);

        assert(serialize(from_file) == bytes);
    }

    std::cout << "\nDebug dump " << std::dec << bytes.size() << " bytes:\n";
    for (auto ch : bytes)
        std::cout << "0x" << std::hex << std::setw(2) << std::setfill('0')
                  << static_cast<int>((uint8_t)ch) << " " << std::dec;
    std::cout << "\nDone\n";
}

// suggested implementations:
namespace MangoLib {
    std::vector<uint8_t> serialize(Mango const& Man) {
        std::vector<uint8_t> bytes;
        do_generate(back_inserter(bytes), Man);
        return bytes;
    }

    Mango deserialize(std::span<uint8_t const> data) {
        Mango result;
        auto  f = begin(data), l = end(data);
        if (!do_parse(f, l, result))
            throw std::runtime_error("deserialize");
        return result;
    }

    void serialize_to_stream(std::ostream& os, Mango const& Man)  {
        do_generate(std::ostreambuf_iterator<char>(os), Man);
    }

    void deserialize(std::istream& is, Mango& Man) {
        Man = {}; // clear it!
        std::istreambuf_iterator<char> f(is), l;
        if (!do_parse(f, l, Man))
            throw std::runtime_error("deserialize");
    }
}

Live on Coliru ( without using boost Boost Endian) Live on Coliru ( with boost Boost Endian)

I want improvement in terms of performance and safety in serialize and deserialize implementation in any of the above implementation methods ( the best one )

Note :

  1. I do not want to transfer it over the network. My use case is that it is very time consuming to load data every time in Mango class ( It comes after computation ). So I want to serialize it .. so that next time I want it , I can just deserialize the previous serialized data
  2. I do not want to use library which requires linking like boost serialization directly. But is there any way to use it as header only ?
\$\endgroup\$

1 Answer 1

1
\$\begingroup\$

Don't use big-endian format

Virtually all CPUs made nowadays are little-endian or can switch between big-endian and little-endian, with most operating systems favoring the little-endian format. Therefore, if you can choose the serialization format, use little-endian.

In fact, I would suggest you forget about the existence of big-endian CPUs, just like almost everyone assumes bytes have 8 bits and floating point formats are IEEE 754. To rationalize this, you can just mandate that your serialization format uses the same format as Intel/AMD/ARM/RISC-V processors, and apply the YAGNI principle to avoid implementing code that handles those rare big-endian processors.

Make your templates even more generic

Your do_generate() and do_parse() functions are templates, but only parts of the arguments are templated, and you still are writing a lot of overloads yourself. You can make even more use of templates and avoid similar overloads. For example, for serializing integers, you would ideally only write one overload:

template <typename Out, typename Integer>
Out do_generate(Out out, Integer const& data) {
    return std::copy_n(reinterpret_cast<char const*>(&data), sizeof data, out);
}

However, the above would catch all types, which you don't want of course. Since C++20, you can use concepts:

template <std::contiguous_iterator Out, std::integral Integer>
Out do_generate(Out out, Integer const& data) {
    return std::copy_n(reinterpret_cast<char const*>(&data), sizeof data, out);
}

In C++11 you can restrict a template as well, however you need the more cumbersome SFINAE technique, although there are some library functions like std::enable_if that make it less painful:

template <typename Out, typename Integer,
          typename std::enable_if<std::is_integral<Integer>::value, bool>::type = true>
Out do_generate(Out out, Integer const& data) {
    return std::copy_n(reinterpret_cast<char const*>(&data), sizeof data, out);
}

But since you are using std::span you should be able to use concepts as well.

Consider using or emulating Boost::Serialization

Boost has a Serialization library that you could either use yourself or try to emulate. It works quite different from your library. The main benefit is that you just have to add one function called serialize() to any function you want to make serializable, and that function basically just has to list all the members that need serialization. This allows for recursive serialization as well. Since it's just one function, some of the duplication you have with your method is avoided.

Boost::Serialization is unfortunately not a header-only library, but you can probably make something similar yourself that is header-only.

\$\endgroup\$
6
  • \$\begingroup\$ Thank you for the answer. Unfortunately I don't know much about boost serialization. Can you show me how this probably make something similar yourself that is header-only to be done? Can you show that with my class Mango ? \$\endgroup\$ Commented Oct 9, 2022 at 5:25
  • 1
    \$\begingroup\$ I suggest you read the documentation of Boost::Serialization, and since it's open source, you can also look at how it is implemented. \$\endgroup\$
    – G. Sliepen
    Commented Oct 9, 2022 at 10:02
  • \$\begingroup\$ Ok But can please show me any example of it? \$\endgroup\$ Commented Oct 9, 2022 at 10:44
  • \$\begingroup\$ github.com/boostorg/serialization This look very complicated. How should I make something similar in my case \$\endgroup\$ Commented Oct 9, 2022 at 10:51
  • 3
    \$\begingroup\$ @MartinYork "Network protocols always use big endian". This is false, but I get why you think that: IETF RFCs were written in a time when network equipment was largely using big-endian processors (like Sun SPARC and Motorola 68000), so the RFCs that describe low-level protocols like IP, TCP and so on mandate that big-endian is used for those protocols. But you are free to send whatever you like over TCP and UDP, there is absolutely no reason that your own stuff has to be big-endian. \$\endgroup\$
    – G. Sliepen
    Commented Oct 9, 2022 at 16:02

Not the answer you're looking for? Browse other questions tagged or ask your own question.