4
\$\begingroup\$

I'm creating a simple yet fast OrderBook, that only adds orders and matches them (no cancelling or modifications, etc.). I'm using partial template specialization to reduce branching in the hotpath, and I'm preallocating the price ladder to avoid expensive allocation operations (that and this being a lot more cache friendly are the reasons I chose this approach over something like a balanced BST or a heap). The printing of the orderbook after each order insertion is a requirement, and of course that dominates the runtime, but the order insertion and matching itself should be as low latency as possible.

This was tested on Clang 14 (on Ubuntu 22.04 LTS), and doesn't compile with GCC due to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85282.

Here's the first rough implementation (just wanted something implemented quickly, and I'm going to make improvements like using a conditional for processOrder instead of doing specialisation), and I thought I'd get some feedback on it and see what I can improve in terms of performance (and maybe some code quality issues too). I've included everything all in one file to make it easier to quickly run. Also error checking isn't needed, the orders will be formatted correctly with valid values.

#include <iostream>
#include <deque>
#include <vector>
#include <iomanip>

using namespace std; // remove this

struct Order
{
    int id;
    short price;
    int quantity;
};

enum class Side
{
    Buy,
    Sell
};

class OrderBook
{

    using book_t = vector<deque<Order>>;

private:
    book_t buys;
    book_t sells;
    int topOfBuys = -1;
    int bottomOfBuys = 65537;
    int topOfSells = 65537;
    int bottomOfSells = -1;

public:
    OrderBook() : buys(65536), sells(65536) {}

    void displayOrderBook()
    {
        cout << "+" << string(65, '-') << "+\n";
        cout << "| BUY                            | SELL                           |\n";
        cout << "| Id       | Volume      | Price | Price | Volume      | Id       |\n";
        cout << "+----------+-------------+-------+-------+-------------+----------+\n";

        vector<pair<Order, Order>> orders; // buy, sell
        int sellIndex = 0, buyIndex = 0;

        for (int i = topOfBuys; i >= bottomOfBuys; --i)
        {
            if (not buys[i].empty())
            {
                for (auto &order : buys[i])
                {
                    if (buyIndex < orders.size())
                        orders[buyIndex++].first = order;
                    else
                        orders.push_back({order, Order{}});
                }
            }
        }

        for (int i = topOfSells; i <= bottomOfSells; ++i)
        {
            if (not sells[i].empty())
            {
                for (auto &order : sells[i])
                {
                    if (sellIndex < orders.size())
                        orders[sellIndex++].second = order;
                    else
                        orders.push_back({Order{}, order});
                }
            }
        }

        auto displayField = [](const int value)
        {
            return value != 0 ? to_string(value) : "";
        };

        for (const auto &[buyOrder, sellOrder] : orders)
        {
            cout << "| " << setw(8) << displayField(buyOrder.id) << " | "
                 << setw(11) << displayField(buyOrder.quantity) << " | "
                 << setw(5) << displayField(buyOrder.price) << " | "
                 << setw(5) << displayField(sellOrder.price) << " | "
                 << setw(11) << displayField(sellOrder.quantity) << " | "
                 << setw(8) << displayField(sellOrder.id) << " |\n";
        }

        cout << "+" << string(65, '-') << "+\n";
    }

    string formatWithCommas(int value)
    {
        stringstream ss;
        ss.imbue(locale(""));
        ss << fixed << value;
        return ss.str();
    }

    template <Side side>
    void processOrder(Order &&order);

    template <>
    void processOrder<Side::Sell>(Order &&order)
    {
        if (order.price <= topOfBuys)
            matchOrders<Side::Sell, Side::Buy>(move(order), buys, topOfBuys, bottomOfBuys);
        else
            addOrder<Side::Sell>(move(order));

        displayOrderBook();
    }

    template <>
    void processOrder<Side::Buy>(Order &&order)
    {
        if (order.price >= topOfSells)
            matchOrders<Side::Buy, Side::Sell>(move(order), sells, topOfSells, bottomOfSells);
        else
            addOrder<Side::Buy>(move(order));

        displayOrderBook();
    }

    template <Side orderSide, Side otherSide>
    void matchOrders(Order &&order, book_t &otherBook, int &topOfOther, int &bottomOfOther)
    {

        while (compare<otherSide>() and order.quantity > 0)
        {
            auto &other = otherBook[topOfOther].front();
            if (order.quantity < other.quantity)
            {
                cout << order.id << "," << other.id << "," << topOfOther << "," << order.quantity << '\n';
                other.quantity -= order.quantity;
                order.quantity = 0;
            }
            else
            {
                cout << order.id << "," << other.id << "," << topOfOther << "," << other.quantity << '\n';
                order.quantity -= other.quantity;
                otherBook[topOfOther].pop_front();
            }

            if (otherBook[topOfOther].empty())
                findNext<otherSide>();
        }

        if (order.quantity > 0)
            addOrder<orderSide>(move(order));
    }

    template <Side side>
    inline bool compare();

    template <>
    inline bool compare<Side::Buy>()
    {
        return topOfBuys >= bottomOfBuys;
    }

    template <>
    inline bool compare<Side::Sell>()
    {
        return topOfSells <= bottomOfSells;
    }

    template <Side side>
    inline void findNext();

    template <>
    inline void findNext<Side::Sell>()
    {
        while (topOfSells <= bottomOfSells and sells[topOfSells].empty())
        {
            ++topOfSells;
        }

        if (topOfSells > bottomOfSells)
        {
            topOfSells = 65537;
            bottomOfSells = -1;
        }
    }

    template <>
    inline void findNext<Side::Buy>()
    {
        while (topOfBuys >= bottomOfBuys and buys[topOfBuys].empty())
        {
            --topOfBuys;
        }

        if (topOfBuys < bottomOfBuys)
        {
            topOfBuys = -1;
            bottomOfBuys = 65537;
        }
    }

    template <Side side>
    void addOrder(Order &&order);

    template <>
    void addOrder<Side::Sell>(Order &&order)
    {
        topOfSells = min(topOfSells, (int)order.price);
        bottomOfSells = max(bottomOfSells, (int)order.price);
        sells[order.price].push_back(move(order));
    }

    template <>
    void addOrder<Side::Buy>(Order &&order)
    {
        topOfBuys = max(topOfBuys, (int)order.price);
        bottomOfBuys = min(bottomOfBuys, (int)order.price);
        buys[order.price].push_back(move(order));
    }
};

int main()
{
    OrderBook orderbook;
    orderbook.processOrder<Side::Sell>(move(Order{1, 5103, 100}));
    orderbook.processOrder<Side::Buy>(move(Order{2, 5103, 100000}));
    orderbook.processOrder<Side::Buy>(move(Order{3, 5103, 100000}));
    orderbook.processOrder<Side::Sell>(move(Order{4, 5103, 100}));
    orderbook.processOrder<Side::Sell>(move(Order{5, 5104, 100}));
    orderbook.processOrder<Side::Sell>(move(Order{6, 5103, 100}));
    orderbook.processOrder<Side::Buy>(move(Order{7, 5105, 300}));
    orderbook.processOrder<Side::Sell>(move(Order{8, 5105, 300}));
    return 0;
}
```
\$\endgroup\$
3
  • 1
    \$\begingroup\$ Please do not edit the question, especially the code, after an answer has been posted. Changing the question may cause answer invalidation. Everyone needs to be able to see what the reviewer was referring to. What to do after the question has been answered. If you want to update the code and have another review please post a follow up question with a link back to this question. \$\endgroup\$
    – pacmaninbw
    Commented Jun 25, 2023 at 12:31
  • \$\begingroup\$ Oh right, didn't know that. I'll post a new question in that case. \$\endgroup\$
    – jpf
    Commented Jun 25, 2023 at 12:38
  • 1
    \$\begingroup\$ here's the follow up question: codereview.stackexchange.com/questions/285749/… \$\endgroup\$
    – jpf
    Commented Jun 25, 2023 at 13:14

2 Answers 2

4
\$\begingroup\$

Adding to pacmaninbw's review:

There's a hole in struct Order

There's a hole between price and quantity in struct Order. If you hadn't mentioned performance I would not have mentioned this, but there is a cost to this. Depending on the CPU architecture, the compiler might have to emit more instructions to deal with something smaller than an int. For example, because it can only load a whole 32-bit value from memory, and then needs to mask the 16 unused bits, because there is no guarantee that the whole is filled with zeroes. The simple solution is to just store price as an int here as well.

Naming things

You are creating new types using PascalCase, except for book_t. Why not write Book instead?

Also note that POSIX reserves all names ending in _t, so it's best not to create new identifiers with that suffix yourself in C or C++ code.

Handling buys and sells

pacmaninbw already mentioned that your templated processOrder() function is not great. Having a template that only has specializations defeats the purpose of making it a template to begin with. Let's look at some alternatives:

Use separate functions

You could just have untemplated processBuy() and processSell() functions.

Pass the Side as a regular function parameter

This is what pacmaninbw suggested. Since the member functions are defined in the class, calls to these function will be inlined, and the compiler can then specialize the function at the call site, giving you the same performance as with a template parameter.

Create distinct BuyOrder and SellOrder types

You can create separate types for buy orders and sell orders, for example:

struct BuyOrder: public Order
{
    static constexpr Side side = Side::Buy;
};

struct SellOrder: public Order
{
    static constexpr Side side = Side::Sell;
};

Now you can do two things. First, you can template processOrder() again, and then use if constexpr to do things differently depending on whether you have a buy or sell order:

void processOrder(std::derived_from<Order> auto&& order>)
{
    if constexpr (order.side == Side::Buy)
    {
        …
    } else {
        …
    }

    displayOrderBook();
}

Second, you can create a std::variant type:

using AnyOrder = std::variant<BuyOrder, SellOrder>;

This way you can pass either a BuyOrder or SellOrder to a function, without requiring the function to be a template:

void processOrder(AnyOrder&& order)
{
    if (std::holds_alternative<BuyOrder>(order))
    {
        …
    } else {
        …
    }

    displayOrderBook();
}

I'm just showing std::holds_alternative<>() here for symmetry with the example above this one; you'd probably use std::visit() instead.

Make your code more generic

Regardless of how you deal with the difference between buy and sell orders, try to make your code more generic. Whenever you see you have to write specific code, avoid specializing the whole function, and instead first ask yourself if you can delegate that specialization to another function. For example, what if you could write:

template <Side side>
void processOrder(Order &&order)
{
    if (priceMatches<side>(order))
        matchOrders<side>(std::move(order));
    else
        addOrder<side>(std::move(order));

    displayOrderBook();
}

Now you can create a templated priceMatches() function that checks order.price against topOfBuys or topOfSells, depending on the template argument. And let matchOrders derive otherSide from orderSide, and also let it decide which book to use.

Another way to avoid all the ifs that deal with Sides is to create an array to hold the books and their associated variables:

struct Book
{
    std::vector<std::deque<Order>> orders;
    int top;
    int bottom;
};

std::array<Books, 2> books;

Now consider that you can index books using the template parameter side, like for example:

template <Side orderSide>
void matchOrders(Order &&order)
{
    static constexpr Side otherSide = (orderSide == Side::Buy) ? Side::Sell : Side::Buy;
    Book& otherBook = books[static_cast<int>(otherSide)];
    int& topOfOther = otherBook.top;
    int& bottomOfOther = otherBook.bottom;
    …
}

Avoid unnecessary temporary storage

There is no need for the vector orders in displayOrderBook(). You can just have one loop which has two loop indices, one for the buys and one for the sells:

for (int i = topOfBuys, j = topOfSells; i >= bottomOfBuys && j >= bottomOfSells;)
{
    // Find the next buy
    while (i >= bottomOfBuys && buys[i].empty)
        --i;

    // Find the next sell
    while (j >= bottomOfSells && sells[j].empty)
        --j;

    const auto& buyOrder = (i >= bottomOfBuys) ? buys[i] : Order{};
    const auto& sellOrder = (i >= bottomOfSells) ? sells[i] : Order{};

    std::cout << …;
}

This saves you multiple expensive allocations each time you call displayOrderBook().

Use std::format()

Since you added the C++20 tag to the question, I strongly recommend you use std::format() instead of the old way of formatting output.

Is it really faster?

An array is only faster and more cache efficient if it's not full of empty spots. What if I add two orders, one with price 1 and with price 32767? Suddenly displayOrderBook() becomes very expensive!

Is a 16-bit integer enough for all possible prices? I'm sure there are things you can buy that cost more than ¤32767. If you want to make this a 32-bit integer, then your vectors can suddenly grow very large. To ensure your order book can scale, you probably have to abandon the idea of using a vector indexed by price.

You are right that a naive heap or balanced tree might be a lot less efficient. However, there are other data structures you can use that are more cache-efficient. Instead of binary heaps or trees, you could use \$n\$-ary heaps or trees, with \$n\$ chosen such that each node fits neatly in a cache line.

\$\endgroup\$
4
  • \$\begingroup\$ Hey, thanks for the review! I've cleaned up the code quite a bit, the updated implementation is in the question if you'd like to have a look and provide more things to improve upon As for std::format(), I wanted to use it but I don't know what environment the marker is going to use and I was told that I should stick to things that are readily available. \$\endgroup\$
    – jpf
    Commented Jun 25, 2023 at 12:32
  • \$\begingroup\$ As for the performance, yeah this should be faster given that most trades happen in the middle of the book (I did think of what you said but that doesn't really happen in live trading) but it depends on the sparsity of the book and I was told it would be very dense. \$\endgroup\$
    – jpf
    Commented Jun 25, 2023 at 12:34
  • \$\begingroup\$ link to other post: codereview.stackexchange.com/questions/285749/… \$\endgroup\$
    – jpf
    Commented Jun 25, 2023 at 13:09
  • \$\begingroup\$ Hey G. Sliepen I just wanted to make sure you're aware I've invited you to a private room. \$\endgroup\$
    – Peilonrayz
    Commented Jun 26, 2023 at 16:26
4
\$\begingroup\$

General Observations

The general impression of the code is that this is a fast and dirty implementation. Take the time you need to design and implement a proper solution.

Some things that the code is missing:

  • The is no ID for what is being bought or sold
  • Error Checking, the code currently works around this, but if there is any user input there needs to be error checking. Unit testing needs to include all error checking.
  • Comments, this code needs the design decisions explained in comments, the comments I do see are meaningless. For instance why are there 2 separate vectors in OrderBook? What is topOfBuys, this might indicate that you are using a stack, but there is no stack defined.
  • Symbolic Constants (see Magic Numbers below).

The way the Order struct is designed is creating duplication of code, an order is a transaction, the transaction type should be included in the order. The Order struct should contain the code to output an order, the displayField Lambda function isn't necessary.

Missing Include Directive

This code does not compile without #include <sstream>.

Avoid using namespace std;

I think you know this already since the actual code is

using namespace std; // remove this

Don't do this for a fast and dirty implementation, it can lead to errors or bugs. Learn to include the proper namespaces in the code as you code, it is an important habit.

If you are coding professionally you probably should get out of the habit of using the using namespace std; statement. The code will more clearly define where cout and other identifiers are coming from (std::cin, std::cout). As you start using namespaces in your code it is better to identify where each function comes from because there may be function name collisions from different namespaces. The identifiercout you may override within your own classes, and you may override the operator << in your own classes as well. This stack overflow question discusses this in more detail.

Meaningful Type Names as Well as Meaningful Variable Names and Method Names

There is an enum called Side, this name only makes sense in terms of the chart the code is generating, it doesn't make sense in terms of the overall algorithm. The members of this enum are Buy and Sell, which does make sense, but in terms of stock or bond transactions Side doesn't make sense. A better name may be TransactionType or Action.

Magic Numbers

There are Magic Numbers in the private section of the OrderBook (-1 and 65537), it might be better to create symbolic constants for them to make the code more readble and easier to maintain. These numbers may be used in many places and being able to change them by editing only one line makes maintainence easier.

DRY Code

There is a programming principle called the Don't Repeat Yourself Principle sometimes referred to as DRY code. If you find yourself repeating the same code mutiple times it is better to encapsulate it in a function. If it is possible to loop through the code that can reduce repetition as well.

The templates should removed code redundancy, the following code increases the code redundancy rather than decreasing it.

    template <Side side>
    void addOrder(Order&& order);

    template <>
    void addOrder<Side::Sell>(Order&& order)
    {
        topOfSells = min(topOfSells, (int)order.price);
        bottomOfSells = max(bottomOfSells, (int)order.price);
        sells[order.price].push_back(move(order));
    }

    template <>
    void addOrder<Side::Buy>(Order&& order)
    {
        topOfBuys = max(topOfBuys, (int)order.price);
        bottomOfBuys = min(bottomOfBuys, (int)order.price);
        buys[order.price].push_back(move(order));
    }

The Side enum should be an input parameter to addOrder(), it isn't really clear that you need the templates for the function at all.

Code Organization

The OrderBook class should have it's own header file and source file. The class declaration and the method / function prototypes should be in the header file, the method / function bodies should be in the C++ source file.

The Use of the inline Keyword

It isn't clear why some of the methods in the OrderBook class are being declared as inline functions. If you are optimizing the code then the -O3 compiler flag is a better optimization that hand optimization. The compiler will automatically inline the functions that can be inlined, and it will do a better job than the programmer can.

\$\endgroup\$
2
  • \$\begingroup\$ Hey, thanks a lot for the review, I've included an updated implementation, let me know if there's anything else I can improve! \$\endgroup\$
    – jpf
    Commented Jun 25, 2023 at 12:29
  • \$\begingroup\$ @jpf Post a follow up question with a link back to this question. \$\endgroup\$
    – pacmaninbw
    Commented Jun 25, 2023 at 12:32

Not the answer you're looking for? Browse other questions tagged or ask your own question.