ipaddress performance and ease when excluding networks from networks #96302

nlsj1985 · 2022-08-26T08:53:41Z

Hello,

I would like to ask you if this is worth a feature-request or some other improvement?

https://cloud.google.com/appengine/docs/standard/python3/outbound-ip-addresses

I was trying to recreate this usecase using the ipaddress module, but had some problems at first getting the same result (same host addresses remaining) as netaddr's results in the example.
To get the result with ipaddress, I've to do multiple iterations using address_exclude() on the 2 sets of overlapping subnets.

The time required by my loops, using ipaddress is 56s vs 16s with netaddr.
I'm not sure if i could have done this much better (perf. wise) using ipaddress.. and 56s is fine for my usecase.. but I'm raising this feature request to notify you of my experience comparing the two modules for this specific usecase.

b.t.w. I've also done some matching of random ip's and in that case both modules are almost as fast.

This is some of the tests i put together..
(ps. you need to have some ram.. pycharm grew to 4,5 GB on my workstation)


import sys
import json
from functools import wraps
import time
import random
from time import sleep

try:
    from urllib import urlopen
except ImportError:
    from urllib.request import urlopen
    from urllib.error import HTTPError

import netaddr
from ipaddress import ip_network, ip_address

IPRANGE_URLS = {
    "goog": "https://www.gstatic.com/ipranges/goog.json",
    "cloud": "https://www.gstatic.com/ipranges/cloud.json",
}


def timeit(func):
    @wraps(func)
    def timeit_wrapper(*args, **kwargs):
        start_time = time.perf_counter()
        result = func(*args, **kwargs)
        end_time = time.perf_counter()
        total_time = end_time - start_time
        print(f'Function {func.__name__} Took {total_time:.4f} seconds')
        return result
    return timeit_wrapper


def read_url(url):
    try:
        return json.loads(urlopen(url).read())
    except (IOError, HTTPError):
        print("ERROR: Invalid HTTP response from %s" % url)
    except json.decoder.JSONDecodeError:
        print("ERROR: Could not parse HTTP response from %s" % url)


def get_data(link):
    data = read_url(link)
    if data:
        print("{} published: {}".format(link, data.get("creationTime")))
        cidrs = netaddr.IPSet()
        for e in data["prefixes"]:
            if "ipv4Prefix" in e:
                cidrs.add(e.get("ipv4Prefix"))
        return cidrs


@timeit
def netaddr_set():
    cidrs = {group: get_data(link) for group, link in IPRANGE_URLS.items()}
    if len(cidrs) != 2:
        raise ValueError("ERROR: Could process data from Google")
    print("IP ranges for Google APIs and services default domains:")
    ip_set = set()
    net_res = (cidrs["goog"] - cidrs["cloud"])
    for network in (cidrs["goog"] - cidrs["cloud"]).iter_cidrs():
        for ip in network.iter_hosts():
            ip_set.add(str(ip))
    return ip_set, net_res


@timeit
def ipaddress_set():
    goog = read_url('https://www.gstatic.com/ipranges/goog.json')
    cloud = read_url('https://www.gstatic.com/ipranges/cloud.json')
    if not goog or not cloud:
        sys.exit(0)

    goog_set = set()
    for gp in goog["prefixes"]:
        if "ipv4Prefix" in gp:
            gn = ip_network(gp["ipv4Prefix"])
            goog_set.add(gn)

    cloud_set = set()
    for cp in cloud["prefixes"]:
        if "ipv4Prefix" in cp:
            cn = ip_network(cp["ipv4Prefix"])
            cloud_set.add(cn)

    goog_set_hosts = set()
    gn_overlap = True
    while gn_overlap:
        res = set()
        gn_overlap_set = set()
        for gn in goog_set:
            cp_overlap = False
            for cn in cloud_set:
                if gn.overlaps(cn):
                    cp_overlap = True
                    gn_overlap_set.add(gn)
                    t = set(gn.address_exclude(cn))
                    for i in t:
                        res.add(i)
                    break
            if not cp_overlap:
                res.add(gn)
        if gn_overlap_set:
            goog_set = res
        else:
            gn_overlap = False
            for n in goog_set:
                hs = set(n.hosts())
                for ip in hs:
                    goog_set_hosts.add(str(ip))
    return goog_set_hosts, goog_set


def check_ip_netaddr(ip, ipset):
    res = netaddr.IPAddress(ip) in ipset
    return res


def check_ip_ipaddress(ip, network_set):
    res = False
    ipa = ip_address(ip)
    for network in network_set:
        if ipa in network:
            res = True
            break
    return res


@timeit
def run_sample_netaddr(sample, ipset):
    res_list = list()
    for i in sample:
        res = check_ip_netaddr(i, ipset)
        res_list.append(res)
    print(res_list)


@timeit
def run_sample_ipaddress(sample, network_set):
    res_list = list()
    for i in sample:
        res = check_ip_ipaddress(i, network_set)
        res_list.append(res)
    print(res_list)


def main():
    netaddr_hosts, netaddr_net = netaddr_set()
    ipaddress_hosts, ipaddress_net = ipaddress_set()
    print(f"netaddr remaining size: {len(netaddr_hosts)}")
    print(f"ipaddress remaining size: {len(ipaddress_hosts)}")
    test_sample = random.sample(netaddr_hosts, 100)
    print(test_sample)
    run_sample_netaddr(test_sample, netaddr_net)
    run_sample_ipaddress(test_sample, ipaddress_net)


if __name__ == "__main__":
    main()

The text was updated successfully, but these errors were encountered:

mdboom added performance Performance or resource usage stdlib Python modules in the Lib dir labels Aug 26, 2022

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

ipaddress performance and ease when excluding networks from networks #96302

ipaddress performance and ease when excluding networks from networks #96302

nlsj1985 commented Aug 26, 2022

ipaddress performance and ease when excluding networks from networks #96302

ipaddress performance and ease when excluding networks from networks #96302

Comments

nlsj1985 commented Aug 26, 2022