Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ipaddress performance and ease when excluding networks from networks #96302

Open
nlsj1985 opened this issue Aug 26, 2022 · 0 comments
Open

ipaddress performance and ease when excluding networks from networks #96302

nlsj1985 opened this issue Aug 26, 2022 · 0 comments
Labels
performance Performance or resource usage stdlib Python modules in the Lib dir

Comments

@nlsj1985
Copy link

Hello,

I would like to ask you if this is worth a feature-request or some other improvement?

https://cloud.google.com/appengine/docs/standard/python3/outbound-ip-addresses

I was trying to recreate this usecase using the ipaddress module, but had some problems at first getting the same result (same host addresses remaining) as netaddr's results in the example.
To get the result with ipaddress, I've to do multiple iterations using address_exclude() on the 2 sets of overlapping subnets.

The time required by my loops, using ipaddress is 56s vs 16s with netaddr.
I'm not sure if i could have done this much better (perf. wise) using ipaddress.. and 56s is fine for my usecase.. but I'm raising this feature request to notify you of my experience comparing the two modules for this specific usecase.

b.t.w. I've also done some matching of random ip's and in that case both modules are almost as fast.

This is some of the tests i put together..
(ps. you need to have some ram.. pycharm grew to 4,5 GB on my workstation)


import sys
import json
from functools import wraps
import time
import random
from time import sleep

try:
    from urllib import urlopen
except ImportError:
    from urllib.request import urlopen
    from urllib.error import HTTPError

import netaddr
from ipaddress import ip_network, ip_address

IPRANGE_URLS = {
    "goog": "https://www.gstatic.com/ipranges/goog.json",
    "cloud": "https://www.gstatic.com/ipranges/cloud.json",
}


def timeit(func):
    @wraps(func)
    def timeit_wrapper(*args, **kwargs):
        start_time = time.perf_counter()
        result = func(*args, **kwargs)
        end_time = time.perf_counter()
        total_time = end_time - start_time
        print(f'Function {func.__name__} Took {total_time:.4f} seconds')
        return result
    return timeit_wrapper


def read_url(url):
    try:
        return json.loads(urlopen(url).read())
    except (IOError, HTTPError):
        print("ERROR: Invalid HTTP response from %s" % url)
    except json.decoder.JSONDecodeError:
        print("ERROR: Could not parse HTTP response from %s" % url)


def get_data(link):
    data = read_url(link)
    if data:
        print("{} published: {}".format(link, data.get("creationTime")))
        cidrs = netaddr.IPSet()
        for e in data["prefixes"]:
            if "ipv4Prefix" in e:
                cidrs.add(e.get("ipv4Prefix"))
        return cidrs


@timeit
def netaddr_set():
    cidrs = {group: get_data(link) for group, link in IPRANGE_URLS.items()}
    if len(cidrs) != 2:
        raise ValueError("ERROR: Could process data from Google")
    print("IP ranges for Google APIs and services default domains:")
    ip_set = set()
    net_res = (cidrs["goog"] - cidrs["cloud"])
    for network in (cidrs["goog"] - cidrs["cloud"]).iter_cidrs():
        for ip in network.iter_hosts():
            ip_set.add(str(ip))
    return ip_set, net_res


@timeit
def ipaddress_set():
    goog = read_url('https://www.gstatic.com/ipranges/goog.json')
    cloud = read_url('https://www.gstatic.com/ipranges/cloud.json')
    if not goog or not cloud:
        sys.exit(0)

    goog_set = set()
    for gp in goog["prefixes"]:
        if "ipv4Prefix" in gp:
            gn = ip_network(gp["ipv4Prefix"])
            goog_set.add(gn)

    cloud_set = set()
    for cp in cloud["prefixes"]:
        if "ipv4Prefix" in cp:
            cn = ip_network(cp["ipv4Prefix"])
            cloud_set.add(cn)

    goog_set_hosts = set()
    gn_overlap = True
    while gn_overlap:
        res = set()
        gn_overlap_set = set()
        for gn in goog_set:
            cp_overlap = False
            for cn in cloud_set:
                if gn.overlaps(cn):
                    cp_overlap = True
                    gn_overlap_set.add(gn)
                    t = set(gn.address_exclude(cn))
                    for i in t:
                        res.add(i)
                    break
            if not cp_overlap:
                res.add(gn)
        if gn_overlap_set:
            goog_set = res
        else:
            gn_overlap = False
            for n in goog_set:
                hs = set(n.hosts())
                for ip in hs:
                    goog_set_hosts.add(str(ip))
    return goog_set_hosts, goog_set


def check_ip_netaddr(ip, ipset):
    res = netaddr.IPAddress(ip) in ipset
    return res


def check_ip_ipaddress(ip, network_set):
    res = False
    ipa = ip_address(ip)
    for network in network_set:
        if ipa in network:
            res = True
            break
    return res


@timeit
def run_sample_netaddr(sample, ipset):
    res_list = list()
    for i in sample:
        res = check_ip_netaddr(i, ipset)
        res_list.append(res)
    print(res_list)


@timeit
def run_sample_ipaddress(sample, network_set):
    res_list = list()
    for i in sample:
        res = check_ip_ipaddress(i, network_set)
        res_list.append(res)
    print(res_list)


def main():
    netaddr_hosts, netaddr_net = netaddr_set()
    ipaddress_hosts, ipaddress_net = ipaddress_set()
    print(f"netaddr remaining size: {len(netaddr_hosts)}")
    print(f"ipaddress remaining size: {len(ipaddress_hosts)}")
    test_sample = random.sample(netaddr_hosts, 100)
    print(test_sample)
    run_sample_netaddr(test_sample, netaddr_net)
    run_sample_ipaddress(test_sample, ipaddress_net)


if __name__ == "__main__":
    main()
@mdboom mdboom added performance Performance or resource usage stdlib Python modules in the Lib dir labels Aug 26, 2022
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
performance Performance or resource usage stdlib Python modules in the Lib dir
2 participants