You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I was trying to recreate this usecase using the ipaddress module, but had some problems at first getting the same result (same host addresses remaining) as netaddr's results in the example.
To get the result with ipaddress, I've to do multiple iterations using address_exclude() on the 2 sets of overlapping subnets.
The time required by my loops, using ipaddress is 56s vs 16s with netaddr.
I'm not sure if i could have done this much better (perf. wise) using ipaddress.. and 56s is fine for my usecase.. but I'm raising this feature request to notify you of my experience comparing the two modules for this specific usecase.
b.t.w. I've also done some matching of random ip's and in that case both modules are almost as fast.
This is some of the tests i put together..
(ps. you need to have some ram.. pycharm grew to 4,5 GB on my workstation)
import sys
import json
from functools import wraps
import time
import random
from time import sleep
try:
from urllib import urlopen
except ImportError:
from urllib.request import urlopen
from urllib.error import HTTPError
import netaddr
from ipaddress import ip_network, ip_address
IPRANGE_URLS = {
"goog": "https://www.gstatic.com/ipranges/goog.json",
"cloud": "https://www.gstatic.com/ipranges/cloud.json",
}
def timeit(func):
@wraps(func)
def timeit_wrapper(*args, **kwargs):
start_time = time.perf_counter()
result = func(*args, **kwargs)
end_time = time.perf_counter()
total_time = end_time - start_time
print(f'Function {func.__name__} Took {total_time:.4f} seconds')
return result
return timeit_wrapper
def read_url(url):
try:
return json.loads(urlopen(url).read())
except (IOError, HTTPError):
print("ERROR: Invalid HTTP response from %s" % url)
except json.decoder.JSONDecodeError:
print("ERROR: Could not parse HTTP response from %s" % url)
def get_data(link):
data = read_url(link)
if data:
print("{} published: {}".format(link, data.get("creationTime")))
cidrs = netaddr.IPSet()
for e in data["prefixes"]:
if "ipv4Prefix" in e:
cidrs.add(e.get("ipv4Prefix"))
return cidrs
@timeit
def netaddr_set():
cidrs = {group: get_data(link) for group, link in IPRANGE_URLS.items()}
if len(cidrs) != 2:
raise ValueError("ERROR: Could process data from Google")
print("IP ranges for Google APIs and services default domains:")
ip_set = set()
net_res = (cidrs["goog"] - cidrs["cloud"])
for network in (cidrs["goog"] - cidrs["cloud"]).iter_cidrs():
for ip in network.iter_hosts():
ip_set.add(str(ip))
return ip_set, net_res
@timeit
def ipaddress_set():
goog = read_url('https://www.gstatic.com/ipranges/goog.json')
cloud = read_url('https://www.gstatic.com/ipranges/cloud.json')
if not goog or not cloud:
sys.exit(0)
goog_set = set()
for gp in goog["prefixes"]:
if "ipv4Prefix" in gp:
gn = ip_network(gp["ipv4Prefix"])
goog_set.add(gn)
cloud_set = set()
for cp in cloud["prefixes"]:
if "ipv4Prefix" in cp:
cn = ip_network(cp["ipv4Prefix"])
cloud_set.add(cn)
goog_set_hosts = set()
gn_overlap = True
while gn_overlap:
res = set()
gn_overlap_set = set()
for gn in goog_set:
cp_overlap = False
for cn in cloud_set:
if gn.overlaps(cn):
cp_overlap = True
gn_overlap_set.add(gn)
t = set(gn.address_exclude(cn))
for i in t:
res.add(i)
break
if not cp_overlap:
res.add(gn)
if gn_overlap_set:
goog_set = res
else:
gn_overlap = False
for n in goog_set:
hs = set(n.hosts())
for ip in hs:
goog_set_hosts.add(str(ip))
return goog_set_hosts, goog_set
def check_ip_netaddr(ip, ipset):
res = netaddr.IPAddress(ip) in ipset
return res
def check_ip_ipaddress(ip, network_set):
res = False
ipa = ip_address(ip)
for network in network_set:
if ipa in network:
res = True
break
return res
@timeit
def run_sample_netaddr(sample, ipset):
res_list = list()
for i in sample:
res = check_ip_netaddr(i, ipset)
res_list.append(res)
print(res_list)
@timeit
def run_sample_ipaddress(sample, network_set):
res_list = list()
for i in sample:
res = check_ip_ipaddress(i, network_set)
res_list.append(res)
print(res_list)
def main():
netaddr_hosts, netaddr_net = netaddr_set()
ipaddress_hosts, ipaddress_net = ipaddress_set()
print(f"netaddr remaining size: {len(netaddr_hosts)}")
print(f"ipaddress remaining size: {len(ipaddress_hosts)}")
test_sample = random.sample(netaddr_hosts, 100)
print(test_sample)
run_sample_netaddr(test_sample, netaddr_net)
run_sample_ipaddress(test_sample, ipaddress_net)
if __name__ == "__main__":
main()
The text was updated successfully, but these errors were encountered:
Hello,
I would like to ask you if this is worth a feature-request or some other improvement?
https://cloud.google.com/appengine/docs/standard/python3/outbound-ip-addresses
I was trying to recreate this usecase using the ipaddress module, but had some problems at first getting the same result (same host addresses remaining) as netaddr's results in the example.
To get the result with ipaddress, I've to do multiple iterations using address_exclude() on the 2 sets of overlapping subnets.
The time required by my loops, using ipaddress is 56s vs 16s with netaddr.
I'm not sure if i could have done this much better (perf. wise) using ipaddress.. and 56s is fine for my usecase.. but I'm raising this feature request to notify you of my experience comparing the two modules for this specific usecase.
b.t.w. I've also done some matching of random ip's and in that case both modules are almost as fast.
This is some of the tests i put together..
(ps. you need to have some ram.. pycharm grew to 4,5 GB on my workstation)
The text was updated successfully, but these errors were encountered: