add ability to one-off domain lookups, add more error handling (#39)

* add ability to one-off domain lookups, add more error handling

* add note about version of whois

* Update docs/research/scripts/icann_lookup.py

Co-authored-by: Seamus Johnston <seamus.johnston@gsa.gov>

* Update docs/research/scripts/icann_lookup.py

Co-authored-by: Seamus Johnston <seamus.johnston@gsa.gov>

* Update docs/research/scripts/icann_lookup.py

Co-authored-by: Seamus Johnston <seamus.johnston@gsa.gov>

* add shebangs at the top of scripts

Co-authored-by: Seamus Johnston <seamus.johnston@gsa.gov>
This commit is contained in:
Logan McDonald 2022-08-17 15:17:54 -04:00 committed by GitHub
parent 8a9ca2e700
commit 61602084ff
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 41 additions and 13 deletions

View file

@ -1,14 +1,20 @@
#!/usr/bin/env python3
""" """
This script takes each domain in a dataset of non-.gov government domains and looks for This script takes each domain in a dataset of non-.gov government domains and looks for
which registrar they are currently registered with. which registrar they are currently registered with.
This script can be run locally to generate data and currently takes some time to run. This script can be run locally to generate data and currently takes some time to run.
NOTE: This requries python-whois and argparse to be installed.
""" """
import csv import csv
import requests import requests
import whois import whois # this is python-whois
import argparse
import sys
from pathlib import Path
GOV_URLS_CSV_URL = "https://raw.githubusercontent.com/GSA/govt-urls/master/1_govt_urls_full.csv" GOV_URLS_CSV_URL = "https://raw.githubusercontent.com/GSA/govt-urls/master/1_govt_urls_full.csv"
data = requests.get(GOV_URLS_CSV_URL).text data = requests.get(GOV_URLS_CSV_URL).text
@ -20,17 +26,37 @@ def check_registration(name):
try: try:
domain_info = whois.whois(name) domain_info = whois.whois(name)
return domain_info['registrar'] return domain_info['registrar']
except KeyboardInterrupt:
sys.exit(1)
except: except:
print('Something went wrong') print(f'Something went wrong with that domain lookup for {name}, continuing...')
full_data = []
for domain in domains: def main(domain):
full_data = []
if domain:
registrar = check_registration(domain)
print(registrar)
else:
for idx, domain in enumerate(domains):
domain_name = domain[0].lower() domain_name = domain[0].lower()
if domain_name.endswith('.com') or domain_name.endswith('.edu') or domain_name.endswith('.net'): if domain_name.endswith('.com') or domain_name.endswith('.edu') or domain_name.endswith('.net'):
print(idx)
print(domain_name)
registrar = check_registration(domain_name) registrar = check_registration(domain_name)
full_data.append(domain + [registrar]) full_data.append(domain + [registrar])
with open('../data/registrar_data.csv', 'w') as f: Path("../data").mkdir(exist_ok=True)
with open('../data/registrar_data.csv', 'w') as f:
writer = csv.writer(f) writer = csv.writer(f)
writer.writerow(fields) writer.writerow(fields)
writer.writerows(full_data) writer.writerows(full_data)
if __name__ == '__main__':
cl = argparse.ArgumentParser(description="This performs ICANN lookups on domains.")
cl.add_argument("--domain", help="finds the registrar for a single domain", default=None)
args = cl.parse_args()
sys.exit(main(args.domain))

View file

@ -1,3 +1,5 @@
#!/usr/bin/env python3
""" """
This script performs a basic request to each of the domains in the current list of This script performs a basic request to each of the domains in the current list of
dotgov domains hosted at https://flatgithub.com/cisagov/dotgov-data/blob/main/?filename=current-full.csv dotgov domains hosted at https://flatgithub.com/cisagov/dotgov-data/blob/main/?filename=current-full.csv