add ability to one-off domain lookups, add more error handling (#39)

* add ability to one-off domain lookups, add more error handling

* add note about version of whois

* Update docs/research/scripts/icann_lookup.py

Co-authored-by: Seamus Johnston <seamus.johnston@gsa.gov>

* Update docs/research/scripts/icann_lookup.py

Co-authored-by: Seamus Johnston <seamus.johnston@gsa.gov>

* Update docs/research/scripts/icann_lookup.py

Co-authored-by: Seamus Johnston <seamus.johnston@gsa.gov>

* add shebangs at the top of scripts

Co-authored-by: Seamus Johnston <seamus.johnston@gsa.gov>
This commit is contained in:
Logan McDonald 2022-08-17 15:17:54 -04:00 committed by GitHub
parent 8a9ca2e700
commit 61602084ff
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 41 additions and 13 deletions

View file

@ -1,14 +1,20 @@
#!/usr/bin/env python3
"""
This script takes each domain in a dataset of non-.gov government domains and looks for
which registrar they are currently registered with.
This script can be run locally to generate data and currently takes some time to run.
NOTE: This requries python-whois and argparse to be installed.
"""
import csv
import requests
import whois
import whois # this is python-whois
import argparse
import sys
from pathlib import Path
GOV_URLS_CSV_URL = "https://raw.githubusercontent.com/GSA/govt-urls/master/1_govt_urls_full.csv"
data = requests.get(GOV_URLS_CSV_URL).text
@ -20,17 +26,37 @@ def check_registration(name):
try:
domain_info = whois.whois(name)
return domain_info['registrar']
except KeyboardInterrupt:
sys.exit(1)
except:
print('Something went wrong')
print(f'Something went wrong with that domain lookup for {name}, continuing...')
def main(domain):
full_data = []
for domain in domains:
if domain:
registrar = check_registration(domain)
print(registrar)
else:
for idx, domain in enumerate(domains):
domain_name = domain[0].lower()
if domain_name.endswith('.com') or domain_name.endswith('.edu') or domain_name.endswith('.net'):
print(idx)
print(domain_name)
registrar = check_registration(domain_name)
full_data.append(domain + [registrar])
Path("../data").mkdir(exist_ok=True)
with open('../data/registrar_data.csv', 'w') as f:
writer = csv.writer(f)
writer.writerow(fields)
writer.writerows(full_data)
if __name__ == '__main__':
cl = argparse.ArgumentParser(description="This performs ICANN lookups on domains.")
cl.add_argument("--domain", help="finds the registrar for a single domain", default=None)
args = cl.parse_args()
sys.exit(main(args.domain))

View file

@ -1,3 +1,5 @@
#!/usr/bin/env python3
"""
This script performs a basic request to each of the domains in the current list of
dotgov domains hosted at https://flatgithub.com/cisagov/dotgov-data/blob/main/?filename=current-full.csv