mirror of
https://github.com/cisagov/manage.get.gov.git
synced 2025-05-15 09:07:02 +02:00
add ability to one-off domain lookups, add more error handling (#39)
* add ability to one-off domain lookups, add more error handling * add note about version of whois * Update docs/research/scripts/icann_lookup.py Co-authored-by: Seamus Johnston <seamus.johnston@gsa.gov> * Update docs/research/scripts/icann_lookup.py Co-authored-by: Seamus Johnston <seamus.johnston@gsa.gov> * Update docs/research/scripts/icann_lookup.py Co-authored-by: Seamus Johnston <seamus.johnston@gsa.gov> * add shebangs at the top of scripts Co-authored-by: Seamus Johnston <seamus.johnston@gsa.gov>
This commit is contained in:
parent
8a9ca2e700
commit
61602084ff
2 changed files with 41 additions and 13 deletions
|
@ -1,14 +1,20 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
"""
|
"""
|
||||||
This script takes each domain in a dataset of non-.gov government domains and looks for
|
This script takes each domain in a dataset of non-.gov government domains and looks for
|
||||||
which registrar they are currently registered with.
|
which registrar they are currently registered with.
|
||||||
|
|
||||||
This script can be run locally to generate data and currently takes some time to run.
|
This script can be run locally to generate data and currently takes some time to run.
|
||||||
|
|
||||||
|
NOTE: This requries python-whois and argparse to be installed.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import csv
|
import csv
|
||||||
import requests
|
import requests
|
||||||
import whois
|
import whois # this is python-whois
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
GOV_URLS_CSV_URL = "https://raw.githubusercontent.com/GSA/govt-urls/master/1_govt_urls_full.csv"
|
GOV_URLS_CSV_URL = "https://raw.githubusercontent.com/GSA/govt-urls/master/1_govt_urls_full.csv"
|
||||||
|
|
||||||
data = requests.get(GOV_URLS_CSV_URL).text
|
data = requests.get(GOV_URLS_CSV_URL).text
|
||||||
|
@ -20,17 +26,37 @@ def check_registration(name):
|
||||||
try:
|
try:
|
||||||
domain_info = whois.whois(name)
|
domain_info = whois.whois(name)
|
||||||
return domain_info['registrar']
|
return domain_info['registrar']
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
sys.exit(1)
|
||||||
except:
|
except:
|
||||||
print('Something went wrong')
|
print(f'Something went wrong with that domain lookup for {name}, continuing...')
|
||||||
|
|
||||||
full_data = []
|
|
||||||
for domain in domains:
|
|
||||||
domain_name = domain[0].lower()
|
|
||||||
if domain_name.endswith('.com') or domain_name.endswith('.edu') or domain_name.endswith('.net'):
|
|
||||||
registrar = check_registration(domain_name)
|
|
||||||
full_data.append(domain + [registrar])
|
|
||||||
|
|
||||||
with open('../data/registrar_data.csv', 'w') as f:
|
def main(domain):
|
||||||
writer = csv.writer(f)
|
full_data = []
|
||||||
writer.writerow(fields)
|
if domain:
|
||||||
writer.writerows(full_data)
|
registrar = check_registration(domain)
|
||||||
|
print(registrar)
|
||||||
|
else:
|
||||||
|
for idx, domain in enumerate(domains):
|
||||||
|
domain_name = domain[0].lower()
|
||||||
|
if domain_name.endswith('.com') or domain_name.endswith('.edu') or domain_name.endswith('.net'):
|
||||||
|
print(idx)
|
||||||
|
print(domain_name)
|
||||||
|
registrar = check_registration(domain_name)
|
||||||
|
full_data.append(domain + [registrar])
|
||||||
|
|
||||||
|
Path("../data").mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
with open('../data/registrar_data.csv', 'w') as f:
|
||||||
|
writer = csv.writer(f)
|
||||||
|
writer.writerow(fields)
|
||||||
|
writer.writerows(full_data)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
cl = argparse.ArgumentParser(description="This performs ICANN lookups on domains.")
|
||||||
|
cl.add_argument("--domain", help="finds the registrar for a single domain", default=None)
|
||||||
|
args = cl.parse_args()
|
||||||
|
|
||||||
|
sys.exit(main(args.domain))
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
"""
|
"""
|
||||||
This script performs a basic request to each of the domains in the current list of
|
This script performs a basic request to each of the domains in the current list of
|
||||||
dotgov domains hosted at https://flatgithub.com/cisagov/dotgov-data/blob/main/?filename=current-full.csv
|
dotgov domains hosted at https://flatgithub.com/cisagov/dotgov-data/blob/main/?filename=current-full.csv
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue