add scripts for generating icann lookup and response code csvs (#2)

* add scripts for generating icann lookup and response code csvs

* add docstring at the top of scripts

* add error catching for whois command

* exclude data for local use only
This commit is contained in:
Logan McDonald 2022-08-09 09:03:51 -04:00 committed by GitHub
parent 3fbced1f62
commit b5b3defe41
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 71 additions and 0 deletions

View file

@ -0,0 +1,36 @@
"""
This script takes each domain in a dataset of non-.gov government domains and looks for
which registrar they are currently registered with.
This script can be run locally to generate data and currently takes some time to run.
"""
import csv
import requests
import whois
GOV_URLS_CSV_URL = "https://raw.githubusercontent.com/GSA/govt-urls/master/1_govt_urls_full.csv"
data = requests.get(GOV_URLS_CSV_URL).text
csv_data = list(csv.reader(data.splitlines(), delimiter=','))
domains = csv_data[1:]
fields = csv_data[0] + ['Registrar']
def check_registration(name):
try:
domain_info = whois.whois(name)
return domain_info['registrar']
except:
print('Something went wrong')
full_data = []
for domain in domains:
domain_name = domain[0].lower()
if domain_name.endswith('.com') or domain_name.endswith('.edu') or domain_name.endswith('.net'):
registrar = check_registration(domain_name)
full_data.append(domain + [registrar])
with open('../data/registrar_data.csv', 'w') as f:
writer = csv.writer(f)
writer.writerow(fields)
writer.writerows(full_data)