mirror of
https://github.com/cisagov/manage.get.gov.git
synced 2025-05-13 08:07:03 +02:00
* add scripts for generating icann lookup and response code csvs * add docstring at the top of scripts * add error catching for whois command * exclude data for local use only
36 lines
1.1 KiB
Python
36 lines
1.1 KiB
Python
"""
|
|
This script takes each domain in a dataset of non-.gov government domains and looks for
|
|
which registrar they are currently registered with.
|
|
|
|
This script can be run locally to generate data and currently takes some time to run.
|
|
"""
|
|
|
|
import csv
|
|
import requests
|
|
import whois
|
|
|
|
GOV_URLS_CSV_URL = "https://raw.githubusercontent.com/GSA/govt-urls/master/1_govt_urls_full.csv"
|
|
|
|
data = requests.get(GOV_URLS_CSV_URL).text
|
|
csv_data = list(csv.reader(data.splitlines(), delimiter=','))
|
|
domains = csv_data[1:]
|
|
fields = csv_data[0] + ['Registrar']
|
|
|
|
def check_registration(name):
|
|
try:
|
|
domain_info = whois.whois(name)
|
|
return domain_info['registrar']
|
|
except:
|
|
print('Something went wrong')
|
|
|
|
full_data = []
|
|
for domain in domains:
|
|
domain_name = domain[0].lower()
|
|
if domain_name.endswith('.com') or domain_name.endswith('.edu') or domain_name.endswith('.net'):
|
|
registrar = check_registration(domain_name)
|
|
full_data.append(domain + [registrar])
|
|
|
|
with open('../data/registrar_data.csv', 'w') as f:
|
|
writer = csv.writer(f)
|
|
writer.writerow(fields)
|
|
writer.writerows(full_data)
|