sitemap: improvements, fixes, add tags

This commit is contained in:
Kyle Drake 2019-10-09 15:13:42 -07:00
parent 86df307d5f
commit f84290f8f4
3 changed files with 68 additions and 19 deletions

View file

@ -485,7 +485,7 @@ task :generate_sitemap => [:environment] do
sorted_sites = {} sorted_sites = {}
sites = Site. sites = Site.
select(:id, :username, :updated_at). select(:id, :username, :updated_at, :profile_enabled).
where(site_changed: true). where(site_changed: true).
exclude(updated_at: nil). exclude(updated_at: nil).
order(:follow_count.desc, :updated_at.desc). order(:follow_count.desc, :updated_at.desc).
@ -500,19 +500,36 @@ task :generate_sitemap => [:environment] do
site_urlset_path = File.join(sitemap_root, key, "#{site.username}.xml") site_urlset_path = File.join(sitemap_root, key, "#{site.username}.xml")
# Delete old records for deleted sites
if site.is_deleted if site.is_deleted
FileUtils.rm site_urlset_path if File.exist? site_urlset_path FileUtils.rm site_urlset_path if File.exist? site_urlset_path
next next
end end
# Make sitemap for each site
builder = Nokogiri::XML::Builder.new { |xml| builder = Nokogiri::XML::Builder.new { |xml|
xml.urlset(xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9') { xml.urlset(xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9') {
site.site_files_dataset.where(path: /\.html?$/).all.each { |site_file| site.site_files_dataset.exclude(path: 'not_found.html').where(path: /\.html?$/).all.each { |site_file|
xml.url { xml.url {
loc = site.file_uri site_file.path
xml.loc site.file_uri site_file.path xml.loc site.file_uri site_file.path
xml.lastmod site_file.updated_at.strftime("%Y-%m-%d") xml.lastmod site_file.updated_at.strftime("%Y-%m-%d")
if site.file_uri(site_file.path) == site.uri+'/'
xml.priority 0.5
else
xml.priority 0.4
end
} }
} }
if site.profile_enabled
xml.url {
xml.loc "https://neocities.org/site/#{site.username}"
xml.lastmod site.updated_at.strftime("%Y-%m-%d")
xml.priority 0.3
}
end
} }
} }
@ -525,7 +542,7 @@ task :generate_sitemap => [:environment] do
sites = nil sites = nil
GC.start GC.start
# Create XML file for each site # Create sitemap for key
sorted_sites.keys.sort.each { |key| sorted_sites.keys.sort.each { |key|
builder = Nokogiri::XML::Builder.new { |xml| builder = Nokogiri::XML::Builder.new { |xml|
xml.sitemapindex(xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9') { xml.sitemapindex(xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9') {
@ -549,18 +566,22 @@ task :generate_sitemap => [:environment] do
xml.sitemap { xml.sitemap {
xml.loc "https://neocities.org/sitemap/index-#{key}.xml" xml.loc "https://neocities.org/sitemap/index-#{key}.xml"
xml.lastmod Time.now.strftime("%Y-%m-%d") xml.lastmod Time.now.strftime("%Y-%m-%d")
xml.changefreq 'daily'
} }
} }
} }
} }
File.write File.join(sitemap_root, "index.xml"), builder.to_xml(encoding: 'UTF-8') File.write File.join(sitemap_root, "index-sites.xml"), builder.to_xml(encoding: 'UTF-8')
# Set basic neocities.org root paths # Set basic neocities.org root paths
builder = Nokogiri::XML::Builder.new { |xml| builder = Nokogiri::XML::Builder.new { |xml|
xml.urlset(xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9') { xml.urlset(xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9') {
File.read(File.join(DIR_ROOT, 'files', 'root_site_uris.txt')).each_line { |uri| File.read(File.join(DIR_ROOT, 'files', 'root_site_uris.txt')).each_line { |uri|
priority, changefreq, uri = uri.strip.split(',')
xml.url { xml.url {
xml.loc uri.strip xml.loc uri
xml.changefreq changefreq
xml.priority priority
} }
} }
} }
@ -568,6 +589,22 @@ task :generate_sitemap => [:environment] do
File.write File.join(sitemap_root, 'index-root.xml'), builder.to_xml(encoding: 'UTF-8') File.write File.join(sitemap_root, 'index-root.xml'), builder.to_xml(encoding: 'UTF-8')
# Tagged sites sitemap
builder = Nokogiri::XML::Builder.new { |xml|
xml.urlset(xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9') {
Tag.popular_names(Site.count).each { |tag|
xml.url {
xml.loc "https://neocities.org/browse?sort_by=views&tag=#{tag[:name]}"
xml.changefreq 'daily'
xml.lastmod Time.now.strftime("%Y-%m-%d")
}
}
}
}
File.write File.join(sitemap_root, 'index-tags.xml'), builder.to_xml(encoding: 'UTF-8')
=begin =begin
Zlib::GzipWriter.open File.join(sitemap_root, "index-#{key}.xml.gz") do |gz| Zlib::GzipWriter.open File.join(sitemap_root, "index-#{key}.xml.gz") do |gz|
gz.write builder.to_xml(encoding: 'UTF-8') gz.write builder.to_xml(encoding: 'UTF-8')

View file

@ -1,14 +1,24 @@
https://neocities.org 1,weekly,https://neocities.org
https://neocities.org/signin 0.9,daily,https://neocities.org/browse
https://neocities.org/browse 0.9,monthly,https://neocities.org/signin
https://neocities.org/activity 0.9,hourly,https://neocities.org/activity
https://neocities.org/tutorials 0.9,monthly,https://neocities.org/tutorials
https://neocities.org/supporter 0.9,monthly,https://neocities.org/supporter
https://neocities.org/cli 0.9,monthly,https://neocities.org/cli
https://neocities.org/about 0.9,monthly,https://neocities.org/about
https://neocities.org/donate 0.9,monthly,https://neocities.org/donate
https://neocities.org/api 0.9,monthly,https://neocities.org/api
https://neocities.org/press 0.9,monthly,https://neocities.org/press
https://neocities.org/terms 0.8,monthly,https://neocities.org/terms
https://neocities.org/contact 0.8,weekly,https://blog.neocities.org
https://blog.neocities.org 0.7,monthly,https://neocities.org/contact
0.6,daily,https://neocities.org/browse?sort_by=special_sauce&tag=
0.6,hourly,https://neocities.org/browse?sort_by=last_updated&tag=
0.6,daily,https://neocities.org/browse?sort_by=supporters&tag=
0.6,weekly,https://neocities.org/browse?sort_by=featured&tag=
0.6,daily,https://neocities.org/browse?sort_by=tipping_enabled&tag=
0.6,weekly,https://neocities.org/browse?sort_by=views&tag=
0.6,weekly,https://neocities.org/browse?sort_by=hits&tag=
0.6,hourly,https://neocities.org/browse?sort_by=newest&tag=
0.6,monthly,https://neocities.org/browse?sort_by=oldest&tag=
0.6,always,https://neocities.org/browse?sort_by=random&tag=

View file

@ -1,3 +1,5 @@
User-agent: * User-agent: *
Allow: / Allow: /
Sitemap: https://neocities.org/sitemap/index-root.xml Sitemap: https://neocities.org/sitemap/index-root.xml
Sitemap: https://neocities.org/sitemap/index-tags.xml
Sitemap: https://neocities.org/sitemap/index-sites.xml