Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Manifest.txt
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,12 @@ lib/rubygems/commands/unpack_command.rb
lib/rubygems/commands/update_command.rb
lib/rubygems/commands/which_command.rb
lib/rubygems/commands/yank_command.rb
lib/rubygems/compact_index_client.rb
lib/rubygems/compact_index_client/cache.rb
lib/rubygems/compact_index_client/cache_file.rb
lib/rubygems/compact_index_client/http_fetcher.rb
lib/rubygems/compact_index_client/parser.rb
lib/rubygems/compact_index_client/updater.rb
lib/rubygems/config_file.rb
lib/rubygems/core_ext/kernel_gem.rb
lib/rubygems/core_ext/kernel_require.rb
Expand Down
90 changes: 90 additions & 0 deletions lib/rubygems/compact_index_client.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# frozen_string_literal: true

##
# The CompactIndexClient fetches and parses the compact index files
# (names, versions and info/[gem]) served by a gem server, keeping a
# local cache so subsequent fetches only transfer what changed.
#
# This is an independent RubyGems port of Bundler::CompactIndexClient.
# Both implementations are intentionally kept separate so that changes
# on either side cannot affect the other; this one only depends on
# RubyGems itself.

class Gem::CompactIndexClient
SUPPORTED_DIGESTS = { "sha-256" => :SHA256 }.freeze
DEBUG_MUTEX = Thread::Mutex.new

# info returns an Array of INFO Arrays. Each INFO Array has the following indices:
INFO_NAME = 0
INFO_VERSION = 1
INFO_PLATFORM = 2
INFO_DEPS = 3
INFO_REQS = 4

def self.debug
return unless ENV["DEBUG_COMPACT_INDEX"]
DEBUG_MUTEX.synchronize { warn("[#{self}] #{yield}") }
end

class Error < StandardError; end

require_relative "compact_index_client/cache"
require_relative "compact_index_client/cache_file"
require_relative "compact_index_client/http_fetcher"
require_relative "compact_index_client/parser"
require_relative "compact_index_client/updater"

# The client is instantiated with:
# - `directory`: the root directory where the cache files are stored.
# - `fetcher`: (optional) an object that responds to #call(uri_path, headers)
# and returns a Gem::Net::HTTP response. When the fetcher is not provided,
# the client only reads cached files from disk.
def initialize(directory, fetcher = nil)
@cache = Cache.new(directory, fetcher)
@parser = Parser.new(@cache)
end

def names
Gem::CompactIndexClient.debug { "names" }
@parser.names
end

def versions
Gem::CompactIndexClient.debug { "versions" }
@parser.versions
end

def dependencies(names)
Gem::CompactIndexClient.debug { "dependencies(#{names})" }
names.map {|name| info(name) }
end

def info(name)
Gem::CompactIndexClient.debug { "info(#{name})" }
@parser.info(name)
end

# Fetches a single gem's info without consulting the versions index,
# using a conditional request to refresh the cached file. Useful when
# only a few gems are needed and the versions index download would
# dominate, as in gem install.
def fetch_info(name)
Gem::CompactIndexClient.debug { "fetch_info(#{name})" }
@parser.parse_info(@cache.fetch_info(name), name)
end

def latest_version(name)
Gem::CompactIndexClient.debug { "latest_version(#{name})" }
@parser.info(name).map {|d| Gem::Version.new(d[INFO_VERSION]) }.max
end

def available?
Gem::CompactIndexClient.debug { "available?" }
@parser.available?
end

def reset!
Gem::CompactIndexClient.debug { "reset!" }
@cache.reset!
end
end
107 changes: 107 additions & 0 deletions lib/rubygems/compact_index_client/cache.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# frozen_string_literal: true

require "digest"
require "fileutils"
require "pathname" unless defined?(Pathname)
require "set"

class Gem::CompactIndexClient
# Calls the Updater to update the cached files on disk, reads the
# cached files and returns their contents.
class Cache
attr_reader :directory

def initialize(directory, fetcher = nil)
@directory = Pathname.new(directory).expand_path
@updater = Updater.new(fetcher) if fetcher
@mutex = Thread::Mutex.new
@endpoints = Set.new

@info_root = mkdir("info")
@special_characters_info_root = mkdir("info-special-characters")
@info_etag_root = mkdir("info-etags")
end

def names
fetch("names", names_path, names_etag_path)
end

def versions
fetch("versions", versions_path, versions_etag_path)
end

def info(name, remote_checksum = nil)
path = info_path(name)

if remote_checksum && remote_checksum != checksum_for_file(path)
fetch("info/#{name}", path, info_etag_path(name))
else
Gem::CompactIndexClient.debug { "update skipped info/#{name} (#{remote_checksum ? "versions index checksum matches local" : "versions index checksum is nil"})" }
read(path)
end
end

# Fetch a single gem's info file without consulting the versions
# index, refreshing the cached file with a conditional request.
def fetch_info(name)
fetch("info/#{name}", info_path(name), info_etag_path(name))
end

def reset!
@mutex.synchronize { @endpoints.clear }
end

private

def names_path = directory.join("names")
def names_etag_path = directory.join("names.etag")
def versions_path = directory.join("versions")
def versions_etag_path = directory.join("versions.etag")

def info_path(name)
name = name.to_s
if /[^a-z0-9_-]/.match?(name)
name += "-#{Digest::MD5.hexdigest(name).downcase}"
@special_characters_info_root.join(name)
else
@info_root.join(name)
end
end

def info_etag_path(name)
name = name.to_s
@info_etag_root.join("#{name}-#{Digest::MD5.hexdigest(name).downcase}")
end

def checksum_for_file(path)
return unless path.file?
Digest::MD5.file(path).hexdigest
end

def mkdir(name)
directory.join(name).tap do |dir|
FileUtils.mkdir_p(dir)
end
end

def fetch(remote_path, path, etag_path)
if already_fetched?(remote_path)
Gem::CompactIndexClient.debug { "already fetched #{remote_path}" }
else
Gem::CompactIndexClient.debug { "fetching #{remote_path}" }
@updater&.update(remote_path, path, etag_path)
end

read(path)
end

def already_fetched?(remote_path)
@mutex.synchronize { !@endpoints.add?(remote_path) }
end

def read(path)
return unless path.file?
path.read
end
end
end
141 changes: 141 additions & 0 deletions lib/rubygems/compact_index_client/cache_file.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# frozen_string_literal: true

require "digest"
require "fileutils"
require_relative "../package"

class Gem::CompactIndexClient
# write cache files in a way that is robust to concurrent modifications
# if digests are given, the checksums will be verified
class CacheFile
DEFAULT_FILE_MODE = 0o644
private_constant :DEFAULT_FILE_MODE

class Error < RuntimeError; end
class ClosedError < Error; end

class DigestMismatchError < Error
def initialize(digests, expected_digests)
super "Calculated checksums #{digests.inspect} did not match expected #{expected_digests.inspect}."
end
end

# Initialize with a copy of the original file, then yield the instance.
def self.copy(path, &block)
new(path) do |file|
file.initialize_digests

path.open("rb") do |s|
file.open {|f| IO.copy_stream(s, f) }
end

yield file
end
end

# Write data to a temp file, then replace the original file with it verifying the digests if given.
def self.write(path, data, digests = nil)
return unless data
new(path) do |file|
file.digests = digests
file.write(data)
end
end

attr_reader :original_path, :path

def initialize(original_path, &block)
@original_path = original_path
@perm = original_path.file? ? original_path.stat.mode : DEFAULT_FILE_MODE
@path = original_path.sub(/$/, ".#{$$}.tmp")
return unless block_given?
begin
yield self
ensure
close
end
end

def size
path.size
end

# initialize the digests using CompactIndexClient::SUPPORTED_DIGESTS, or a subset based on keys.
def initialize_digests(keys = nil)
@digests = keys ? SUPPORTED_DIGESTS.slice(*keys) : SUPPORTED_DIGESTS.dup
@digests.transform_values! {|algo_class| Digest(algo_class).new }
end

# reset the digests so they don't contain any previously read data
def reset_digests
@digests&.each_value(&:reset)
end

# set the digests that will be verified at the end
def digests=(expected_digests)
@expected_digests = expected_digests

if @expected_digests.nil?
@digests = nil
elsif @digests
@digests = @digests.slice(*@expected_digests.keys)
else
initialize_digests(@expected_digests.keys)
end
end

def digests?
@digests&.any?
end

# Open the temp file for writing, reusing original permissions, yielding the IO object.
def open(write_mode = "wb", perm = @perm, &block)
raise ClosedError, "Cannot reopen closed file" if @closed
path.open(write_mode, perm) do |f|
yield digests? ? Gem::Package::DigestIO.new(f, @digests) : f
end
end

# Returns false without appending when no digests since appending is too error prone to do without digests.
def append(data)
return false unless digests?
open("a") {|f| f.write data }
verify && commit
end

def write(data)
reset_digests
open {|f| f.write data }
commit!
end

def commit!
verify || raise(DigestMismatchError.new(@base64digests, @expected_digests))
commit
end

# Verify the digests, returning true on match, false on mismatch.
def verify
return true unless @expected_digests && digests?
@base64digests = @digests.transform_values!(&:base64digest)
@digests = nil
@base64digests.all? {|algo, digest| @expected_digests[algo] == digest }
end

# Replace the original file with the temp file without verifying digests.
# The file is permanently closed.
def commit
raise ClosedError, "Cannot commit closed file" if @closed
FileUtils.mv(path, original_path)
@closed = true
end

# Remove the temp file without replacing the original file.
# The file is permanently closed.
def close
return if @closed
FileUtils.remove_file(path) if @path&.file?
@closed = true
end
end
end
Loading
Loading