Skip to content

Commit c55a4e0

Browse files
committed
Enhance NuGet package metadata handling for Newtonsoft.Json #1175
1 parent e89843c commit c55a4e0

File tree

6 files changed

+975
-11
lines changed

6 files changed

+975
-11
lines changed

app/models/ecosystem/nuget.rb

Lines changed: 302 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -105,21 +105,77 @@ def map_package_metadata(package)
105105
return false if package[:releases].nil?
106106
item = package[:releases].last["catalogEntry"]
107107

108+
# Get comprehensive nuspec metadata for the latest version
109+
nuspec_metadata = parse_nuspec_metadata(package[:name], item["version"])
110+
108111
{
109112
name: package[:name].try(:downcase),
110113
description: description(item),
111114
homepage: item["projectUrl"],
112115
keywords_array: Array(item["tags"]).reject(&:blank?),
113-
repository_url: repo_fallback(item["projectUrl"], item["licenseUrl"], item["packageUrl"]),
116+
repository_url: repo_fallback(item["projectUrl"], item["licenseUrl"], item["packageUrl"],
117+
package_name: package[:name], version: item["version"]),
114118
releases: package[:releases],
115119
licenses: item["licenseExpression"],
116120
downloads: package[:download_stats]['data'].try(:first).try(:fetch,'totalDownloads'),
117121
downloads_period: 'total',
118122
download_stats: package[:download_stats],
123+
124+
# Enhanced metadata from .nuspec file
125+
metadata: build_package_nuspec_metadata(nuspec_metadata, package)
119126
}
120127
end
121128

122-
def repo_fallback(repo, license, homepage)
129+
def build_package_nuspec_metadata(nuspec_metadata, package)
130+
# Only include NuGet-specific fields that aren't duplicates of standard package fields
131+
return {} unless nuspec_metadata
132+
133+
metadata = {
134+
# NuGet-specific package information
135+
copyright: nuspec_metadata[:copyright],
136+
owners: nuspec_metadata[:owners],
137+
138+
# Legal and licensing (detailed)
139+
license_info: nuspec_metadata[:license],
140+
license_url: nuspec_metadata[:license_url],
141+
require_license_acceptance: nuspec_metadata[:require_license_acceptance],
142+
143+
# URLs and resources (NuGet-specific)
144+
icon_url: nuspec_metadata[:icon_url],
145+
icon: nuspec_metadata[:icon],
146+
readme: nuspec_metadata[:readme],
147+
148+
# Repository details (more detailed than just URL)
149+
repository: nuspec_metadata[:repository],
150+
151+
# Technical information
152+
min_client_version: nuspec_metadata[:min_client_version],
153+
language: nuspec_metadata[:language],
154+
development_dependency: nuspec_metadata[:development_dependency],
155+
serviceable: nuspec_metadata[:serviceable],
156+
157+
# Framework and packaging information
158+
framework_assemblies: nuspec_metadata[:framework_assemblies],
159+
package_types: nuspec_metadata[:package_types],
160+
161+
# Additional categorization
162+
summary: nuspec_metadata[:summary],
163+
release_notes: nuspec_metadata[:release_notes]
164+
}.compact
165+
166+
# Only include dependency information if it exists
167+
if nuspec_metadata[:dependency_groups]&.any?
168+
metadata[:dependency_summary] = {
169+
total_dependency_groups: nuspec_metadata[:dependency_groups].length,
170+
target_frameworks: nuspec_metadata[:dependency_groups].map { |g| g[:target_framework] }.compact.uniq,
171+
total_dependencies: nuspec_metadata[:dependency_groups].sum { |g| g[:dependencies]&.length || 0 }
172+
}
173+
end
174+
175+
metadata
176+
end
177+
178+
def repo_fallback(repo, license, homepage, package_name: nil, version: nil)
123179
repo = "" if repo.nil?
124180
homepage = "" if homepage.nil?
125181
license = "" if license.nil?
@@ -133,33 +189,272 @@ def repo_fallback(repo, license, homepage)
133189
elsif license_url.present?
134190
license_url
135191
else
136-
""
192+
# Fallback to .nuspec file parsing if API URLs don't contain repository info
193+
nuspec_repo_url(package_name, version) if package_name && version
137194
end
138195
end
139196

197+
def nuspec_repo_url(package_name, version)
198+
nuspec_metadata = parse_nuspec_metadata(package_name, version)
199+
return "" unless nuspec_metadata
200+
201+
repository_url = nuspec_metadata.dig(:repository, :url)
202+
return UrlParser.try_all(repository_url) if repository_url.present?
203+
""
204+
end
205+
206+
def parse_nuspec_metadata(package_name, version)
207+
return nil unless package_name && version
208+
209+
nuspec_url = "https://api.nuget.org/v3-flatcontainer/#{package_name.downcase}/#{version}/#{package_name.downcase}.nuspec"
210+
response = Faraday.get(nuspec_url)
211+
return nil unless response.success?
212+
213+
# Parse XML to extract comprehensive metadata
214+
require 'nokogiri'
215+
doc = Nokogiri::XML(response.body)
216+
217+
# Remove namespace for easier querying
218+
doc.remove_namespaces!
219+
metadata_node = doc.at_xpath('//metadata')
220+
return nil unless metadata_node
221+
222+
# Extract comprehensive metadata
223+
{
224+
# Basic package information
225+
id: metadata_node.at_xpath('id')&.text,
226+
version: metadata_node.at_xpath('version')&.text,
227+
title: metadata_node.at_xpath('title')&.text,
228+
authors: metadata_node.at_xpath('authors')&.text,
229+
owners: metadata_node.at_xpath('owners')&.text,
230+
231+
# License and legal information
232+
license: extract_license_info(metadata_node),
233+
license_url: metadata_node.at_xpath('licenseUrl')&.text,
234+
require_license_acceptance: metadata_node.at_xpath('requireLicenseAcceptance')&.text == 'true',
235+
copyright: metadata_node.at_xpath('copyright')&.text,
236+
237+
# URLs and resources
238+
project_url: metadata_node.at_xpath('projectUrl')&.text,
239+
icon_url: metadata_node.at_xpath('iconUrl')&.text,
240+
icon: metadata_node.at_xpath('icon')&.text,
241+
readme: metadata_node.at_xpath('readme')&.text,
242+
243+
# Description and categorization
244+
description: metadata_node.at_xpath('description')&.text,
245+
summary: metadata_node.at_xpath('summary')&.text,
246+
tags: metadata_node.at_xpath('tags')&.text,
247+
release_notes: metadata_node.at_xpath('releaseNotes')&.text,
248+
249+
# Repository information
250+
repository: extract_repository_info(metadata_node),
251+
252+
# Technical metadata
253+
min_client_version: metadata_node.attr('minClientVersion'),
254+
language: metadata_node.at_xpath('language')&.text,
255+
development_dependency: metadata_node.at_xpath('developmentDependency')&.text == 'true',
256+
serviceable: metadata_node.at_xpath('serviceable')&.text == 'true',
257+
258+
# Dependencies information (detailed)
259+
dependency_groups: extract_dependency_groups(metadata_node),
260+
261+
# Framework information
262+
framework_assemblies: extract_framework_assemblies(metadata_node),
263+
content_files: extract_content_files(metadata_node),
264+
package_types: extract_package_types(metadata_node),
265+
266+
# Additional metadata
267+
raw_xml: response.body # Store raw XML for any future parsing needs
268+
}
269+
rescue => e
270+
Rails.logger.debug "Failed to parse .nuspec for #{package_name} v#{version}: #{e.message}"
271+
nil
272+
end
273+
140274
def description(item)
141275
item["description"].blank? ? item["summary"] : item["description"]
142276
end
143277

144278
def versions_metadata(pkg_metadata, existing_version_numbers = [])
145279
pkg_metadata[:releases].map do |item|
280+
version = item["catalogEntry"]["version"]
281+
146282
{
147-
number: item["catalogEntry"]["version"],
283+
number: version,
148284
published_at: item["catalogEntry"]["published"],
149-
metadata: {
150-
downloads: version_downloads(pkg_metadata, item["catalogEntry"]["version"])
151-
}
285+
metadata: build_version_nuspec_metadata(pkg_metadata[:name], version, pkg_metadata, item)
152286
}
153287
end
154288
end
155289

290+
def build_version_nuspec_metadata(package_name, version, pkg_metadata, item)
291+
# Start with basic API metadata
292+
base_metadata = {
293+
downloads: version_downloads(pkg_metadata, version),
294+
295+
# From API catalogEntry
296+
api_description: item["catalogEntry"]["description"],
297+
api_summary: item["catalogEntry"]["summary"],
298+
api_title: item["catalogEntry"]["title"],
299+
api_authors: item["catalogEntry"]["authors"],
300+
api_license_expression: item["catalogEntry"]["licenseExpression"],
301+
api_license_url: item["catalogEntry"]["licenseUrl"],
302+
api_project_url: item["catalogEntry"]["projectUrl"],
303+
api_icon_url: item["catalogEntry"]["iconUrl"],
304+
api_tags: item["catalogEntry"]["tags"],
305+
api_min_client_version: item["catalogEntry"]["minClientVersion"],
306+
api_language: item["catalogEntry"]["language"],
307+
308+
# Technical details from API
309+
package_content_url: item["packageContent"],
310+
catalog_entry_id: item["catalogEntry"]["@id"],
311+
listed: item["catalogEntry"]["listed"],
312+
require_license_acceptance: item["catalogEntry"]["requireLicenseAcceptance"]
313+
}
314+
315+
# Get enhanced metadata from .nuspec file
316+
nuspec_metadata = parse_nuspec_metadata(package_name, version)
317+
return base_metadata unless nuspec_metadata
318+
319+
# Merge with comprehensive .nuspec metadata
320+
base_metadata.merge({
321+
# Enhanced .nuspec fields
322+
nuspec_id: nuspec_metadata[:id],
323+
nuspec_title: nuspec_metadata[:title],
324+
nuspec_authors: nuspec_metadata[:authors],
325+
nuspec_owners: nuspec_metadata[:owners],
326+
nuspec_description: nuspec_metadata[:description],
327+
nuspec_summary: nuspec_metadata[:summary],
328+
nuspec_copyright: nuspec_metadata[:copyright],
329+
nuspec_tags: nuspec_metadata[:tags],
330+
nuspec_release_notes: nuspec_metadata[:release_notes],
331+
332+
# License information (more detailed)
333+
license_info: nuspec_metadata[:license],
334+
335+
# Repository information (detailed)
336+
repository: nuspec_metadata[:repository],
337+
338+
# URLs and resources
339+
icon: nuspec_metadata[:icon],
340+
readme: nuspec_metadata[:readme],
341+
342+
# Technical metadata
343+
min_client_version: nuspec_metadata[:min_client_version],
344+
language: nuspec_metadata[:language],
345+
development_dependency: nuspec_metadata[:development_dependency],
346+
serviceable: nuspec_metadata[:serviceable],
347+
348+
# Dependency information (detailed for this version)
349+
dependency_groups: nuspec_metadata[:dependency_groups],
350+
framework_assemblies: nuspec_metadata[:framework_assemblies],
351+
content_files: nuspec_metadata[:content_files],
352+
package_types: nuspec_metadata[:package_types],
353+
354+
# Analysis of differences between API and .nuspec
355+
metadata_source_comparison: {
356+
description_differs: (base_metadata[:api_description] != nuspec_metadata[:description]),
357+
title_differs: (base_metadata[:api_title] != nuspec_metadata[:title]),
358+
authors_differs: (base_metadata[:api_authors] != nuspec_metadata[:authors]),
359+
license_differs: (base_metadata[:api_license_expression] != nuspec_metadata[:license]&.dig(:text)),
360+
tags_differs: (base_metadata[:api_tags] != nuspec_metadata[:tags])
361+
}
362+
}).compact
363+
end
364+
156365
def version_downloads(pkg_metadata, version)
157366
return nil unless pkg_metadata[:download_stats] && pkg_metadata[:download_stats]['data'].present?
158367
pkg_metadata[:download_stats]['data'][0]['versions'].find{|v| v['version'] == version}.try(:fetch,'downloads')
159368
rescue
160369
nil
161370
end
162371

372+
private
373+
374+
def extract_license_info(metadata_node)
375+
license_element = metadata_node.at_xpath('license')
376+
return nil unless license_element
377+
378+
{
379+
type: license_element.attr('type'),
380+
text: license_element.text,
381+
version: license_element.attr('version')
382+
}
383+
end
384+
385+
def extract_repository_info(metadata_node)
386+
repository_element = metadata_node.at_xpath('repository')
387+
return nil unless repository_element
388+
389+
{
390+
type: repository_element.attr('type'),
391+
url: repository_element.attr('url'),
392+
branch: repository_element.attr('branch'),
393+
commit: repository_element.attr('commit')
394+
}
395+
end
396+
397+
def extract_dependency_groups(metadata_node)
398+
dependency_groups = []
399+
metadata_node.xpath('dependencies/group').each do |group|
400+
target_framework = group.attr('targetFramework')
401+
dependencies = []
402+
403+
group.xpath('dependency').each do |dep|
404+
dependencies << {
405+
id: dep.attr('id'),
406+
version: dep.attr('version'),
407+
include: dep.attr('include'),
408+
exclude: dep.attr('exclude')
409+
}
410+
end
411+
412+
dependency_groups << {
413+
target_framework: target_framework,
414+
dependencies: dependencies
415+
}
416+
end
417+
dependency_groups
418+
end
419+
420+
def extract_framework_assemblies(metadata_node)
421+
assemblies = []
422+
metadata_node.xpath('frameworkAssemblies/frameworkAssembly').each do |assembly|
423+
assemblies << {
424+
assembly_name: assembly.attr('assemblyName'),
425+
target_framework: assembly.attr('targetFramework')
426+
}
427+
end
428+
assemblies
429+
end
430+
431+
def extract_content_files(metadata_node)
432+
files = []
433+
metadata_node.xpath('contentFiles/files/file').each do |file|
434+
files << {
435+
include: file.attr('include'),
436+
exclude: file.attr('exclude'),
437+
build_action: file.attr('buildAction'),
438+
copy_to_output: file.attr('copyToOutput'),
439+
flatten: file.attr('flatten')
440+
}
441+
end
442+
files
443+
end
444+
445+
def extract_package_types(metadata_node)
446+
types = []
447+
metadata_node.xpath('packageTypes/packageType').each do |type|
448+
types << {
449+
name: type.attr('name'),
450+
version: type.attr('version')
451+
}
452+
end
453+
types
454+
end
455+
456+
public
457+
163458
def dependencies_metadata(_name, version, package)
164459
current_version = package[:releases].find { |v| v["catalogEntry"]["version"] == version }
165460
dep_groups = current_version.fetch("catalogEntry", {})["dependencyGroups"] || []

0 commit comments

Comments
 (0)