Skip to content

Commit bd3689c

Browse files
committed
experimenting with different export approaches for dependencies
1 parent 228d3f6 commit bd3689c

File tree

1 file changed

+92
-0
lines changed

1 file changed

+92
-0
lines changed

lib/tasks/open_data.rake

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,4 +323,96 @@ namespace :open_data do
323323
end
324324
end
325325
end
326+
327+
desc "Export dependencies open data csv"
328+
task export_dependencies2: :environment do
329+
csv_file = File.open("data/dependencies2-#{EXPORT_VERSION}-#{EXPORT_DATE}.csv", "w")
330+
csv_file = CSV.new(csv_file)
331+
csv_file << [
332+
"ID",
333+
"Ecosystem",
334+
"Registry",
335+
"Package Name",
336+
"Package ID",
337+
"Version Number",
338+
"Version ID",
339+
"Dependency Name",
340+
"Dependency Ecosystem",
341+
"Dependency Kind",
342+
"Optional Dependency",
343+
"Dependency Requirements",
344+
"Dependency Package ID",
345+
]
346+
347+
Dependency.includes(version: {package: :registry}).find_each do |dependency|
348+
version = dependency.version
349+
next if version.nil?
350+
package = version.package
351+
next if package.nil?
352+
csv_file << [
353+
dependency.id,
354+
package.ecosystem,
355+
package.registry.name,
356+
package.name,
357+
package.id,
358+
version.number,
359+
version.id,
360+
dependency.package_name.try(:tr, "\r\n", ""),
361+
dependency.ecosystem.try(:tr, "\r\n", ""),
362+
dependency.kind.try(:tr, "\r\n", ""),
363+
dependency.optional,
364+
dependency.requirements.try(:tr, "\r\n", ""),
365+
dependency.package_id,
366+
]
367+
end
368+
end
369+
370+
desc "Export dependencies open data csv"
371+
task export_dependencies3: :environment do
372+
csv_file = File.open("data/dependencies3-#{EXPORT_VERSION}-#{EXPORT_DATE}.csv", "w")
373+
csv_file = CSV.new(csv_file)
374+
csv_file << [
375+
"ID",
376+
"Ecosystem",
377+
"Registry",
378+
"Package Name",
379+
"Package ID",
380+
"Version Number",
381+
"Version ID",
382+
"Dependency Name",
383+
"Dependency Ecosystem",
384+
"Dependency Kind",
385+
"Optional Dependency",
386+
"Dependency Requirements",
387+
"Dependency Package ID",
388+
]
389+
390+
Dependency.each_row_batch do |batch|
391+
version_ids = batch.map{|r| r['version_id'] }.uniq
392+
versions = Version.where(id: version_ids).includes(package: :registry)
393+
394+
batch.each do |row|
395+
version = versions.find{|v| v.id == row['version_id'] }
396+
397+
next if version.nil?
398+
package = version.package
399+
next if package.nil?
400+
csv_file << [
401+
row['id'],
402+
package.ecosystem,
403+
package.registry.name,
404+
package.name,
405+
package.id,
406+
version.number,
407+
version.id,
408+
row['package_name'].try(:tr, "\r\n", ""),
409+
row['ecosystem'].try(:tr, "\r\n", ""),
410+
row['kind'].try(:tr, "\r\n", ""),
411+
row['optional'],
412+
row['requirements'].try(:tr, "\r\n", ""),
413+
row['package_id'],
414+
]
415+
end
416+
end
417+
end
326418
end

0 commit comments

Comments
 (0)