Skip to content

Commit 0a668b9

Browse files
committed
czi dependency rake task
1 parent bd3689c commit 0a668b9

File tree

2 files changed

+74
-2
lines changed

2 files changed

+74
-2
lines changed

.gitignore

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,5 +33,6 @@
3333

3434
.env
3535

36-
# Ignore generated csv files
37-
*.csv
36+
# Ignore generated files
37+
*.csv
38+
data/*

lib/tasks/czi.rake

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
require 'csv'
2+
3+
namespace :czi do
4+
task :bioconductor => :environment do
5+
# load csv file
6+
csv = CSV.read('data/bioconductor_raw_df.csv', headers: true)
7+
8+
registry = Registry.find_by_ecosystem('bioconductor')
9+
10+
file = File.open("data/bioconductor.ndjson", "a")
11+
12+
processed_names = Set.new
13+
missing_names = Set.new
14+
dependencies = Set.new
15+
16+
csv.each do |row|
17+
package = registry.packages.where('lower(name) = ?', row['Bioconductor Package'].downcase).first
18+
19+
if package
20+
puts "#{package.name} - #{package.latest_release_number}"
21+
22+
obj = package.as_json(include: [latest_version: { include: :dependencies }])
23+
24+
file.puts JSON.generate(obj)
25+
26+
processed_names << package.name.downcase
27+
package.latest_version.dependencies.map(&:package_name).each do |name|
28+
dependencies << name.downcase
29+
end
30+
else
31+
puts "Package not found: #{row['Bioconductor Package']}"
32+
missing_names << row['Bioconductor Package'].downcase
33+
end
34+
end
35+
36+
while dependencies.count > 0
37+
38+
first_level_dependencies = dependencies.flatten.uniq
39+
40+
dependencies = Set.new
41+
42+
first_level_dependencies.each do |name|
43+
next if processed_names.include?(name)
44+
next if missing_names.include?(name)
45+
package = registry.packages.where('lower(name) = ?', name.downcase).first
46+
if package
47+
puts "#{package.name} - #{package.latest_release_number}"
48+
49+
obj = package.as_json(include: [latest_version: { include: :dependencies }])
50+
51+
file.puts JSON.generate(obj)
52+
53+
processed_names << package.name.downcase
54+
package.latest_version.dependencies.map(&:package_name).each do |name|
55+
dependencies << name.downcase
56+
end
57+
else
58+
puts "Package not found: #{name}"
59+
missing_names << name.downcase
60+
end
61+
end
62+
63+
puts "Processed #{processed_names.uniq.count} packages"
64+
puts "Found #{missing_names.uniq.count} missing packages"
65+
puts "Found #{dependencies.uniq.count} dependencies"
66+
puts '--------------------------'
67+
end
68+
69+
end
70+
end
71+

0 commit comments

Comments
 (0)