|
| 1 | +require 'csv' |
| 2 | + |
| 3 | +namespace :czi do |
| 4 | + task :bioconductor => :environment do |
| 5 | + # load csv file |
| 6 | + csv = CSV.read('data/bioconductor_raw_df.csv', headers: true) |
| 7 | + |
| 8 | + registry = Registry.find_by_ecosystem('bioconductor') |
| 9 | + |
| 10 | + file = File.open("data/bioconductor.ndjson", "a") |
| 11 | + |
| 12 | + processed_names = Set.new |
| 13 | + missing_names = Set.new |
| 14 | + dependencies = Set.new |
| 15 | + |
| 16 | + csv.each do |row| |
| 17 | + package = registry.packages.where('lower(name) = ?', row['Bioconductor Package'].downcase).first |
| 18 | + |
| 19 | + if package |
| 20 | + puts "#{package.name} - #{package.latest_release_number}" |
| 21 | + |
| 22 | + obj = package.as_json(include: [latest_version: { include: :dependencies }]) |
| 23 | + |
| 24 | + file.puts JSON.generate(obj) |
| 25 | + |
| 26 | + processed_names << package.name.downcase |
| 27 | + package.latest_version.dependencies.map(&:package_name).each do |name| |
| 28 | + dependencies << name.downcase |
| 29 | + end |
| 30 | + else |
| 31 | + puts "Package not found: #{row['Bioconductor Package']}" |
| 32 | + missing_names << row['Bioconductor Package'].downcase |
| 33 | + end |
| 34 | + end |
| 35 | + |
| 36 | + while dependencies.count > 0 |
| 37 | + |
| 38 | + first_level_dependencies = dependencies.flatten.uniq |
| 39 | + |
| 40 | + dependencies = Set.new |
| 41 | + |
| 42 | + first_level_dependencies.each do |name| |
| 43 | + next if processed_names.include?(name) |
| 44 | + next if missing_names.include?(name) |
| 45 | + package = registry.packages.where('lower(name) = ?', name.downcase).first |
| 46 | + if package |
| 47 | + puts "#{package.name} - #{package.latest_release_number}" |
| 48 | + |
| 49 | + obj = package.as_json(include: [latest_version: { include: :dependencies }]) |
| 50 | + |
| 51 | + file.puts JSON.generate(obj) |
| 52 | + |
| 53 | + processed_names << package.name.downcase |
| 54 | + package.latest_version.dependencies.map(&:package_name).each do |name| |
| 55 | + dependencies << name.downcase |
| 56 | + end |
| 57 | + else |
| 58 | + puts "Package not found: #{name}" |
| 59 | + missing_names << name.downcase |
| 60 | + end |
| 61 | + end |
| 62 | + |
| 63 | + puts "Processed #{processed_names.uniq.count} packages" |
| 64 | + puts "Found #{missing_names.uniq.count} missing packages" |
| 65 | + puts "Found #{dependencies.uniq.count} dependencies" |
| 66 | + puts '--------------------------' |
| 67 | + end |
| 68 | + |
| 69 | + end |
| 70 | +end |
| 71 | + |
0 commit comments