Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 4198dc6

Browse files
committedJun 11, 2025
update to boltz-2, MolecularInput -> BoltzInput
1 parent 74cef93 commit 4198dc6

File tree

12 files changed

+121
-67
lines changed

12 files changed

+121
-67
lines changed
 

‎.github/workflows/CI.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@ jobs:
3737
- uses: julia-actions/cache@v2
3838
- uses: julia-actions/julia-buildpkg@v1
3939
- uses: julia-actions/julia-runtest@v1
40+
- uses: julia-actions/julia-processcoverage@v1
41+
- uses: codecov/codecov-action@v5
42+
with:
43+
files: lcov.info
44+
token: ${{ secrets.CODECOV_TOKEN }}
45+
fail_ci_if_error: false
4046
docs:
4147
name: Documentation
4248
runs-on: ubuntu-latest

‎CondaPkg.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11

22
[pip.deps]
3-
boltz = "~=2.0.0"
3+
boltz = "~=2.1.1"

‎Project.toml

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,24 +19,11 @@ BioStructuresExt = "BioStructures"
1919
ProteinChainsExt = "ProteinChains"
2020

2121
[compat]
22-
BioAlignments = "3"
23-
BioSequences = "3"
2422
BioStructures = "4"
2523
Compat = "4.16"
2624
CondaPkg = "0.2"
2725
ProteinChains = "0.7"
2826
PythonCall = "0.9"
2927
Scratch = "1"
30-
TMscore = "0.1"
3128
YAML = "0.4"
3229
julia = "1.10"
33-
34-
[extras]
35-
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
36-
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
37-
BioStructures = "de9282ab-8554-53be-b2d6-f6c222edabfc"
38-
TMscore = "6e960f36-814a-4ceb-8c87-6ab5da0e9bff"
39-
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
40-
41-
[targets]
42-
test = ["Test", "BioAlignments", "BioSequences", "BioStructures", "TMscore"]

‎README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ pkg"add PyBoltz"
2020
```julia
2121
using PyBoltz, PyBoltz.Schema
2222

23-
input = MolecularInput(
23+
input = BoltzInput(
2424
sequences = [
2525
protein(id="A", sequence="TTCCPSIVARSNFNVCRLPGTPEAICATYTGCIIIPGATCPGDYAN", msa="empty"),
2626
]

‎docs/src/api.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@ predict
99
## `Schema` submodule
1010

1111
```@docs
12-
PyBoltz.Schema.MolecularInput
12+
PyBoltz.Schema.BoltzInput
1313
```
1414

1515
### Sequences
1616

17-
The following sequence types go into the `sequences` vector keyword argument of `MolecularInput`.
17+
The following sequence types go into the `sequences` vector keyword argument of `BoltzInput`.
1818

1919
```@docs
2020
PyBoltz.Schema.protein
@@ -25,7 +25,7 @@ PyBoltz.Schema.ligand
2525

2626
### Constraints
2727

28-
The following constraint types go into the `constraints` vector keyword argument of `MolecularInput`.
28+
The following constraint types go into the `constraints` vector keyword argument of `BoltzInput`.
2929

3030
```@docs
3131
PyBoltz.Schema.bond

‎ext/BioStructuresExt.jl

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,16 @@ end
1313
function PyBoltz.predict(input, ::Type{MolecularStructure}; options...)
1414
mktempdir() do out_dir
1515
predict(input;
16-
_prefix_index=(input isa AbstractVector{PyBoltz.Schema.MolecularInput}),
1716
out_dir,
1817
output_format="mmcif",
1918
options...)
2019
prediction_paths = readdir(joinpath(out_dir, only(readdir(out_dir)), "predictions"); join=true)
2120
prediction_names = basename.(prediction_paths)
2221

2322
local results
24-
if all(name -> startswith(name, PyBoltz.PYBOLTZ_INPUT_INDEX_PREFIX), prediction_names)
23+
if get(task_local_storage(), "pyboltz_remember_ordering", false)
2524
# output vector needs to match input vector (with possible missing values)
26-
@assert input isa AbstractVector{PyBoltz.Schema.MolecularInput}
25+
@assert input isa AbstractVector{PyBoltz.Schema.BoltzInput}
2726
results = Union{MolecularStructure,Missing}[fill(missing, length(input))...]
2827
for prediction_name in prediction_names
2928
index, name = split(split(prediction_name, PyBoltz.PYBOLTZ_INPUT_INDEX_PREFIX, limit=2)[2], "_", limit=2)
@@ -33,7 +32,7 @@ function PyBoltz.predict(input, ::Type{MolecularStructure}; options...)
3332
try
3433
results[idx] = read_boltz_cif(cif_path, name)
3534
catch e
36-
@warn e
35+
@error name
3736
results[idx] = missing
3837
end
3938
end
@@ -44,16 +43,25 @@ function PyBoltz.predict(input, ::Type{MolecularStructure}; options...)
4443
try
4544
push!(results, read_boltz_cif(cif_path, basename(prediction_path)))
4645
catch e
47-
@warn e
46+
@error name
4847
push!(results, missing)
4948
end
5049
end
5150
end
51+
if any(ismissing, results)
52+
@error "$(count(ismissing, results)) out of $(length(results)) boltz predictions errored."
53+
end
5254
return results
5355
end
5456
end
5557

56-
function PyBoltz.predict(input::PyBoltz.Schema.MolecularInput, ::Type{MolecularStructure}; options...)
58+
function PyBoltz.predict(input::AbstractVector{PyBoltz.Schema.BoltzInput}, ::Type{MolecularStructure}; options...)
59+
task_local_storage("pyboltz_remember_ordering", true) do
60+
@invoke predict(input::Any, MolecularStructure; options...)
61+
end
62+
end
63+
64+
function PyBoltz.predict(input::PyBoltz.Schema.BoltzInput, ::Type{MolecularStructure}; options...)
5765
return PyBoltz.predict([input], MolecularStructure; options...) |> only
5866
end
5967

‎ext/ProteinChainsExt.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ function PyBoltz.predict(input, ::Type{ProteinStructure}; options...)
1111
return PyBoltz.predict(input, MolecularStructure; options...) .|> to_structure
1212
end
1313

14-
function PyBoltz.predict(input::PyBoltz.Schema.MolecularInput, ::Type{ProteinStructure}; options...)
14+
function PyBoltz.predict(input::PyBoltz.Schema.BoltzInput, ::Type{ProteinStructure}; options...)
1515
return PyBoltz.predict(input, MolecularStructure; options...) |> to_structure
1616
end
1717

‎src/Schema.jl

Lines changed: 70 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
module Schema
22

3-
export MolecularInput
3+
export BoltzInput
44
export protein, dna, rna, ligand
55
export bond, pocket
66

77
const BOLTZ_SCHEMA_VERSION = 1
88

99
"""
10-
MolecularInput
10+
BoltzInput
1111
1212
A dictionary object that can be written to a YAML file.
1313
1414
Implemented according to the schema definition in the
15-
[boltz documentation](https://github.com/jwohlwend/boltz/blob/a9b3abc2c1f90f26b373dd1bcb7afb5a3cb40293/docs/prediction.md),
15+
[boltz documentation](https://github.com/jwohlwend/boltz/blob/744b4aecb6b5e847a25692ced07c328e7995ee33/docs/prediction.md),
1616
allowing for easy in-memory construction of the schema.
1717
1818
# Additions
@@ -28,7 +28,7 @@ allowing for easy in-memory construction of the schema.
2828
```julia
2929
using PyBoltz.Schema
3030
31-
input1 = MolecularInput(
31+
input1 = BoltzInput(
3232
name = "example1", # optional name YAML file (and thus output pdb/cif file)
3333
sequences = [
3434
protein(
@@ -47,7 +47,7 @@ input1 = MolecularInput(
4747
]
4848
)
4949
50-
input2 = MolecularInput(
50+
input2 = BoltzInput(
5151
sequences = [
5252
protein(
5353
id = ["A1"],
@@ -67,25 +67,33 @@ input2 = MolecularInput(
6767
)
6868
```
6969
"""
70-
struct MolecularInput <: AbstractDict{String,Any}
70+
struct BoltzInput <: AbstractDict{String,Any}
7171
dict::Dict{String,Any}
7272
end
7373

74-
function MolecularInput(;
74+
function BoltzInput(;
7575
sequences,
7676
constraints = nothing,
77+
templates = nothing,
78+
properties = nothing,
7779
name = nothing
7880
)
7981
dict = Dict{String,Any}("version" => BOLTZ_SCHEMA_VERSION, "sequences" => sequences)
8082
!isnothing(constraints) && (dict["constraints"] = constraints)
83+
!isnothing(templates) && (dict["templates"] = templates)
84+
!isnothing(properties) && (dict["properties"] = properties)
8185
!isnothing(name) && (dict["name"] = name)
82-
return MolecularInput(dict)
86+
return BoltzInput(dict)
8387
end
8488

85-
Base.length(input::MolecularInput) = length(input.dict)
86-
Base.iterate(input::MolecularInput, args...) = iterate(input.dict, args...)
87-
Base.getindex(input::MolecularInput, key::AbstractString) = input.dict[key]
88-
Base.get(input::MolecularInput, key::AbstractString, default) = get(input.dict, key, default)
89+
Base.length(input::BoltzInput) = length(input.dict)
90+
Base.iterate(input::BoltzInput, args...) = iterate(input.dict, args...)
91+
Base.getindex(input::BoltzInput, key::AbstractString) = input.dict[key]
92+
Base.get(input::BoltzInput, key::AbstractString, default) = get(input.dict, key, default)
93+
94+
# deprecated
95+
const MolecularInput = BoltzInput
96+
export MolecularInput
8997

9098
## sequences
9199

@@ -202,20 +210,66 @@ function bond(;
202210
end
203211

204212
"""
205-
pocket(; binder, contacts)
213+
pocket(; binder, contacts, max_distance=nothing)
206214
207215
```julia
208216
using PyBoltz.Schema: pocket
209217
# binder is a chain_id
210218
# contacts is a vector of vectors of (chain_id, residue_index)
211-
pocket(binder="A", contacts=[["B", 1], ["C", 2]])
219+
pocket(binder="A", contacts=[("B", 1), ("C", 2)])
212220
```
213221
"""
214222
function pocket(;
215223
binder::AbstractString,
216-
contacts::AbstractVector{<:Tuple{AbstractString,Integer}}
224+
contacts::AbstractVector{<:Tuple{AbstractString,Any}},
225+
max_distance::Union{Real,Nothing} = nothing,
226+
)
227+
dict = Dict("pocket" => Dict{String,Any}(
228+
"binder" => binder,
229+
"contacts" => [[c...] for c in contacts]))
230+
!isnothing(max_distance) && (dict["max_distance"] = max_distance)
231+
return dict
232+
end
233+
234+
"""
235+
contact(; token1, token2, max_distance=nothing)
236+
"""
237+
function contact(;
238+
token1::Tuple{AbstractString,Any},
239+
token2::Tuple{AbstractString,Any},
240+
max_distance::Union{Real,Nothing} = nothing,
241+
)
242+
dict = Dict("contact" => Dict(
243+
"token1" => token1,
244+
"token2" => token2))
245+
!isnothing(max_distance) && (dict["max_distance"] = max_distance)
246+
return dict
247+
end
248+
249+
## templates
250+
251+
"""
252+
template(; cif, chain_id=nothing, template_id=nothing)
253+
"""
254+
function template(;
255+
cif::AbstractString,
256+
chain_id::Union{AbstractString,AbstractVector{<:AbstractString},Nothing} = nothing,
257+
template_id::Union{AbstractString,AbstractVector{<:AbstractString},Nothing} = nothing,
258+
)
259+
dict = Dict{String,Any}("cif" => cif)
260+
!isnothing(chain_id) && (dict["chain_id"] = chain_id)
261+
!isnothing(template_id) && (dict["template_id"] = template_id)
262+
end
263+
264+
## properties
265+
266+
"""
267+
affinity(; binder)
268+
"""
269+
function affinity(;
270+
binder::AbstractString
217271
)
218-
return Dict("pocket" => Dict{String,Any}("binder" => binder, "contacts" => [[c...] for c in contacts]))
272+
return Dict("affinity" => Dict("binder" => binder))
219273
end
220274

221275
end

‎src/predict.jl

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ Run Boltz-1 prediction with the given input, output type, and options.
66
# Input types
77
88
- `AbstractString`: Path to a FASTA/YAML file or directory (for batching).
9-
- `MolecularInput`: A single [`PyBoltz.Schema.MolecularInput`](@ref) object.
10-
- `Vector{MolecularInput}`: A vector of [`PyBoltz.Schema.MolecularInput`](@ref) objects for batching.
9+
- `BoltzInput`: A single [`PyBoltz.Schema.BoltzInput`](@ref) object.
10+
- `Vector{BoltzInput}`: A vector of [`PyBoltz.Schema.BoltzInput`](@ref) objects for batching.
1111
1212
# Output types
1313
@@ -16,8 +16,8 @@ By default, raw results will be written to disk in the `out_dir` directory (see
1616
For convenience, `output_type` can be provided as a second argument to reduce manual file I/O.
1717
1818
If `output_type` is provided, the function will return a
19-
single object if a `MolecularInput` was provided as input,
20-
otherwise a vector if an `AbstractString` or `Vector{MolecularInput}` was provided.
19+
single object if a `BoltzInput` was provided as input,
20+
otherwise a vector if an `AbstractString` or `Vector{BoltzInput}` was provided.
2121
2222
The following output types are supported:
2323
@@ -46,35 +46,28 @@ Defaults to a Scratch.jl-backed directory created at module init; call `clear_ca
4646
- `msa_pairing_strategy::String`: 'greedy' or 'complete'; requires `use_msa_server=true`.
4747
4848
## Boolean Flags
49-
- `verbose::Bool`: Whether to print boltz logs to stdout. Default: true.
5049
- `write_full_pae::Bool`: Dump PAE to a npz file. Default: true.
5150
- `write_full_pde::Bool`: Dump PDE to a npz file. Default: false.
5251
- `override::Bool`: Override existing predictions. Default: false.
5352
- `use_msa_server::Bool`: Use MMSeqs2 server for MSA generation. Default: false.
5453
"""
55-
function predict(input_path::AbstractString; verbose=true, _prefix_index=false, options...)
56-
@assert !_prefix_index "`_prefix_index` is reserved for internal use"
57-
cmd = predict_cmd(input_path; options...)
58-
if verbose
59-
run(cmd)
60-
else
61-
read(cmd, String)
62-
end
54+
function predict(input_path::AbstractString; options...)
55+
run(predict_cmd(input_path; options...))
6356
return nothing
6457
end
6558

6659

6760
const PYBOLTZ_INPUT_INDEX_PREFIX = "__pyboltz_index_"
6861

69-
function predict(inputs::AbstractVector{Schema.MolecularInput}; _prefix_index=false, options...)
62+
function predict(inputs::AbstractVector{Schema.BoltzInput}; options...)
7063
mktempdir() do dir
7164
input_dir = joinpath(dir, "inputs")
7265
mkdir(input_dir)
7366
msa_dir = joinpath(dir, "msas")
7467
mkdir(msa_dir)
7568
for (i, input) in enumerate(inputs)
76-
name = get(input, "name", "noname")
77-
prefix = _prefix_index ? "$(PYBOLTZ_INPUT_INDEX_PREFIX)$(i)_" : ""
69+
name = get(input, "name", "pyboltz_structure_$i")
70+
prefix = get(task_local_storage(), "pyboltz_remember_ordering", false) ? "$(PYBOLTZ_INPUT_INDEX_PREFIX)$(i)_" : ""
7871
path = joinpath(input_dir, "$prefix$name.yaml")
7972
YAML.write_file(path, MSAs_to_files!(deepcopy(input), msa_dir; prefix=prefix))
8073
end
@@ -83,4 +76,4 @@ function predict(inputs::AbstractVector{Schema.MolecularInput}; _prefix_index=fa
8376
return nothing
8477
end
8578

86-
predict(input::Schema.MolecularInput; options...) = predict([input]; options...)
79+
predict(input::Schema.BoltzInput; options...) = predict([input]; options...)

‎src/utils.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ function write_alignment(path::AbstractString, alignment::Vector{<:AbstractStrin
2525
end
2626
end
2727

28-
function MSAs_to_files!(input::Schema.MolecularInput, dir::AbstractString; prefix="")
28+
function MSAs_to_files!(input::Schema.BoltzInput, dir::AbstractString; prefix="")
2929
for (i, type_dict) in enumerate(input["sequences"])
3030
type, seq_dict = only(type_dict)
3131
type == "protein" || continue # only proteins have msas

‎test/Project.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[deps]
2+
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
3+
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
4+
BioStructures = "de9282ab-8554-53be-b2d6-f6c222edabfc"
5+
PyBoltz = "0bca4874-a2b8-4dca-bc03-d2d86e344f1f"
6+
TMscore = "6e960f36-814a-4ceb-8c87-6ab5da0e9bff"
7+
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

0 commit comments

Comments
 (0)
Please sign in to comment.