Skip to content

Commit 26d8374

Browse files
committed
define not-yet registered TMscore.jl in tests
1 parent 2e0a800 commit 26d8374

File tree

5 files changed

+263
-11
lines changed

5 files changed

+263
-11
lines changed

.github/workflows/CI.yml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,6 @@ jobs:
3636
version: ${{ matrix.version }}
3737
arch: ${{ matrix.arch }}
3838
- uses: julia-actions/cache@v2
39-
- name: registry_add
40-
run: julia -e 'using Pkg; pkg"registry add https://github.com/MurrellGroup/MurrellGroupRegistry"'
4139
- uses: julia-actions/julia-buildpkg@v1
4240
- uses: julia-actions/julia-runtest@v1
4341
docs:
@@ -53,8 +51,6 @@ jobs:
5351
with:
5452
version: '1'
5553
- uses: julia-actions/cache@v2
56-
- name: registry_add
57-
run: julia -e 'using Pkg; pkg"registry add https://github.com/JuliaRegistries/General https://github.com/MurrellGroup/MurrellGroupRegistry"'
5854
- name: Configure doc environment
5955
shell: julia --project=docs --color=yes {0}
6056
run: |

Project.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,16 @@ CondaPkg = "0.2"
2727
ProteinChains = "0.7"
2828
PythonCall = "0.9"
2929
Scratch = "1"
30-
TMscore = "0.0.3"
30+
TMscore_jll = "1.0.0"
3131
YAML = "0.4"
3232
julia = "1.10"
3333

3434
[extras]
3535
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
3636
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
3737
BioStructures = "de9282ab-8554-53be-b2d6-f6c222edabfc"
38-
TMscore = "6e960f36-814a-4ceb-8c87-6ab5da0e9bff"
38+
TMscore_jll = "1d8dad79-2f3d-554c-bf22-543753cb6ff4"
3939
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
4040

4141
[targets]
42-
test = ["Test", "BioAlignments", "BioSequences", "BioStructures", "TMscore"]
42+
test = ["Test", "BioAlignments", "BioSequences", "BioStructures", "TMscore_jll"]

README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ Julia bindings for the [boltz](https://github.com/jwohlwend/boltz) Python packag
1010

1111
```julia
1212
using Pkg
13-
pkg"registry add https://github.com/MurrellGroup/MurrellGroupRegistry"
1413
pkg"add Boltz1"
1514
```
1615

@@ -23,7 +22,7 @@ using Boltz1, Boltz1.Schema
2322

2423
input = MolecularInput(
2524
sequences = [
26-
protein(id="A", sequence="TTCCPSIVARSNFNVCRLPGTPEAICATYTGCIIIPGATCPGDYAN"),
25+
protein(id="A", sequence="TTCCPSIVARSNFNVCRLPGTPEAICATYTGCIIIPGATCPGDYAN", msa="empty"),
2726
]
2827
)
2928

test/TMscore.jl

Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
module TMscore
2+
3+
import TMscore_jll
4+
5+
export tmscore, run_tmscore, TMscoreResult
6+
7+
"""
8+
TMscoreResult
9+
10+
Holds all of the key metrics returned by the TMscore executable.
11+
"""
12+
struct TMscoreResult
13+
output::String # raw output from TMscore
14+
len1::Int # length of structure 1
15+
len2::Int # length of structure 2
16+
common::Int # number of residues in common
17+
rmsd::Float64 # RMSD over the common residues
18+
tmscore::Float64 # TM‐score
19+
d0::Float64 # d0 distance
20+
maxsub::Float64 # MaxSub‐score
21+
maxsub_d0::Float64 # MaxSub d0
22+
gdt_ts::Float64 # GDT‐TS score
23+
gdt_ts_thresholds::Dict{Float64,Float64} # (d<1, d<2, d<4, d<8) → value
24+
gdt_ha::Float64 # GDT‐HA score
25+
gdt_ha_thresholds::Dict{Float64,Float64} # (d<0.5, d<1, d<2, d<4) → value
26+
rotation::Matrix{Float64} # 3×3 rotation matrix
27+
translation::Vector{Float64} # length‐3 translation vector
28+
end
29+
30+
Base.show(io::IO, result::TMscoreResult) = print(io, TMscoreResult, "(\"\"\"\n", result.output, "\"\"\")")
31+
32+
"""
33+
TMscoreResult(output::String)::TMscoreResult
34+
35+
Parse the text printed by TMscore into a `TMscoreResult`.
36+
"""
37+
function TMscoreResult(output::String)::TMscoreResult
38+
len1 = 0; len2 = 0; common = 0; rmsd = 0.0
39+
tms = 0.0; d0 = 0.0
40+
ms = 0.0; ms_d0 = 0.0
41+
gdt_ts = 0.0; gdt_ha = 0.0
42+
gdt_ts_th = Dict{Float64,Float64}(); gdt_ha_th = Dict{Float64,Float64}()
43+
rotation = zeros(Float64, 3, 3)
44+
translation = zeros(Float64, 3)
45+
46+
for line in split(output, '\n')
47+
s = strip(line)
48+
49+
# Structure lengths
50+
if startswith(s, "Structure1:")
51+
m = match(r"Length=\s*(\d+)", s)
52+
if m !== nothing
53+
len1 = parse(Int, m.captures[1])
54+
end
55+
elseif startswith(s, "Structure2:")
56+
m = match(r"Length=\s*(\d+)", s)
57+
if m !== nothing
58+
len2 = parse(Int, m.captures[1])
59+
end
60+
61+
# common residues & RMSD
62+
elseif startswith(s, "Number of residues in common=")
63+
m = match(r"common=\s*(\d+)", s)
64+
if m !== nothing
65+
common = parse(Int, m.captures[1])
66+
end
67+
elseif startswith(s, "RMSD")
68+
m = match(r"=\s*([\d\.]+)", s)
69+
if m !== nothing
70+
rmsd = parse(Float64, m.captures[1])
71+
end
72+
73+
# TM-score + d0
74+
elseif startswith(s, "TM-score")
75+
m = match(r"TM-score\s*=\s*([\d\.]+).*\(d0=\s*([\d\.]+)\)", s)
76+
if m !== nothing
77+
tms = parse(Float64, m.captures[1])
78+
d0 = parse(Float64, m.captures[2])
79+
end
80+
81+
# MaxSub-score + d0
82+
elseif startswith(s, "MaxSub-score")
83+
m = match(r"MaxSub-score\s*=\s*([\d\.]+).*\(d0=\s*([\d\.]+)\)", s)
84+
if m !== nothing
85+
ms = parse(Float64, m.captures[1])
86+
ms_d0 = parse(Float64, m.captures[2])
87+
end
88+
89+
# GDT-TS
90+
elseif startswith(s, "GDT-TS-score")
91+
m = match(r"=\s*([\d\.]+)", s)
92+
if m !== nothing
93+
gdt_ts = parse(Float64, m.captures[1])
94+
end
95+
for mm in eachmatch(r"\(d<([\d\.]+)\)=([\d\.]+)", s)
96+
thr = parse(Float64, mm.captures[1])
97+
val = parse(Float64, mm.captures[2])
98+
gdt_ts_th[thr] = val
99+
end
100+
101+
# GDT-HA
102+
elseif startswith(s, "GDT-HA-score")
103+
m = match(r"=\s*([\d\.]+)", s)
104+
if m !== nothing
105+
gdt_ha = parse(Float64, m.captures[1])
106+
end
107+
for mm in eachmatch(r"\(d<([\d\.]+)\)=([\d\.]+)", s)
108+
thr = parse(Float64, mm.captures[1])
109+
val = parse(Float64, mm.captures[2])
110+
gdt_ha_th[thr] = val
111+
end
112+
113+
end
114+
115+
# rotation/translation rows
116+
m = match(r"^\s*(\d+)\s+([-\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)", s)
117+
if m !== nothing
118+
i = parse(Int, m.captures[1])
119+
translation[i] = parse(Float64, m.captures[2])
120+
rotation[i, 1:3] .= parse.(Float64, m.captures[3:5])
121+
end
122+
end
123+
124+
return TMscoreResult(
125+
output,
126+
len1, len2, common, rmsd,
127+
tms, d0, ms, ms_d0,
128+
gdt_ts, gdt_ts_th, gdt_ha, gdt_ha_th,
129+
rotation, translation
130+
)
131+
end
132+
133+
tmscore_command(options::Vector{String}) = Cmd([TMscore_jll.TMscore().exec; options])
134+
135+
"""
136+
run_tmscore(file1::String, file2::String; options...) -> TMscoreResult
137+
run_tmscore(struc1, struc2; options...) -> TMscoreResult
138+
139+
Invoke the TMscore binary and parse its output into a `TMscoreResult` struct.
140+
141+
`BioStructures.StructuralElementOrList` objects can also be used as input,
142+
and will be written to a temporary file before processing.
143+
144+
## Options
145+
146+
### Boolean Flags
147+
- `c::Bool`: Compare two complex structures with multiple chains
148+
- `seq::Bool`: Establish residue equivalence by sequence alignment instead of residue indices
149+
- `a::Bool`: TM-score normalized by the average length of two structures
150+
- `m::Bool`: Output TM-score rotation matrix
151+
- `fast::Bool`: Fast but slightly inaccurate alignment
152+
- `mirror::Bool`: Whether to align the mirror image of input structure
153+
- `het::Bool`: Whether to align residues marked as 'HETATM' in addition to 'ATOM'
154+
155+
### Numeric Options
156+
- `d::Number`: TM-score scaled by an assigned d0 (in Angstroms)
157+
- `l::Int`: TM-score normalized by a specific length
158+
- `ter::Int`: Strings to mark the end of a chain
159+
- 0: No TER card
160+
- 1: TER separates different chains
161+
- 2: TER marks end of each chain
162+
- 3: EXIT separates different chains
163+
- `split::Int`: Whether to split PDB file into multiple chains
164+
- 0: Don't split
165+
- 1: Split by chain ID
166+
- 2: Split by TER records
167+
- `outfmt::Int`: Output format
168+
- 0: Full format
169+
- 1: Sequence and structure in fasta format
170+
- 2: Matrix format
171+
- -1: Compact format
172+
- `infmt1::Int`: Input format for chain1
173+
- -1: Auto-detect
174+
- 0: PDB format
175+
- 1: SPICKER format
176+
- 2: xyz format
177+
- 3: FASTA format
178+
- `infmt2::Int`: Input format for chain2 (same options as infmt1)
179+
180+
### String Options
181+
- `o::String`: Generate superposition output files with the given prefix
182+
- `dir::String`: Perform all-against-all alignment among the list of PDB chains
183+
- `dir1::String`: Use chain2 to search a list of PDB chains
184+
- `dir2::String`: Use chain1 to search a list of PDB chains
185+
- `suffix::String`: Add file name suffix to files listed by chain1_list or chain2_list
186+
- `atom::String`: 4-character atom name used to represent a residue
187+
- `mol::String`: Molecule type: RNA or protein
188+
"""
189+
function run_tmscore(file1::AbstractString, file2::AbstractString; options...)
190+
cmd_vec = String[file1, file2]
191+
for (key, val) in pairs(options)
192+
if val === true
193+
push!(cmd_vec, "-$key")
194+
elseif val === false
195+
nothing
196+
else
197+
push!(cmd_vec, "-$key", string(val))
198+
end
199+
end
200+
201+
cmd = tmscore_command(cmd_vec)
202+
stdout_buffer = IOBuffer()
203+
stderr_buffer = IOBuffer()
204+
success = try
205+
process = run(pipeline(cmd, stdout=stdout_buffer, stderr=stderr_buffer); wait=true)
206+
process.exitcode == 0
207+
catch e
208+
false
209+
end
210+
stdout_output = String(take!(stdout_buffer))
211+
stderr_output = String(take!(stderr_buffer))
212+
if success
213+
return TMscoreResult(stdout_output)
214+
else
215+
error_msg = "TMscore failed with error:\n" *
216+
(isempty(stderr_output) ? stdout_output : stderr_output)
217+
throw(ErrorException(error_msg))
218+
end
219+
end
220+
221+
write_tempfile(filename::AbstractString, tempdir) = filename
222+
223+
function run_tmscore(arg1, arg2; options...)
224+
mktempdir() do tempdir
225+
run_tmscore(write_tempfile(arg1, tempdir), write_tempfile(arg2, tempdir); options...)
226+
end
227+
end
228+
229+
"""
230+
tmscore(file1::String, file2::String; options...) -> Float64
231+
tmscore(struc1, struc2; options...) -> Float64
232+
233+
Invoke the TMscore binary and return only the TM-score value.
234+
235+
This is a convenience function that calls `run_tmscore` internally and extracts
236+
the `tmscore` field from the resulting `TMscoreResult`. See `run_tmscore` for
237+
details on options and handling different input types.
238+
"""
239+
function tmscore(arg1, arg2; options...)
240+
result = run_tmscore(arg1, arg2; options...)
241+
return result.tmscore
242+
end
243+
244+
245+
## BioStructuresExt.jl
246+
247+
using BioStructures
248+
249+
function TMscore.write_tempfile(arg::StructuralElementOrList, tempdir)
250+
path = joinpath(tempdir, "$(time_ns()).cif")
251+
writemmcif(path, arg)
252+
return path
253+
end
254+
255+
end

test/runtests.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@ using Test
44
using BioAlignments
55
using BioSequences
66
using BioStructures
7-
using TMscore
87

9-
# ENV["BOLTZ1_TEST_ACCELERATOR"] = "gpu"
8+
include("TMscore.jl") # remove this once TMscore.jl is registered: https://github.com/JuliaRegistries/General/pull/130867
9+
using .TMscore
10+
11+
ENV["BOLTZ1_TEST_ACCELERATOR"] = "gpu"
1012

1113
const accelerator = get(ENV, "BOLTZ1_TEST_ACCELERATOR", "cpu")
1214

0 commit comments

Comments
 (0)