1
+ module TMscore
2
+
3
+ import TMscore_jll
4
+
5
+ export tmscore, run_tmscore, TMscoreResult
6
+
7
+ """
8
+ TMscoreResult
9
+
10
+ Holds all of the key metrics returned by the TMscore executable.
11
+ """
12
+ struct TMscoreResult
13
+ output:: String # raw output from TMscore
14
+ len1:: Int # length of structure 1
15
+ len2:: Int # length of structure 2
16
+ common:: Int # number of residues in common
17
+ rmsd:: Float64 # RMSD over the common residues
18
+ tmscore:: Float64 # TM‐score
19
+ d0:: Float64 # d0 distance
20
+ maxsub:: Float64 # MaxSub‐score
21
+ maxsub_d0:: Float64 # MaxSub d0
22
+ gdt_ts:: Float64 # GDT‐TS score
23
+ gdt_ts_thresholds:: Dict{Float64,Float64} # (d<1, d<2, d<4, d<8) → value
24
+ gdt_ha:: Float64 # GDT‐HA score
25
+ gdt_ha_thresholds:: Dict{Float64,Float64} # (d<0.5, d<1, d<2, d<4) → value
26
+ rotation:: Matrix{Float64} # 3×3 rotation matrix
27
+ translation:: Vector{Float64} # length‐3 translation vector
28
+ end
29
+
30
+ Base. show (io:: IO , result:: TMscoreResult ) = print (io, TMscoreResult, " (\"\"\"\n " , result. output, " \"\"\" )" )
31
+
32
+ """
33
+ TMscoreResult(output::String)::TMscoreResult
34
+
35
+ Parse the text printed by TMscore into a `TMscoreResult`.
36
+ """
37
+ function TMscoreResult (output:: String ):: TMscoreResult
38
+ len1 = 0 ; len2 = 0 ; common = 0 ; rmsd = 0.0
39
+ tms = 0.0 ; d0 = 0.0
40
+ ms = 0.0 ; ms_d0 = 0.0
41
+ gdt_ts = 0.0 ; gdt_ha = 0.0
42
+ gdt_ts_th = Dict {Float64,Float64} (); gdt_ha_th = Dict {Float64,Float64} ()
43
+ rotation = zeros (Float64, 3 , 3 )
44
+ translation = zeros (Float64, 3 )
45
+
46
+ for line in split (output, ' \n ' )
47
+ s = strip (line)
48
+
49
+ # Structure lengths
50
+ if startswith (s, " Structure1:" )
51
+ m = match (r" Length=\s *(\d +)" , s)
52
+ if m != = nothing
53
+ len1 = parse (Int, m. captures[1 ])
54
+ end
55
+ elseif startswith (s, " Structure2:" )
56
+ m = match (r" Length=\s *(\d +)" , s)
57
+ if m != = nothing
58
+ len2 = parse (Int, m. captures[1 ])
59
+ end
60
+
61
+ # common residues & RMSD
62
+ elseif startswith (s, " Number of residues in common=" )
63
+ m = match (r" common=\s *(\d +)" , s)
64
+ if m != = nothing
65
+ common = parse (Int, m. captures[1 ])
66
+ end
67
+ elseif startswith (s, " RMSD" )
68
+ m = match (r" =\s *([\d\. ]+)" , s)
69
+ if m != = nothing
70
+ rmsd = parse (Float64, m. captures[1 ])
71
+ end
72
+
73
+ # TM-score + d0
74
+ elseif startswith (s, " TM-score" )
75
+ m = match (r" TM-score\s *=\s *([\d\. ]+).*\( d0=\s *([\d\. ]+)\) " , s)
76
+ if m != = nothing
77
+ tms = parse (Float64, m. captures[1 ])
78
+ d0 = parse (Float64, m. captures[2 ])
79
+ end
80
+
81
+ # MaxSub-score + d0
82
+ elseif startswith (s, " MaxSub-score" )
83
+ m = match (r" MaxSub-score\s *=\s *([\d\. ]+).*\( d0=\s *([\d\. ]+)\) " , s)
84
+ if m != = nothing
85
+ ms = parse (Float64, m. captures[1 ])
86
+ ms_d0 = parse (Float64, m. captures[2 ])
87
+ end
88
+
89
+ # GDT-TS
90
+ elseif startswith (s, " GDT-TS-score" )
91
+ m = match (r" =\s *([\d\. ]+)" , s)
92
+ if m != = nothing
93
+ gdt_ts = parse (Float64, m. captures[1 ])
94
+ end
95
+ for mm in eachmatch (r" \( d<([\d\. ]+)\) =([\d\. ]+)" , s)
96
+ thr = parse (Float64, mm. captures[1 ])
97
+ val = parse (Float64, mm. captures[2 ])
98
+ gdt_ts_th[thr] = val
99
+ end
100
+
101
+ # GDT-HA
102
+ elseif startswith (s, " GDT-HA-score" )
103
+ m = match (r" =\s *([\d\. ]+)" , s)
104
+ if m != = nothing
105
+ gdt_ha = parse (Float64, m. captures[1 ])
106
+ end
107
+ for mm in eachmatch (r" \( d<([\d\. ]+)\) =([\d\. ]+)" , s)
108
+ thr = parse (Float64, mm. captures[1 ])
109
+ val = parse (Float64, mm. captures[2 ])
110
+ gdt_ha_th[thr] = val
111
+ end
112
+
113
+ end
114
+
115
+ # rotation/translation rows
116
+ m = match (r" ^\s *(\d +)\s +([-\d\. ]+)\s +([-\d\. ]+)\s +([-\d\. ]+)\s +([-\d\. ]+)" , s)
117
+ if m != = nothing
118
+ i = parse (Int, m. captures[1 ])
119
+ translation[i] = parse (Float64, m. captures[2 ])
120
+ rotation[i, 1 : 3 ] .= parse .(Float64, m. captures[3 : 5 ])
121
+ end
122
+ end
123
+
124
+ return TMscoreResult (
125
+ output,
126
+ len1, len2, common, rmsd,
127
+ tms, d0, ms, ms_d0,
128
+ gdt_ts, gdt_ts_th, gdt_ha, gdt_ha_th,
129
+ rotation, translation
130
+ )
131
+ end
132
+
133
+ tmscore_command (options:: Vector{String} ) = Cmd ([TMscore_jll. TMscore (). exec; options])
134
+
135
+ """
136
+ run_tmscore(file1::String, file2::String; options...) -> TMscoreResult
137
+ run_tmscore(struc1, struc2; options...) -> TMscoreResult
138
+
139
+ Invoke the TMscore binary and parse its output into a `TMscoreResult` struct.
140
+
141
+ `BioStructures.StructuralElementOrList` objects can also be used as input,
142
+ and will be written to a temporary file before processing.
143
+
144
+ ## Options
145
+
146
+ ### Boolean Flags
147
+ - `c::Bool`: Compare two complex structures with multiple chains
148
+ - `seq::Bool`: Establish residue equivalence by sequence alignment instead of residue indices
149
+ - `a::Bool`: TM-score normalized by the average length of two structures
150
+ - `m::Bool`: Output TM-score rotation matrix
151
+ - `fast::Bool`: Fast but slightly inaccurate alignment
152
+ - `mirror::Bool`: Whether to align the mirror image of input structure
153
+ - `het::Bool`: Whether to align residues marked as 'HETATM' in addition to 'ATOM'
154
+
155
+ ### Numeric Options
156
+ - `d::Number`: TM-score scaled by an assigned d0 (in Angstroms)
157
+ - `l::Int`: TM-score normalized by a specific length
158
+ - `ter::Int`: Strings to mark the end of a chain
159
+ - 0: No TER card
160
+ - 1: TER separates different chains
161
+ - 2: TER marks end of each chain
162
+ - 3: EXIT separates different chains
163
+ - `split::Int`: Whether to split PDB file into multiple chains
164
+ - 0: Don't split
165
+ - 1: Split by chain ID
166
+ - 2: Split by TER records
167
+ - `outfmt::Int`: Output format
168
+ - 0: Full format
169
+ - 1: Sequence and structure in fasta format
170
+ - 2: Matrix format
171
+ - -1: Compact format
172
+ - `infmt1::Int`: Input format for chain1
173
+ - -1: Auto-detect
174
+ - 0: PDB format
175
+ - 1: SPICKER format
176
+ - 2: xyz format
177
+ - 3: FASTA format
178
+ - `infmt2::Int`: Input format for chain2 (same options as infmt1)
179
+
180
+ ### String Options
181
+ - `o::String`: Generate superposition output files with the given prefix
182
+ - `dir::String`: Perform all-against-all alignment among the list of PDB chains
183
+ - `dir1::String`: Use chain2 to search a list of PDB chains
184
+ - `dir2::String`: Use chain1 to search a list of PDB chains
185
+ - `suffix::String`: Add file name suffix to files listed by chain1_list or chain2_list
186
+ - `atom::String`: 4-character atom name used to represent a residue
187
+ - `mol::String`: Molecule type: RNA or protein
188
+ """
189
+ function run_tmscore (file1:: AbstractString , file2:: AbstractString ; options... )
190
+ cmd_vec = String[file1, file2]
191
+ for (key, val) in pairs (options)
192
+ if val === true
193
+ push! (cmd_vec, " -$key " )
194
+ elseif val === false
195
+ nothing
196
+ else
197
+ push! (cmd_vec, " -$key " , string (val))
198
+ end
199
+ end
200
+
201
+ cmd = tmscore_command (cmd_vec)
202
+ stdout_buffer = IOBuffer ()
203
+ stderr_buffer = IOBuffer ()
204
+ success = try
205
+ process = run (pipeline (cmd, stdout = stdout_buffer, stderr = stderr_buffer); wait= true )
206
+ process. exitcode == 0
207
+ catch e
208
+ false
209
+ end
210
+ stdout_output = String (take! (stdout_buffer))
211
+ stderr_output = String (take! (stderr_buffer))
212
+ if success
213
+ return TMscoreResult (stdout_output)
214
+ else
215
+ error_msg = " TMscore failed with error:\n " *
216
+ (isempty (stderr_output) ? stdout_output : stderr_output)
217
+ throw (ErrorException (error_msg))
218
+ end
219
+ end
220
+
221
+ write_tempfile (filename:: AbstractString , tempdir) = filename
222
+
223
+ function run_tmscore (arg1, arg2; options... )
224
+ mktempdir () do tempdir
225
+ run_tmscore (write_tempfile (arg1, tempdir), write_tempfile (arg2, tempdir); options... )
226
+ end
227
+ end
228
+
229
+ """
230
+ tmscore(file1::String, file2::String; options...) -> Float64
231
+ tmscore(struc1, struc2; options...) -> Float64
232
+
233
+ Invoke the TMscore binary and return only the TM-score value.
234
+
235
+ This is a convenience function that calls `run_tmscore` internally and extracts
236
+ the `tmscore` field from the resulting `TMscoreResult`. See `run_tmscore` for
237
+ details on options and handling different input types.
238
+ """
239
+ function tmscore (arg1, arg2; options... )
240
+ result = run_tmscore (arg1, arg2; options... )
241
+ return result. tmscore
242
+ end
243
+
244
+
245
+ # # BioStructuresExt.jl
246
+
247
+ using BioStructures
248
+
249
+ function TMscore. write_tempfile (arg:: StructuralElementOrList , tempdir)
250
+ path = joinpath (tempdir, " $(time_ns ()) .cif" )
251
+ writemmcif (path, arg)
252
+ return path
253
+ end
254
+
255
+ end
0 commit comments