rev: tip chemfp_examples/chembl_neighbor_browser_flask.py -rw-r--r-- 5.8 KiB View raw Log this file
871d0037866bAndrew Dalke added description of how the cli output SDF is generated 5 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# Example Flask-based web application to browse similar structures in ChEMBL.
#
# Requires chemfp, Flask, and chembl_28.fpb, which can be installed with:
# 
#   python -m pip install chemfp -i https://chemp.com/packages/
#   python -m pip install flask
#   wget https://chemfp.com/datasets/chembl_28.fpb.gz
#   gunzip chembl_28.fpb.gz
#
# The program can be run using either of the following:
#   
#   python chembl_neighbor_browser_flask.py
#       -or-
#   FLASK_APP=chembl_neighbor_browser_flask.py flask run
#
# The first of these supports a "--fingerprints" command-line option
# to specify the path to the ChEMBL fingerprints.
#
# By default this starts a web server on localhost port 5000. To
# interact with it, go to:
#
#     http://127.0.0.1:5000/
#
# or use the URL printed in your terminal window.

import os
import sys
import argparse
import time

import chemfp

from flask import Flask
from flask import render_template, request, redirect, url_for

from werkzeug.exceptions import BadRequest

# The start id if nothing is passed in.
DEFAULT_ID = "CHEMBL1114"

app = Flask(__name__)
app.templates_auto_reload = True

class AppConfig:
    arena = None
    default_id = DEFAULT_ID
    default_k = 10
    k_values = [1, 5, 10, 25, 50, 100, 250, 500, 1000]

# Return the compound report URL for a given ChEMBL id
def chembl_url(chembl_id):
    return f"https://www.ebi.ac.uk/chembl/compound_report_card/{chembl_id}/"

# Return the SVG image URL for a given ChEMBL id
def depict_url(chembl_id):
    return f"https://www.ebi.ac.uk/chembl/api/data/image/{chembl_id}.svg"

@app.route("/")
def index():
    # Use the user-specified id. If not given, use the default.
    chembl_id = request.args.get("id", None)
    if chembl_id is None:
        chembl_id = AppConfig.default_id

    # Get 'k' as an integer, limited to the range of AppConfig.k_values
    try:
        k = int(request.args.get("k", AppConfig.default_k))
    except ValueError:
        k = AppConfig.default_k  # 'k' was not an integer
    else:
        if k not in AppConfig.k_values:
            k = AppConfig.default_k

    # Initialize a dictionary containing the search results
    search_results = dict(
        chembl_id = chembl_id,
        errmsg = None,
        k = k,
        search_time = None,
        hits = None,
        )
            
    ## Start working with the arena
    arena = AppConfig.arena

    # Get the corresponding fingerprint for the id
    query_fp = arena.get_fingerprint_by_id(chembl_id)
    if query_fp is None:
        search_results["errmsg"] = "No fingerprint found with the given id."
    else:
        # Do the search
        t1 = time.time()
        # Using k+1 because we want k elements which are *not* the query,
        # and we know the query has a perfect match with itself.
        hits = arena.knearest_tanimoto_search_fp(query_fp, k=(k+1), threshold=0.0)
        t2 = time.time()

        # Ignore matching the query
        hits = [pair for pair in hits.get_ids_and_scores() if pair[0] != chembl_id]

        # Update the search results
        search_results.update(dict(
            search_time = t2-t1,
            hits = hits,
            ))

    # Return a URL which can be used to search for neighbors of the given
    # chembl_id, using the same 'k' as the current search.
    def search_url(chembl_id):
        return url_for("index", id=chembl_id, k=k)
    
    return render_template(
        "neighbor_browser.html",
        chembl_url = chembl_url,
        depict_url = depict_url,
        search_url = search_url,
        app_config = AppConfig,
        **search_results, # Pass search results to the template
        )


########

# Helper function to report an error message and exit.
def die(msg):
    raise SystemExit("ERROR: " + msg)

parser = argparse.ArgumentParser(
    description = "browse through ChEMBL similarity neighborhoods"
    )
parser.add_argument(
    "-f", "--fingerprints", metavar = "FILENAME",
    dest = "fp_filename",
    help = "Location of the FPB file containing ChEMBL fingerprint.",
    )
parser.add_argument("--host",
                        help="hostname to listen on")
parser.add_argument("--port", type=int,
                        help="port of the webserver")
parser.add_argument("chembl_id", default=DEFAULT_ID,
                        nargs="?",
                        help=f"initial ChEMBL id (default: '{DEFAULT_ID}')")


def main():
    args = parser.parse_args()

    set_config(args.fp_filename, args.chembl_id)
    
    app.env = "development"
    app.run(host=args.host, port=args.port)

def set_config(fp_filename, chembl_id):
    # Use the --fingerprints filename or search for the expected name
    # in the current directory.
    fp_filename = get_filename(
        fp_filename,
        ["chembl_28.fpb", "chembl_28.fpb.gz", "chembl_28.fpb.zst"],
        "Could not find the ChEMBL fingerprints.\n"
        "--fingerprints not specified and FPB file not found in the current directory.\n"
        "Download it from https://chemfp.com/datasets/chembl_28.fpb.gz .\n"
        "See https://hg.sr.ht/~dalke/chemfp_examples/ for setup details.")

    arena = chemfp.load_fingerprints(fp_filename)

    if chembl_id not in arena.ids:
        die(f"Cannot find id {chembl_id!r} in {fp_filename!r}")

    AppConfig.arena = arena
    AppConfig.default_id = chembl_id

def get_filename(filename, default_filenames, errmsg):
    if filename is not None:
        return filename
    for filename in default_filenames:
        if os.path.exists(filename):
            return filename
    die(errmsg)
    
if __name__ == "__main__":
    main()
elif "FLASK_APP" in os.environ:
    # Used during development, with:
    #    FLASK_APP=chembl_neighbor_browser_flask.py flask run
    # This is the more standard way to work with flask.
    # However, I don't know how to handle configuration.
    set_config(None, DEFAULT_ID)