Commit c03098af authored by Alok Saldanha's avatar Alok Saldanha
Browse files

#14 enabled new annotations

Due to issue with opening annotation files with "-" in the name, this doesn't quite work.
parent f41db75a
......@@ -33,12 +33,12 @@ class BackendCache:
contents = self.entry_list
return [c.port for c in contents]
def check_entry(self, dataset):
def check_entry(self, key):
contents = self.entry_list
matches = [
c
for c in contents
if c.dataset == dataset and c.status != "terminated"
if c.key.dataset == key.dataset and c.key.annotation_file == key.annotation_file and c.status != "terminated"
]
if len(matches) == 0:
......@@ -51,13 +51,14 @@ class BackendCache:
"Found " + str(len(matches)) + " for " + dataset,
)
def create_entry(self, dataset, file_path, scripts):
def create_entry(self, key, scripts):
port = 8000
existing_ports = self.get_ports()
while (port in existing_ports) or is_port_in_use(port):
port += 1
entry = CacheEntry.for_dataset(dataset, file_path, port)
entry = CacheEntry.for_key(key, port)
background_thread = Thread(
target=process_backend.launch,
......
......@@ -15,13 +15,13 @@ from requests import get, post, put
from cellxgene_gateway import env
from cellxgene_gateway.cellxgene_exception import CellxgeneException
from cellxgene_gateway.util import current_time_stamp
from cellxgene_gateway.flask_util import querystring
class CacheEntry:
def __init__(
self,
pid,
dataset,
file_path,
key,
port,
launchtime,
timestamp,
......@@ -32,8 +32,7 @@ class CacheEntry:
http_status,
):
self.pid = pid
self.dataset = dataset
self.file_path = file_path
self.key = key
self.port = port
self.launchtime = launchtime
self.timestamp = timestamp
......@@ -44,11 +43,11 @@ class CacheEntry:
self.http_status = http_status
@classmethod
def for_dataset(cls, dataset, file_path, port):
def for_key(cls, key, port):
return cls(
None,
dataset,
file_path,
key,
port,
current_time_stamp(),
current_time_stamp(),
......@@ -93,45 +92,59 @@ class CacheEntry:
self.status = "terminated"
def serve_content(self, path):
dataset = self.dataset
dataset = self.key.dataset
gateway_basepath = (
f"{env.external_protocol}://{env.external_host}/view/{dataset}/"
f"{env.external_protocol}://{env.external_host}/view/{self.key.pathpart}/"
)
subpath = path[len(dataset) :] # noqa: E203
subpath = path[len(self.key.pathpart) :] # noqa: E203
if len(subpath) == 0:
r = make_response(f"Redirect to {gateway_basepath}\n", 301)
r.headers["location"] = gateway_basepath
r.headers["location"] = gateway_basepath+querystring()
return r
port = self.port
cellxgene_basepath = f"http://127.0.0.1:{port}"
headers = {}
if "accept" in request.headers:
headers["accept"] = request.headers["accept"]
if "user-agent" in request.headers:
headers["user-agent"] = request.headers["user-agent"]
if "content-type" in request.headers:
headers["content-type"] = request.headers["content-type"]
copy_headers = [
'accept',
'accept-encoding',
'accept-language',
'cache-control',
'connection',
'content-length',
'content-type',
'cookie',
'host',
'origin',
'pragma',
'referer',
'sec-fetch-mode',
'sec-fetch-site',
'user-agent'
]
for h in copy_headers:
if h in request.headers:
headers[h] = request.headers[h]
full_path = cellxgene_basepath + subpath + querystring()
if request.method in ["GET", "HEAD", "OPTIONS"]:
cellxgene_response = get(
cellxgene_basepath + subpath, headers=headers
full_path, headers=headers
)
elif request.method == "PUT":
cellxgene_response = put(
cellxgene_basepath + subpath,
full_path,
headers=headers,
data=request.data.decode(),
data=request.data,
)
elif request.method == "POST":
cellxgene_response = post(
cellxgene_basepath + subpath,
full_path,
headers=headers,
data=request.data.decode(),
data=request.data,
)
else:
raise CellxgeneException(
......@@ -146,10 +159,15 @@ class CacheEntry:
else:
gateway_content = cellxgene_response.content
resp_headers = {}
for h in copy_headers:
if h in cellxgene_response.headers:
resp_headers[h] = cellxgene_response.headers[h]
gateway_response = make_response(
gateway_content,
cellxgene_response.status_code,
{"Content-Type": content_type},
resp_headers,
)
return gateway_response
# Copyright 2019 Novartis Institutes for BioMedical Research Inc. Licensed
# under the Apache License, Version 2.0 (the "License"); you may not use
# this file except in compliance with the License. You may obtain a copy
# of the License at http://www.apache.org/licenses/LICENSE-2.0. Unless
# required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. See the License for
# the specific language governing permissions and limitations under the License.
import os
from flask_api import status
from cellxgene_gateway import env
from cellxgene_gateway.cellxgene_exception import CellxgeneException
# There are three kinds of CacheKey:
# 1) somedir/dataset.h5ad: a dataset
# in this case, pathpart == dataset == 'somedir/dataset.h5ad'
# 2) somedir/dataset_annotations/saldaal1-T5HMVBNV.csv : an actual annotaitons file.
# in this case, pathpart == 'dataset_annotations/saldaal1-T5HMVBNV.csv', dataset == 'somedir/dataset.h5ad'
# 3) somedir/dataset_annotations: an annotation directory. The corresponding h5ad must exist, but the directory may not.
# in this case, pathpart == 'dataset_annotations', dataset == 'somedir/dataset.h5ad'
class CacheKey:
def __init__(self, pathpart, dataset, annotation_file):
self.pathpart = pathpart
self.dataset = dataset
self.annotation_file = annotation_file
......@@ -51,45 +51,8 @@ def create_dir(parent_path, dir_name):
else:
os.mkdir(full_path)
def recurse_dir(path):
if not os.path.exists(path):
raise CellxgeneException(
"The given path does not exist.", status.HTTP_400_BAD_REQUEST
)
def make_entry(el):
full_path = os.path.join(path, el)
if os.path.isfile(full_path):
return {
"path": full_path.replace(env.cellxgene_data, ""),
"name": el,
"type": "file",
}
elif os.path.isdir(full_path):
return {
"path": full_path.replace(env.cellxgene_data, ""),
"name": el,
"type": "directory",
"children": recurse_dir(full_path),
}
else:
raise CellxgeneException(
"Given path is neither file nor directory.",
status.HTTP_400_BAD_REQUEST,
)
return [make_entry(x) for x in os.listdir(path)]
def render_entries(entries):
return "<ul>" + "\n".join([render_entry(e) for e in entries]) + "</ul>"
def render_entry(entry):
if entry["type"] == "file":
url = f"/view/{entry['path'].lstrip('/')}"
return f"<li> <a href='{ url}'>{entry['name']}</a></li>"
elif entry["type"] == "directory":
url = f"/filecrawl/{entry['path'].lstrip('/')}"
return f"<li><a href='{url}'>{entry['name']}</a>{render_entries(entry['children'])}</li>"
annotations_suffix = '_annotations'
def make_h5ad(el):
return el[:-len(annotations_suffix)]+'.h5ad'
def make_annotations(el):
return el[:-5]+annotations_suffix
......@@ -20,6 +20,7 @@ ip = os.environ.get("GATEWAY_IP")
extra_scripts = os.environ.get("GATEWAY_EXTRA_SCRIPTS")
ttl = os.environ.get("GATEWAY_TTL")
enable_upload = os.environ.get("GATEWAY_ENABLE_UPLOAD", "").lower() in ['true', '1']
enable_annotations = os.environ.get("GATEWAY_ENABLE_ANNOTATIONS", "").lower() in ['true', '1']
env_vars = {
"CELLXGENE_LOCATION": cellxgene_location,
......@@ -34,6 +35,7 @@ optional_env_vars = {
"GATEWAY_EXTRA_SCRIPTS": extra_scripts,
"GATEWAY_TTL": ttl,
"GATEWAY_ENABLE_UPLOAD": enable_upload,
"GATEWAY_ENABLE_ANNOTATIONS": enable_annotations,
}
def validate():
......
import os
from cellxgene_gateway import env
from cellxgene_gateway.dir_util import make_h5ad, make_annotations, annotations_suffix
def recurse_dir(path):
if not os.path.exists(path):
raise CellxgeneException(
"The given path does not exist.", status.HTTP_400_BAD_REQUEST
)
all_entries = os.listdir(path)
def is_h5ad(el):
return el.endswith('.h5ad') and os.path.isfile(os.path.join(path, el))
h5ad_entries = [x for x in all_entries if is_h5ad(x)]
annotation_dir_entries = [x for x in all_entries if x.endswith(annotations_suffix) and make_h5ad(x) in h5ad_entries]
def list_annotations(el):
full_path = os.path.join(path, el)
if not os.path.isdir(full_path):
entries = []
else:
entries = [{
"name": x[:x.index('-')] if '-' in x else x,
"path": os.path.join(full_path, x).replace(env.cellxgene_data, ""),
} for x in os.listdir(full_path) if x.endswith('.csv') and os.path.isfile(os.path.join(full_path, x))]
return [{"name":'new', "path":full_path.replace(env.cellxgene_data, "")}] + entries
def make_entry(el):
full_path = os.path.join(path, el)
if el in h5ad_entries:
return {
"path": full_path.replace(env.cellxgene_data, ""),
"name": el,
"type": "file",
"annotations": list_annotations(make_annotations(el)),
}
elif os.path.isdir(full_path) and el not in annotation_dir_entries:
return {
"path": full_path.replace(env.cellxgene_data, ""),
"name": el,
"type": "directory",
"children": recurse_dir(full_path),
}
else:
return {
"path": full_path,
"name": el,
"type": "neither",
}
return [make_entry(x) for x in os.listdir(path)]
def render_entries(entries):
return "<ul>" + "\n".join([render_entry(e) for e in entries]) + "</ul>"
def get_url(entry):
return f"view/{ entry['path'].lstrip('/') }"
def render_annotations(entry):
if len(entry['annotations']) > 0:
return ' | annotations: ' + ", ".join([f"<a href='{get_url(a)}'>{a['name']}</a>" for a in entry['annotations']])
else:
return '';
def render_entry(entry):
if entry["type"] == "file":
return f"<li> <a href='{ get_url(entry) }'>{entry['name']}</a> {render_annotations(entry)}</li>"
elif entry["type"] == "directory":
url = f"/filecrawl/{entry['path'].lstrip('/')}"
return f"<li><a href='{url}'>{entry['name']}</a>{render_entries(entry['children'])}</li>"
else:
return ""
from flask import request
def querystring():
qs = request.query_string.decode()
return f'?{qs}' if len(qs) > 0 else ''
......@@ -28,12 +28,13 @@ from werkzeug import secure_filename
from cellxgene_gateway import env
from cellxgene_gateway.backend_cache import BackendCache
from cellxgene_gateway.cellxgene_exception import CellxgeneException
from cellxgene_gateway.dir_util import create_dir, recurse_dir, render_entries, is_subdir
from cellxgene_gateway.dir_util import create_dir, is_subdir
from cellxgene_gateway.filecrawl import recurse_dir, render_entries
from cellxgene_gateway.extra_scripts import get_extra_scripts
from cellxgene_gateway.path_util import get_dataset, get_file_path
from cellxgene_gateway.process_exception import ProcessException
from cellxgene_gateway.prune_process_cache import PruneProcessCache
from cellxgene_gateway.util import current_time_stamp
from cellxgene_gateway.path_util import get_key
app = Flask(__name__)
cache = BackendCache()
......@@ -73,7 +74,8 @@ def handle_invalid_process(error):
http_status=error.http_status,
stdout=error.stdout,
stderr=error.stderr,
dataset=error.dataset,
dataset=error.key.dataset,
annotation_file=error.key.annotation_file,
),
error.http_status,
)
......@@ -159,7 +161,6 @@ if env.enable_upload:
@app.route("/filecrawl.html")
def filecrawl():
entries = recurse_dir(env.cellxgene_data)
rendered_html = render_entries(entries)
return render_template(
......@@ -187,13 +188,13 @@ def do_filecrawl(path):
entry_lock = Lock()
@app.route("/view/<path:path>", methods=["GET", "PUT", "POST"])
def do_view(path):
dataset = get_dataset(path)
file_path = get_file_path(dataset)
key = get_key(path)
print(f"view path={path}, dataset={key.dataset}, annotation_file= {key.annotation_file}, key={key.pathpart}")
with entry_lock:
match = cache.check_entry(dataset)
match = cache.check_entry(key)
if match is None:
uascripts = get_extra_scripts()
match = cache.create_entry(dataset, file_path, uascripts)
match = cache.create_entry(key, uascripts)
match.timestamp = current_time_stamp()
......@@ -216,7 +217,8 @@ def do_GET_status():
def do_GET_status_json():
return json.dumps({'launchtime':app.launchtime,
'entry_list':[{
'dataset': entry.dataset,
'dataset': entry.key.dataset,
'annotation_file': entry.key.annotation_file,
'launchtime': entry.launchtime,
'last_access': entry.timestamp,
'status': entry.status
......@@ -224,16 +226,17 @@ def do_GET_status_json():
@app.route("/relaunch/<path:path>", methods=["GET"])
def do_relaunch(path):
dataset = get_dataset(path)
match = cache.check_entry(dataset)
key = get_key(path)
match = cache.check_entry(key)
if not match is None:
match.terminate()
return redirect(url_for("do_view", path=path), code=302)
qs = request.query_string.decode()
return redirect(url_for("do_view", path=path) + (f'?{qs}' if len(qs) > 0 else ''), code=302)
@app.route("/terminate/<path:path>", methods=["GET"])
def do_terminate(path):
dataset = get_dataset(path)
match = cache.check_entry(dataset)
key = get_key(path)
match = cache.check_entry(key)
if not match is None:
match.terminate()
return redirect(url_for("do_GET_status"), code=302)
......
......@@ -13,37 +13,81 @@ from flask_api import status
from cellxgene_gateway import env
from cellxgene_gateway.cellxgene_exception import CellxgeneException
from cellxgene_gateway.dir_util import make_h5ad
from cellxgene_gateway.cache_key import CacheKey
def get_dataset(path):
def get_key(path):
if path == "/" or path == "":
raise CellxgeneException(
"No matching dataset found.", status.HTTP_404_NOT_FOUND
)
trimmed = path[:-1] if path[-1] == "/" else path
try:
get_file_path(trimmed)
return trimmed
except CellxgeneException:
split = os.path.split(trimmed)
return get_dataset(split[0])
# valid paths come in three forms:
if trimmed.endswith('.h5ad') and data_file_exists(trimmed):
# 1) somedir/dataset.h5ad: a dataset
return CacheKey(trimmed, trimmed, None)
elif trimmed.endswith('.csv') and data_file_exists(trimmed):
# 2) somedir/dataset_annotations/saldaal1-T5HMVBNV.csv : an actual annotaitons file.
annotations_dir = os.path.split(trimmed)[0]
dataset = make_h5ad(annotations_dir)
if data_file_exists(dataset):
return CacheKey(trimmed, dataset, trimmed)
elif trimmed.endswith('_annotations') and data_dir_exists(trimmed):
# 3) somedir/dataset_annotations: an annotation directory. The corresponding h5ad must exist, but the directory may not.
dataset = make_h5ad(trimmed)
if data_file_exists(dataset):
return CacheKey(trimmed, dataset, '')
except CellxgeneException:
pass
split = os.path.split(trimmed)
return get_key(split[0])
def validate_path(file_path):
def validate_exists(file_path):
if not os.path.exists(file_path):
raise CellxgeneException(
"File does not exist: " + file_path, status.HTTP_400_BAD_REQUEST
)
def validate_is_file(file_path):
validate_exists(file_path)
if not os.path.isfile(file_path):
raise CellxgeneException(
"Path is not file: " + file_path, status.HTTP_400_BAD_REQUEST
)
return
def validate_is_dir(file_path):
validate_exists(file_path)
if not os.path.isdir(file_path):
raise CellxgeneException(
"Path is not dir: " + file_path, status.HTTP_400_BAD_REQUEST
)
return
def data_file_exists(dataset):
file_path = os.path.join(env.cellxgene_data, dataset)
validate_is_file(file_path)
return True
def data_dir_exists(dataset):
file_path = os.path.join(env.cellxgene_data, dataset)
validate_is_dir(file_path)
return True
def get_file_path(dataset):
def get_file_path(key):
dataset = key.dataset
file_path = os.path.join(env.cellxgene_data, dataset)
validate_path(file_path)
validate_is_file(file_path)
return file_path
def get_annotation_file_path(key):
print(f"getting annotaiton_file_path for {key.annotation_file}")
if key.annotation_file is None:
return None
if key.annotation_file == '':
return ''
file_path = os.path.join(env.cellxgene_data, key.annotation_file)
print(f"getting annotaiton_file_path for {key}, file_path {file_path}")
validate_is_file(file_path)
return file_path
......@@ -15,7 +15,7 @@ class ProcessException(Exception):
self.stdout = stdout
self.stderr = stderr
self.http_status = http_status
self.dataset = dataset
self.key = key
@classmethod
def from_cache_entry(cls, cache_entry):
......@@ -24,5 +24,5 @@ class ProcessException(Exception):
cache_entry.all_output,
cache_entry.stderr,
cache_entry.http_status,
cache_entry.dataset,
cache_entry.key,
)
......@@ -11,21 +11,30 @@ import logging
import subprocess
from flask_api import status
from cellxgene_gateway.env import enable_annotations
from cellxgene_gateway.process_exception import ProcessException
from cellxgene_gateway.dir_util import make_annotations
from cellxgene_gateway.path_util import get_file_path, get_annotation_file_path
class SubprocessBackend:
def __init__(self):
pass
def create_cmd(self, cellxgene_loc, file_path, port, scripts):
def create_cmd(self, cellxgene_loc, file_path, port, scripts, annotation_file_path):
if enable_annotations and not annotation_file_path is None:
annotation_args_prefix = " --experimental-annotations"
if annotation_file_path == "":
annotation_args = f"{annotation_args_prefix} --experimental-annotations-output-dir {make_annotations(file_path)}"
else:
annotation_args = f"{annotation_args_prefix} --experimental-annotations-file {annotation_file_path}"
else:
annotation_args = ""
cmd = (
f"yes | {cellxgene_loc} launch {file_path}"
+ " --port "
+ str(port)
+ " --host 127.0.0.1"
+ annotation_args
)
for s in scripts:
......@@ -36,7 +45,7 @@ class SubprocessBackend:
def launch(self, cellxgene_loc, scripts, cache_entry):
cmd = self.create_cmd(
cellxgene_loc, cache_entry.file_path, cache_entry.port, scripts
cellxgene_loc, get_file_path(cache_entry.key), cache_entry.port, scripts, get_annotation_file_path(cache_entry.key)
)
logging.getLogger("cellxgene_gateway").info(f"launching {cmd}")
process = subprocess.Popen(
......
......@@ -29,6 +29,7 @@
<tr>
<th>PID</th>
<th>dataset</th>
<th>annotation_file</th>
<th>port</th>
<th>launchtime</th>
<th>last access</th>
......@@ -42,7 +43,8 @@
{% for entry in entry_list %}
<tr>
<td>{{ entry.pid }}</td>
<td><a href="{{ url_for('do_view', path=entry.dataset) }}">{{ entry.dataset }}</a></td>
<td><a href="{{ url_for('do_view', path=entry.dataset) }}">{{ entry.key.dataset }}</a></td>
<td>{{ entry.key.annotation_file }}</td>