Commit d52a1128 authored by Gervaise Henry's avatar Gervaise Henry 🤠
Browse files

Merge tag 'v0.3.2' into StrandLab

parents 573b620f a3a1a2d0
......@@ -51,7 +51,7 @@ jobs:
run: |
eval "$(conda shell.bash hook)"
conda activate cellxgene-gateway
coverage report --fail-under 47
coverage report --fail-under 41
coverage xml -i
- name: "Upload coverage to Codecov"
......
# 0.3.2
* Fixed bug #45 affecting multi-level S3 folders
* Added extra_scripts to cache_status page
# 0.3.1
* Added missing __init__.py
# 0.3.0
* Added support for itemsource interface, allowing s3 hosting
......
......@@ -7,4 +7,4 @@
# OR CONDITIONS OF ANY KIND, either express or implied. See the License for
# the specific language governing permissions and limitations under the License.
__version__ = "0.3.1"
__version__ = "0.3.2"
......@@ -8,6 +8,7 @@
# the specific language governing permissions and limitations under the License.
from json import loads
from json.decoder import JSONDecodeError
from cellxgene_gateway import env
......@@ -17,4 +18,9 @@ def get_extra_scripts():
# ['https://www.googletagmanager.com/gtag/js?id=UA-123456-2',
# f"{env.external_protocol}://{env.external_host}/static/js/google_ua.js"]
# where google_ua.js is a script you add to the static/js folder prior to deployment.
return [] if env.extra_scripts is None else loads(env.extra_scripts)
try:
return [] if env.extra_scripts is None else loads(env.extra_scripts)
except JSONDecodeError as exc:
raise Exception(
f'Error parsing GATEWAY_EXTRA_SCRIPTS, expected JSON array e.g. ["https://example.com/path/to/script.js"]'
) from exc
......@@ -62,5 +62,6 @@ def render_item_tree(item_tree, item_source):
def render_item_source(item_source, filter=None):
item_tree = item_source.list_items(filter)
heading = f"<h6><a href='/filecrawl.html?source={urllib.parse.quote_plus(item_source.name)}'>{item_source.name}</a></h6>"
filterpart = "" if filter is None else ":" + filter
heading = f"<h6><a href='/filecrawl.html?source={urllib.parse.quote_plus(item_source.name)}'>{item_source.name}</a>{filterpart}</h6>"
return heading + render_item_tree(item_tree, item_source)
......@@ -217,7 +217,11 @@ def do_view(path, source_name=None):
@app.route("/cache_status", methods=["GET"])
def do_GET_status():
return render_template("cache_status.html", entry_list=cache.entry_list)
return render_template(
"cache_status.html",
entry_list=cache.entry_list,
extra_scripts=get_extra_scripts(),
)
@app.route("/cache_status.json", methods=["GET"])
......
......@@ -38,8 +38,11 @@ class S3ItemSource(ItemSource):
self.annotation_dir_suffix = annotation_dir_suffix
self.annotation_file_suffix = annotation_file_suffix
def url(self, path):
return "s3://" + join(self.bucket, path)
def url(self, key):
return "s3://" + self.bucket + "/" + key
def remove_bucket(self, filepath):
return filepath[len(self.bucket) :].lstrip("/")
@property
def name(self):
......@@ -61,49 +64,44 @@ class S3ItemSource(ItemSource):
return self.convert_h5ad_key_to_annotation(item.descriptor)
def list_items(self, filter: str = None) -> ItemTree:
item_tree = self.scan_directory()
item_tree = self.scan_directory("" if filter is None else filter)
return item_tree
def scan_directory(self, subpath="") -> dict:
url = self.url(subpath)
def scan_directory(self, directory_key="") -> dict:
url = self.url(directory_key)
if not self.s3.exists(url):
raise Exception(f"S3 url '{url}' does not exist.")
s3key_map = dict(
(filepath[len(self.bucket) :].lstrip("/"), "s3://" + filepath)
(self.remove_bucket(filepath), "s3://" + filepath)
for filepath in sorted(self.s3.ls(url))
)
def is_annotation_dir(dir_s3key):
return (
dir_s3key.endswith(self.annotation_dir_suffix)
and self.convert_annotation_key_to_h5ad(dir_s3key) in h5ad_paths
and self.convert_annotation_key_to_h5ad(dir_s3key) in h5ad_keys
)
h5ad_paths = [
h5ad_keys = [
filepath
for filepath, item_url in s3key_map.items()
if self.is_h5ad_url(item_url)
]
subdirs = [
subdir_keys = [
filepath
for filepath, item_url in s3key_map.items()
if self.s3.isdir(item_url) and not is_annotation_dir(filepath)
]
items = [
self.make_s3item_from_key(filename, join(subpath, filename))
for filename in h5ad_paths
]
items = [self.make_s3item_from_key(basename(key), key) for key in h5ad_keys]
branches = None
if len(subdirs) > 0:
branches = [
self.scan_directory(join(subpath, subdir)) for subdir in subdirs
]
if len(subdir_keys) > 0:
branches = [self.scan_directory(key) for key in subdir_keys]
return ItemTree(subpath, items, branches)
return ItemTree(directory_key, items, branches)
def create_annotation(self, item: S3Item, name: str) -> S3Item:
annotation = self.make_s3item_from_key(
......@@ -163,11 +161,11 @@ class S3ItemSource(ItemSource):
if self.s3.isdir(annotations_fullpath):
return [
self.make_s3item_from_key(
annotation, join(annotations_subpath, annotation), True
basename(annotation), self.remove_bucket(annotation), True
)
for annotation in sorted(self.s3.ls(annotations_fullpath))
if annotation.endswith(self.annotation_file_suffix)
and self.s3.isfile(join(annotations_fullpath, annotation))
and self.s3.isfile("s3://" + annotation)
]
else:
return None
import unittest
from unittest.mock import MagicMock, Mock, patch
from cellxgene_gateway.items.item import ItemType
from cellxgene_gateway.items.s3.s3item import S3Item
from cellxgene_gateway.items.s3.s3item_source import S3ItemSource
class TestScanDirectory(unittest.TestCase):
@patch("s3fs.S3FileSystem")
def test_GIVEN_invalid_bucket_THEN_throws_error(self, s3func):
class S3Mock:
def exists(path):
if path in ["s3://my-bucket/"]:
return False
s3func.return_value = S3Mock
source = S3ItemSource("my-bucket")
with self.assertRaises(Exception) as context:
source.scan_directory()
self.assertEqual(
"S3 url 's3://my-bucket/' does not exist.",
str(context.exception),
)
@patch("s3fs.S3FileSystem")
def test__GIVEN_multilevel_bucket_THEN_properly_recurses_suburls(self, s3func):
class S3Mock:
def exists(path):
if path in [
"s3://my-bucket/",
"s3://my-bucket/pbmc3k.h5ad",
"s3://my-bucket/lvl1",
"s3://my-bucket/lvl1/pbmc3k_l1.h5ad",
"s3://my-bucket/lvl1/lvl2",
"s3://my-bucket/lvl1/lvl2/pbmc3k_l2.h5ad",
]:
return True
raise Exception("exists called with " + path)
def ls(path):
if path == "s3://my-bucket/":
return [
"my-bucket/lvl1",
"my-bucket/pbmc3k.h5ad",
"my-bucket/pbmc3k_annotations",
]
elif path == "s3://my-bucket/pbmc3k_annotations":
return ["my-bucket/pbmc3k_annotations/annot.csv"]
elif path == "s3://my-bucket/lvl1":
return ["my-bucket/lvl1/lvl2", "my-bucket/lvl1/pbmc3k_l1.h5ad"]
elif path == "s3://my-bucket/lvl1/lvl2":
return ["my-bucket/lvl1/lvl2/pbmc3k_l2.h5ad"]
raise Exception("ls called with " + path)
def isdir(path):
if path in [
"s3://my-bucket/lvl1",
"s3://my-bucket/pbmc3k_annotations",
"s3://my-bucket/lvl1/lvl2",
]:
return True
if path in [
"s3://my-bucket/pbmc3k.h5ad",
"s3://my-bucket/lvl1/pbmc3k_l1.h5ad",
"s3://my-bucket/lvl1/pbmc3k_l1_annotations",
"s3://my-bucket/lvl1/lvl2/pbmc3k_l2.h5ad",
"s3://my-bucket/lvl1/lvl2/pbmc3k_l2_annotations",
]:
return False
raise Exception("isdir called with " + path)
def isfile(path):
if path in ["s3://my-bucket/pbmc3k_annotations/annot.csv"]:
return True
if path in ["s3://my-bucket/pbmc3k_annotations"]:
return False
raise Exception("isfile called with " + path)
s3func.return_value = S3Mock
source = S3ItemSource("my-bucket")
tree = source.scan_directory()
def s3item_compare(i1, i2, msg=""):
self.assertEqual(i1.name, i2.name, "name equals")
self.assertEqual(i1.type, i2.type, "type equals")
self.assertEqual(i1.s3key, i2.s3key, "s3key equals")
if i1.annotations is None:
self.assertEqual(i1.annotations, i2.annotations, "annotations equals")
else:
self.assertEqual(
len(i1.annotations),
len(i2.annotations),
"annotations length equals",
)
for a1, a2 in zip(i1.annotations, i2.annotations):
self.assertEqual(a1, a2)
return True
self.addTypeEqualityFunc(S3Item, s3item_compare)
def assertTree(t, descriptor, items):
self.assertEqual(t.descriptor, descriptor)
self.assertEqual(len(t.items), len(items))
for i1, i2 in zip(t.items, items):
self.assertEqual(i1, i2)
assertTree(
tree,
"",
[
S3Item(
"pbmc3k.h5ad",
name="pbmc3k.h5ad",
type=ItemType.h5ad,
annotations=[
S3Item(
"pbmc3k_annotations/annot.csv",
name="annot.csv",
type=ItemType.annotation,
)
],
)
],
)
self.assertEqual(len(tree.branches), 1)
lvl1 = tree.branches[0]
assertTree(
lvl1,
"lvl1",
[
S3Item(
"lvl1/pbmc3k_l1.h5ad",
name="pbmc3k_l1.h5ad",
type=ItemType.h5ad,
annotations=None,
)
],
)
self.assertEqual(len(lvl1.branches), 1)
lvl2 = lvl1.branches[0]
assertTree(
lvl2,
"lvl1/lvl2",
[
S3Item(
"lvl1/lvl2/pbmc3k_l2.h5ad",
name="pbmc3k_l2.h5ad",
type=ItemType.h5ad,
annotations=None,
)
],
)
self.assertEqual(lvl2.branches, None)
class TestListItems(unittest.TestCase):
def test_GIVEN_filter_THEN_pass_filter_into_scan_directory(self):
source = S3ItemSource("my-bucket")
source.scan_directory = MagicMock()
tree = source.list_items("some-filter")
source.scan_directory.assert_called_once_with("some-filter")
def test_GIVEN_no_filter_THEN_pass_empty_string_into_scan_directory(self):
source = S3ItemSource("my-bucket")
source.scan_directory = MagicMock()
tree = source.list_items()
source.scan_directory.assert_called_once_with("")
......@@ -21,6 +21,15 @@ class TestExtraScripts(unittest.TestCase):
def test_GIVEN_empty_string_THEN_returns_empty_array(self):
self.assertEqual(get_extra_scripts(), [])
@patch("cellxgene_gateway.env.extra_scripts", new="'asdf'")
def test_GIVEN_bare_string_THEN_throws_Exception(self):
with self.assertRaises(Exception) as context:
self.assertEqual(get_extra_scripts(), [])
self.assertEqual(
'Error parsing GATEWAY_EXTRA_SCRIPTS, expected JSON array e.g. ["https://example.com/path/to/script.js"]',
str(context.exception),
)
if __name__ == "__main__":
unittest.main()
import unittest
from unittest.mock import MagicMock, patch
from cellxgene_gateway.filecrawl import render_item
from cellxgene_gateway.filecrawl import render_item, render_item_source
from cellxgene_gateway.items.file.fileitem import FileItem
from cellxgene_gateway.items.file.fileitem_source import FileItemSource
from cellxgene_gateway.items.item import ItemType
from cellxgene_gateway.items.item import ItemTree, ItemType
source = FileItemSource("/tmp")
......@@ -29,3 +29,15 @@ class TestRenderEntry(unittest.TestCase):
entry = FileItem(subpath="somepath", name="entry", type=ItemType.h5ad)
rendered = render_item(entry, source)
self.assertIn("view/somepath/entry/'", rendered)
class TestRenderItemSource(unittest.TestCase):
@patch("cellxgene_gateway.items.file.fileitem_source.FileItemSource")
def test_GIVEN_some_filter_THEN_includes_filterpart_in_heading(self, item_source):
item_source.name = "FakeSource"
item_source.list_items.return_value = ItemTree("rootdir", [], [])
rendered = render_item_source(item_source, "some_filter")
self.assertEqual(
rendered,
"<h6><a href='/filecrawl.html?source=FakeSource'>FakeSource</a>:some_filter</h6><li><a href='/filecrawl/rootdir?source=FakeSource'>rootdir</a><ul></ul></li>",
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment