From 6fef246b914077e620da594f10c08a5143ff5e18 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 3 Jun 2021 20:58:03 -0400 Subject: [PATCH 1/4] Fix memory directories and add tests --- fsspec/conftest.py | 2 + fsspec/implementations/memory.py | 175 ++++++++++++-------- fsspec/implementations/tests/test_memory.py | 64 ++++--- 3 files changed, 148 insertions(+), 93 deletions(-) diff --git a/fsspec/conftest.py b/fsspec/conftest.py index 5d88767aa..393e4c465 100644 --- a/fsspec/conftest.py +++ b/fsspec/conftest.py @@ -17,10 +17,12 @@ def m(): """ m = fsspec.filesystem("memory") m.store.clear() + m.pseudo_dirs.clear() try: yield m finally: m.store.clear() + m.pseudo_dirs.clear() @pytest.fixture diff --git a/fsspec/implementations/memory.py b/fsspec/implementations/memory.py index 2032b1d6f..8c21d5cce 100644 --- a/fsspec/implementations/memory.py +++ b/fsspec/implementations/memory.py @@ -18,14 +18,22 @@ class MemoryFileSystem(AbstractFileSystem): """ store = {} # global - pseudo_dirs = [] + pseudo_dirs = [""] protocol = "memory" - root_marker = "" + root_marker = "/" + + @classmethod + def _strip_protocol(cls, path): + if path.startswith("memory://"): + path = path[len("memory://") :] + path = path.lstrip("/").rstrip("/") + return "/" + path if path else "" def ls(self, path, detail=False, **kwargs): + path = self._strip_protocol(path) if path in self.store: - # there is a key with this exact name, but could also be directory - out = [ + # there is a key with this exact name + return [ { "name": path, "size": self.store[path].getbuffer().nbytes, @@ -33,77 +41,79 @@ def ls(self, path, detail=False, **kwargs): "created": self.store[path].created, } ] - else: - out = [] - path = path.strip("/").lstrip("/") paths = set() + starter = path + "/" + out = [] for p2 in self.store: - has_slash = "/" if p2.startswith("/") else "" - p = p2.lstrip("/") - if "/" in p: - root = p.rsplit("/", 1)[0] - else: - root = "" - if root == path: - out.append( - { - "name": has_slash + p, - "size": self.store[p2].getbuffer().nbytes, - "type": "file", - "created": self.store[p2].created, - } - ) - elif ( - path - and len(path) < len(p.strip("/")) - and all( - (a == b) for a, b in zip(path.split("/"), p.strip("/").split("/")) - ) - ): - # implicit directory - ppath = "/".join(p.split("/")[: len(path.split("/")) + 1]) - if ppath not in paths: - out.append( - { - "name": has_slash + ppath + "/", - "size": 0, - "type": "directory", - } - ) - paths.add(ppath) - elif all( - (a == b) - for a, b in zip(path.split("/"), [""] + p.strip("/").split("/")) - ): - # root directory entry - ppath = p.rstrip("/").split("/", 1)[0] - if ppath not in paths: + if p2.startswith(starter): + if "/" not in p2[len(starter) :]: + # exact child out.append( { - "name": has_slash + ppath + "/", - "size": 0, - "type": "directory", + "name": p2, + "size": self.store[p2].getbuffer().nbytes, + "type": "file", + "created": self.store[p2].created, } ) - paths.add(ppath) + elif len(p2) > len(starter): + # implied child directory + ppath = starter + p2[len(starter) :].split("/", 1)[0] + if ppath not in paths: + out = out or [] + out.append( + { + "name": ppath, + "size": 0, + "type": "directory", + } + ) + paths.add(ppath) for p2 in self.pseudo_dirs: - if self._parent(p2).strip("/") == path and p2.strip("/") not in paths: - out.append({"name": p2 + "/", "size": 0, "type": "directory"}) + if p2.startswith(starter): + if "/" not in p2[len(starter) :]: + # exact child pdir + if p2 not in paths: + out.append({"name": p2, "size": 0, "type": "directory"}) + paths.add(p2) + else: + # directory implied by deeper pdir + ppath = starter + p2[len(starter) :].split("/", 1)[0] + if ppath not in paths: + out.append({"name": ppath, "size": 0, "type": "directory"}) + paths.add(ppath) + if not out: + if path in self.pseudo_dirs: + # empty dir + return [] + raise FileNotFoundError(path) if detail: return out return sorted([f["name"] for f in out]) def mkdir(self, path, create_parents=True, **kwargs): - path = path.rstrip("/") - if create_parents and self._parent(path): - self.mkdir(self._parent(path), create_parents, **kwargs) - if self._parent(path) and not self.isdir(self._parent(path)): + path = self._strip_protocol(path) + if path in self.store or path in self.pseudo_dirs: + raise FileExistsError + if self._parent(path).strip("/") and self.isfile(self._parent(path)): raise NotADirectoryError(self._parent(path)) + if create_parents and self._parent(path).strip("/"): + try: + self.mkdir(self._parent(path), create_parents, **kwargs) + except FileExistsError: + pass if path and path not in self.pseudo_dirs: self.pseudo_dirs.append(path) + def makedirs(self, path, exist_ok=False): + try: + self.mkdir(path, create_parents=True) + except FileExistsError: + if not exist_ok: + raise + def rmdir(self, path): - path = path.rstrip("/") + path = self._strip_protocol(path) if path in self.pseudo_dirs: if not self.ls(path): self.pseudo_dirs.remove(path) @@ -116,6 +126,24 @@ def exists(self, path): path = self._strip_protocol(path) return path in self.store or path in self.pseudo_dirs + def info(self, path, **kwargs): + path = self._strip_protocol(path) + if path in self.pseudo_dirs: + return { + "name": path, + "size": 0, + "type": "directory", + } + elif path in self.store: + return { + "name": path, + "size": self.store[path].getbuffer().nbytes, + "type": "file", + "created": self.store[path].created, + } + else: + raise FileNotFoundError(path) + def _open( self, path, @@ -125,6 +153,14 @@ def _open( cache_options=None, **kwargs, ): + path = self._strip_protocol(path) + if path in self.pseudo_dirs: + raise IsADirectoryError + parent = path + while len(parent) > 1: + parent = self._parent(parent) + if self.isfile(parent): + raise FileExistsError(parent) if mode in ["rb", "ab", "rb+"]: if path in self.store: f = self.store[path] @@ -144,6 +180,8 @@ def _open( return m def cp_file(self, path1, path2, **kwargs): + path1 = self._strip_protocol(path1) + path2 = self._strip_protocol(path2) if self.isfile(path1): self.store[path2] = MemoryFile(self, path2, self.store[path1].getbuffer()) elif self.isdir(path1): @@ -153,18 +191,18 @@ def cp_file(self, path1, path2, **kwargs): raise FileNotFoundError def cat_file(self, path, start=None, end=None, **kwargs): + path = self._strip_protocol(path) try: return self.store[path].getvalue()[start:end] except KeyError: raise FileNotFoundError(path) def _rm(self, path): - if self.isfile(path): + path = self._strip_protocol(path) + try: del self.store[path] - elif self.isdir(path): - self.rmdir(path) - else: - raise FileNotFoundError + except KeyError as e: + raise FileNotFoundError from e def rm(self, path, recursive=False, maxdepth=None): paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth) @@ -175,13 +213,10 @@ def rm(self, path, recursive=False, maxdepth=None): # directories first. if not self.exists(p): continue - self.rm_file(p) - - def size(self, path): - """Size in bytes of the file at path""" - if path not in self.store: - raise FileNotFoundError(path) - return self.store[path].getbuffer().nbytes + if self.isfile(p): + self.rm_file(p) + else: + self.rmdir(p) class MemoryFile(BytesIO): diff --git a/fsspec/implementations/tests/test_memory.py b/fsspec/implementations/tests/test_memory.py index b957e5d19..f8a46c46a 100644 --- a/fsspec/implementations/tests/test_memory.py +++ b/fsspec/implementations/tests/test_memory.py @@ -1,5 +1,3 @@ -import sys - import pytest @@ -7,22 +5,20 @@ def test_1(m): m.touch("/somefile") # NB: is found with or without initial / m.touch("afiles/and/anothers") files = m.find("") - if "somefile" in files: - assert files == ["afiles/and/anothers", "somefile"] - else: - assert files == ["/somefile", "afiles/and/anothers"] - - files = sorted(m.get_mapper("")) - if "somefile" in files: - assert files == ["afiles/and/anothers", "somefile"] - else: - assert files == ["/somefile", "afiles/and/anothers"] - - -@pytest.mark.xfail( - sys.version_info < (3, 6), - reason="py35 error, see https://github.com/intake/filesystem_spec/issues/148", -) + assert files == ["/afiles/and/anothers", "/somefile"] + + files = sorted(m.get_mapper("/")) + assert files == ["afiles/and/anothers", "somefile"] + + +def test_strip(m): + assert m._strip_protocol("") == "" + assert m._strip_protocol("memory://") == "" + assert m._strip_protocol("afile") == "/afile" + assert m._strip_protocol("/b/c") == "/b/c" + assert m._strip_protocol("/b/c/") == "/b/c" + + def test_ls(m): m.mkdir("/dir") m.mkdir("/dir/dir1") @@ -31,8 +27,8 @@ def test_ls(m): m.touch("/dir/dir1/bfile") m.touch("/dir/dir1/cfile") - assert m.ls("/", False) == ["/dir/"] - assert m.ls("/dir", False) == ["/dir/afile", "/dir/dir1/"] + assert m.ls("/", False) == ["/dir"] + assert m.ls("/dir", False) == ["/dir/afile", "/dir/dir1"] assert m.ls("/dir", True)[0]["type"] == "file" assert m.ls("/dir", True)[1]["type"] == "directory" @@ -40,8 +36,6 @@ def test_ls(m): def test_directories(m): - with pytest.raises(NotADirectoryError): - m.mkdir("outer/inner", create_parents=False) m.mkdir("outer/inner") assert m.ls("outer") @@ -83,6 +77,30 @@ def test_rewind(m): assert f.tell() == 0 +def test_empty_raises(m): + with pytest.raises(FileNotFoundError): + m.ls("nonexistent") + + with pytest.raises(FileNotFoundError): + m.info("nonexistent") + + +def test_dir_errors(m): + m.mkdir("/first") + + with pytest.raises(FileExistsError): + m.mkdir("/first") + with pytest.raises(FileExistsError): + m.makedirs("/first", exist_ok=False) + m.makedirs("/first", exist_ok=True) + m.makedirs("/first/second/third") + assert "/first/second" in m.pseudo_dirs + + m.touch("/afile") + with pytest.raises(NotADirectoryError): + m.mkdir("/afile/nodir") + + def test_no_rewind_append_mode(m): # https://github.com/intake/filesystem_spec/issues/349 with m.open("src/file.txt", "w") as f: @@ -97,7 +115,7 @@ def test_moves(m): m.touch("source2.txt") m.mv("source2.txt", "target2.txt", recursive=True) - assert m.find("") == ["target.txt", "target2.txt"] + assert m.find("") == ["/target.txt", "/target2.txt"] def test_rm_reursive_empty_subdir(m): From bccd0d2f14843303164ab541b0e78a3a40fce548 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 3 Jun 2021 21:21:42 -0400 Subject: [PATCH 2/4] knockon fixes --- fsspec/implementations/tests/test_cached.py | 8 ++++---- fsspec/mapping.py | 2 +- fsspec/tests/test_api.py | 6 +++--- fsspec/tests/test_mapping.py | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fsspec/implementations/tests/test_cached.py b/fsspec/implementations/tests/test_cached.py index 535d69c88..9ca24f71c 100644 --- a/fsspec/implementations/tests/test_cached.py +++ b/fsspec/implementations/tests/test_cached.py @@ -693,9 +693,9 @@ def test_multi_cache_chain(protocol): def test_strip(protocol): fs = fsspec.filesystem(protocol, target_protocol="memory") url1 = "memory://afile" - assert fs._strip_protocol(url1) == "afile" - assert fs._strip_protocol(protocol + "://afile") == "afile" - assert fs._strip_protocol(protocol + "::memory://afile") == "afile" + assert fs._strip_protocol(url1) == "/afile" + assert fs._strip_protocol(protocol + "://afile") == "/afile" + assert fs._strip_protocol(protocol + "::memory://afile") == "/afile" @pytest.mark.parametrize("protocol", ["simplecache", "filecache"]) @@ -713,7 +713,7 @@ def test_expiry(): d = tempfile.mkdtemp() fs = fsspec.filesystem("memory") - fn = "afile" + fn = "/afile" fn0 = "memory://afile" data = b"hello" with fs.open(fn0, "wb") as f: diff --git a/fsspec/mapping.py b/fsspec/mapping.py index 0599bfda6..17fe7ae79 100644 --- a/fsspec/mapping.py +++ b/fsspec/mapping.py @@ -120,7 +120,7 @@ def _key_to_str(self, key): key = str(tuple(key)) else: key = str(key) - return "/".join([self.root, key]) if self.root else key + return self.fs._strip_protocol("/".join([self.root, key]) if self.root else key) def _str_to_key(self, s): """Strip path of to leave key name""" diff --git a/fsspec/tests/test_api.py b/fsspec/tests/test_api.py index b12311fe3..4119a631a 100644 --- a/fsspec/tests/test_api.py +++ b/fsspec/tests/test_api.py @@ -33,9 +33,9 @@ def test_pickle(): def test_class_methods(): - assert MemoryFileSystem._strip_protocol("memory::stuff") == "stuff" - assert MemoryFileSystem._strip_protocol("memory://stuff") == "stuff" - assert MemoryFileSystem._strip_protocol("stuff") == "stuff" + assert MemoryFileSystem._strip_protocol("memory::stuff") == "/stuff" + assert MemoryFileSystem._strip_protocol("memory://stuff") == "/stuff" + assert MemoryFileSystem._strip_protocol("stuff") == "/stuff" assert MemoryFileSystem._strip_protocol("other://stuff") == "other://stuff" assert MemoryFileSystem._get_kwargs_from_urls("memory://user@thing") == {} diff --git a/fsspec/tests/test_mapping.py b/fsspec/tests/test_mapping.py index a08e7a16d..691462516 100644 --- a/fsspec/tests/test_mapping.py +++ b/fsspec/tests/test_mapping.py @@ -84,7 +84,7 @@ def test_keys_view(): def test_multi(): - m = fsspec.get_mapper("memory://") + m = fsspec.get_mapper("memory:///") data = {"a": b"data1", "b": b"data2"} m.setitems(data) From d5e68886d1d14885b2b1728bb37531ef2ab17c30 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 3 Jun 2021 21:50:28 -0400 Subject: [PATCH 3/4] fix mem info --- fsspec/fuse.py | 1 + fsspec/implementations/memory.py | 4 +++- fsspec/implementations/tests/test_dask.py | 2 +- fsspec/implementations/tests/test_memory.py | 1 + fsspec/tests/test_api.py | 3 +-- fsspec/tests/test_fuse.py | 6 +++++- 6 files changed, 12 insertions(+), 5 deletions(-) diff --git a/fsspec/fuse.py b/fsspec/fuse.py index 8962aa234..5a195c99c 100644 --- a/fsspec/fuse.py +++ b/fsspec/fuse.py @@ -16,6 +16,7 @@ def __init__(self, fs, path): self.cache = {} self.root = path.rstrip("/") + "/" self.counter = 0 + logger.info("Starting FUSE at %s", path) def getattr(self, path, fh=None): logger.debug("getattr %s", path) diff --git a/fsspec/implementations/memory.py b/fsspec/implementations/memory.py index 8c21d5cce..2f14ebe14 100644 --- a/fsspec/implementations/memory.py +++ b/fsspec/implementations/memory.py @@ -128,7 +128,9 @@ def exists(self, path): def info(self, path, **kwargs): path = self._strip_protocol(path) - if path in self.pseudo_dirs: + if path in self.pseudo_dirs or any( + p.startswith(path + "/") for p in list(self.store) + self.pseudo_dirs + ): return { "name": path, "size": 0, diff --git a/fsspec/implementations/tests/test_dask.py b/fsspec/implementations/tests/test_dask.py index ae5a673e5..d7e8ce467 100644 --- a/fsspec/implementations/tests/test_dask.py +++ b/fsspec/implementations/tests/test_dask.py @@ -26,5 +26,5 @@ def setup(): def test_basic(cli): fs = fsspec.filesystem("dask", target_protocol="memory") - assert fs.ls("") == ["afile"] + assert fs.ls("") == ["/afile"] assert fs.cat("afile") == b"data" diff --git a/fsspec/implementations/tests/test_memory.py b/fsspec/implementations/tests/test_memory.py index f8a46c46a..6bfe06e3a 100644 --- a/fsspec/implementations/tests/test_memory.py +++ b/fsspec/implementations/tests/test_memory.py @@ -37,6 +37,7 @@ def test_ls(m): def test_directories(m): m.mkdir("outer/inner") + assert m.info("outer/inner")["type"] == "directory" assert m.ls("outer") assert m.ls("outer/inner") == [] diff --git a/fsspec/tests/test_api.py b/fsspec/tests/test_api.py index 4119a631a..f1f93eb27 100644 --- a/fsspec/tests/test_api.py +++ b/fsspec/tests/test_api.py @@ -33,7 +33,6 @@ def test_pickle(): def test_class_methods(): - assert MemoryFileSystem._strip_protocol("memory::stuff") == "/stuff" assert MemoryFileSystem._strip_protocol("memory://stuff") == "/stuff" assert MemoryFileSystem._strip_protocol("stuff") == "/stuff" assert MemoryFileSystem._strip_protocol("other://stuff") == "other://stuff" @@ -146,7 +145,7 @@ def test_pipe_cat(): fs.pipe("afile", b"contents") assert fs.cat("afile") == b"contents" - data = {"bfile": b"more", "cfile": b"stuff"} + data = {"/bfile": b"more", "/cfile": b"stuff"} fs.pipe(data) assert fs.cat(list(data)) == data diff --git a/fsspec/tests/test_fuse.py b/fsspec/tests/test_fuse.py index 731c702be..8a4d5927e 100644 --- a/fsspec/tests/test_fuse.py +++ b/fsspec/tests/test_fuse.py @@ -38,7 +38,11 @@ def test_basic(tmpdir, capfd): pass timeout -= 1 time.sleep(1) - assert timeout > 0, "Timeout" + if not timeout > 0: + import pdb + + pdb.set_trace() + pytest.skip(msg="fuse didn't come live") fn = os.path.join(mountdir, "test") with open(fn, "wb") as f: From b956197810cce7d21b646e9424412a9a2364e32e Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 3 Jun 2021 22:15:12 -0400 Subject: [PATCH 4/4] more --- fsspec/implementations/memory.py | 2 ++ fsspec/implementations/tests/test_dask.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fsspec/implementations/memory.py b/fsspec/implementations/memory.py index 2f14ebe14..27e8b0a28 100644 --- a/fsspec/implementations/memory.py +++ b/fsspec/implementations/memory.py @@ -26,6 +26,8 @@ class MemoryFileSystem(AbstractFileSystem): def _strip_protocol(cls, path): if path.startswith("memory://"): path = path[len("memory://") :] + if "::" in path or "://" in path: + return path.rstrip("/") path = path.lstrip("/").rstrip("/") return "/" + path if path else "" diff --git a/fsspec/implementations/tests/test_dask.py b/fsspec/implementations/tests/test_dask.py index d7e8ce467..d40f2679a 100644 --- a/fsspec/implementations/tests/test_dask.py +++ b/fsspec/implementations/tests/test_dask.py @@ -27,4 +27,4 @@ def test_basic(cli): fs = fsspec.filesystem("dask", target_protocol="memory") assert fs.ls("") == ["/afile"] - assert fs.cat("afile") == b"data" + assert fs.cat("/afile") == b"data"