Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions waterbutler/core/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import asyncio


class CacheableMetadataProviderProxy:
"""Wraps a provider so that folder ``metadata`` requests are cached and the
metadata of any child folders is prefetched concurrently.

When a folder is listed, a metadata task is scheduled for each of its child
folders without awaiting them. Those tasks run on the event loop while files
are being downloaded, so the generator can issue all of its metadata requests
up front instead of fetching folder listings strictly one-at-a-time.

Any attribute that is not overridden here (``download``, ``path_from_metadata``,
etc.) is delegated to the wrapped provider.
"""

def __init__(self, provider):
self.provider = provider
self.__cache = dict[str, asyncio.Task]()

def metadata_task(self, path, **kwargs):
key = path.identifier or str(path)
if key not in self.__cache:
self.__cache[key] = asyncio.create_task(
self.provider.metadata(path, **kwargs)
)
return self.__cache[key]

async def metadata(self, path, **kwargs):
if path.is_file:
return await self.provider.metadata(path, **kwargs)

items = await self.metadata_task(path, **kwargs)

for item in items:
if item.is_folder:
child = self.provider.path_from_metadata(path, item)
self.metadata_task(child, **kwargs)

return items

def __getattr__(self, name):
return getattr(self.provider, name)
14 changes: 7 additions & 7 deletions waterbutler/core/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from waterbutler import settings as wb_settings
from waterbutler.core.metrics import MetricsRecord
from waterbutler.core import metadata as wb_metadata
from waterbutler.core.cache import CacheableMetadataProviderProxy
from waterbutler.core.utils import ZipStreamGenerator
from waterbutler.core.utils import RequestHandlerContext

Expand Down Expand Up @@ -678,18 +679,17 @@ async def revalidate_path(self,
"""
return base.child(path, folder=folder)

async def zip(self, path: wb_path.WaterButlerPath, **kwargs) -> asyncio.StreamReader:
async def zip(self, path: wb_path.WaterButlerPath, use_cache: bool = True, **kwargs) -> asyncio.StreamReader:
"""Streams a Zip archive of the given folder

:param path: ( :class:`.WaterButlerPath` ) The folder to compress
:param use_cache: ( `bool` ) Whether to prefetch metadata requests for nested folders during zip
"""
Comment thread
cslzchen marked this conversation as resolved.
provider = CacheableMetadataProviderProxy(self) if use_cache else self

meta_data = await self.metadata(path, **kwargs) # type: ignore
if path.is_file:
meta_data = [meta_data] # type: ignore
path = path.parent

return streams.ZipStreamReader(ZipStreamGenerator(self, path, *meta_data, **kwargs)) # type: ignore
return streams.ZipStreamReader(
await ZipStreamGenerator.create(provider, path, **kwargs)
) # type: ignore

def shares_storage_root(self, other: 'BaseProvider') -> bool:
"""Returns True if ``self`` and ``other`` both point to the same storage root. Used to
Expand Down
9 changes: 9 additions & 0 deletions waterbutler/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ def make_disposition(filename):


class ZipStreamGenerator:

def __init__(self, provider, parent_path, *metadata_objs, **kwargs):
self.provider = provider
self.parent_path = parent_path
Expand All @@ -202,6 +203,14 @@ def __init__(self, provider, parent_path, *metadata_objs, **kwargs):
]
self.kwargs = kwargs

@classmethod
async def create(cls, provider, path, **kwargs):
meta_data = await provider.metadata(path, **kwargs)
if path.is_file:
meta_data = [meta_data]
path = path.parent
return cls(provider, path, *meta_data, **kwargs)

async def __aiter__(self):
return self

Expand Down
Loading