from __future__ import division
from __future__ import print_function
import hashlib
import os
import shutil
import tarfile
import tempfile
import zipfile
import filelock
from six.moves.urllib import request
import sys
import time
from chainer.dataset.download import get_dataset_directory
from chainer.dataset.download import get_dataset_root
def _reporthook(count, block_size, total_size):
global start_time
if count == 0:
start_time = time.time()
print(' % Total Recv Speed Time left')
return
duration = time.time() - start_time
progress_size = count * block_size
try:
speed = progress_size / duration
except ZeroDivisionError:
speed = float('inf')
percent = progress_size / total_size * 100
eta = int((total_size - progress_size) / speed)
sys.stdout.write(
'\r{:3.0f} {:4.0f}MiB {:4.0f}MiB {:6.0f}KiB/s {:4d}:{:02d}:{:02d}'
.format(
percent, total_size / (1 << 20), progress_size / (1 << 20),
speed / (1 << 10), eta // 60 // 60, (eta // 60) % 60, eta % 60))
sys.stdout.flush()
[docs]def cached_download(url):
"""Downloads a file and caches it.
This is different from the original
:func:`~chainer.dataset.cached_download` in that the download
progress is reported.
It downloads a file from the URL if there is no corresponding cache. After
the download, this function stores a cache to the directory under the
dataset root (see :func:`set_dataset_root`). If there is already a cache
for the given URL, it just returns the path to the cache without
downloading the same file.
Args:
url (string): URL to download from.
Returns:
string: Path to the downloaded file.
"""
cache_root = os.path.join(get_dataset_root(), '_dl_cache')
try:
os.makedirs(cache_root)
except OSError:
if not os.path.exists(cache_root):
raise
lock_path = os.path.join(cache_root, '_dl_lock')
urlhash = hashlib.md5(url.encode('utf-8')).hexdigest()
cache_path = os.path.join(cache_root, urlhash)
with filelock.FileLock(lock_path):
if os.path.exists(cache_path):
return cache_path
temp_root = tempfile.mkdtemp(dir=cache_root)
try:
temp_path = os.path.join(temp_root, 'dl')
print('Downloading ...')
print('From: {:s}'.format(url))
print('To: {:s}'.format(cache_path))
request.urlretrieve(url, temp_path, _reporthook)
with filelock.FileLock(lock_path):
shutil.move(temp_path, cache_path)
finally:
shutil.rmtree(temp_root)
return cache_path
[docs]def download_model(url):
"""Downloads a model file and puts it under model directory.
It downloads a file from the URL and puts it under model directory.
For exmaple, if :obj:`url` is `http://example.com/subdir/model.npz`,
the pretrained weights file will be saved to
`$CHAINER_DATASET_ROOT/pfnet/chainercv/models/model.npz`.
If there is already a file at the destination path,
it just returns the path without downloading the same file.
Args:
url (string): URL to download from.
Returns:
string: Path to the downloaded file.
"""
root = get_dataset_directory(
os.path.join('pfnet', 'chainercv', 'models'))
basename = os.path.basename(url)
path = os.path.join(root, basename)
if not os.path.exists(path):
cache_path = cached_download(url)
os.rename(cache_path, path)
return path