From 4fb577e5d4d90eee9ffb46e2ddd1f8fe6ff33325 Mon Sep 17 00:00:00 2001 From: Cuong Duong Date: Sun, 3 Oct 2021 12:03:03 +1100 Subject: [PATCH] (Stage 1) Python Wheels for PyPi (#2010) --- .github/workflows/build-and-test.yml | 31 ++- .github/workflows/wheel.yml | 106 ++++++++++ python/prophet/models.py | 45 ++--- python/pyproject.toml | 8 + python/requirements.txt | 4 +- python/setup.py | 280 +++++++++++++++++++++------ 6 files changed, 380 insertions(+), 94 deletions(-) create mode 100644 .github/workflows/wheel.yml create mode 100644 python/pyproject.toml diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 2e300f5..f881d04 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -6,6 +6,9 @@ on: pull_request: branches: [ master ] +env: + CMDSTAN_VERSION: "2.26.1" + jobs: build-and-test-python: @@ -21,16 +24,34 @@ jobs: uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} + - name: "Restore pip cache" + id: cache-pip + uses: actions/cache@v2 + with: + path: $HOME/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/python/requirements.txt') }}-v1 + restore-keys: | + ${{ runner.os }}-pip- + - name: "Restore cmdstan cache" + id: cache-cmdstan + uses: actions/cache@v2 + with: + path: $HOME/.cmdstan + key: ${{ runner.os }}-cmdstan-${{ env.CMDSTAN_VERSION }}-v1 + - name: "Download cmdstan" + if: steps.cache-cmdstan.outputs.cache-hit != 'true' + run: | + wget https://github.com/stan-dev/cmdstan/releases/download/v${{ env.CMDSTAN_VERSION }}/cmdstan-${{ env.CMDSTAN_VERSION }}.tar.gz -O /tmp/cmdstan.tar.gz &> /dev/null + mkdir $HOME/.cmdstan + tar -xf /tmp/cmdstan.tar.gz -C $HOME/.cmdstan &> /dev/null - name: Install and test run: | pip install -U -r python/requirements.txt dask[dataframe] distributed - cd python && python setup.py develop test + cd python + STAN_BACKEND=PYSTAN python setup.py develop test python setup.py clean rm -rf prophet/stan_model - wget https://github.com/stan-dev/cmdstan/releases/download/v2.26.1/cmdstan-2.26.1.tar.gz -O /tmp/cmdstan.tar.gz > /dev/null - tar -xvf /tmp/cmdstan.tar.gz -C /tmp > /dev/null - make -C /tmp/cmdstan-2.26.1/ build > /dev/null - CMDSTAN=/tmp/cmdstan-2.26.1 STAN_BACKEND=CMDSTANPY python setup.py develop test + STAN_BACKEND=CMDSTANPY python setup.py develop test build-and-test-r: diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml new file mode 100644 index 0000000..1f3ccb7 --- /dev/null +++ b/.github/workflows/wheel.yml @@ -0,0 +1,106 @@ +name: "Create Python Wheels" + +on: + release: + types: [ created ] + workflow_dispatch: {} + +env: + STAN_BACKEND: "PYSTAN,CMDSTANPY" + CMDSTAN_VERSION: "2.26.1" + +jobs: + make-wheels-macos-linux: + name: ${{ matrix.python-version }}-${{ matrix.architecture }}-${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: + - "macos-latest" + - "ubuntu-latest" + python-version: + - "3.6" + - "3.7" + - "3.8" + architecture: + - x64 + + fail-fast: false + + steps: + - name: "Get OS version (Linux)" + if: startsWith(runner.os, 'Linux') + run: | + echo "OS_VERSION=`lsb_release -sr`" >> $GITHUB_ENV + echo "PIP_DEFAULT_CACHE=$HOME/.cache/pip" >> $GITHUB_ENV + echo "DEFAULT_HOME=$HOME" >> $GITHUB_ENV + + - name: "Get OS version (macOS)" + if: startsWith(runner.os, 'macOS') + run: | + echo "OS_VERSION=`sw_vers -productVersion`" >> $GITHUB_ENV + echo "PIP_DEFAULT_CACHE=$HOME/Library/Caches/pip" >> $GITHUB_ENV + echo "DEFAULT_HOME=$HOME" >> $GITHUB_ENV + + - name: "Checkout repo" + uses: actions/checkout@v2 + + - name: "Set up Python" + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + architecture: ${{ matrix.architecture }} + + - name: "Restore pip cache" + id: cache-pip + uses: actions/cache@v2 + with: + path: ${{ env.PIP_DEFAULT_CACHE }} + key: ${{ runner.os }}-pip-${{ hashFiles('**/python/requirements.txt') }}-v1 + restore-keys: | + ${{ runner.os }}-pip- + + - name: "Install pip" + run: | + python -m pip install --upgrade pip + python -m pip install cibuildwheel build + + - name: "Restore cmdstan cache" + id: cache-cmdstan + uses: actions/cache@v2 + with: + path: ${{ env.DEFAULT_HOME }}/.cmdstan + key: ${{ runner.os }}-cmdstan-${{ env.CMDSTAN_VERSION }}-v1 + + - name: "Download cmdstan" + if: steps.cache-cmdstan.outputs.cache-hit != 'true' + run: | + wget https://github.com/stan-dev/cmdstan/releases/download/v${{ env.CMDSTAN_VERSION }}/cmdstan-${{ env.CMDSTAN_VERSION }}.tar.gz -O /tmp/cmdstan.tar.gz &> /dev/null + mkdir $HOME/.cmdstan + tar -xf /tmp/cmdstan.tar.gz -C $HOME/.cmdstan &> /dev/null + + - name: "Build wheel" + run: | + cd python && python -m cibuildwheel --output-dir wheelhouse + env: + CIBW_ENVIRONMENT: > + STAN_BACKEND="${{ env.STAN_BACKEND }}" + CMDSTAN_VERSION=${{ env.CMDSTAN_VERSION }} + # Linux builds run in a Docker container, need to point the cache to the host machine. + CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014 + CIBW_ENVIRONMENT_LINUX: > + STAN_BACKEND="${{ env.STAN_BACKEND }}" + CMDSTAN_VERSION=${{ env.CMDSTAN_VERSION }} + HOME="/host/${{ env.DEFAULT_HOME }}" + PIP_CACHE_DIR="/host/${{ env.PIP_DEFAULT_CACHE }}" + CIBW_BEFORE_ALL_LINUX: sudo chmod -R a+rwx ${{ env.PIP_DEFAULT_CACHE }} + CIBW_ARCHS: native + CIBW_BUILD_FRONTEND: build + CIBW_TEST_REQUIRES: pytest + CIBW_TEST_COMMAND: pytest --pyargs prophet + + - name: "Upload wheel as artifact" + uses: actions/upload-artifact@v2 + with: + name: ${{ matrix.os }}-wheel + path: "./**/*.whl" diff --git a/python/prophet/models.py b/python/prophet/models.py index 3cf4b93..a3d4f15 100644 --- a/python/prophet/models.py +++ b/python/prophet/models.py @@ -12,7 +12,6 @@ from enum import Enum from pathlib import Path import pickle import pkg_resources -import os import logging logger = logging.getLogger('prophet.models') @@ -53,30 +52,20 @@ class IStanBackend(ABC): def sampling(self, stan_init, stan_data, samples, **kwargs) -> dict: pass - @staticmethod - @abstractmethod - def build_model(target_dir, model_dir): - pass - class CmdStanPyBackend(IStanBackend): + CMDSTAN_VERSION = "2.26.1" + def __init__(self): + super().__init__() + import cmdstanpy + cmdstanpy.set_cmdstan_path( + pkg_resources.resource_filename("prophet", f"stan_model/cmdstan-{self.CMDSTAN_VERSION}") + ) @staticmethod def get_type(): return StanBackendEnum.CMDSTANPY.name - @staticmethod - def build_model(target_dir, model_dir): - from shutil import copy - import cmdstanpy - model_name = 'prophet.stan' - target_name = 'prophet_model.bin' - - sm = cmdstanpy.CmdStanModel( - stan_file=os.path.join(model_dir, model_name)) - sm.compile() - copy(sm.exe_file, os.path.join(target_dir, target_name)) - def load_model(self): import cmdstanpy model_file = pkg_resources.resource_filename( @@ -87,7 +76,7 @@ class CmdStanPyBackend(IStanBackend): def fit(self, stan_init, stan_data, **kwargs): (stan_init, stan_data) = self.prepare_data(stan_init, stan_data) - + if 'inits' not in kwargs and 'init' in kwargs: kwargs['inits'] = self.prepare_data(kwargs['init'], stan_data)[0] @@ -120,14 +109,13 @@ class CmdStanPyBackend(IStanBackend): def sampling(self, stan_init, stan_data, samples, **kwargs) -> dict: (stan_init, stan_data) = self.prepare_data(stan_init, stan_data) - + if 'inits' not in kwargs and 'init' in kwargs: kwargs['inits'] = self.prepare_data(kwargs['init'], stan_data)[0] args = dict( data=stan_data, inits=stan_init, - algorithm='Newton' if stan_data['T'] < 100 else 'LBFGS', ) if 'chains' not in kwargs: @@ -136,7 +124,7 @@ class CmdStanPyBackend(IStanBackend): kwargs['iter_sampling'] = iter_half if 'iter_warmup' not in kwargs: kwargs['iter_warmup'] = iter_half - + args.update(kwargs) self.stan_fit = self.model.sample(**args) @@ -181,7 +169,7 @@ class CmdStanPyBackend(IStanBackend): 'sigma_obs': init['sigma_obs'] } return (cmdstanpy_init, cmdstanpy_data) - + @staticmethod def stan_to_dict_numpy(column_names: Tuple[str, ...], data: 'np.array'): import numpy as np @@ -235,17 +223,6 @@ class PyStanBackend(IStanBackend): def get_type(): return StanBackendEnum.PYSTAN.name - @staticmethod - def build_model(target_dir, model_dir): - import pystan - model_name = 'prophet.stan' - target_name = 'prophet_model.pkl' - with open(os.path.join(model_dir, model_name)) as f: - model_code = f.read() - sm = pystan.StanModel(model_code=model_code) - with open(os.path.join(target_dir, target_name), 'wb') as f: - pickle.dump(sm, f, protocol=pickle.HIGHEST_PROTOCOL) - def sampling(self, stan_init, stan_data, samples, **kwargs) -> dict: args = dict( diff --git a/python/pyproject.toml b/python/pyproject.toml new file mode 100644 index 0000000..fe3533a --- /dev/null +++ b/python/pyproject.toml @@ -0,0 +1,8 @@ +[build-system] +requires = [ + "setuptools>=42", + "wheel", + "pystan~=2.19.1.1", + "cmdstanpy==0.9.77", +] +build-backend = "setuptools.build_meta" diff --git a/python/requirements.txt b/python/requirements.txt index badec6c..5dddf49 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,5 +1,5 @@ Cython>=0.22 -cmdstanpy==0.9.68 +cmdstanpy==0.9.77 pystan~=2.19.1.1 numpy>=1.15.4 pandas>=1.0.4 @@ -7,6 +7,8 @@ matplotlib>=2.0.0 LunarCalendar>=0.0.9 convertdate>=2.1.2 holidays>=0.11.3.1 +setuptools>=42 setuptools-git>=1.2 python-dateutil>=2.8.0 tqdm>=4.36.1 +wheel>=0.37.0 diff --git a/python/setup.py b/python/setup.py index 1812f13..4d1de88 100644 --- a/python/setup.py +++ b/python/setup.py @@ -3,39 +3,200 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -import os.path -import platform -import sys import os -from pkg_resources import ( - normalize_path, - working_set, - add_activation_listener, - require, -) -from setuptools import setup, find_packages +import pickle +import platform +import subprocess +import sys +from collections import OrderedDict +from pathlib import Path +from shutil import copy, copytree, rmtree +from typing import List + +from pkg_resources import add_activation_listener, normalize_path, require, working_set +from setuptools import find_packages, setup, Extension +from setuptools.command.build_ext import build_ext from setuptools.command.build_py import build_py from setuptools.command.develop import develop from setuptools.command.test import test as test_command -from typing import List -PLATFORM = 'unix' -if platform.platform().startswith('Win'): - PLATFORM = 'win' +PLATFORM = "unix" +if platform.platform().startswith("Win"): + PLATFORM = "win" -MODEL_DIR = os.path.join('stan', PLATFORM) -MODEL_TARGET_DIR = os.path.join('prophet', 'stan_model') +MODEL_DIR = os.path.join("stan", PLATFORM) +MODEL_TARGET_DIR = os.path.join("prophet", "stan_model") +# TODO: Remove when upgrading to cmdstanpy 1.0, use cmdstanpy internals instead +# cmdstan utils +MAKE = os.getenv("MAKE", "make" if PLATFORM != "win" else "mingw32-make") +EXTENSION = ".exe" if PLATFORM == "win" else "" + +CMDSTAN_VERSION = "2.26.1" +BINARIES_DIR = "bin" +BINARIES = ["diagnose", "print", "stanc", "stansummary"] +TBB_PARENT = "stan/lib/stan_math/lib" +TBB_DIRS = ["tbb", "tbb_2019_U8"] + + +# TODO: Remove when upgrading to cmdstanpy 1.0, use cmdstanpy internals instead +def clean_all_cmdstan(verbose: bool = False) -> None: + """Run `make clean-all` in the current directory (must be a cmdstan library). + + Parameters + ---------- + verbose: when ``True``, print build msgs to stdout. + """ + cmd = [MAKE, "clean-all"] + proc = subprocess.Popen( + cmd, + cwd=None, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=os.environ, + ) + while proc.poll() is None: + if proc.stdout: + output = proc.stdout.readline().decode("utf-8").strip() + if verbose and output: + print(output, flush=True) + _, stderr = proc.communicate() + if proc.returncode: + msgs = ['Command "make clean-all" failed'] + if stderr: + msgs.append(stderr.decode("utf-8").strip()) + raise RuntimeError("\n".join(msgs)) + +# TODO: Remove when upgrading to cmdstanpy 1.0, use cmdstanpy internals instead +def build_cmdstan(verbose: bool = False) -> None: + """Run `make build` in the current directory (must be a cmdstan library). + + Parameters + ---------- + verbose: when ``True``, print build msgs to stdout. + """ + cmd = [MAKE, "build"] + proc = subprocess.Popen( + cmd, + cwd=None, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=os.environ, + ) + while proc.poll() is None: + if proc.stdout: + output = proc.stdout.readline().decode("utf-8").strip() + if verbose and output: + print(output, flush=True) + _, stderr = proc.communicate() + if proc.returncode: + msgs = ['Command "make build" failed'] + if stderr: + msgs.append(stderr.decode("utf-8").strip()) + raise RuntimeError("\n".join(msgs)) + # Add tbb to the $PATH on Windows + if PLATFORM == "win": + libtbb = os.path.join(os.getcwd(), "stan", "lib", "stan_math", "lib", "tbb") + os.environ["PATH"] = ";".join( + list(OrderedDict.fromkeys([libtbb] + os.environ.get("PATH", "").split(";"))) + ) + +def prune_cmdstan(cmdstan_dir: str) -> None: + """ + Keep only the cmdstan executables and tbb files (minimum required to run a cmdstanpy commands on a pre-compiled model). + """ + original_dir = Path(cmdstan_dir).resolve() + parent_dir = original_dir.parent + temp_dir = parent_dir / "temp" + if temp_dir.is_dir(): + rmtree(temp_dir) + temp_dir.mkdir() + + copytree(original_dir / BINARIES_DIR, temp_dir / BINARIES_DIR) + for f in (temp_dir / BINARIES_DIR).iterdir(): + if f.is_dir(): + rmtree(f) + elif f.is_file() and f.stem not in BINARIES: + os.remove(f) + for tbb_dir in TBB_DIRS: + copytree(original_dir / TBB_PARENT / tbb_dir, temp_dir / TBB_PARENT / tbb_dir) + + rmtree(original_dir) + temp_dir.rename(original_dir) + +def get_cmdstan_cache() -> str: + """Default directory for an existing cmdstan library. Prevents unnecessary re-downloads of cmdstan.""" + return Path.home().resolve() / ".cmdstan" / f"cmdstan-{CMDSTAN_VERSION}" + +def download_cmdstan(cache_dir: Path) -> None: + """Ensure the cmdstan library exists in the cache directory.""" + import cmdstanpy + if os.path.isdir(cache_dir): + print(f"Found existing cmdstan library at {cache_dir}") + else: + with cmdstanpy.utils.pushd(cache_dir.parent): + cmdstanpy.utils.retrieve_version(version=CMDSTAN_VERSION, progress=False) + +def build_cmdstan_model(target_dir): + """ + Rebuild cmdstan in the build environment, then use this installation to compile the stan model. + The stan model is copied to {target_dir}/prophet_model.bin + The cmdstan files required to run cmdstanpy commands are copied to {target_dir}/cmdstan-{version}. + + Parameters + ---------- + target_dir: Directory to copy the compiled model executable and core cmdstan files to. + """ + import cmdstanpy + + cmdstan_cache = get_cmdstan_cache() + download_cmdstan(cmdstan_cache) + + cmdstan_dir = os.path.join(target_dir, f"cmdstan-{CMDSTAN_VERSION}") + if os.path.isdir(cmdstan_dir): + rmtree(cmdstan_dir) + copytree(cmdstan_cache, cmdstan_dir) + with cmdstanpy.utils.pushd(cmdstan_dir): + clean_all_cmdstan() + build_cmdstan() + cmdstanpy.set_cmdstan_path(cmdstan_dir) + + model_name = "prophet.stan" + target_name = "prophet_model.bin" + sm = cmdstanpy.CmdStanModel(stan_file=os.path.join(MODEL_DIR, model_name)) + copy(sm.exe_file, os.path.join(target_dir, target_name)) + # Clean up + for f in Path(MODEL_DIR).iterdir(): + if f.is_file() and f.name != model_name: + os.remove(f) + prune_cmdstan(cmdstan_dir) + +def build_pystan_model(target_dir): + """ + Compile the stan model using pystan and pickle it. The pickle is copied to {target_dir}/prophet_model.pkl. + """ + import pystan + + model_name = "prophet.stan" + target_name = "prophet_model.pkl" + with open(os.path.join(MODEL_DIR, model_name)) as f: + model_code = f.read() + sm = pystan.StanModel(model_code=model_code) + with open(os.path.join(target_dir, target_name), "wb") as f: + pickle.dump(sm, f, protocol=pickle.HIGHEST_PROTOCOL) def get_backends_from_env() -> List[str]: - from prophet.models import StanBackendEnum - return os.environ.get("STAN_BACKEND", StanBackendEnum.PYSTAN.name).split(",") - + return os.environ.get("STAN_BACKEND", "PYSTAN").split(",") def build_models(target_dir): - from prophet.models import StanBackendEnum for backend in get_backends_from_env(): - StanBackendEnum.get_backend_class(backend).build_model(target_dir, MODEL_DIR) + print(f"Compiling {backend} model") + if backend == "CMDSTANPY": + build_cmdstan_model(target_dir) + elif backend == "PYSTAN": + build_pystan_model(target_dir) class BuildPyCommand(build_py): @@ -50,6 +211,13 @@ class BuildPyCommand(build_py): build_py.run(self) +class BuildExtCommand(build_ext): + """Ensure built extensions are added to the correct path in the wheel.""" + + def run(self): + pass + + class DevelopCommand(develop): """Custom develop command to pre-compile Stan models in-place.""" @@ -64,11 +232,14 @@ class DevelopCommand(develop): class TestCommand(test_command): user_options = [ - ('test-module=', 'm', "Run 'test_suite' in specified module"), - ('test-suite=', 's', - "Run single test, case or suite (e.g. 'module.test_suite')"), - ('test-runner=', 'r', "Test runner to use"), - ('test-slow', 'w', "Test slow suites (default off)"), + ("test-module=", "m", "Run 'test_suite' in specified module"), + ( + "test-suite=", + "s", + "Run single test, case or suite (e.g. 'module.test_suite')", + ), + ("test-runner=", "r", "Test runner to use"), + ("test-slow", "w", "Test slow suites (default off)"), ] test_slow = None @@ -79,23 +250,23 @@ class TestCommand(test_command): def finalize_options(self): super(TestCommand, self).finalize_options() if self.test_slow is None: - self.test_slow = getattr(self.distribution, 'test_slow', False) + self.test_slow = getattr(self.distribution, "test_slow", False) """We must run tests on the build directory, not source.""" def with_project_on_sys_path(self, func): # Ensure metadata is up-to-date - self.reinitialize_command('build_py', inplace=0) - self.run_command('build_py') + self.reinitialize_command("build_py", inplace=0) + self.run_command("build_py") bpy_cmd = self.get_finalized_command("build_py") build_path = normalize_path(bpy_cmd.build_lib) # Build extensions - self.reinitialize_command('egg_info', egg_base=build_path) - self.run_command('egg_info') + self.reinitialize_command("egg_info", egg_base=build_path) + self.run_command("egg_info") - self.reinitialize_command('build_ext', inplace=0) - self.run_command('build_ext') + self.reinitialize_command("build_ext", inplace=0) + self.run_command("build_ext") ei_cmd = self.get_finalized_command("egg_info") @@ -106,7 +277,7 @@ class TestCommand(test_command): sys.path.insert(0, normalize_path(ei_cmd.egg_base)) working_set.__init__() add_activation_listener(lambda dist: dist.activate()) - require('%s==%s' % (ei_cmd.egg_name, ei_cmd.egg_version)) + require("%s==%s" % (ei_cmd.egg_name, ei_cmd.egg_version)) func() finally: sys.path[:] = old_path @@ -114,38 +285,39 @@ class TestCommand(test_command): sys.modules.update(old_modules) working_set.__init__() -with open('README.md', 'r', encoding='utf-8') as f: + +with open("README.md", "r", encoding="utf-8") as f: long_description = f.read() -with open('requirements.txt', 'r') as f: +with open("requirements.txt", "r") as f: install_requires = f.read().splitlines() setup( - name='prophet', - version='1.0.1', - description='Automatic Forecasting Procedure', - url='https://facebook.github.io/prophet/', - author='Sean J. Taylor , Ben Letham ', - author_email='sjtz@pm.me', - license='MIT', + name="prophet", + version="1.0.1", + description="Automatic Forecasting Procedure", + url="https://facebook.github.io/prophet/", + author="Sean J. Taylor , Ben Letham ", + author_email="sjtz@pm.me", + license="MIT", packages=find_packages(), - setup_requires=[ - ], install_requires=install_requires, - python_requires='>=3', + python_requires=">=3", zip_safe=False, include_package_data=True, + ext_modules=[Extension("prophet.stan_model", [])], cmdclass={ - 'build_py': BuildPyCommand, - 'develop': DevelopCommand, - 'test': TestCommand, + "build_ext": BuildExtCommand, + "build_py": BuildPyCommand, + "develop": DevelopCommand, + "test": TestCommand, }, - test_suite='prophet.tests', + test_suite="prophet.tests", classifiers=[ - 'Programming Language :: Python', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.7', + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", ], long_description=long_description, - long_description_content_type='text/markdown', + long_description_content_type="text/markdown", )