diff --git a/README.md b/README.md index 07b56ac390c6dd1a755b042b84b8bf7fbe9ea5d6..66ad82c9d09e6750c398f59851cb241e108592ca 100644 --- a/README.md +++ b/README.md @@ -1 +1,14 @@ -# BiTIA CLI +# BioInformatics Tool for Infrastructure Automation (BiTIA) CLI utility + +This is the cli utility of the BiTIA project. The other component is +`bitia-runner`. Most users only need the cli client `bitia` to submit jobs. +Install it using pip. + +``` +pip install bitia +``` + +## BiTIA runner + +If you are self-hosting the BiTIA server, you need `bitia-runner` as well. See +the documents of `bitia-runner` for more details. diff --git a/bitia/__init__.py b/bitia/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c876830eb0b0e93213106260588310611448d1a6 --- /dev/null +++ b/bitia/__init__.py @@ -0,0 +1,20 @@ +from importlib.metadata import version as _version +import os +import logging +from rich.logging import RichHandler +import logging.handlers + +log_level = os.environ.get("BITIA_LOGLEVEL", "NOTSET").upper() +logging.basicConfig( + format="%(message)s", + level=log_level, + datefmt="[%X]", + handlers=[RichHandler(rich_tracebacks=True)], +) + +__version__ = _version("bitia") + + +def version() -> str: + """version""" + return __version__ diff --git a/bitia/__main__.py b/bitia/__main__.py index 6ee261bb134bdf94bae54fd384264e07112d1254..2555dc0cb9c2307d137b8fd68ecad22065c53d34 100644 --- a/bitia/__main__.py +++ b/bitia/__main__.py @@ -14,7 +14,8 @@ from rich.logging import RichHandler from pathlib import Path import tempfile -from checksumdir import dirhash + +from bitia.checksumdir import dirhash FORMAT = "%(message)s" logging.basicConfig( diff --git a/bitia/checksumdir.py b/bitia/checksumdir.py new file mode 100644 index 0000000000000000000000000000000000000000..5f0f9ab4d9503d2e6e077eda2a9f1bbe044b0c16 --- /dev/null +++ b/bitia/checksumdir.py @@ -0,0 +1,79 @@ +""" +Function for deterministically creating a single hash for a directory of files, +taking into account only file contents and not filenames. + +dirhash('/path/to/directory', 'md5') + +Orignally from +https://raw.githubusercontent.com/SubconsciousCompute/checksumdir/master/checksumdir/__init__.p +y + +""" + +import hashlib +import typing as T +import os +import re +from pathlib import Path + +HASH_FUNCS = { + "md5": hashlib.md5, + "sha1": hashlib.sha1, + "sha256": hashlib.sha256, + "sha512": hashlib.sha512, +} + + +def dirhash( + dirname: Path, + hashfunc: str = "md5", + excluded_files: T.List[Path] = [], + ignore_hidden: bool = False, + followlinks: bool = False, + excluded_extensions: T.List[str] = [], +): + hash_func = HASH_FUNCS[hashfunc] + assert dirname.is_dir(), "{} is not a directory.".format(dirname) + hashvalues = [] + for r_dir, dirs, files in os.walk(dirname, topdown=True, followlinks=followlinks): + if ignore_hidden and re.search(r"/\.", r_dir): + continue + root = Path(r_dir) + dirs.sort() + files.sort() + for fname in files: + if ignore_hidden and fname.startswith("."): + continue + + if Path(fname).suffix in excluded_extensions: + continue + + if fname in excluded_files: + continue + + hashvalues.append(filehash(root / fname, hashfunc)) + + return _reduce_hash(hashvalues, hash_func) + + +def filehash(filepath: Path, hashfunc: str = "md5"): + hasher = HASH_FUNCS[hashfunc]() + blocksize = 64 * 1024 + + if not filepath.is_file(): + return hasher.hexdigest() + + with filepath.open("rb") as fp: + while True: + data = fp.read(blocksize) + if not data: + break + hasher.update(data) + return hasher.hexdigest() + + +def _reduce_hash(hashlist, hashfunc): + hasher = hashfunc() + for hashvalue in sorted(hashlist): + hasher.update(hashvalue.encode("utf-8")) + return hasher.hexdigest() diff --git a/poetry.lock b/poetry.lock index 43931f2f8c8362c03a97e8aa5a373eb2b29800b6..102eaf777ce33427f9ad98148a8bf88fa6674342 100644 --- a/poetry.lock +++ b/poetry.lock @@ -17,14 +17,6 @@ python-versions = ">=3.6.0" [package.extras] unicode_backport = ["unicodedata2"] -[[package]] -name = "checksumdir" -version = "1.2.0" -description = "Compute a single hash of the file contents of a directory." -category = "main" -optional = false -python-versions = ">=3.6,<4.0" - [[package]] name = "click" version = "8.1.3" @@ -149,7 +141,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [metadata] lock-version = "1.1" python-versions = "^3.8" -content-hash = "d60f75638b8835d054e6d589945791f581eb9275aae90e255a58aff3bfab3b27" +content-hash = "876b8c11170f017ce20e5b481e80612ab81476f22aa411f669ad5edcc0fdd327" [metadata.files] certifi = [ @@ -160,10 +152,6 @@ charset-normalizer = [ {file = "charset-normalizer-2.1.1.tar.gz", hash = "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845"}, {file = "charset_normalizer-2.1.1-py3-none-any.whl", hash = "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"}, ] -checksumdir = [ - {file = "checksumdir-1.2.0-py3-none-any.whl", hash = "sha256:77687e16da95970c94061c74ef2e13666c4b6e0e8c90a5eaf0c8f7591332cf01"}, - {file = "checksumdir-1.2.0.tar.gz", hash = "sha256:10bfd7518da5a14b0e9ac03e9ad105f0e70f58bba52b6e9aa2f21a3f73c7b5a8"}, -] click = [ {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, diff --git a/pyproject.toml b/pyproject.toml index 6a9c48673355b1ab48b039d7b648bfcb164b9ce3..897d76bb3a56bbf77f47be6d35684707961eebb4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "bitia" version = "0.1.1" -description = "BioInformatics Tool for Infrastructure Automation (BiTIA) command line utility." +description = "BioInformatics Tool for Infrastructure Automation (BiTIA) CLI utility." authors = ["Dilawar Singh <dilawar@subcom.tech>"] readme = "README.md" @@ -9,7 +9,6 @@ readme = "README.md" python = "^3.8" requests = "^2.28.1" typer = "^0.6.1" -checksumdir = "^1.2.0" rich = "^12.5.1" [tool.poetry.scripts] diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/test_sanity.py b/tests/test_sanity.py new file mode 100644 index 0000000000000000000000000000000000000000..77a2e2b344a69e1a61ea49a1ca6e61a03e88cb1e --- /dev/null +++ b/tests/test_sanity.py @@ -0,0 +1,5 @@ +import bitia + + +def test_sanity(): + assert bitia.version()