Understanding the Kedro codebase - A quick dirty meta-analysis - (Part I)

Meta-analysis of the kedro codebase
kedro
Published

November 13, 2022

Inspired by this talk

How many lines of code in Kedro?

from pathlib import Path
import pandas as pd
from collections import Counter
REPO_PATH = Path("/Users/Nok_Lam_Chan/GitHub/kedro")
list(REPO_PATH.iterdir())
[PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/test_requirements.txt'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/CODE_OF_CONDUCT.md'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/LICENSE.md'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tools'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro_technical_charter.pdf'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/.DS_Store'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/test'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/.pytest_cache'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/derby.log'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro.egg-info'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/.pre-commit-config.yaml'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/.coverage'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/Makefile'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/CITATION.cff'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/CODEOWNERS'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris-demo'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/pyproject.toml'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/features'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/trufflehog-ignore.txt'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/dependency'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/MANIFEST.in'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/docs'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/.readthedocs.yml'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/dep_tree.log'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/README.md'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/RELEASE.md'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/setup.py'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/demo-project'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/logs'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/.mypy_cache'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/.gitignore'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/static'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/CONTRIBUTING.md'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/behave.ini'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/.github'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/resume-kedro'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/.gitpod.yml'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/info.log'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/coverage.xml'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/errors.log'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/.git'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/htmlcov'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/.vscode'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/data'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/conf'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/.circleci'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/import.log'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/notebooks'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/.run'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/.idea'),
 PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/src')]
def count_effective_line(counter, fn):
    with open (fn) as f:
        for line in f:
            counter[fn] += 1
lines_count = Counter()
for fn in REPO_PATH.rglob("*/*.py"):
#     print(fn)
    count_effective_line(lines_count, fn)
print(lines_count)
            
Counter({PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/spark/test_spark_dataset.py'): 984, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/pipeline/test_pipeline.py'): 940, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/pipeline/pipeline.py'): 926, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/session/test_session.py'): 891, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/cli/micropkg.py'): 854, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/cli/micropkg/test_micropkg_pull.py'): 846, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/io/core.py'): 748, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/cli/test_cli.py'): 730, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/io/test_data_catalog.py'): 685, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/cli/test_starters.py'): 639, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/features/steps/cli_steps.py'): 623, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/pipeline/node.py'): 612, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/docs/conf.py'): 598, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/io/data_catalog.py'): 594, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/docs/build/conf.py'): 587, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/cli/micropkg/test_micropkg_package.py'): 581, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/session/test_session_extension_hooks.py'): 576, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/io/test_partitioned_dataset.py'): 565, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/cli/starters.py'): 552, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/io/partitioned_dataset.py'): 551, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/cli/pipeline/test_pipeline.py'): 522, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/io/test_incremental_dataset.py'): 503, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/context/test_context.py'): 485, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/config/test_templated_config.py'): 482, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/cli/test_project.py'): 479, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/cli/test_jupyter.py'): 470, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/cli/utils.py'): 469, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/runner/runner.py'): 456, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/tensorflow/test_tensorflow_model_dataset.py'): 441, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/pandas/sql_dataset.py'): 438, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/matplotlib/test_matplotlib_writer.py'): 436, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/pipeline/test_node.py'): 434, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/session/session.py'): 423, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/spark/spark_dataset.py'): 422, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/pipeline/test_modular_pipeline.py'): 418, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/runner/test_parallel_runner.py'): 401, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/cli/project.py'): 392, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/pipeline/test_pipeline_with_transcoding.py'): 391, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/pandas/test_generic_dataset.py'): 383, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/session/conftest.py'): 381, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/pandas/test_sql_dataset.py'): 374, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/project/__init__.py'): 369, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/config/test_config.py'): 354, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/runner/parallel_runner.py'): 353, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/context/context.py'): 345, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/pandas/test_parquet_dataset.py'): 344, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/cli/pipeline.py'): 336, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/pandas/test_gbq_dataset.py'): 315, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/spark/test_spark_hive_dataset.py'): 314, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/pandas/gbq_dataset.py'): 314, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/cli/test_catalog.py'): 305, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/ipython/test_ipython.py'): 304, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/pandas/test_csv_dataset.py'): 300, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/hooks/specs.py'): 296, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/pipeline/modular_pipeline.py'): 290, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/cli/jupyter.py'): 282, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/pandas/test_excel_dataset.py'): 281, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/config/templated_config.py'): 281, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/resume-kedro/src/build/lib/resume_kedro/dagascii.py'): 275, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/runner/test_sequential_runner.py'): 273, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/pickle/test_pickle_dataset.py'): 269, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/cli/micropkg/test_micropkg_requirements.py'): 266, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/project/test_pipeline_discovery.py'): 260, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/pandas/excel_dataset.py'): 254, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/test_startup.py'): 250, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/config/common.py'): 248, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/pandas/generic_dataset.py'): 246, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/pandas/test_hdf_dataset.py'): 245, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/matplotlib/matplotlib_writer.py'): 243, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/pickle/pickle_dataset.py'): 243, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/pandas/test_xml_dataset.py'): 241, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/pandas/test_json_dataset.py'): 241, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/geojson/test_geojson_dataset.py'): 232, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/pillow/test_image_dataset.py'): 231, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/pandas/parquet_dataset.py'): 230, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/pipeline/test_pipeline_from_missing.py'): 227, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/io/test_memory_dataset.py'): 226, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/networkx/test_json_dataset.py'): 226, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/email/test_message_dataset.py'): 226, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris/docs/source/conf.py'): 225, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris-demo/docs/source/conf.py'): 225, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/resume-kedro/docs/source/conf.py'): 225, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/test/docs/source/conf.py'): 224, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/templates/project/{{ cookiecutter.repo_name }}/docs/source/conf.py'): 224, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/features/steps/test_starter/{{ cookiecutter.repo_name }}/docs/source/conf.py'): 222, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/holoviews/test_holoviews_writer.py'): 220, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/pandas/test_feather_dataset.py'): 220, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/spark/spark_hive_dataset.py'): 220, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/runner/test_thread_runner.py'): 213, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/cli/cli.py'): 211, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/yaml/test_yaml_dataset.py'): 210, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/pandas/hdf_dataset.py'): 204, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/json/test_json_dataset.py'): 200, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/io/test_lambda_dataset.py'): 194, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/tracking/test_metrics_dataset.py'): 194, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/pandas/csv_dataset.py'): 194, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/pandas/feather_dataset.py'): 191, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/redis/redis_dataset.py'): 189, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/networkx/test_gml_dataset.py'): 188, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/networkx/test_graphml_dataset.py'): 188, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/tensorflow/tensorflow_model_dataset.py'): 188, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/email/message_dataset.py'): 188, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/text/test_text_dataset.py'): 187, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/pandas/json_dataset.py'): 187, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/tracking/test_json_dataset.py'): 185, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/cli/catalog.py'): 176, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/spark/spark_jdbc_dataset.py'): 175, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/pandas/xml_dataset.py'): 171, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/api/test_api_dataset.py'): 170, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/runner/conftest.py'): 168, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/redis/test_redis_dataset.py'): 165, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/ipython/__init__.py'): 164, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/dask/test_parquet_dataset.py'): 162, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/json/json_dataset.py'): 160, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/cli/conftest.py'): 159, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/geopandas/geojson_dataset.py'): 157, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/startup.py'): 156, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/runner/thread_runner.py'): 156, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/yaml/yaml_dataset.py'): 155, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/tools/test_cli.py'): 154, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/plotly/json_dataset.py'): 154, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/networkx/json_dataset.py'): 150, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/networkx/gml_dataset.py'): 145, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/networkx/graphml_dataset.py'): 143, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/io/test_cached_dataset.py'): 142, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/pillow/image_dataset.py'): 142, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/api/api_dataset.py'): 142, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/pipeline/test_node_run.py'): 141, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/biosequence/biosequence_dataset.py'): 137, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/holoviews/holoviews_writer.py'): 137, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/dask/parquet_dataset.py'): 136, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/config/config.py'): 134, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/io/memory_dataset.py'): 132, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/text/text_dataset.py'): 131, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/features/environment.py'): 128, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/cli/test_cli_hooks.py'): 128, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/session/test_session_hook_manager.py'): 126, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/spark/test_spark_jdbc_dataset.py'): 121, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/plotly/plotly_dataset.py'): 117, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/io/cached_dataset.py'): 113, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/io/lambda_dataset.py'): 113, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/plotly/test_plotly_dataset.py'): 108, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/spark/deltatable_dataset.py'): 108, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/bioinformatics/test_biosequence_dataset.py'): 107, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/hooks/manager.py'): 106, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/features/steps/sh_run.py'): 105, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/project/test_settings.py'): 102, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/plotly/test_json_dataset.py'): 101, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/io/test_core.py'): 96, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/logging/color_logger.py'): 95, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/conftest.py'): 89, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/session/test_store.py'): 89, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/spark/test_deltatable_dataset.py'): 89, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/cli/test_registry.py'): 88, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/runner/sequential_runner.py'): 87, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/features/steps/util.py'): 84, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/pipeline/test_pipeline_integration.py'): 84, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/cli/pipeline/conftest.py'): 84, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/features/steps/test_starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_science/nodes.py'): 80, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/project/test_pipeline_registry.py'): 79, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/cli/micropkg/conftest.py'): 79, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/hooks/test_manager.py'): 75, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris/src/iris/nodes.py'): 74, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris-demo/src/iris_demo/nodes.py'): 74, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/resume-kedro/src/resume_kedro/nodes.py'): 74, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/resume-kedro/src/build/lib/resume_kedro/nodes.py'): 74, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/tracking/metrics_dataset.py'): 68, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/spark/test_memory_dataset.py'): 67, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tools/cli.py'): 62, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris-demo/src/iris_demo/settings.py'): 62, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/project/test_logging.py'): 58, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/resume-kedro/src/build/lib/resume_kedro/settings.py'): 56, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/features/steps/test_starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_engineering/nodes.py'): 51, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/cli/registry.py'): 50, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/cli/hooks/manager.py'): 49, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/test/src/test/__main__.py'): 47, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris/src/iris/__main__.py'): 47, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris-demo/src/iris_demo/__main__.py'): 47, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/features/steps/test_starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/__main__.py'): 47, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/resume-kedro/src/resume_kedro/__main__.py'): 47, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/resume-kedro/src/build/lib/resume_kedro/__main__.py'): 47, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/templates/project/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/__main__.py'): 47, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/tracking/json_dataset.py'): 47, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/cli/hooks/specs.py'): 46, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/session/shelvestore.py'): 43, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/test/src/tests/test_run.py'): 41, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/spark/conftest.py'): 41, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/templates/project/{{ cookiecutter.repo_name }}/src/tests/test_run.py'): 41, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/features/steps/test_starter/{{ cookiecutter.repo_name }}/src/tests/test_run.py'): 40, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/test/src/setup.py'): 39, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris/src/tests/test_run.py'): 39, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris-demo/src/setup.py'): 39, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris-demo/src/tests/test_run.py'): 39, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/features/steps/test_starter/{{ cookiecutter.repo_name }}/src/setup.py'): 39, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/features/steps/test_starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/settings.py'): 39, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/resume-kedro/src/tests/test_run.py'): 39, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/resume-kedro/src/resume_kedro/pipeline.py'): 39, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/config/abstract_config.py'): 39, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/session/store.py'): 39, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/templates/project/{{ cookiecutter.repo_name }}/src/setup.py'): 39, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/templates/project/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/settings.py'): 39, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/pandas/__init__.py'): 39, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/resume-kedro/src/build/lib/resume_kedro/pipeline.py'): 38, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/test/src/test/settings.py'): 37, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris/src/setup.py'): 37, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris/src/iris/settings.py'): 37, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/resume-kedro/src/setup.py'): 37, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/datasets/conftest.py'): 35, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris/src/iris/pipeline.py'): 33, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris-demo/src/iris_demo/pipeline.py'): 33, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/io/__init__.py'): 33, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/test_utils.py'): 30, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/utils.py'): 28, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/features/steps/test_plugin/plugin.py'): 27, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/features/steps/test_starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_science/pipeline.py'): 27, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tools/circleci/github_scripts/kedro_version.py'): 26, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/features/steps/test_starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_engineering/pipeline.py'): 26, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris/src/settings.py'): 24, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/framework/cli/hooks/test_manager.py'): 22, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/extensions/ipython.py'): 22, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/features/steps/test_starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipeline_registry.py'): 19, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/config/__init__.py'): 19, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris/src/iris/pipeline_registry.py'): 18, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris-demo/src/iris_demo/pipeline_registry.py'): 18, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/resume-kedro/src/resume_kedro/pipeline_registry.py'): 18, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/resume-kedro/src/build/lib/resume_kedro/pipeline_registry.py'): 18, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/tests/extras/logging/test_color_logger.py'): 16, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/runner/__init__.py'): 16, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/templates/project/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipeline_registry.py'): 16, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/logging/__init__.py'): 15, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/networkx/__init__.py'): 15, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/spark/__init__.py'): 14, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/test/src/test/pipeline_registry.py'): 13, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/features/steps/test_plugin/setup.py'): 12, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/__init__.py'): 11, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/hooks/markers.py'): 11, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/plotly/__init__.py'): 11, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/api/__init__.py'): 11, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/tracking/__init__.py'): 11, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/__main__.py'): 10, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/cli/hooks/markers.py'): 10, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/templates/pipeline/{{ cookiecutter.pipeline_name }}/__init__.py'): 10, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/templates/pipeline/{{ cookiecutter.pipeline_name }}/pipeline.py'): 10, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris/src/tests/test_pipeline.py'): 9, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris-demo/src/tests/test_pipeline.py'): 9, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/resume-kedro/src/tests/test_pipeline.py'): 9, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/pipeline/__init__.py'): 9, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/templates/pipeline/{{ cookiecutter.pipeline_name }}/tests/test_pipeline.py'): 9, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/dask/__init__.py'): 8, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/redis/__init__.py'): 8, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/geopandas/__init__.py'): 8, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/pillow/__init__.py'): 8, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/json/__init__.py'): 8, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/biosequence/__init__.py'): 8, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/tensorflow/__init__.py'): 8, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/matplotlib/__init__.py'): 8, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/yaml/__init__.py'): 8, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/pickle/__init__.py'): 8, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/text/__init__.py'): 8, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/holoviews/__init__.py'): 8, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/email/__init__.py'): 8, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/features/steps/test_starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_science/__init__.py'): 7, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/features/steps/test_starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_engineering/__init__.py'): 7, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/context/__init__.py'): 7, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/cli/__init__.py'): 7, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/session/__init__.py'): 6, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/hooks/__init__.py'): 5, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/cli/hooks/__init__.py'): 5, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/test/src/test/__init__.py'): 4, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris/src/iris/__init__.py'): 4, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/iris-demo/src/iris_demo/__init__.py'): 4, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/features/steps/test_starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/__init__.py'): 4, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/resume-kedro/src/resume_kedro/__init__.py'): 4, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/resume-kedro/src/build/lib/resume_kedro/__init__.py'): 4, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/templates/pipeline/{{ cookiecutter.pipeline_name }}/nodes.py'): 4, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/templates/project/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/__init__.py'): 4, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/resume-kedro/src/resume_kedro/settings.py'): 3, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/datasets/__init__.py'): 3, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/extensions/__init__.py'): 3, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/extras/__init__.py'): 2, PosixPath('/Users/Nok_Lam_Chan/GitHub/kedro/kedro/framework/__init__.py'): 1})

Clean up the dictionary a little bit

line_counts_df = pd.DataFrame(lines_count.items(), columns=["fullpath","line_of_code"])
line_counts_df["fullpath"] = line_counts_df["fullpath"].apply(str)
line_counts_df["fullpath"] =  line_counts_df["fullpath"].str.replace("/Users/Nok_Lam_Chan/GitHub/kedro/", "")
line_counts_df.head(2)
fullpath line_of_code
0 tools/cli.py 62
1 features/environment.py 128
line_counts_df[["toplevel","module","submodule","filename"]] = line_counts_df["fullpath"].str.split("/",expand=True, n=3)
line_counts_df
fullpath line_of_code toplevel module submodule filename
0 tools/cli.py 62 tools cli.py None None
1 features/environment.py 128 features environment.py None None
2 tests/test_utils.py 30 tests test_utils.py None None
3 tests/conftest.py 89 tests conftest.py None None
4 docs/conf.py 598 docs conf.py None None
... ... ... ... ... ... ...
276 kedro/extras/datasets/pandas/feather_dataset.py 191 kedro extras datasets pandas/feather_dataset.py
277 kedro/extras/datasets/pandas/hdf_dataset.py 204 kedro extras datasets pandas/hdf_dataset.py
278 kedro/extras/datasets/pandas/csv_dataset.py 194 kedro extras datasets pandas/csv_dataset.py
279 kedro/extras/datasets/pandas/excel_dataset.py 254 kedro extras datasets pandas/excel_dataset.py
280 kedro/extras/datasets/pandas/gbq_dataset.py 314 kedro extras datasets pandas/gbq_dataset.py

281 rows × 6 columns

## Sort by Top level module
line_counts_df.groupby(["toplevel"]).sum().sort_values(ascending=False, by ="line_of_code")
line_of_code
toplevel
tests 25341
kedro 18683
features 1587
docs 1185
resume-kedro 1007
iris-demo 550
iris 547
test 405
tools 88

Interstingly we have roughly a 1:1 ratio between tests and kedro

line_counts_df.groupby(["module","submodule"]).sum().sort_values(ascending=False, by ="line_of_code")
line_of_code
module submodule
extras datasets 15775
framework cli 8837
session 2574
pipeline test_pipeline.py 940
pipeline.py 926
... ... ...
config __init__.py 19
runner __init__.py 16
pipeline __init__.py 9
extras __init__.py 2
framework __init__.py 1

74 rows × 1 columns

## Sort by Sub-module
kedro_line_counts_df = line_counts_df[line_counts_df["toplevel"] == "kedro"]
tmp = kedro_line_counts_df.groupby("module").sum().rename(mapper={"line_of_code": "module_line_of_code"},axis=1 )
kedro_line_counts_df_group = kedro_line_counts_df.groupby(["module","submodule"]).sum().reset_index().merge(tmp, left_on="module", right_on="module")


# .sort_values(ascending=False, by ="line_of_code")
kedro_line_counts_df.groupby(["module"]).sum().sort_values(ascending=False, by ="line_of_code")
line_of_code
module
extras 6871
framework 5246
io 2284
pipeline 1837
runner 1068
config 721
templates 443
ipython 164
utils.py 28
__init__.py 11
__main__.py 10
# Sort by file 
kedro_line_counts_df_group.sort_values(ascending=False, by =["module_line_of_code","line_of_code"])
module submodule line_of_code module_line_of_code
6 extras datasets 6734 6871
8 extras logging 110 6871
7 extras extensions 25 6871
5 extras __init__.py 2 6871
10 framework cli 3439 5246
14 framework session 511 5246
12 framework hooks 418 5246
13 framework project 369 5246
11 framework context 352 5246
15 framework startup.py 156 5246
9 framework __init__.py 1 5246
18 io core.py 748 2284
19 io data_catalog.py 594 2284
22 io partitioned_dataset.py 551 2284
21 io memory_dataset.py 132 2284
17 io cached_dataset.py 113 2284
20 io lambda_dataset.py 113 2284
16 io __init__.py 33 2284
27 pipeline pipeline.py 926 1837
26 pipeline node.py 612 1837
25 pipeline modular_pipeline.py 290 1837
24 pipeline __init__.py 9 1837
30 runner runner.py 456 1068
29 runner parallel_runner.py 353 1068
32 runner thread_runner.py 156 1068
31 runner sequential_runner.py 87 1068
28 runner __init__.py 16 1068
4 config templated_config.py 281 721
2 config common.py 248 721
3 config config.py 134 721
1 config abstract_config.py 39 721
0 config __init__.py 19 721
34 templates project 410 443
33 templates pipeline 33 443
23 ipython __init__.py 164 164
# Total number of LOC
kedro_line_counts_df["line_of_code"].sum()
18683

Conclusion

The kedro codebase is not huge, roughly 20000 line of code, compare to pandas which has > 250000 of code, 10x smaller. The datasets and framework code is the largest module which isn’t surprise to me. The more surprising is how small config actually is, but it creates huge complexity in terms of a kedro project. The cli is also relatively huge as it takes ~3000 lines of code which I didn’t expected.