!kedro -V
kedro, version 0.18.14
February 8, 2024
Default node names are problematic #3575
[02/08/24 15:59:00] WARNING Kedro extension was registered but couldn't find a Kedro project. Make __init__.py:40 sure you run '%reload_kedro <project_root>'.
[02/08/24 15:59:00] INFO Kedro project default-node-name __init__.py:108
INFO Defined global variable 'context', 'session', 'catalog' and __init__.py:109 'pipelines'
[02/08/24 15:59:08] INFO Registered line magic 'run_viz' __init__.py:115
@property
def name(self) -> str:
"""Node's name.
Returns:
Node's name if provided or the name of its function.
"""
node_name = self._name or str(self)
if self.namespace:
return f"{self.namespace}.{node_name}"
return node_name
node.py
, not used outsideIn [8]: n._unique_key
Out[8]: ('preprocess_companies_node', 'companies', 'preprocessed_companies')
__eq__
make sense.__lt__
- Private Kedro PR
__str__
__repr__
short_name
__str__
and __repr__
will call node._func_name
def __str__(self) -> str:
def _set_to_str(xset: set | list[str]) -> str:
return f"[{';'.join(xset)}]"
out_str = _set_to_str(self.outputs) if self._outputs else "None"
in_str = _set_to_str(self.inputs) if self._inputs else "None"
prefix = self._name + ": " if self._name else ""
return prefix + f"{self._func_name}({in_str}) -> {out_str}"
def _set_to_str(xset: set | list[str]) -> str:
return f"[{';'.join(xset)}]"
self = n
out_str = _set_to_str(self.outputs) if self._outputs else "None"
in_str = _set_to_str(self.inputs) if self._inputs else "None"
prefix = self._name + ": " if self._name else ""
prefix + f"{self._func_name}({in_str}) -> {out_str}"
'split: split_data([example_iris_data;parameters]) -> [X_train;X_test;y_train;y_test]'
"Node(split_data, ['example_iris_data', 'parameters'], ['X_train', 'X_test', 'y_train', 'y_test'], 'split')"
nameless_node = node(dummy_func, inputs=["a"], outputs=["b"])
nameless_namespace_node = node(dummy_func, inputs=["a"], outputs=["b"], namespace="nok")
nam_node = node(dummy_func, inputs=["a"], outputs=["b"], name="dummy_name")
format_name(nameless_node)
format_name(nameless_namespace_node)
format_name(name_node)
str(node)='dummy_func([a]) -> [b]'
repr(node)="Node(dummy_func, ['a'], ['b'], None)"
node.name='dummy_func([a]) -> [b]'
node.short_name='Dummy Func'
str(node)='dummy_func([a]) -> [b]'
repr(node)="Node(dummy_func, ['a'], ['b'], None)"
node.name='nok.dummy_func([a]) -> [b]'
node.short_name='Dummy Func'
str(node)='dummy_name: dummy_func([a]) -> [b]'
repr(node)="Node(dummy_func, ['a'], ['b'], 'dummy_name')"
node.name='dummy_name'
node.short_name='dummy_name'
repr
for namespace if wrong because it will not reconstruct the same node, and namespace
wasn’t included in the __repr__
at all.short_name
feels very kedro-viz
coupled and unnecessary to keep them in kedro
. Kedro does not use this property.https://github.com/kedro-org/kedro/pull/568/files - can replace with self._func_name instead of
_get_readable_func_name`
def __str__(self) -> str:
def _set_to_str(xset: set | list[str]) -> str:
return f"[{';'.join(xset)}]"
out_str = _set_to_str(self.outputs) if self._outputs else "None"
in_str = _set_to_str(self.inputs) if self._inputs else "None"
prefix = self._name + ": " if self._name else ""
return prefix + f"{self._func_name}({in_str}) -> {out_str}"
@property
def name(self) -> str:
"""Node's name.
Returns:
Node's name if provided or the name of its function.
"""
node_name = self._name or str(self)
if self.namespace:
return f"{self.namespace}.{node_name}"
return node_name
This is an important property and must be kept unique, it’s used for filtering.
However in the implementation it used __str__
which is for “printing” and create obsecure dependency. In any case, it should be reverted and __str__
relies on self.name
instead.