### Basic JSON Conversion Examples Source: https://google.github.io/koladata/api/kd/json.html Examples demonstrating the conversion of various Koda data types to JSON strings. ```python kd.to_json(None) -> kd.str(None) kd.to_json(kd.missing) -> kd.str(None) kd.to_json(kd.present) -> 'true' kd.to_json(True) -> 'true' kd.to_json(kd.slice([1, None, 3])) -> ['1', None, '3'] kd.to_json(kd.list([1, None, 3])) -> '[1, null, 3]' kd.to_json(kd.dict({'a': 1, 'b':'2'}) -> '{"a": 1, "b": "2"}' kd.to_json(kd.new(a=1, b='2')) -> '{"a": 1, "b": "2"}' kd.to_json(kd.new(x=None)) -> '{"x": null}' kd.to_json(kd.new(x=kd.missing)) -> '{"x": false}' kd.to_json(kd.new(a=1, b=None), include_missing_values=False) -> '{"a": 1}' ``` -------------------------------- ### Object Creation and Attribute Management Source: https://google.github.io/koladata/cheatsheet.html Examples of creating objects, accessing attributes, and updating object state. ```python # Entity objects > > > o = kd.obj(x=1, y=2) os = kd.obj(x=kd.slice([1, 2, None]), … y=kd.slice([4, None, 6])) > > > os = kd.slice([kd.obj(x=1), … kd.obj(y=2.0), … kd.obj(x=1.0, y=’a’)]) > > > os.get_schema() DataItem(OBJECT, schema: SCHEMA, …) os.get_obj_schema() DataSlice([ IMPLICIT_ENTITY(x=INT32), IMPLICIT_ENTITY(y=FLOAT32), IMPLICIT_ENTITY(x=FLOAT32, y=STRING), ], schema: SCHEMA, …) # Use provided itemids > > > itemid = kd.new_itemid() o1 = kd.obj(x=1, y=2, itemid=itemid) o2 = kd.obj(x=1, y=2, itemid=itemid) assert o1.get_itemid() == o2.get_itemid() # Get available attributes > > > os1 = kd.slice([kd.obj(x=1), kd.obj(x=1.0, y=’a’)]) # Attributes present in all objects > > > kd.dir(os1) Traceback (most recent call last): … ValueError: dir() cannot determine attribute names because objects have different attributes. Please specify intersection= explicitly. # Or > > > kd.dir(os1, intersection=True) [‘x’] > > > kd.dir(os1, intersection=False) [‘x’, ‘y’] # Access attribute > > > o.x DataItem(1, schema: INT32, …) o.get_attr(‘y’) DataItem(2, schema: INT32, …) o.maybe(‘z’) DataItem(None, schema: NONE, …) o.get_attr(‘z’, default=0) DataItem(0, schema: INT32, …) os.get_attr(‘x’, default=0) DataSlice([1.0, 0.0, 1.0], schema: FLOAT32, …) # Objects are immutable by default, modification is done # by creating a new object with the same ItemId and # updated attributes > > > o = kd.obj(x=1, y=2) # Update a single attribute > > > o1 = o.with_attr(‘x’, 3) o1 = o.with_attr(‘z’, 4) # Also override schema # no overwrite_schema=True is needed > > > o1 = o.with_attr(‘y’, ‘a’) # Remove a single attribute > > > o1 = o.with_attr(‘x’, None) # Update/remove multiple attributes > > > o2 = o.with_attrs(z=4, x=None) # Also override schema for ‘y’ > > > o2 = o.with_attrs(z=4, y=’a’) # Create an update and apply it separately > > > upd = kd.attrs(o, z=4, y=10) o3 = o.updated(upd) # Allows mixing multiple updates > > > o4 = o.updated(kd.attrs(o, z=4), kd.attrs(o, y=None)) # Update nested attributes > > > nested = kd.obj(a=kd.obj(c=kd.obj(e=1), d=2), b=3) nested = nested.updated(kd.attrs(nested.a.c, e=4), … kd.attrs(nested.a, d=5), … kd.attrs(nested, b=6)) # List and dict can be objects too # To convert a list/dict to an object, # use kd.obj() > > > l = kd.list([1, 2, 3]) l_obj = kd.obj(l) l_obj[:] DataSlice([1, 2, 3], schema: INT32, …) > > > d = kd.dict({‘a’: 1, ‘b’: 2}) d_obj = kd.obj(d) kd.sort(d_obj.get_keys()) DataSlice([‘a’, ‘b’], schema: STRING, …) d_obj[‘a’] DataItem(1, schema: INT32, …) # Convert an entity to an object > > > e = kd.new(x=1, y=2) e_obj = kd.obj(e) # Actually, we can pass primitive to kd.obj() > > > p_obj = kd.obj(1) p_obj = kd.obj(‘a’) # An OBJECT Dataslice with entity, list, # dict and primitive items > > > kd.slice([kd.obj(a=1), 1, kd.obj(kd.list([1, 2])), … kd.obj(kd.dict({‘a’: 1}))]) DataSlice([Obj(a=1), 1, List[1, 2], Dict{‘a’=1}], schema: OBJECT, …) ``` -------------------------------- ### Define Protocol Buffers Source: https://google.github.io/koladata/cheatsheet.html Example proto definitions including extensions and nested messages. ```protobuf message Query { string query_text = 1; float final_ir = 2; repeated Doc docs = 3; repeated int32 tags = 4; map term_weight = 5; proto2.bridge.MessageSet ms_extensions = 6; extensions 1000 to max } message QueryExtension { extend Query { QueryExtension query_extension = 1000; } extend proto2.bridge.MessageSet { QueryExtension ms_extension = 1000; } int32 extra = 1; } message Doc { string url = 1; string title = 2; float score = 3; int32 word_count = 4; bool spam = 5; enum Type { UNDEFINED = 0; WEB = 1; IMAGE = 2; } Type type = 6; } ``` -------------------------------- ### Formatting JSON Output Source: https://google.github.io/koladata/api/kd/json.html Examples demonstrating the use of indent and ensure_ascii parameters to control the output format. ```python kd.to_json(kd.list([1, 2, 3]), indent=-1) -> '[1,2,3]' kd.to_json(kd.list([1, 2, 3]), indent=2) -> '[\n 1,\n 2,\n 3\n]' kd.to_json('✨', ensure_ascii=True) -> '"\\u2728"' kd.to_json('✨', ensure_ascii=False) -> '"✨"' ``` -------------------------------- ### Dict Creation and Manipulation Source: https://google.github.io/koladata/cheatsheet.html Examples of creating dicts with specific item IDs, retrieving keys and values, and performing updates. ```python itemid = kd.new_dictid() d3 = kd.dict({'a': 1, 'b': 2}, itemid=itemid) d4 = kd.dict({'c': 3, 'd': 4}, itemid=itemid) assert d3.get_itemid() == d4.get_itemid() >>> k = d1.get_keys(); sorted(k.to_py()) # order of keys/values is arbitrary ['a', 'b'] >>> v = d1.get_values(); sorted(v.to_py()) # order of keys/values is arbitrary [1, 2] >>> d1['a'] DataItem(1, schema: INT32,...) >>> kd.testing.assert_equivalent(kd.get_item(d1, 'a'),d1['a']) # Same as above # Filter out keys/values >>> d1.select_keys(lambda k: k != 'b') DataSlice(['a']...) >>> d1.select_values(lambda v: v > 1) DataSlice([2]...) # Dicts are immutable by default, modification is done # by creating a new dict with the same ItemId and # updated key/values # Update a key/value >>> d4 = d1.with_dict_update('c', 5) # Update multiple key/values >>> another_dict = kd.dict({'a': 3, 'c': 5}) >>> d5 = d1.with_dict_update(another_dict) >>> d6 = d1.with_dict_update(kd.slice(['a', 'c']), ... kd.slice([3, 5])) # Same as above >>> kd.testing.assert_equivalent(d5, d6) # Note that dict update does not support # removing values for now >>> d2 = d1.with_dict_update('a', None) # Dict{'a': 1, 'b': 2} rather than Dict{'b': 2} >>> sorted(d2.get_keys().to_py()) ['a', 'b'] # Create an update and apply it separately >>> upd = kd.dict_update(d1, another_dict) >>> d6 = d1.updated(upd) # Allows mixing multiple updates >>> d7 = d1.updated(kd.dict_update(d1, 'c', 5), ... kd.dict_update(d1, another_dict)) ``` -------------------------------- ### Combining DataSlice Operations Source: https://google.github.io/koladata/overview.html Example demonstrating combined usage of DataSlice operations. ```python > > > a = kd.from_py([{‘x’: 1}, {‘x’: 3}], dict_as_obj=True) b = kd.from_py([{‘y’: 2}, {‘y’: 4}]) > > > a[:].x + b[:][‘y’] DataSlice([3, 7], schema: OBJECT, present: 2/2) > > > kd.zip(kd.agg_sum(a[:].x), kd.agg_sum(b[:][‘y’])) DataSlice([4, 6], schema: OBJECT, present: 2/2) ``` -------------------------------- ### Define and inspect primitive schemas Source: https://google.github.io/koladata/fundamentals.html Examples of specifying dtypes for DataSlices and retrieving schema information. ```python kd.slice([1, 2, 3]) # INT32 is chosen by default when converting from Python kd.slice([1, 2, 3], schema=kd.INT32) # the same as above kd.slice([1, 2, 3], schema=kd.INT64) # can specify INT64 schema kd.int64([1, 2, 3]) # the same as above kd.slice([1., 2., 3.], schema=kd.FLOAT64) # can specify FLOAT64 schema kd.float64([1., 2., 3.]) # the same as above kd.slice([1, 2, 3]).get_dtype() # kd.INT32 kd.slice([1, 2, 3]).get_schema() # kd.INT32 kd.slice([1., 2, 3]).get_dtype() # kd.FLOAT32, because of 1., 2 and 3 are casted to floats ``` -------------------------------- ### Aggregation Using Keyword Arguments Source: https://google.github.io/koladata/cheatsheet.html Performs the same aggregation as the previous example but uses keyword arguments for clarity and to avoid potential argument order issues. ```python kd.apply_py(f3, a=a, b=b) ``` -------------------------------- ### Create Koda Dictionaries Source: https://google.github.io/koladata/api/kd/dicts.html Examples of creating new Koda dicts from Python dictionaries or DataSlices. ```python dict() -> returns a single new dict dict({1: 2, 3: 4}) -> returns a single new dict dict({1: [1, 2]}) -> returns a single dict, mapping 1->List[1, 2] dict({1: kd.slice([1, 2])}) -> returns a single dict, mapping 1->List[1, 2] dict({db.uuobj(x=1, y=2): 3}) -> returns a single dict, mapping uuid->3 dict(kd.slice([1, 2]), kd.slice([3, 4])) -> returns a dict ({1: 3, 2: 4}) dict(kd.slice([[1], [2]]), kd.slice([3, 4])) -> returns a 1-D DataSlice that holds two dicts ({1: 3} and {2: 4}) dict('key', 12) -> returns a single dict mapping 'key'->12 ``` -------------------------------- ### Schemas Cannot Be Objects Source: https://google.github.io/koladata/common_pitfalls.html Provides examples demonstrating that Koda schemas themselves cannot be created as objects using `kd.obj()`. ```python >>> # They fail >>> # kd.obj(kd.INT32) >>> # kd.obj(kd.list_schema(kd.INT32)) >>> ``` -------------------------------- ### Controlling JSON Key Order Source: https://google.github.io/koladata/api/kd/json.html Examples showing how to control the order of keys in JSON objects using keys_attr and values_attr. ```python kd.to_json(kd.new(x=1, y=2)) -> '{"x": 2, "y": 1}' kd.to_json(kd.new(x=1, y=2, json_object_keys=kd.list(['y', 'x']))) -> '{"y": 2, "x": 1}' kd.to_json(kd.new(x=1, y=2, foo=kd.list(['y', 'x'])), keys_attr='foo') -> '{"y": 2, "x": 1}' kd.to_json(kd.new(x=1, y=2, z=3, json_object_keys=kd.list(['x', 'z', 'x']))) -> '{"x": 1, "z": 3, "x": 1}' kd.to_json(kd.new(json_object_keys=kd.list(['x', 'z', 'x']), json_object_values=kd.list([1, 2, 3]))) -> '{"x": 1, "z": 2, "x": 3}' kd.to_json(kd.new(a=kd.list(['x', 'z', 'x']), b=kd.list([1, 2, 3])), keys_attr='a', values_attr='b') -> '{"x": 1, "z": 2, "x": 3}' ``` -------------------------------- ### kd.cond Example with Function (Fails) Source: https://google.github.io/koladata/common_pitfalls.html Illustrates a potential failure case with `kd.cond` when one branch is not applicable to the input type. This highlights that both branches must be evaluable. ```python @kd.fn def explode_or_default(x, y): return kd.cond(kd.is_list(x), x[:], y) explode_or_default(kd.list([1, 2, 3]), 2) ``` -------------------------------- ### Create and Use Entities Source: https://google.github.io/koladata/cheatsheet.html Shows how to create entities with named schemas and verify their structure. ```python # Entity creation with named schema >>> e = kd.new(x=1, y=2, schema='Point') >>> es = kd.new(x=kd.slice([1, 2, None]), ... y=kd.slice([4, None, 6]), ... schema='Point') >>> assert e.get_schema() == es.get_schema() >>> assert e.is_entity() # Use an existing schema >>> s = kd.named_schema('Point', x=kd.INT32, y=kd.INT32) >>> e = kd.new(x=1, y=2, schema=s) ``` -------------------------------- ### Parallel Tuple/NamedTuple Field Evaluation Source: https://google.github.io/koladata/cheatsheet.html Illustrates how parts of a computation can start evaluating even before a sub-functor's tuple/namedtuple result is fully completed, if they only depend on specific fields. This example shows parallel execution of `outer1` and `outer2` which depend on different elements of the tuple returned by `f`. ```python @kd.trace_as_fn(functor_factory=kd.py_fn) def step(x, msg, pause): print('Start', msg) time.sleep(pause.to_py()) print('Finish', msg) return x + 1 @kd.trace_as_fn(return_type_as=( kd.types.DataSlice, kd.types.DataSlice)) def f(x): return ( step(x, 'inner1', 0.1), step(x, 'inner2', 0.3)) @kd.trace_as_fn() def g(): inner = f(1) return ( step(inner[0], 'outer1', 0.1) + step(inner[1], 'outer2', 0.1)) kd.parallel.call_multithreaded(g) ``` -------------------------------- ### Generate Integer Range DataSlice Source: https://google.github.io/koladata/api/kd/slices.html The `kd.slices.range` function creates a DataSlice of INT64s within a specified range `[start, end)`. `start` and `end` must be broadcastable. If `end` is omitted, `start` is used as `end` and 0 as `start`. ```python kd.slices.range(start, end=unspecified) ``` ```python kd.range(5) -> kd.slice([0, 1, 2, 3, 4]) ``` ```python kd.range(2, 5) -> kd.slice([2, 3, 4]) ``` ```python kd.range(5, 2) -> kd.slice([]) # empty range ``` ```python kd.range(kd.slice([2, 4])) -> kd.slice([[0, 1], [0, 1, 2, 3]) ``` ```python kd.range(kd.slice([2, 4]), 6) -> kd.slice([[2, 3, 4, 5], [4, 5]) ``` -------------------------------- ### Creating and managing entities and schemas Source: https://google.github.io/koladata/overview.html Shows how to create entities, define schemas, and convert from Python objects. ```python # kd.new creates new entities and assigns schemas to them >>> kd.new(x=1, y=2, schema=’Point’) DataItem(Entity(x=1, y=2), schema: Point(x=INT32, y=INT32),…) # Can also explicitly create schema with attributes before using. >>> my_schema = kd.named_schema(‘Point’, x=kd.INT32, y=kd.INT32) >>> x = kd.new(x=1, y=2, schema=my_schema) >>> x.get_schema() == my_schema # Yes, i.e. a present mask. DataItem(present, schema: MASK) # When converting from py, can specify schema >>> kd.from_py({‘x’: 1, ‘y’: 2}, schema=my_schema) DataItem(Entity(x=1, y=2), schema: Point(x=INT32, y=INT32),…) # It’s possible to create nested entities >>> x = kd.new(a=1, b=kd.new(c=3, schema=’Inner’), schema=’Outer’) >>> x DataItem(Entity(a=1, b=Entity(c=3)), schema: Outer(a=INT32, b=Inner(c=INT32)),…) ``` -------------------------------- ### Serial Execution Example Source: https://google.github.io/koladata/cheatsheet.html This example shows a basic function call where all operations are executed serially, without any parallelization. ```python kd.parallel.call_multithreaded( lambda x, y: x ** 2 + y ** 2, x=1, y=2) ``` -------------------------------- ### Entity Creation and Attribute Access Source: https://google.github.io/koladata/cheatsheet.html Demonstrates creating entities, managing schemas, and accessing attributes. ```python >>> e2 = s.new(x=1, y=2) >>> kd.testing.assert_equivalent(e, e2) # When `schema=` is not provided, a new # schema is created for each invocation >>> e1 = kd.new(x=1, y=2) >>> e2 = kd.new(x=1, y=2) >>> assert e1.get_schema() != e2.get_schema() # Use provided itemids >>> itemid = kd.new_itemid() >>> e3 = kd.new(x=1, y=2, itemid=itemid) >>> e4 = kd.new(x=1, y=2, itemid=itemid) >>> assert e3.get_itemid() == e4.get_itemid() # Get available attributes # As all entities share the same schema, # intersection= argument does not matter for them. >>> kd.dir(e) ['x', 'y'] # Access attribute >>> e.x DataItem(1,...) >>> e.get_attr('y') DataItem(2,...) >>> e.maybe('z') DataItem(None,...) >>> e.get_attr('z', default=0) DataItem(0,...) >>> es.get_attr('x', default=0) DataSlice([1, 2, 0],...) ``` -------------------------------- ### Get Dimension Sizes in JaggedShape Source: https://google.github.io/koladata/api/kd/shapes.html Use `kd.shapes.dim_sizes` to get the sizes of rows at a specific dimension within a JaggedShape. This helps in understanding the structure and distribution of data. ```python shape = kd.shapes.new([2], [2, 1]) kd.shapes.dim_sizes(shape, 0) # -> kd.slice([2]) kd.shapes.dim_sizes(shape, 1) # -> kd.slice([2, 1]) ``` -------------------------------- ### Creating Entity Objects Source: https://google.github.io/koladata/common_pitfalls.html Demonstrates how to create entity objects using `kd.obj()` with variadic keyword arguments or by wrapping an existing Koda entity. ```python >>> # Entity objects >>> kd.obj(a=1, b=2) DataItem(Obj(a=1, b=2), schema: OBJECT,...) >>> kd.obj(kd.new(a=1, b=2)) DataItem(Obj(a=1, b=2), schema: OBJECT,...) ``` -------------------------------- ### Get Extension Class from QType Source: https://google.github.io/koladata/api/kd/extension_types.html Converts a Koda QType into its corresponding extension type class. This allows you to get the Python class definition from its QType representation. ```python kd.extension_types.get_extension_cls(qtype: QType) -> type[Any] ``` -------------------------------- ### kd.expr.get_name Source: https://google.github.io/koladata/api/kd/expr.html Gets the name of an Expr if it has one. ```APIDOC ## kd.expr.get_name ### Description Returns the name of the given Expr, or None if it does not have one. ### Method N/A (Function) ### Endpoint N/A ### Parameters #### Path Parameters N/A #### Query Parameters N/A #### Request Body N/A ### Request Example N/A ### Response #### Success Response (200) - **str | None** - The name of the Expr, or None. #### Response Example N/A ``` -------------------------------- ### Navigate and Index DataSlices Source: https://google.github.io/koladata/fundamentals.html Demonstrates indexing and size aggregation on DataSlices. ```python kd.index(ds, dim=2) # the same as above, as there are 3 dimensions kd.index(ds, dim=0) # [[[0, 0], [0, 0, 0]], [[1], [], [1, 1, 1, 1]]] kd.agg_size(ds) # [[2, 3], [1, 0, 4]] - last dimension sizes ``` -------------------------------- ### Get Shape of DataSlice Source: https://google.github.io/koladata/api/data_slice.html Returns the shape of the DataSlice. ```python DataSlice.get_shape() ``` -------------------------------- ### Importing Koda Source: https://google.github.io/koladata/cheatsheet.html How to import the Koda library and its extensions. ```APIDOC ## Import Koda ### Description Import the main Koda library and optional extension libraries. ### Code Example ```python from koladata import kd # Optional extension libraries # from koladata import kd_ext ``` ``` -------------------------------- ### kd.shapes.ndim Source: https://google.github.io/koladata/api/kd/shapes.html Gets the rank (number of dimensions) of a jagged shape. ```APIDOC ## kd.shapes.ndim(shape) ### Description Returns the rank of the jagged shape. ### Method GET (assumed, as it's a retrieval function) ### Endpoint /kd/shapes/ndim ### Parameters #### Path Parameters None #### Query Parameters None #### Request Body None ### Request Example ```python # Example usage (conceptual, actual API call might differ) # Assuming kd.shapes.new is defined # shape = kd.shapes.new(2, 3, 1) # print(kd.shapes.ndim(shape)) ``` ### Response #### Success Response (200) - **rank** (integer) - The number of dimensions in the shape. #### Response Example ```json { "rank": 3 } ``` ``` -------------------------------- ### Create Entities with Derived Schemas Source: https://google.github.io/koladata/cheatsheet.html Demonstrates creating entities using kd.new() where schemas are automatically derived or specified via uu schema names. ```python # kd.new() creates entities with derived schema i1 = kd.new(x=1, y=2.0, z='3') # The result DataItem has a auto-drived schema assert i1.get_schema().x == kd.INT32 assert i1.get_schema().y == kd.FLOAT32 assert i1.get_schema().z == kd.STRING i2 = kd.new(x=1, y=2.0, z='3') # Schemas are different because two schemas # with different ItemIds are created assert i1.get_schema() != i2.get_schema() i3 = kd.new(x=1, y=2.0, schema='Point') i4 = kd.new(x=2, y=3.0, schema='Point') # Schemas are the same because two uu schemas # with the same ItemIds are created assert i3.get_schema() == i4.get_schema() ``` -------------------------------- ### Vectorized Entity Creation with kd.new Source: https://google.github.io/koladata/common_pitfalls.html Creates entities in a vectorized manner. Input arguments are aligned to have the same shape before entity creation. Note that string literals are automatically wrapped. ```python # Note ‘x’ is first wrapped into kd.str(‘x’) then broadcasted to kd.str([‘x’, ‘x’, ‘x’]) >>> kd.new(a=kd.slice([1, 2, 3]), b=’x’, c=kd.new(d=kd.slice([4, 5, 6]))) # DataSlice([ Entity(a=1, b=’x’, c=Entity(d=4)), Entity(a=2, b=’x’, c=Entity(d=5)), Entity(a=3, b=’x’, c=Entity(d=6)), ], schema: ENTITY(a=INT32, b=STRING, c=ENTITY(d=INT32)),…) ``` -------------------------------- ### kd.strings.substr Source: https://google.github.io/koladata/api/kd/strings.html Returns a DataSlice of substrings based on start and end indices. ```APIDOC ## kd.strings.substr(x, start=0, end=None) ### Description Returns a DataSlice of substrings with indices [start, end). Python slicing rules apply, including negative indexing. ### Parameters #### Arguments - **x** (STRING or BYTES) - Required - Text or Bytes DataSlice. - **start** (INT) - Optional - The start index (inclusive). Defaults to 0. - **end** (INT) - Optional - The end index (exclusive). Defaults to string length. ### Response - **Returns** (STRING or BYTES) - A DataSlice of substrings. ``` -------------------------------- ### Initialize Primitive Types Source: https://google.github.io/koladata/overview.html Demonstrates creation of DataItems and DataSlices for various primitive types. ```python > > > kd.int32(1) DataItem(1, schema: INT32) kd.int64([2, 3]) DataSlice([2, 3], schema: INT64…) kd.float32([[1., 2.], [3.]]) DataSlice([[1.0, 2.0], [3.0]], schema: FLOAT32,…) kd.str(‘string’) DataItem(‘string’, schema: STRING) kd.bytes(b’bytes’) DataItem(b’bytes’, schema: BYTES) kd.bool(True) DataItem(True, schema: BOOLEAN) ``` -------------------------------- ### kd.core.maybe Source: https://google.github.io/koladata/api/kd/core.html Provides a shortcut for getting an attribute with a default None value. ```APIDOC ## `kd.core.maybe(x, attr_name)` ### Description A shortcut for kd.get_attr(x, attr_name, default=None). ``` -------------------------------- ### Create Primitive Items Source: https://google.github.io/koladata/fundamentals.html Demonstrates creating primitive Koda items using `kd.item`, `kd.int32`, `kd.str`, `kd.new`, and `kd.from_py`. Covers conversion to Python types and basic comparisons. ```python kd.item(123) # kd.INT32 kd.int32(123) # the same as above kd.new(123) # the same as above kd.item("hello world") # kd.STRING kd.str("hello world") # the same as above kd.new("hello world") # the same as above ``` ```python kd.from_py("hello world") # kd.STRING kd.to_py(kd.item(123)) # python's int kd.item(123).to_py() # the same as above int(kd.item(123)) # the same as above str(kd.item("hello")) # python's string ``` ```python kd.present # Koda's "True", or mask value indicating a 'present' item kd.missing # Koda's "False", or mask value indicating a 'missing' item kd.item(5) > 3 # kd.present - Koda's True ~(kd.item(5) > 3) # kd.missing - Koda's False kd.item(None) # missing item with none schema/dtype ``` ```python kd.is_primitive(kd.item(123)) # yes ``` -------------------------------- ### Perform Lazy Operator Source: https://google.github.io/koladata/glossary.html Example of a lazy operator using input nodes. ```python I.a + I.b ``` -------------------------------- ### kd.obj vs kd.obj(kd.new) Source: https://google.github.io/koladata/common_pitfalls.html Illustrates the difference between `kd.obj(**kwargs)` and `kd.obj(kd.new(**kwargs))`. The former creates objects with potentially different schemas, while the latter embeds a common schema into each entity. ```python kwargs_in_ds = dict(a=kd.slice([1, 2, 3]), b=’x’) entities = kd.new(**kwargs_in_ds) objs_1 = kd.obj(entities) # Schemas are the same: [Schema:$7IzNK3toe5Fiq3uZOg396n, Schema:$7IzNK3toe5Fiq3uZOg396n, Schema:$7IzNK3toe5Fiq3uZOg396n] # objs_1.get_obj_schema().get_itemid() # DataSlice(…, schema: ITEMID,…) # Modification of the schema of one item affects the other items upd = kd.attrs(objs_1.S[1], c=4.0) # objs_1.updated(upd) # DataSlice([Obj(a=1, b=’x’, c=None), Obj(a=2, b=’x’, c=4.0), Obj(a=3, b=’x’, c=None)], schema: OBJECT,…) ``` ```python kwargs_in_ds = dict(a=kd.slice([1, 2, 3]), b=’x’) # objs_2 = kd.obj(**kwargs_in_ds) # Schemas are different: [Schema:#6wYkMBuiRTtW7jaIQfyPEy, Schema:#6wYkMBuiRTtW7jaIQfyPEz, Schema:#6wYkMBuiRTtW7jaIQfyPF0] # objs_2.get_obj_schema().get_itemid() # DataSlice(…, schema: ITEMID,…) # Modification of the schema for one item does not affect the other items upd = kd.attrs(objs_2.S[1], c=4.0) # objs_2.updated(upd) # DataSlice([Obj(a=1, b=’x’), Obj(a=2, b=’x’, c=4.0), Obj(a=3, b=’x’)], schema: OBJECT,…) ``` -------------------------------- ### Retrieve view content with get Source: https://google.github.io/koladata/api/kd_ext/kv/view.html Extracts the underlying object represented by the view. ```python view('foo').get() # 'foo' view([[1,2],[3]])[:].get() # ([1,2],[3]). view([[1,2],[3]])[:][:].get() # ((1,2),(3,)). ``` -------------------------------- ### Create and Manage DataItems Source: https://google.github.io/koladata/cheatsheet.html Demonstrates creating DataItems with specific dtypes and verifying their properties. ```python # Primitive dtypes > > > kd.INT32 DataItem(INT32, schema: SCHEMA) kd.INT64 DataItem(INT64, schema: SCHEMA) kd.FLOAT32 DataItem(FLOAT32, schema: SCHEMA) kd.FLOAT64 DataItem(FLOAT64, schema: SCHEMA) kd.STRING DataItem(STRING, schema: SCHEMA) kd.BYTES DataItem(BYTES, schema: SCHEMA) kd.BOOLEAN DataItem(BOOLEAN, schema: SCHEMA) kd.MASK DataItem(MASK, schema: SCHEMA) # MASK type values > > > kd.present DataItem(present, schema: MASK) kd.missing DataItem(missing, schema: MASK) # DataItem creation > > > i = kd.item(1) > > > assert kd.is_item(i) assert kd.is_primitive(i) kd.get_dtype(i) DataItem(INT32, schema: SCHEMA) # DataItem creation with explicit dtypes > > > kd.int32(1) DataItem(1, schema: INT32) kd.int64(2) DataItem(2, schema: INT64) kd.float32(1.1) DataItem(1.1, schema: FLOAT32) kd.float64(2.2) DataItem(2.2, schema: FLOAT64) kd.str(‘a’) DataItem(‘a’, schema: STRING) kd.bytes(b’a’) DataItem(b’a’, schema: BYTES) kd.bool(True) DataItem(True, schema: BOOLEAN) kd.mask(None) DataItem(missing, schema: MASK) # Or use kd.item with explicit dtype > > > kd.item(1, kd.INT32) DataItem(1, schema: INT32) kd.item(2, kd.INT64) DataItem(2, schema: INT64) # kd.from_py is a universal converter # Same as kd.item > > > kd.from_py(1) DataItem(1, schema: OBJECT) ``` -------------------------------- ### Get Number of Dimensions of DataSlice Source: https://google.github.io/koladata/api/kd/slices.html Returns the number of dimensions of the input DataSlice `x`. ```python kd.get_ndim(x) ``` -------------------------------- ### kd.follow Source: https://google.github.io/koladata/api/kd.html Alias for kd.core.follow. ```APIDOC ## kd.follow(x) ### Description Alias for kd.core.follow ### Method N/A (Function) ### Endpoint N/A (Function) ### Parameters N/A ### Request Example N/A ### Response N/A ### Response Example N/A ``` -------------------------------- ### Iterate with kd.for_ Source: https://google.github.io/koladata/cheatsheet.html Demonstrates basic iteration over an iterable using kd.for_ compared to a standard Python loop. ```python inputs = [kd.slice([1, 2]), kd.slice([3, 4])] # Python version def foo(iterable): returns = 0 for i in iterable: returns = i + returns return returns foo(inputs) # Koda version def foo(iterable): return kd.for_( iterable, lambda i, returns: kd.namedtuple(returns=(i + returns)), returns=0) foo(kd.iterables.make(*inputs)) ``` -------------------------------- ### Get Schema for DataSlice Source: https://google.github.io/koladata/api/data_slice.html Returns a schema DataItem detailing the type information of this DataSlice. ```python DataSlice.get_schema() ``` -------------------------------- ### Get DataBag size Source: https://google.github.io/koladata/api/data_bag.html Methods to retrieve the approximate size of the DataBag in bytes or triples. ```python DataBag.get_approx_byte_size() ``` ```python DataBag.get_approx_size() ``` -------------------------------- ### Get Attribute QType Source: https://google.github.io/koladata/api/kd/extension_types.html Determines the QType of a specified attribute within an extension type. ```APIDOC ## `kd.extension_types.get_attr_qtype(ext, attr)` ### Description Returns the qtype of the `attr`, or NOTHING if the `attr` is missing. ### Args - **ext** - The extension type instance. - **attr** - The attribute name or QValue. ### Returns - The QType of the attribute, or a representation of NOTHING if not found. ``` -------------------------------- ### Create Entities and Objects with Deterministic IDs Source: https://google.github.io/koladata/fundamentals.html Utilize `uu` and `uuobj` as shortcuts to create entities and objects with deterministically computed IDs and set their attributes. `kd.new` provides equivalent functionality. ```python kd.uu(x=1, y=2) kd.new(itemid=kd.uuid(x=1, y=2), x=1, y=2) # the same as above kd.uu(x=1, y=2).get_itemid() == kd.uuid(x=1, y=2) # yes ``` ```python # nested uuobj a = kd.uuobj(x=1, y=2, z=kd.uuobj(a=3, b=4)) a.z == kd.uuobj(a=3, b=4) # yes ``` -------------------------------- ### Manage object versions and cloning Source: https://google.github.io/koladata/fundamentals.html Demonstrates cloning and deep cloning to manage distinct object versions. ```python x = kd.obj(a=1, b=2) x1 = x.with_attrs(c=3) x2 = x.with_attrs(c=4) x1.a + x1.c # 4 x2.a + x2.c # 5 x1.c + x2.c # 7 # x1.get_itemid() == x2.get_itemid() # yes x1.enriched(x2.get_bag()).c # 3 - keep x1.c x1.updated(x2.get_bag()).c # 4 - overwrite with x2.c # y = kd.obj(x1=x1, x2=x2) # x2.c overwrites x1.c # y.x.get_itemid() == y.x.get_itemid() # yes # y.x1.c # 4 was overwritten # y.x2.c # 4 # Use clone to create a different copy that can be modified. x = kd.obj(a=1, b=2) x1 = x.clone(c=3) x2 = x.clone(c=4) y = kd.obj(x1=x1, x2=x2) # y.x.get_itemid() != y.x.get_itemid() # yes y.x1.c # 3 y.x2.c # 4 # Note, clone is not recursive: # z = kd.obj(x1a=y.clone().x1.with_attrs(c=6), # x1b=y.clone().x1.with_attrs(c=7)) # z.x1a.c # 7 - overwritten # z.x1b.c # 7 # Use deep_clone to clone everything recursively. z = kd.obj(x1a=y.deep_clone().x1.with_attrs(c=6), x1b=y.deep_clone().x1.with_attrs(c=7)) z.x1a.c # 6 z.x1b.c # 7 ``` -------------------------------- ### Manipulate DataSlices Source: https://google.github.io/koladata/fundamentals.html Examples of querying DataSlice properties such as size, dimensions, shape, and generating indices. ```python # Root # ├── dim_1:0 # │ ├── dim_2:0 # │ │ ├── dim_3:0 -> 1 # │ │ └── dim_3:1 -> 2 # │ └── dim_2:1 # │ ├── dim_3:0 -> 3 # │ ├── dim_3:1 -> 4 # │ └── dim_3:2 -> 5 # └── dim_1:1 # ├── dim_2:0 # │ └── dim_3:0 -> 6 # ├── dim_2:1 (Empty) # └── dim_2:2 # ├── dim_3:0 -> 7 # ├── dim_3:1 -> 8 # ├── dim_3:2 -> 9 # └── dim_3:3 -> 10 ds = kd.slice([[[1, 2], [3, 4, 5]], [[6], [], [7, 8, 9, 10]]]) ds.get_size() # 10 - total array size: number of items (leaves) ds.get_ndim() # 3 # JaggedShape: number of items (leaves) at each dimension (level) ds.get_shape() # JaggedShape(2, [2, 3], [2, 3, 1, 0, 4]) # kd.index returns the index based on the last dimension kd.index(ds) # [[[0, 1], [0, 1, 2]], [[0], [], [0, 1, 2, 3]]] ``` -------------------------------- ### Creating List Objects Source: https://google.github.io/koladata/common_pitfalls.html Shows the correct way to create list objects using `kd.obj()` by wrapping a Koda list. Direct use of Python lists is not supported. ```python >>> # List objects >>> kd.obj([1, 2, 3]) Traceback (most recent call last): ... ValueError: object with unsupported type: list >>> kd.obj(kd.list([1, 2, 3])) # do this instead DataItem(List[1, 2, 3], schema: OBJECT,...) ``` -------------------------------- ### kd.freeze Source: https://google.github.io/koladata/api/kd.html Alias for kd.core.freeze. ```APIDOC ## kd.freeze(x) ### Description Alias for kd.core.freeze ### Method N/A (Function) ### Endpoint N/A (Function) ### Parameters N/A ### Request Example N/A ### Response N/A ### Response Example N/A ``` -------------------------------- ### Create UUID for list Source: https://google.github.io/koladata/api/kd/ids.html Usage example for generating a UUID specifically for keying list items. ```python kd.list([1, 2, 3], itemid=kd.uuid_for_list(seed='seed', a=ds(1))) ``` -------------------------------- ### Create UUID for dictionary Source: https://google.github.io/koladata/api/kd/ids.html Usage example for generating a UUID specifically for keying dictionary items. ```python kd.dict(['a', 'b'], [1, 2], itemid=kd.uuid_for_dict(seed='seed', a=ds(1))) ``` -------------------------------- ### Dict Operations Source: https://google.github.io/koladata/cheatsheet.html Demonstrates creating and inspecting dictionaries. ```python # Create a dict from a Python dict >>> d1 = kd.dict({'a': 1, 'b': 2}) >>> d2 = kd.dict(kd.slice(['a', 'b']), ... kd.slice([1, 2])) # Same as above >>> kd.testing.assert_equivalent(d1, d2) # Create multiple dicts >>> d2 = kd.dict(kd.slice([['a', 'b'], ['c']]), ... kd.slice([[1, 2], [3]])) >>> assert kd.is_dict(d1) >>> d1.dict_size() DataItem(2, schema: INT64) ``` -------------------------------- ### GET kd_ext.kv.get_item Source: https://google.github.io/koladata/api/kd_ext/kv.html Retrieves an item or items from a view containing containers, supporting slices and alignment. ```APIDOC ## GET kd_ext.kv.get_item ### Description Returns an item or items from the given view containing containers. It handles slices by adding a new dimension and aligns keys with the view when necessary. If a key or index is missing, it returns None. ### Parameters #### Request Body - **v** (View | int | float | str | bytes | bool | _Present | None) - Required - The view containing the collections to get items from. - **key_or_index** (View | int | float | str | bytes | bool | _Present | None | slice) - Required - The key, index, slice, or indices to retrieve. ``` -------------------------------- ### Entity Creation and Schemas Source: https://google.github.io/koladata/fundamentals.html Create entities with specific schemas and manage schema allocation. ```python kd.new(x=1, y=2, schema='Point') # Entity(x=1, y=2) r = kd.new(x=1, y=2, z=kd.new(a=3, b=4, schema='Data'), schema='PointWithData') # nested Entity r.z.a # 3 # kd.new can also auto-allocate schemas kd.new(x=1, y=2, schema='Point') == kd.new(x=1, y=2, schema='Point') # yes kd.new(x=1, y=2).get_schema() == kd.new(x=1, y=2).get_schema() # no # Schemas can also be created explicitly. # kd.named_schema('Point') creates exactly the same schemas as the one created # by schema='Point' my_schema = kd.named_schema('Point') kd.new(x=1, y=2, schema=my_schema) # set explicit schema kd.new(x=1, y=2, schema='Point').get_schema() == kd.new(x=1, y=2, schema=my_schema).get_schema() # yes # kd.slice([kd.new(x=1, y=2), kd.new(x=2, y=3)]) # fails, as entities have different scheams kd.slice([kd.new(x=1, y=2, schema=my_schema), kd.new(x=2, y=3, schema=my_schema)]) # works kd.slice([kd.new(x=1, y=2, schema='Point'), kd.new(x=2, y=3, schema='Point')]) # works a, b = kd.new(x=1, y=2), kd.new(x=2, y=3) # kd.slice([a, b]) # fails, as entities have different scheams kd.slice([a.with_schema(my_schema), b.with_schema(my_schema)]) # works kd.slice([a, b.with_schema(a.get_schema())]) # works ``` -------------------------------- ### Get Rank of a Shape in KolaData (Alias) Source: https://google.github.io/koladata/api/kd/shapes.html Alias for `kd.shapes.ndim`, returns the rank of the jagged shape. ```python kd.shapes.rank(shape) ```