### Basic JSON Conversion Examples

Source: https://google.github.io/koladata/api/kd/json.html

Examples demonstrating the conversion of various Koda data types to JSON strings.

```python
kd.to_json(None) -> kd.str(None)
kd.to_json(kd.missing) -> kd.str(None)
kd.to_json(kd.present) -> 'true'
kd.to_json(True) -> 'true'
kd.to_json(kd.slice([1, None, 3])) -> ['1', None, '3']
kd.to_json(kd.list([1, None, 3])) -> '[1, null, 3]'
kd.to_json(kd.dict({'a': 1, 'b':'2'}) -> '{"a": 1, "b": "2"}'
kd.to_json(kd.new(a=1, b='2')) -> '{"a": 1, "b": "2"}'
kd.to_json(kd.new(x=None)) -> '{"x": null}'
kd.to_json(kd.new(x=kd.missing)) -> '{"x": false}'
kd.to_json(kd.new(a=1, b=None), include_missing_values=False)
  -> '{"a": 1}'
```

--------------------------------

### Object Creation and Attribute Management

Source: https://google.github.io/koladata/cheatsheet.html

Examples of creating objects, accessing attributes, and updating object state.

```python
# Entity objects
> > > o = kd.obj(x=1, y=2) os = kd.obj(x=kd.slice([1, 2, None]), … y=kd.slice([4, None, 6]))
> > > os = kd.slice([kd.obj(x=1), … kd.obj(y=2.0), … kd.obj(x=1.0, y=’a’)])
> > > os.get_schema() DataItem(OBJECT, schema: SCHEMA, …) os.get_obj_schema() DataSlice([ IMPLICIT_ENTITY(x=INT32), IMPLICIT_ENTITY(y=FLOAT32), IMPLICIT_ENTITY(x=FLOAT32, y=STRING), ], schema: SCHEMA, …)
# Use provided itemids
> > > itemid = kd.new_itemid() o1 = kd.obj(x=1, y=2, itemid=itemid) o2 = kd.obj(x=1, y=2, itemid=itemid) assert o1.get_itemid() == o2.get_itemid()
# Get available attributes
> > > os1 = kd.slice([kd.obj(x=1), kd.obj(x=1.0, y=’a’)])
# Attributes present in all objects
> > > kd.dir(os1) Traceback (most recent call last): … ValueError: dir() cannot determine attribute names because objects have different attributes. Please specify intersection= explicitly.
# Or
> > > kd.dir(os1, intersection=True) [‘x’]
> > > kd.dir(os1, intersection=False) [‘x’, ‘y’]
# Access attribute
> > > o.x DataItem(1, schema: INT32, …) o.get_attr(‘y’) DataItem(2, schema: INT32, …) o.maybe(‘z’) DataItem(None, schema: NONE, …) o.get_attr(‘z’, default=0) DataItem(0, schema: INT32, …) os.get_attr(‘x’, default=0) DataSlice([1.0, 0.0, 1.0], schema: FLOAT32, …)
# Objects are immutable by default, modification is done
# by creating a new object with the same ItemId and
# updated attributes
> > > o = kd.obj(x=1, y=2)
# Update a single attribute
> > > o1 = o.with_attr(‘x’, 3) o1 = o.with_attr(‘z’, 4)
# Also override schema
# no overwrite_schema=True is needed
> > > o1 = o.with_attr(‘y’, ‘a’)
# Remove a single attribute
> > > o1 = o.with_attr(‘x’, None)
# Update/remove multiple attributes
> > > o2 = o.with_attrs(z=4, x=None)
# Also override schema for ‘y’
> > > o2 = o.with_attrs(z=4, y=’a’)
# Create an update and apply it separately
> > > upd = kd.attrs(o, z=4, y=10) o3 = o.updated(upd)
# Allows mixing multiple updates
> > > o4 = o.updated(kd.attrs(o, z=4), kd.attrs(o, y=None))
# Update nested attributes
> > > nested = kd.obj(a=kd.obj(c=kd.obj(e=1), d=2), b=3) nested = nested.updated(kd.attrs(nested.a.c, e=4), … kd.attrs(nested.a, d=5), … kd.attrs(nested, b=6))
# List and dict can be objects too
# To convert a list/dict to an object,
# use kd.obj()
> > > l = kd.list([1, 2, 3]) l_obj = kd.obj(l) l_obj[:] DataSlice([1, 2, 3], schema: INT32, …)
> > > d = kd.dict({‘a’: 1, ‘b’: 2}) d_obj = kd.obj(d) kd.sort(d_obj.get_keys()) DataSlice([‘a’, ‘b’], schema: STRING, …) d_obj[‘a’] DataItem(1, schema: INT32, …)
# Convert an entity to an object
> > > e = kd.new(x=1, y=2) e_obj = kd.obj(e)
# Actually, we can pass primitive to kd.obj()
> > > p_obj = kd.obj(1) p_obj = kd.obj(‘a’)
# An OBJECT Dataslice with entity, list,
# dict and primitive items
> > > kd.slice([kd.obj(a=1), 1, kd.obj(kd.list([1, 2])), … kd.obj(kd.dict({‘a’: 1}))]) DataSlice([Obj(a=1), 1, List[1, 2], Dict{‘a’=1}], schema: OBJECT, …)
```

--------------------------------

### Define Protocol Buffers

Source: https://google.github.io/koladata/cheatsheet.html

Example proto definitions including extensions and nested messages.

```protobuf
message Query {
  string query_text = 1;
  float final_ir = 2;
  repeated Doc docs = 3;
  repeated int32 tags = 4;
  map<string, float> term_weight = 5;
  proto2.bridge.MessageSet ms_extensions = 6;

  extensions 1000 to max
}

message QueryExtension {
  extend Query {
    QueryExtension query_extension = 1000;
  }

  extend proto2.bridge.MessageSet {
    QueryExtension ms_extension = 1000;
  }

  int32 extra = 1;
}

message Doc {
  string url = 1;
  string title = 2;
  float score = 3;
  int32 word_count = 4;
  bool spam = 5;

  enum Type {
    UNDEFINED = 0;
    WEB = 1;
    IMAGE = 2;
  }

  Type type = 6;
}
```

--------------------------------

### Formatting JSON Output

Source: https://google.github.io/koladata/api/kd/json.html

Examples demonstrating the use of indent and ensure_ascii parameters to control the output format.

```python
kd.to_json(kd.list([1, 2, 3]), indent=-1) -> '[1,2,3]'
kd.to_json(kd.list([1, 2, 3]), indent=2) -> '[\n  1,\n  2,\n  3\n]'

kd.to_json('✨', ensure_ascii=True) -> '"\\u2728"'
kd.to_json('✨', ensure_ascii=False) -> '"✨"'
```

--------------------------------

### Dict Creation and Manipulation

Source: https://google.github.io/koladata/cheatsheet.html

Examples of creating dicts with specific item IDs, retrieving keys and values, and performing updates.

```python
itemid = kd.new_dictid()
d3 = kd.dict({'a': 1, 'b': 2}, itemid=itemid)
d4 = kd.dict({'c': 3, 'd': 4}, itemid=itemid)
assert d3.get_itemid() == d4.get_itemid()

>>> k = d1.get_keys(); sorted(k.to_py()) # order of keys/values is arbitrary
['a', 'b']

>>> v = d1.get_values(); sorted(v.to_py()) # order of keys/values is arbitrary
[1, 2]

>>> d1['a']
DataItem(1, schema: INT32,...)

>>> kd.testing.assert_equivalent(kd.get_item(d1, 'a'),d1['a']) # Same as above

# Filter out keys/values
>>> d1.select_keys(lambda k: k != 'b')
DataSlice(['a']...)

>>> d1.select_values(lambda v: v > 1)
DataSlice([2]...)

# Dicts are immutable by default, modification is done
# by creating a new dict with the same ItemId and
# updated key/values

# Update a key/value
>>> d4 = d1.with_dict_update('c', 5)

# Update multiple key/values
>>> another_dict = kd.dict({'a': 3, 'c': 5})
>>> d5 = d1.with_dict_update(another_dict)
>>> d6 = d1.with_dict_update(kd.slice(['a', 'c']),
...                          kd.slice([3, 5])) # Same as above
>>> kd.testing.assert_equivalent(d5, d6)

# Note that dict update does not support
# removing values for now
>>> d2 = d1.with_dict_update('a', None) # Dict{'a': 1, 'b': 2} rather than Dict{'b': 2}
>>> sorted(d2.get_keys().to_py())
['a', 'b']


# Create an update and apply it separately
>>> upd = kd.dict_update(d1, another_dict)
>>> d6 = d1.updated(upd)

# Allows mixing multiple updates
>>> d7 = d1.updated(kd.dict_update(d1, 'c', 5),
...                 kd.dict_update(d1, another_dict))
```

--------------------------------

### Combining DataSlice Operations

Source: https://google.github.io/koladata/overview.html

Example demonstrating combined usage of DataSlice operations.

```python
> > > a = kd.from_py([{‘x’: 1}, {‘x’: 3}], dict_as_obj=True) b = kd.from_py([{‘y’: 2}, {‘y’: 4}])
> > > a[:].x + b[:][‘y’] DataSlice([3, 7], schema: OBJECT, present: 2/2)
> > > kd.zip(kd.agg_sum(a[:].x), kd.agg_sum(b[:][‘y’])) DataSlice([4, 6], schema: OBJECT, present: 2/2)
```

--------------------------------

### Define and inspect primitive schemas

Source: https://google.github.io/koladata/fundamentals.html

Examples of specifying dtypes for DataSlices and retrieving schema information.

```python
kd.slice([1, 2, 3]) # INT32 is chosen by default when converting from Python
kd.slice([1, 2, 3], schema=kd.INT32) # the same as above
kd.slice([1, 2, 3], schema=kd.INT64) # can specify INT64 schema
kd.int64([1, 2, 3])  # the same as above

kd.slice([1., 2., 3.], schema=kd.FLOAT64) # can specify FLOAT64 schema
kd.float64([1., 2., 3.])  # the same as above

kd.slice([1, 2, 3]).get_dtype()  # kd.INT32
kd.slice([1, 2, 3]).get_schema()  # kd.INT32
kd.slice([1., 2, 3]).get_dtype()  # kd.FLOAT32, because of 1., 2 and 3 are casted to floats
```

--------------------------------

### Aggregation Using Keyword Arguments

Source: https://google.github.io/koladata/cheatsheet.html

Performs the same aggregation as the previous example but uses keyword arguments for clarity and to avoid potential argument order issues.

```python
kd.apply_py(f3, a=a, b=b)
```

--------------------------------

### Create Koda Dictionaries

Source: https://google.github.io/koladata/api/kd/dicts.html

Examples of creating new Koda dicts from Python dictionaries or DataSlices.

```python
dict() -> returns a single new dict
dict({1: 2, 3: 4}) -> returns a single new dict
dict({1: [1, 2]}) -> returns a single dict, mapping 1->List[1, 2]
dict({1: kd.slice([1, 2])}) -> returns a single dict, mapping 1->List[1, 2]
dict({db.uuobj(x=1, y=2): 3}) -> returns a single dict, mapping uuid->3
dict(kd.slice([1, 2]), kd.slice([3, 4]))
  -> returns a dict ({1: 3, 2: 4})
dict(kd.slice([[1], [2]]), kd.slice([3, 4]))
  -> returns a 1-D DataSlice that holds two dicts ({1: 3} and {2: 4})
dict('key', 12) -> returns a single dict mapping 'key'->12
```

--------------------------------

### Schemas Cannot Be Objects

Source: https://google.github.io/koladata/common_pitfalls.html

Provides examples demonstrating that Koda schemas themselves cannot be created as objects using `kd.obj()`.

```python
>>> # They fail
>>> # kd.obj(kd.INT32)
>>> # kd.obj(kd.list_schema(kd.INT32))
>>> 
```

--------------------------------

### Controlling JSON Key Order

Source: https://google.github.io/koladata/api/kd/json.html

Examples showing how to control the order of keys in JSON objects using keys_attr and values_attr.

```python
kd.to_json(kd.new(x=1, y=2)) -> '{"x": 2, "y": 1}'
kd.to_json(kd.new(x=1, y=2, json_object_keys=kd.list(['y', 'x'])))
  -> '{"y": 2, "x": 1}'
kd.to_json(kd.new(x=1, y=2, foo=kd.list(['y', 'x'])), keys_attr='foo')
  -> '{"y": 2, "x": 1}'
kd.to_json(kd.new(x=1, y=2, z=3, json_object_keys=kd.list(['x', 'z', 'x'])))
  -> '{"x": 1, "z": 3, "x": 1}'

kd.to_json(kd.new(json_object_keys=kd.list(['x', 'z', 'x']),
                  json_object_values=kd.list([1, 2, 3])))
  -> '{"x": 1, "z": 2, "x": 3}'
kd.to_json(kd.new(a=kd.list(['x', 'z', 'x']), b=kd.list([1, 2, 3])),
           keys_attr='a', values_attr='b')
  -> '{"x": 1, "z": 2, "x": 3}'
```

--------------------------------

### kd.cond Example with Function (Fails)

Source: https://google.github.io/koladata/common_pitfalls.html

Illustrates a potential failure case with `kd.cond` when one branch is not applicable to the input type. This highlights that both branches must be evaluable.

```python
@kd.fn
def explode_or_default(x, y):
    return kd.cond(kd.is_list(x), x[:], y)

explode_or_default(kd.list([1, 2, 3]), 2)
```

--------------------------------

### Create and Use Entities

Source: https://google.github.io/koladata/cheatsheet.html

Shows how to create entities with named schemas and verify their structure.

```python
# Entity creation with named schema
>>> e = kd.new(x=1, y=2, schema='Point')
>>> es = kd.new(x=kd.slice([1, 2, None]),
...             y=kd.slice([4, None, 6]),
...             schema='Point')

>>> assert e.get_schema() == es.get_schema()

>>> assert e.is_entity()

# Use an existing schema
>>> s = kd.named_schema('Point', x=kd.INT32, y=kd.INT32)
>>> e = kd.new(x=1, y=2, schema=s)
```

--------------------------------

### Parallel Tuple/NamedTuple Field Evaluation

Source: https://google.github.io/koladata/cheatsheet.html

Illustrates how parts of a computation can start evaluating even before a sub-functor's tuple/namedtuple result is fully completed, if they only depend on specific fields. This example shows parallel execution of `outer1` and `outer2` which depend on different elements of the tuple returned by `f`.

```python
@kd.trace_as_fn(functor_factory=kd.py_fn)
def step(x, msg, pause):
  print('Start', msg)
  time.sleep(pause.to_py())
  print('Finish', msg)
  return x + 1

@kd.trace_as_fn(return_type_as=(
    kd.types.DataSlice, kd.types.DataSlice))
def f(x):
  return (
      step(x, 'inner1', 0.1),
      step(x, 'inner2', 0.3))

@kd.trace_as_fn()
def g():
  inner = f(1)
  return (
      step(inner[0], 'outer1', 0.1)
      + step(inner[1], 'outer2', 0.1))

kd.parallel.call_multithreaded(g)
```

--------------------------------

### Generate Integer Range DataSlice

Source: https://google.github.io/koladata/api/kd/slices.html

The `kd.slices.range` function creates a DataSlice of INT64s within a specified range `[start, end)`. `start` and `end` must be broadcastable. If `end` is omitted, `start` is used as `end` and 0 as `start`.

```python
kd.slices.range(start, end=unspecified)
```

```python
kd.range(5) -> kd.slice([0, 1, 2, 3, 4])
```

```python
kd.range(2, 5) -> kd.slice([2, 3, 4])
```

```python
kd.range(5, 2) -> kd.slice([])  # empty range
```

```python
kd.range(kd.slice([2, 4])) -> kd.slice([[0, 1], [0, 1, 2, 3])
```

```python
kd.range(kd.slice([2, 4]), 6) -> kd.slice([[2, 3, 4, 5], [4, 5])
```

--------------------------------

### Creating and managing entities and schemas

Source: https://google.github.io/koladata/overview.html

Shows how to create entities, define schemas, and convert from Python objects.

```python
# kd.new creates new entities and assigns schemas to them
>>> kd.new(x=1, y=2, schema=’Point’)
DataItem(Entity(x=1, y=2), schema: Point(x=INT32, y=INT32),…)
# Can also explicitly create schema with attributes before using.
>>> my_schema = kd.named_schema(‘Point’, x=kd.INT32, y=kd.INT32)
>>> x = kd.new(x=1, y=2, schema=my_schema)
>>> x.get_schema() == my_schema
# Yes, i.e. a present mask.
DataItem(present, schema: MASK)
# When converting from py, can specify schema
>>> kd.from_py({‘x’: 1, ‘y’: 2}, schema=my_schema)
DataItem(Entity(x=1, y=2), schema: Point(x=INT32, y=INT32),…)
# It’s possible to create nested entities
>>> x = kd.new(a=1, b=kd.new(c=3, schema=’Inner’), schema=’Outer’)
>>> x
DataItem(Entity(a=1, b=Entity(c=3)), schema: Outer(a=INT32, b=Inner(c=INT32)),…)
```

--------------------------------

### Serial Execution Example

Source: https://google.github.io/koladata/cheatsheet.html

This example shows a basic function call where all operations are executed serially, without any parallelization.

```python
kd.parallel.call_multithreaded(
  lambda x, y: x ** 2 + y ** 2,
  x=1, y=2)
```

--------------------------------

### Entity Creation and Attribute Access

Source: https://google.github.io/koladata/cheatsheet.html

Demonstrates creating entities, managing schemas, and accessing attributes.

```python
>>> e2 = s.new(x=1, y=2)
>>> kd.testing.assert_equivalent(e, e2)

# When `schema=` is not provided, a new
# schema is created for each invocation
>>> e1 = kd.new(x=1, y=2)
>>> e2 = kd.new(x=1, y=2)
>>> assert e1.get_schema() != e2.get_schema()

# Use provided itemids
>>> itemid = kd.new_itemid()
>>> e3 = kd.new(x=1, y=2, itemid=itemid)
>>> e4 = kd.new(x=1, y=2, itemid=itemid)
>>> assert e3.get_itemid() == e4.get_itemid()

# Get available attributes
# As all entities share the same schema,
# intersection= argument does not matter for them.
>>> kd.dir(e)
['x', 'y']

# Access attribute
>>> e.x
DataItem(1,...)
>>> e.get_attr('y')
DataItem(2,...)
>>> e.maybe('z')
DataItem(None,...)
>>> e.get_attr('z', default=0)
DataItem(0,...)
>>> es.get_attr('x', default=0)
DataSlice([1, 2, 0],...)
```

--------------------------------

### Get Dimension Sizes in JaggedShape

Source: https://google.github.io/koladata/api/kd/shapes.html

Use `kd.shapes.dim_sizes` to get the sizes of rows at a specific dimension within a JaggedShape. This helps in understanding the structure and distribution of data.

```python
shape = kd.shapes.new([2], [2, 1])
kd.shapes.dim_sizes(shape, 0)  # -> kd.slice([2])
kd.shapes.dim_sizes(shape, 1)  # -> kd.slice([2, 1])
```

--------------------------------

### Creating Entity Objects

Source: https://google.github.io/koladata/common_pitfalls.html

Demonstrates how to create entity objects using `kd.obj()` with variadic keyword arguments or by wrapping an existing Koda entity.

```python
>>> # Entity objects
>>> kd.obj(a=1, b=2)
DataItem(Obj(a=1, b=2), schema: OBJECT,...)
>>> kd.obj(kd.new(a=1, b=2))
DataItem(Obj(a=1, b=2), schema: OBJECT,...)
```

--------------------------------

### Get Extension Class from QType

Source: https://google.github.io/koladata/api/kd/extension_types.html

Converts a Koda QType into its corresponding extension type class. This allows you to get the Python class definition from its QType representation.

```python
kd.extension_types.get_extension_cls(qtype: QType) -> type[Any]
```

--------------------------------

### kd.expr.get_name

Source: https://google.github.io/koladata/api/kd/expr.html

Gets the name of an Expr if it has one.

```APIDOC
## kd.expr.get_name

### Description
Returns the name of the given Expr, or None if it does not have one.

### Method
N/A (Function)

### Endpoint
N/A

### Parameters
#### Path Parameters
N/A

#### Query Parameters
N/A

#### Request Body
N/A

### Request Example
N/A

### Response
#### Success Response (200)
- **str | None** - The name of the Expr, or None.

#### Response Example
N/A
```

--------------------------------

### Navigate and Index DataSlices

Source: https://google.github.io/koladata/fundamentals.html

Demonstrates indexing and size aggregation on DataSlices.

```python
kd.index(ds, dim=2)  # the same as above, as there are 3 dimensions
kd.index(ds, dim=0)  # [[[0, 0], [0, 0, 0]], [[1], [], [1, 1, 1, 1]]]

kd.agg_size(ds)  # [[2, 3], [1, 0, 4]] - last dimension sizes
```

--------------------------------

### Get Shape of DataSlice

Source: https://google.github.io/koladata/api/data_slice.html

Returns the shape of the DataSlice.

```python
DataSlice.get_shape()
```

--------------------------------

### Importing Koda

Source: https://google.github.io/koladata/cheatsheet.html

How to import the Koda library and its extensions.

```APIDOC
## Import Koda

### Description
Import the main Koda library and optional extension libraries.

### Code Example
```python
from koladata import kd
# Optional extension libraries
# from koladata import kd_ext
```
```

--------------------------------

### kd.shapes.ndim

Source: https://google.github.io/koladata/api/kd/shapes.html

Gets the rank (number of dimensions) of a jagged shape.

```APIDOC
## kd.shapes.ndim(shape)

### Description
Returns the rank of the jagged shape.

### Method
GET (assumed, as it's a retrieval function)

### Endpoint
/kd/shapes/ndim

### Parameters
#### Path Parameters
None

#### Query Parameters
None

#### Request Body
None

### Request Example
```python
# Example usage (conceptual, actual API call might differ)
# Assuming kd.shapes.new is defined
# shape = kd.shapes.new(2, 3, 1)
# print(kd.shapes.ndim(shape))
```

### Response
#### Success Response (200)
- **rank** (integer) - The number of dimensions in the shape.

#### Response Example
```json
{
  "rank": 3
}
```
```

--------------------------------

### Create Entities with Derived Schemas

Source: https://google.github.io/koladata/cheatsheet.html

Demonstrates creating entities using kd.new() where schemas are automatically derived or specified via uu schema names.

```python
# kd.new() creates entities with derived schema
i1 = kd.new(x=1, y=2.0, z='3')

# The result DataItem has a auto-drived schema
assert i1.get_schema().x == kd.INT32
assert i1.get_schema().y == kd.FLOAT32
assert i1.get_schema().z == kd.STRING

i2 = kd.new(x=1, y=2.0, z='3')
# Schemas are different because two schemas
# with different ItemIds are created
assert i1.get_schema() != i2.get_schema()

i3 = kd.new(x=1, y=2.0, schema='Point')
i4 = kd.new(x=2, y=3.0, schema='Point')
# Schemas are the same because two uu schemas
# with the same ItemIds are created
assert i3.get_schema() == i4.get_schema()
```

--------------------------------

### Vectorized Entity Creation with kd.new

Source: https://google.github.io/koladata/common_pitfalls.html

Creates entities in a vectorized manner. Input arguments are aligned to have the same shape before entity creation. Note that string literals are automatically wrapped.

```python
# Note ‘x’ is first wrapped into kd.str(‘x’) then broadcasted to kd.str([‘x’, ‘x’, ‘x’])
>>> kd.new(a=kd.slice([1, 2, 3]), b=’x’, c=kd.new(d=kd.slice([4, 5, 6])))
# DataSlice([ Entity(a=1, b=’x’, c=Entity(d=4)), Entity(a=2, b=’x’, c=Entity(d=5)), Entity(a=3, b=’x’, c=Entity(d=6)), ], schema: ENTITY(a=INT32, b=STRING, c=ENTITY(d=INT32)),…)
```

--------------------------------

### kd.strings.substr

Source: https://google.github.io/koladata/api/kd/strings.html

Returns a DataSlice of substrings based on start and end indices.

```APIDOC
## kd.strings.substr(x, start=0, end=None)

### Description
Returns a DataSlice of substrings with indices [start, end). Python slicing rules apply, including negative indexing.

### Parameters
#### Arguments
- **x** (STRING or BYTES) - Required - Text or Bytes DataSlice.
- **start** (INT) - Optional - The start index (inclusive). Defaults to 0.
- **end** (INT) - Optional - The end index (exclusive). Defaults to string length.

### Response
- **Returns** (STRING or BYTES) - A DataSlice of substrings.
```

--------------------------------

### Initialize Primitive Types

Source: https://google.github.io/koladata/overview.html

Demonstrates creation of DataItems and DataSlices for various primitive types.

```python
> > > kd.int32(1) DataItem(1, schema: INT32) kd.int64([2, 3]) DataSlice([2, 3], schema: INT64…) kd.float32([[1., 2.], [3.]]) DataSlice([[1.0, 2.0], [3.0]], schema: FLOAT32,…) kd.str(‘string’) DataItem(‘string’, schema: STRING) kd.bytes(b’bytes’) DataItem(b’bytes’, schema: BYTES) kd.bool(True) DataItem(True, schema: BOOLEAN)
```

--------------------------------

### kd.core.maybe

Source: https://google.github.io/koladata/api/kd/core.html

Provides a shortcut for getting an attribute with a default None value.

```APIDOC
## `kd.core.maybe(x, attr_name)`

### Description
A shortcut for kd.get_attr(x, attr_name, default=None).
```

--------------------------------

### Create Primitive Items

Source: https://google.github.io/koladata/fundamentals.html

Demonstrates creating primitive Koda items using `kd.item`, `kd.int32`, `kd.str`, `kd.new`, and `kd.from_py`. Covers conversion to Python types and basic comparisons.

```python
kd.item(123)  # kd.INT32
kd.int32(123) # the same as above
kd.new(123)  # the same as above
kd.item("hello world")  # kd.STRING
kd.str("hello world") # the same as above
kd.new("hello world")  # the same as above
```

```python
kd.from_py("hello world")  # kd.STRING
kd.to_py(kd.item(123))  # python's int
kd.item(123).to_py()  # the same as above
int(kd.item(123))  # the same as above
str(kd.item("hello"))  # python's string
```

```python
kd.present  # Koda's "True", or mask value indicating a 'present' item
kd.missing  # Koda's "False", or mask value indicating a 'missing' item
kd.item(5) > 3  # kd.present - Koda's True
~(kd.item(5) > 3)  # kd.missing - Koda's False
kd.item(None)  # missing item with none schema/dtype
```

```python
kd.is_primitive(kd.item(123))  # yes
```

--------------------------------

### Perform Lazy Operator

Source: https://google.github.io/koladata/glossary.html

Example of a lazy operator using input nodes.

```python
I.a + I.b
```

--------------------------------

### kd.obj vs kd.obj(kd.new)

Source: https://google.github.io/koladata/common_pitfalls.html

Illustrates the difference between `kd.obj(**kwargs)` and `kd.obj(kd.new(**kwargs))`. The former creates objects with potentially different schemas, while the latter embeds a common schema into each entity.

```python
kwargs_in_ds = dict(a=kd.slice([1, 2, 3]), b=’x’)
entities = kd.new(**kwargs_in_ds)
objs_1 = kd.obj(entities)
# Schemas are the same: [Schema:$7IzNK3toe5Fiq3uZOg396n, Schema:$7IzNK3toe5Fiq3uZOg396n, Schema:$7IzNK3toe5Fiq3uZOg396n]
# objs_1.get_obj_schema().get_itemid()
# DataSlice(…, schema: ITEMID,…)
# Modification of the schema of one item affects the other items
upd = kd.attrs(objs_1.S[1], c=4.0)
# objs_1.updated(upd)
# DataSlice([Obj(a=1, b=’x’, c=None), Obj(a=2, b=’x’, c=4.0), Obj(a=3, b=’x’, c=None)], schema: OBJECT,…)
```

```python
kwargs_in_ds = dict(a=kd.slice([1, 2, 3]), b=’x’)
# objs_2 = kd.obj(**kwargs_in_ds)
# Schemas are different: [Schema:#6wYkMBuiRTtW7jaIQfyPEy, Schema:#6wYkMBuiRTtW7jaIQfyPEz, Schema:#6wYkMBuiRTtW7jaIQfyPF0]
# objs_2.get_obj_schema().get_itemid()
# DataSlice(…, schema: ITEMID,…)
# Modification of the schema for one item does not affect the other items
upd = kd.attrs(objs_2.S[1], c=4.0)
# objs_2.updated(upd)
# DataSlice([Obj(a=1, b=’x’), Obj(a=2, b=’x’, c=4.0), Obj(a=3, b=’x’)], schema: OBJECT,…)
```

--------------------------------

### Retrieve view content with get

Source: https://google.github.io/koladata/api/kd_ext/kv/view.html

Extracts the underlying object represented by the view.

```python
view('foo').get()
  # 'foo'
  view([[1,2],[3]])[:].get()
  # ([1,2],[3]).
  view([[1,2],[3]])[:][:].get()
  # ((1,2),(3,)).
```

--------------------------------

### Create and Manage DataItems

Source: https://google.github.io/koladata/cheatsheet.html

Demonstrates creating DataItems with specific dtypes and verifying their properties.

```python
# Primitive dtypes
> > > kd.INT32 DataItem(INT32, schema: SCHEMA) kd.INT64 DataItem(INT64, schema: SCHEMA) kd.FLOAT32 DataItem(FLOAT32, schema: SCHEMA) kd.FLOAT64 DataItem(FLOAT64, schema: SCHEMA) kd.STRING DataItem(STRING, schema: SCHEMA) kd.BYTES DataItem(BYTES, schema: SCHEMA) kd.BOOLEAN DataItem(BOOLEAN, schema: SCHEMA) kd.MASK DataItem(MASK, schema: SCHEMA)
# MASK type values
> > > kd.present DataItem(present, schema: MASK) kd.missing DataItem(missing, schema: MASK)
# DataItem creation
> > > i = kd.item(1)
> > > assert kd.is_item(i) assert kd.is_primitive(i) kd.get_dtype(i) DataItem(INT32, schema: SCHEMA)
# DataItem creation with explicit dtypes
> > > kd.int32(1) DataItem(1, schema: INT32) kd.int64(2) DataItem(2, schema: INT64) kd.float32(1.1) DataItem(1.1, schema: FLOAT32) kd.float64(2.2) DataItem(2.2, schema: FLOAT64) kd.str(‘a’) DataItem(‘a’, schema: STRING) kd.bytes(b’a’) DataItem(b’a’, schema: BYTES) kd.bool(True) DataItem(True, schema: BOOLEAN) kd.mask(None) DataItem(missing, schema: MASK)
# Or use kd.item with explicit dtype
> > > kd.item(1, kd.INT32) DataItem(1, schema: INT32) kd.item(2, kd.INT64) DataItem(2, schema: INT64)
# kd.from_py is a universal converter
# Same as kd.item
> > > kd.from_py(1) DataItem(1, schema: OBJECT)
```

--------------------------------

### Get Number of Dimensions of DataSlice

Source: https://google.github.io/koladata/api/kd/slices.html

Returns the number of dimensions of the input DataSlice `x`.

```python
kd.get_ndim(x)
```

--------------------------------

### kd.follow

Source: https://google.github.io/koladata/api/kd.html

Alias for kd.core.follow.

```APIDOC
## kd.follow(x)

### Description
Alias for kd.core.follow

### Method
N/A (Function)

### Endpoint
N/A (Function)

### Parameters
N/A

### Request Example
N/A

### Response
N/A

### Response Example
N/A
```

--------------------------------

### Iterate with kd.for_

Source: https://google.github.io/koladata/cheatsheet.html

Demonstrates basic iteration over an iterable using kd.for_ compared to a standard Python loop.

```python
inputs = [kd.slice([1, 2]), kd.slice([3, 4])]

# Python version
def foo(iterable):
  returns = 0
  for i in iterable:
    returns = i + returns
  return returns

foo(inputs)

# Koda version
def foo(iterable):
  return kd.for_(
      iterable,
      lambda i, returns:
          kd.namedtuple(returns=(i + returns)),
      returns=0)

foo(kd.iterables.make(*inputs))
```

--------------------------------

### Get Schema for DataSlice

Source: https://google.github.io/koladata/api/data_slice.html

Returns a schema DataItem detailing the type information of this DataSlice.

```python
DataSlice.get_schema()
```

--------------------------------

### Get DataBag size

Source: https://google.github.io/koladata/api/data_bag.html

Methods to retrieve the approximate size of the DataBag in bytes or triples.

```python
DataBag.get_approx_byte_size()
```

```python
DataBag.get_approx_size()
```

--------------------------------

### Get Attribute QType

Source: https://google.github.io/koladata/api/kd/extension_types.html

Determines the QType of a specified attribute within an extension type.

```APIDOC
## `kd.extension_types.get_attr_qtype(ext, attr)`

### Description
Returns the qtype of the `attr`, or NOTHING if the `attr` is missing.

### Args
- **ext** - The extension type instance.
- **attr** - The attribute name or QValue.

### Returns
- The QType of the attribute, or a representation of NOTHING if not found.
```

--------------------------------

### Create Entities and Objects with Deterministic IDs

Source: https://google.github.io/koladata/fundamentals.html

Utilize `uu` and `uuobj` as shortcuts to create entities and objects with deterministically computed IDs and set their attributes. `kd.new` provides equivalent functionality.

```python
kd.uu(x=1, y=2)
kd.new(itemid=kd.uuid(x=1, y=2), x=1, y=2)  # the same as above
kd.uu(x=1, y=2).get_itemid() == kd.uuid(x=1, y=2)  # yes
```

```python
# nested uuobj
a = kd.uuobj(x=1, y=2, z=kd.uuobj(a=3, b=4))
a.z == kd.uuobj(a=3, b=4)  # yes
```

--------------------------------

### Manage object versions and cloning

Source: https://google.github.io/koladata/fundamentals.html

Demonstrates cloning and deep cloning to manage distinct object versions.

```python
x = kd.obj(a=1, b=2)
x1 = x.with_attrs(c=3)
x2 = x.with_attrs(c=4)
x1.a + x1.c  # 4
x2.a + x2.c  # 5
x1.c + x2.c  # 7
# x1.get_itemid() == x2.get_itemid()  # yes

x1.enriched(x2.get_bag()).c  # 3 - keep x1.c
x1.updated(x2.get_bag()).c  # 4 - overwrite with x2.c

# y = kd.obj(x1=x1, x2=x2) # x2.c overwrites x1.c
# y.x.get_itemid() == y.x.get_itemid()  # yes
# y.x1.c  # 4 was overwritten
# y.x2.c  # 4

# Use clone to create a different copy that can be modified.
x = kd.obj(a=1, b=2)
x1 = x.clone(c=3)
x2 = x.clone(c=4)
y = kd.obj(x1=x1, x2=x2)
# y.x.get_itemid() != y.x.get_itemid()  # yes
y.x1.c  # 3
y.x2.c  # 4

# Note, clone is not recursive:
# z = kd.obj(x1a=y.clone().x1.with_attrs(c=6),
#            x1b=y.clone().x1.with_attrs(c=7))
# z.x1a.c  # 7 - overwritten
# z.x1b.c  # 7

# Use deep_clone to clone everything recursively.

z = kd.obj(x1a=y.deep_clone().x1.with_attrs(c=6),
           x1b=y.deep_clone().x1.with_attrs(c=7))
z.x1a.c  # 6
z.x1b.c  # 7
```

--------------------------------

### Manipulate DataSlices

Source: https://google.github.io/koladata/fundamentals.html

Examples of querying DataSlice properties such as size, dimensions, shape, and generating indices.

```python
# Root
# ├── dim_1:0
# │   ├── dim_2:0
# │   │   ├── dim_3:0 -> 1
# │   │   └── dim_3:1 -> 2
# │   └── dim_2:1
# │       ├── dim_3:0 -> 3
# │       ├── dim_3:1 -> 4
# │       └── dim_3:2 -> 5
# └── dim_1:1
#     ├── dim_2:0
#     │   └── dim_3:0 -> 6
#     ├── dim_2:1 (Empty)
#     └── dim_2:2
#         ├── dim_3:0 -> 7
#         ├── dim_3:1 -> 8
#         ├── dim_3:2 -> 9
#         └── dim_3:3 -> 10
ds = kd.slice([[[1, 2], [3, 4, 5]], [[6], [], [7, 8, 9, 10]]])

ds.get_size() # 10 - total array size: number of items (leaves)
ds.get_ndim() # 3
# JaggedShape: number of items (leaves) at each dimension (level)
ds.get_shape() # JaggedShape(2, [2, 3], [2, 3, 1, 0, 4])

# kd.index returns the index based on the last dimension
kd.index(ds)  # [[[0, 1], [0, 1, 2]], [[0], [], [0, 1, 2, 3]]]
```

--------------------------------

### Creating List Objects

Source: https://google.github.io/koladata/common_pitfalls.html

Shows the correct way to create list objects using `kd.obj()` by wrapping a Koda list. Direct use of Python lists is not supported.

```python
>>> # List objects
>>> kd.obj([1, 2, 3])
Traceback (most recent call last):
   ...
ValueError: object with unsupported type: list
>>> kd.obj(kd.list([1, 2, 3])) # do this instead
DataItem(List[1, 2, 3], schema: OBJECT,...)
```

--------------------------------

### kd.freeze

Source: https://google.github.io/koladata/api/kd.html

Alias for kd.core.freeze.

```APIDOC
## kd.freeze(x)

### Description
Alias for kd.core.freeze

### Method
N/A (Function)

### Endpoint
N/A (Function)

### Parameters
N/A

### Request Example
N/A

### Response
N/A

### Response Example
N/A
```

--------------------------------

### Create UUID for list

Source: https://google.github.io/koladata/api/kd/ids.html

Usage example for generating a UUID specifically for keying list items.

```python
kd.list([1, 2, 3], itemid=kd.uuid_for_list(seed='seed', a=ds(1)))
```

--------------------------------

### Create UUID for dictionary

Source: https://google.github.io/koladata/api/kd/ids.html

Usage example for generating a UUID specifically for keying dictionary items.

```python
kd.dict(['a', 'b'], [1, 2], itemid=kd.uuid_for_dict(seed='seed', a=ds(1)))
```

--------------------------------

### Dict Operations

Source: https://google.github.io/koladata/cheatsheet.html

Demonstrates creating and inspecting dictionaries.

```python
# Create a dict from a Python dict
>>> d1 = kd.dict({'a': 1, 'b': 2})
>>> d2 = kd.dict(kd.slice(['a', 'b']),
...              kd.slice([1, 2])) # Same as above
>>> kd.testing.assert_equivalent(d1, d2)

# Create multiple dicts
>>> d2 = kd.dict(kd.slice([['a', 'b'], ['c']]),
...              kd.slice([[1, 2], [3]]))

>>> assert kd.is_dict(d1)

>>> d1.dict_size()
DataItem(2, schema: INT64)
```

--------------------------------

### GET kd_ext.kv.get_item

Source: https://google.github.io/koladata/api/kd_ext/kv.html

Retrieves an item or items from a view containing containers, supporting slices and alignment.

```APIDOC
## GET kd_ext.kv.get_item

### Description
Returns an item or items from the given view containing containers. It handles slices by adding a new dimension and aligns keys with the view when necessary. If a key or index is missing, it returns None.

### Parameters
#### Request Body
- **v** (View | int | float | str | bytes | bool | _Present | None) - Required - The view containing the collections to get items from.
- **key_or_index** (View | int | float | str | bytes | bool | _Present | None | slice) - Required - The key, index, slice, or indices to retrieve.
```

--------------------------------

### Entity Creation and Schemas

Source: https://google.github.io/koladata/fundamentals.html

Create entities with specific schemas and manage schema allocation.

```python
kd.new(x=1, y=2, schema='Point')  # Entity(x=1, y=2)
r = kd.new(x=1, y=2, z=kd.new(a=3, b=4, schema='Data'), schema='PointWithData')  # nested Entity
r.z.a  # 3

# kd.new can also auto-allocate schemas
kd.new(x=1, y=2, schema='Point') == kd.new(x=1, y=2, schema='Point')  # yes
kd.new(x=1, y=2).get_schema() == kd.new(x=1, y=2).get_schema()  # no

# Schemas can also be created explicitly.
# kd.named_schema('Point') creates exactly the same schemas as the one created
# by schema='Point'
my_schema = kd.named_schema('Point')
kd.new(x=1, y=2, schema=my_schema)  # set explicit schema
kd.new(x=1, y=2, schema='Point').get_schema() == kd.new(x=1, y=2, schema=my_schema).get_schema()  # yes

# kd.slice([kd.new(x=1, y=2), kd.new(x=2, y=3)]) # fails, as entities have different scheams
kd.slice([kd.new(x=1, y=2, schema=my_schema), kd.new(x=2, y=3, schema=my_schema)]) # works
kd.slice([kd.new(x=1, y=2, schema='Point'), kd.new(x=2, y=3, schema='Point')]) # works

a, b = kd.new(x=1, y=2), kd.new(x=2, y=3)
# kd.slice([a, b]) # fails, as entities have different scheams
kd.slice([a.with_schema(my_schema), b.with_schema(my_schema)])  # works
kd.slice([a, b.with_schema(a.get_schema())])  # works
```

--------------------------------

### Get Rank of a Shape in KolaData (Alias)

Source: https://google.github.io/koladata/api/kd/shapes.html

Alias for `kd.shapes.ndim`, returns the rank of the jagged shape.

```python
kd.shapes.rank(shape)
```