- DType enum (F32, F16, BF16) with TensorDType trait - Shape utilities: contiguous_strides, broadcast_shape, broadcast_strides - Storage with Arc reference counting (CPU Vec<u8> or GPU GpuBuffer) - Device enum (Cpu, Cuda(id)) with to_device transfer - Tensor type with strided layout: reshape, transpose, squeeze, unsqueeze - contiguous() copies non-contiguous views to contiguous layout - from_slice, zeros, ones constructors - as_slice<T> for typed CPU read access, data_ptr for GPU kernel launch - CPU↔GPU roundtrip verified - All 27 tests pass (12 cuda + 4 shape + 11 tensor) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
16 lines
217 B
TOML
16 lines
217 B
TOML
[workspace]
|
|
resolver = "2"
|
|
members = [
|
|
"crates/xserv-cuda",
|
|
"crates/xserv-tensor",
|
|
]
|
|
|
|
[workspace.package]
|
|
version = "0.1.0"
|
|
edition = "2024"
|
|
license = "MIT"
|
|
|
|
[workspace.dependencies]
|
|
half = "2"
|
|
smallvec = "1"
|