def build(
inputs: Union[schedule.Schedule, PrimFunc, IRModule, Mapping[str, IRModule]],
args: Optional[List[Union[Buffer, tensor.Tensor, Var]]] = None,
target: Optional[Union[str, Target]] = None,
target_host: Optional[Union[str, Target]] = None,
name: Optional[str] = "default_function",
binds: Optional[Mapping[tensor.Tensor, Buffer]] = None,
):
"""Build a function with arguments as signature. Code will be generated
for devices coupled with target information.
Parameters
----------
inputs : Union[tvm.te.schedule.Schedule, tvm.tir.PrimFunc, IRModule, Mapping[str, IRModule]]
The input to be built
args : Optional[List[Union[tvm.tir.Buffer, tensor.Tensor, Var]]]
The argument lists to the function.
target : Optional[Union[str, Target]]
The target and option of the compilation.
target_host : Optional[Union[str, Target]]
Host compilation target, if target is device.
When TVM compiles device specific program such as CUDA,
we also need host(CPU) side code to interact with the driver
setup the dimensions and parameters correctly.
target_host is used to specify the host side codegen target.
By default, llvm is used if it is enabled,
otherwise a stackvm intepreter is used.
name : Optional[str]
The name of result function.
binds : Optional[Mapping[tensor.Tensor, tvm.tir.Buffer]]
Dictionary that maps the binding of symbolic buffer to Tensor.
By default, a new buffer is created for each tensor in the argument.
Returns
-------
ret : tvm.module
A module that combines both host and device code.
Examples
________
There are two typical example uses of this function depending on the type
of the argument `inputs`:
1. it is an IRModule.
.. code-block:: python
n = 2
A = te.placeholder((n,), name='A')
B = te.placeholder((n,), name='B')
C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
s = tvm.te.create_schedule(C.op)
m = tvm.lower(s, [A, B, C], name="test_add")
rt_mod = tvm.build(m, target="llvm")
2. it is a dict of compilation target to IRModule.
.. code-block:: python
n = 2
A = te.placeholder((n,), name='A')
B = te.placeholder((n,), name='B')
C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
s1 = tvm.te.create_schedule(C.op)
with tvm.target.cuda() as cuda_tgt:
s2 = topi.cuda.schedule_injective(cuda_tgt, [C])
m1 = tvm.lower(s1, [A, B, C], name="test_add1")
m2 = tvm.lower(s2, [A, B, C], name="test_add2")
rt_mod = tvm.build({"llvm": m1, "cuda": m2}, target_host="llvm")
Note
----
See the note on :any:`tvm.target` on target string format.
"""
if isinstance(inputs, schedule.Schedule):
if args is None:
raise ValueError("args must be given for build from schedule")
input_mod = lower(inputs, args, name=name, binds=binds)
elif isinstance(inputs, (list, tuple, container.Array)):
merged_mod = tvm.IRModule({})
for x in inputs:
merged_mod.update(lower(x))
input_mod = merged_mod
elif isinstance(inputs, (tvm.IRModule, PrimFunc)):
input_mod = lower(inputs)
elif not isinstance(inputs, (dict, container.Map)):
raise ValueError(
f"Inputs must be Schedule, IRModule or dict of target to IRModule, "
f"but got {type(inputs)}."
)
if not isinstance(inputs, (dict, container.Map)):
target = Target.current() if target is None else target
target = target if target else "llvm"
target_input_mod = {target: input_mod}
else:
target_input_mod = inputs
for tar, mod in target_input_mod.items():
if not isinstance(tar, (str, Target)):
raise ValueError("The key of inputs must be str or " "Target when inputs is dict.")
if not isinstance(mod, tvm.IRModule):
raise ValueError("inputs must be Schedule, IRModule," "or dict of str to IRModule.")
target_input_mod, target_host = Target.check_and_update_host_consist(
target_input_mod, target_host
)
if not target_host:
for tar, mod in target_input_mod.items():
tar = Target(tar)
device_type = ndarray.device(tar.kind.name, 0).device_type
if device_type == ndarray.cpu(0).device_type:
target_host = tar
break
if not target_host:
target_host = "llvm" if tvm.runtime.enabled("llvm") else "stackvm"
target_input_mod, target_host = Target.check_and_update_host_consist(
target_input_mod, target_host
)
mod_host_all = tvm.IRModule({})
device_modules = []
for tar, input_mod in target_input_mod.items():
mod_host, mdev = _build_for_device(input_mod, tar, target_host)
mod_host_all.update(mod_host)
device_modules.append(mdev)
# Generate a unified host module.
rt_mod_host = codegen.build_module(mod_host_all, target_host)
# Import all modules.
for mdev in device_modules:
if mdev:
rt_mod_host.import_module(mdev)
if not isinstance(target_host, Target):
target_host = Target(target_host)
if (
target_host.attrs.get("runtime", tvm.runtime.String("c++")) == "c"
and target_host.attrs.get("system-lib", 0) == 1
):
if target_host.kind.name == "c":
create_csource_crt_metadata_module = tvm._ffi.get_global_func(
"runtime.CreateCSourceCrtMetadataModule"
)
to_return = create_csource_crt_metadata_module([rt_mod_host], target_host)
elif target_host.kind.name == "llvm":
create_llvm_crt_metadata_module = tvm._ffi.get_global_func(
"runtime.CreateLLVMCrtMetadataModule"
)
to_return = create_llvm_crt_metadata_module([rt_mod_host], target_host)
else:
to_return = rt_mod_host
return OperatorModule.from_module(to_return, ir_module_by_target=target_input_mod, name=name)
wasm_memory_init_with_pool(void *mem, unsigned int bytes)
{
mem_allocator_t _allocator = mem_allocator_create(mem, bytes);
if (_allocator) {
memory_mode = MEMORY_MODE_POOL;
pool_allocator = _allocator;
global_pool_size = bytes;
return true;
}
LOG_ERROR("Init memory with pool (%p, %u) failed.\n", mem, bytes);
return false;
}
mem_allocator_t mem_allocator_create(void *mem, uint32_t size)
{
return gc_init_with_pool((char *) mem, size);
}
gc_init_with_pool
The previous function invokes this one:
gc_handle_t
gc_init_with_pool(char *buf, gc_size_t buf_size)
{
char *buf_end = buf + buf_size;
char *buf_aligned = (char*)(((uintptr_t) buf + 7) & (uintptr_t)~7);
char *base_addr = buf_aligned + sizeof(gc_heap_t);
gc_heap_t *heap = (gc_heap_t*)buf_aligned;
gc_size_t heap_max_size;
if (buf_size < APP_HEAP_SIZE_MIN) {
os_printf("[GC_ERROR]heap init buf size (%u) < %u\n",
buf_size, APP_HEAP_SIZE_MIN);
return NULL;
}
base_addr = (char*) (((uintptr_t) base_addr + 7) & (uintptr_t)~7) + GC_HEAD_PADDING;
heap_max_size = (uint32)(buf_end - base_addr) & (uint32)~7;
return gc_init_internal(heap, base_addr, heap_max_size);
}
and invokes the internal implementation:
static gc_handle_t
gc_init_internal(gc_heap_t *heap, char *base_addr, gc_size_t heap_max_size)
{
hmu_tree_node_t *root = NULL, *q = NULL;
int ret;
memset(heap, 0, sizeof *heap);
memset(base_addr, 0, heap_max_size);
ret = os_mutex_init(&heap->lock);
if (ret != BHT_OK) {
os_printf("[GC_ERROR]failed to init lock\n");
return NULL;
}
/* init all data structures*/
heap->current_size = heap_max_size;
heap->base_addr = (gc_uint8*)base_addr;
heap->heap_id = (gc_handle_t)heap;
heap->total_free_size = heap->current_size;
heap->highmark_size = 0;
root = &heap->kfc_tree_root;
memset(root, 0, sizeof *root);
root->size = sizeof *root;
hmu_set_ut(&root->hmu_header, HMU_FC);
hmu_set_size(&root->hmu_header, sizeof *root);
q = (hmu_tree_node_t *) heap->base_addr;
memset(q, 0, sizeof *q);
hmu_set_ut(&q->hmu_header, HMU_FC);
hmu_set_size(&q->hmu_header, heap->current_size);
hmu_mark_pinuse(&q->hmu_header);
root->right = q;
q->parent = root;
q->size = heap->current_size;
bh_assert(root->size <= HMU_FC_NORMAL_MAX_SIZE);
#if WASM_ENABLE_MEMORY_TRACING != 0
os_printf("Heap created, total size: %u\n", buf_size);
os_printf(" heap struct size: %u\n", sizeof(gc_heap_t));
os_printf(" actual heap size: %u\n", heap_max_size);
os_printf(" padding bytes: %u\n",
buf_size - sizeof(gc_heap_t) - heap_max_size);
#endif
return heap;
}
PLDI'16
Hardware support for isolated execution (such as Intel SGX) enables development of applications that keep their code and data confidential even while running in a hostile or compromised host. However, automatically verifying that such applications satisfy confidentiality remains challenging. We present a methodology for designing such applications in a way that enables certifying their confidentiality. Our methodology consists of forcing the application to communicate with the external world through a narrow interface, compiling it with runtime checks that aid verification, and linking it with a small runtime that implements the narrow interface. The runtime includes services such as secure communication channels and memory management. We formalize this restriction on the application as Information Release Confinement (IRC), and we show that it allows us to decompose the task of proving confidentiality into (a) one-time, human-assisted functional verification of the runtime to ensure that it does not leak secrets, (b) automatic verification of the application's machine code to ensure that it satisfies IRC and does not directly read or corrupt the runtime's internal state. We present /CONFIDENTIAL: a verifier for IRC that is modular, automatic, and keeps our compiler out of the trusted computing base. Our evaluation suggests that the methodology scales to real-world applications.
send
and recv
data outside of SIRRecent proposals for trusted hardware platforms, such as Intel SGX and the MIT Sanctum processor, offer compelling security features but lack formal guarantees. We introduce a verification methodology based on a trusted abstract platform (TAP), a formalization of idealized enclave platforms along with a parameterized adversary. We also formalize the notion of secure remote execution and present machine-checked proofs showing that the TAP satisfies the three key security properties that entail secure remote execution: integrity, confidentiality and secure measurement. We then present machine-checked proofs showing that SGX and Sanctum are refinements of the TAP under certain parameterizations of the adversary, demonstrating that these systems implement secure enclaves for the stated adversary models.
The enclave is formally modeled, including the program, state (e.g., memory, cache, registers), I/O, execution, and adversaries. The adversaries with privilege, cache access, and memory access are modeled differently.
A trusted abstract platform (TAP) is built for modeling secure remote execution (SRE), which is defined formally and can be meet by satisfying three properties at the same time: Secure Measurement, Integrity, Confidentiality.
Then, to prove that Sanctum and SGX satisfies SRE, the authors prove these architectures refine TAP in Boogie, and therefore meets SRE.
EuroSys'16 Best Paper
This paper supports the author's following work, graphene(-SGX).