These functions performs essential operations on protected files. The implementations seem like transporting the official Intel SGX SDK cpp
code to c
at graphene side.
- LRU cache is used for data/mht nodes iteration
ipf_open
- Create and initialize(
ipf_init_fields
) apf
(pf_context_t
) - Check validity of key, path, handle(fd), size
- Check if it needs to create a new file or the file exits
- Return
pf
static pf_context_t* ipf_open(const char* path, pf_file_mode_t mode, bool create, pf_handle_t file,
uint64_t real_size, const pf_key_t* kdk_key, pf_status_t* status) {
*status = PF_STATUS_NO_MEMORY;
pf_context_t* pf = calloc(1, sizeof(*pf));
if (!pf)
goto out;
if (!ipf_init_fields(pf))
goto out;
DEBUG_PF("handle: %d, path: '%s', real size: %lu, mode: 0x%x\n", *(int*)file, path, real_size,
mode);
if (kdk_key == NULL) {
DEBUG_PF("no key specified\n");
pf->last_error = PF_STATUS_INVALID_PARAMETER;
goto out;
}
if (path && strlen(path) > PATH_MAX_SIZE - 1) {
pf->last_error = PF_STATUS_PATH_TOO_LONG;
goto out;
}
// for new file, this value will later be saved in the meta data plain part (init_new_file)
// for existing file, we will later compare this value with the value from the file
// (init_existing_file)
COPY_ARRAY(pf->user_kdk_key, *kdk_key);
// omeg: we require a canonical full path to file, so no stripping path to filename only
// omeg: Intel's implementation opens the file, we get the fd and size from the Graphene handler
if (!file) {
DEBUG_PF("invalid handle\n");
pf->last_error = PF_STATUS_INVALID_PARAMETER;
goto out;
}
if (real_size % PF_NODE_SIZE != 0) {
pf->last_error = PF_STATUS_INVALID_HEADER;
goto out;
}
pf->file = file;
pf->real_file_size = real_size;
pf->mode = mode;
if (!create) {
// existing file
if (!ipf_init_existing_file(pf, path))
goto out;
} else {
// new file
if (!ipf_init_new_file(pf, path))
goto out;
}
pf->last_error = pf->file_status = PF_STATUS_SUCCESS;
DEBUG_PF("OK (data size %lu)\n", pf->encrypted_part_plain.size);
out:
if (pf && PF_FAILURE(pf->last_error)) {
DEBUG_PF("failed: %d\n", pf->last_error);
free(pf);
pf = NULL;
}
if (pf)
*status = pf->last_error;
return pf;
}
ipf_read
- Check if error occurs beforehand
- Get data out from metadata block (1st blk) if needed
- Continuously retrieve a node to read and read this node (
ipf_get_data_node
)
static size_t ipf_read(pf_context_t* pf, void* ptr, size_t size) {
if (ptr == NULL) {
pf->last_error = PF_STATUS_INVALID_PARAMETER;
return 0;
}
if (PF_FAILURE(pf->file_status)) {
pf->last_error = pf->file_status;
return 0;
}
if (!(pf->mode & PF_FILE_MODE_READ)) {
pf->last_error = PF_STATUS_INVALID_MODE;
return 0;
}
size_t data_left_to_read = size;
if (((uint64_t)data_left_to_read) > (uint64_t)(pf->encrypted_part_plain.size - pf->offset)) {
// the request is bigger than what's left in the file
data_left_to_read = (size_t)(pf->encrypted_part_plain.size - pf->offset);
}
// used at the end to return how much we actually read
size_t data_attempted_to_read = data_left_to_read;
unsigned char* out_buffer = (unsigned char*)ptr;
// the first block of user data is read from the meta-data encrypted part
if (pf->offset < MD_USER_DATA_SIZE) {
// offset is smaller than MD_USER_DATA_SIZE
size_t data_left_in_md = MD_USER_DATA_SIZE - (size_t)pf->offset;
size_t size_to_read = MIN(data_left_to_read, data_left_in_md);
memcpy(out_buffer, &pf->encrypted_part_plain.data[pf->offset], size_to_read);
pf->offset += size_to_read;
out_buffer += size_to_read;
data_left_to_read -= size_to_read;
}
while (data_left_to_read > 0) {
file_node_t* file_data_node = NULL;
// return the data node of the current offset, will read it from disk if needed
// (and also the mht node if needed)
file_data_node = ipf_get_data_node(pf);
if (file_data_node == NULL)
break;
size_t offset_in_node = (pf->offset - MD_USER_DATA_SIZE) % PF_NODE_SIZE;
size_t data_left_in_node = PF_NODE_SIZE - offset_in_node;
size_t size_to_read = MIN(data_left_to_read, data_left_in_node);
memcpy(out_buffer, &file_data_node->decrypted.data.data[offset_in_node], size_to_read);
pf->offset += size_to_read;
out_buffer += size_to_read;
data_left_to_read -= size_to_read;
}
if (data_left_to_read == 0 && data_attempted_to_read != size) {
// user wanted to read more and we had to shrink the request
assert(pf->offset == pf->encrypted_part_plain.size);
pf->end_of_file = true;
}
return data_attempted_to_read - data_left_to_read;
}
static bool ipf_read_node(pf_context_t* pf, pf_handle_t handle, uint64_t node_number, void* buffer,
uint32_t node_size) {
uint64_t offset = node_number * node_size;
pf_status_t status = g_cb_read(handle, buffer, offset, node_size);
if (PF_FAILURE(status)) {
pf->last_error = status;
return false;
}
return true;
}
ipf_write
- Check if error occurred
- Write to metadata block if there is empty space left
- Get data node (
ipf_get_data_node
) and write data into the node iteratively. Besides, setneed_writing
to all mht nodes in this pf.
// write zeros if `ptr` is NULL
static size_t ipf_write(pf_context_t* pf, const void* ptr, size_t size) {
if (size == 0) {
pf->last_error = PF_STATUS_INVALID_PARAMETER;
return 0;
}
size_t data_left_to_write = size;
if (PF_FAILURE(pf->file_status)) {
pf->last_error = pf->file_status;
DEBUG_PF("bad file status %d\n", pf->last_error);
return 0;
}
if (!(pf->mode & PF_FILE_MODE_WRITE)) {
pf->last_error = PF_STATUS_INVALID_MODE;
DEBUG_PF("File is read-only\n");
return 0;
}
const unsigned char* data_to_write = (const unsigned char*)ptr;
// the first block of user data is written in the meta-data encrypted part
if (pf->offset < MD_USER_DATA_SIZE) {
// offset is smaller than MD_USER_DATA_SIZE
size_t empty_place_left_in_md = MD_USER_DATA_SIZE - (size_t)pf->offset;
size_t size_to_write = MIN(data_left_to_write, empty_place_left_in_md);
memcpy_or_zero_initialize(&pf->encrypted_part_plain.data[pf->offset], data_to_write,
size_to_write);
pf->offset += size_to_write;
if (data_to_write)
data_to_write += size_to_write;
data_left_to_write -= size_to_write;
if (pf->offset > pf->encrypted_part_plain.size)
pf->encrypted_part_plain.size = pf->offset; // file grew, update the new file size
pf->need_writing = true;
}
while (data_left_to_write > 0) {
file_node_t* file_data_node = NULL;
// return the data node of the current offset, will read it from disk or create new one
// if needed (and also the mht node if needed)
file_data_node = ipf_get_data_node(pf);
if (file_data_node == NULL) {
DEBUG_PF("failed to get data node\n");
break;
}
size_t offset_in_node = (size_t)((pf->offset - MD_USER_DATA_SIZE) % PF_NODE_SIZE);
size_t empty_place_left_in_node = PF_NODE_SIZE - offset_in_node;
size_t size_to_write = MIN(data_left_to_write, empty_place_left_in_node);
memcpy_or_zero_initialize(&file_data_node->decrypted.data.data[offset_in_node],
data_to_write, size_to_write);
pf->offset += size_to_write;
if (data_to_write)
data_to_write += size_to_write;
data_left_to_write -= size_to_write;
if (pf->offset > pf->encrypted_part_plain.size) {
pf->encrypted_part_plain.size = pf->offset; // file grew, update the new file size
}
if (!file_data_node->need_writing) {
file_data_node->need_writing = true;
file_node_t* file_mht_node = file_data_node->parent;
while (file_mht_node->node_number != 0) {
// set all the mht parent nodes as 'need writing'
file_mht_node->need_writing = true;
file_mht_node = file_mht_node->parent;
}
pf->root_mht.need_writing = true;
pf->need_writing = true;
}
}
return size - data_left_to_write;
}
ipf_get_data_node
- check the offset is at the edge of nodes and not in metanode
- If
offset==size
, create a new node byipf_append_data_node
; otherwise read the next data nodeipf_read_data_node
. - "bump all the parents mht to reside before the data node in the cache"
- Do other LRU cache related operations TODO
static file_node_t* ipf_get_data_node(pf_context_t* pf) {
file_node_t* file_data_node = NULL;
if (pf->offset < MD_USER_DATA_SIZE) {
pf->last_error = PF_STATUS_UNKNOWN_ERROR;
return NULL;
}
if ((pf->offset - MD_USER_DATA_SIZE) % PF_NODE_SIZE == 0
&& pf->offset == pf->encrypted_part_plain.size) {
// new node
file_data_node = ipf_append_data_node(pf);
} else {
// existing node
file_data_node = ipf_read_data_node(pf);
}
// bump all the parents mht to reside before the data node in the cache
if (file_data_node != NULL) {
file_node_t* file_mht_node = file_data_node->parent;
while (file_mht_node->node_number != 0) {
// bump the mht node to the head of the lru
lruc_get(pf->cache, file_mht_node->physical_node_number);
file_mht_node = file_mht_node->parent;
}
}
// even if we didn't get the required data_node, we might have read other nodes in the process
while (lruc_size(pf->cache) > MAX_PAGES_IN_CACHE) {
void* data = lruc_get_last(pf->cache);
assert(data != NULL);
// for production -
if (data == NULL) {
pf->last_error = PF_STATUS_UNKNOWN_ERROR;
return NULL;
}
if (!((file_node_t*)data)->need_writing) {
lruc_remove_last(pf->cache);
// before deleting the memory, need to scrub the plain secrets
file_node_t* file_node = (file_node_t*)data;
erase_memory(&file_node->decrypted, sizeof(file_node->decrypted));
free(file_node);
} else {
if (!ipf_internal_flush(pf)) {
// error, can't flush cache, file status changed to error
assert(pf->file_status != PF_STATUS_SUCCESS);
if (pf->file_status == PF_STATUS_SUCCESS)
pf->file_status = PF_STATUS_FLUSH_ERROR; // for release set this anyway
return NULL; // even if we got the data_node!
}
}
}
return file_data_node;
}
ipf_read_data_node
This function first reads the ciphertext of a node by specifying a node number and a pf, then decrypt the ciphertext using the callback function to get the plaintext. Besides, add what have been read to LRU cache.
ipf_close
- called by
pf_close
- invokes
ipf_internal_flush
if the status is notPF_STATUS_SUCCESS
- also do some cleaning jobs on the LRU cache
ipf_internal_flush
- Check if writing is needed (for mht and other nodes)
- Update data/mht/meta nodes
- Write to disk
static bool ipf_internal_flush(pf_context_t* pf) {
if (!pf->need_writing) {
// no changes at all
DEBUG_PF("no need to write\n");
return true;
}
if (pf->encrypted_part_plain.size > MD_USER_DATA_SIZE && pf->root_mht.need_writing) {
// otherwise it's just one write - the meta-data node
if (!ipf_update_all_data_and_mht_nodes(pf)) {
// this is something that shouldn't happen, can't fix this...
pf->file_status = PF_STATUS_CRYPTO_ERROR;
DEBUG_PF("failed to update data nodes\n");
return false;
}
}
if (!ipf_update_metadata_node(pf)) {
// this is something that shouldn't happen, can't fix this...
pf->file_status = PF_STATUS_CRYPTO_ERROR;
DEBUG_PF("failed to update metadata nodes\n");
return false;
}
if (!ipf_write_all_changes_to_disk(pf)) {
pf->file_status = PF_STATUS_WRITE_TO_DISK_FAILED;
DEBUG_PF("failed to write changes to disk\n");
return false;
}
pf->need_writing = false;
return true;
}
ipf_update_all_data_and_mht_nodes
This function is very complex. It iterates all nodes with need_writing
flag set and do corresponding change/encryption/update
- Iterate all data nodes needing writing: generate a key for the node, store the key in its parent mht node and encrypt data, then store the gmac in the mht node
- Iterate all mht nodes needing writing: put these mht nodes into an array and sort them
- Iterate this array from the last node to the first: like what have been done to the data nodes, mht nodes also need to be maintained in a similar way.
static bool ipf_update_all_data_and_mht_nodes(pf_context_t* pf) {
bool ret = false;
file_node_t** mht_array = NULL;
file_node_t* file_mht_node;
pf_status_t status;
void* data = lruc_get_first(pf->cache);
// 1. encrypt the changed data
// 2. set the IV+GMAC in the parent MHT
// [3. set the need_writing flag for all the parents]
while (data != NULL) {
if (((file_node_t*)data)->type == FILE_DATA_NODE_TYPE) {
file_node_t* data_node = (file_node_t*)data;
if (data_node->need_writing) {
gcm_crypto_data_t* gcm_crypto_data =
&data_node->parent->decrypted.mht
.data_nodes_crypto[data_node->node_number % ATTACHED_DATA_NODES_COUNT];
if (!ipf_generate_random_key(pf, &gcm_crypto_data->key))
goto out;
// encrypt the data, this also saves the gmac of the operation in the mht crypto
// node
status = g_cb_aes_gcm_encrypt(&gcm_crypto_data->key, &g_empty_iv, NULL, 0, // aad
data_node->decrypted.data.data, PF_NODE_SIZE,
data_node->encrypted.cipher, &gcm_crypto_data->gmac);
if (PF_FAILURE(status)) {
pf->last_error = status;
goto out;
}
file_mht_node = data_node->parent;
#ifdef DEBUG
// this loop should do nothing, add it here just to be safe
while (file_mht_node->node_number != 0) {
assert(file_mht_node->need_writing == true);
file_mht_node = file_mht_node->parent;
}
#endif
}
}
data = lruc_get_next(pf->cache);
}
size_t dirty_count = 0;
// count dirty mht nodes
data = lruc_get_first(pf->cache);
while (data != NULL) {
if (((file_node_t*)data)->type == FILE_MHT_NODE_TYPE) {
if (((file_node_t*)data)->need_writing)
dirty_count++;
}
data = lruc_get_next(pf->cache);
}
// add all the mht nodes that needs writing to a list
mht_array = malloc(dirty_count * sizeof(*mht_array));
if (!mht_array) {
pf->last_error = PF_STATUS_NO_MEMORY;
goto out;
}
data = lruc_get_first(pf->cache);
uint64_t dirty_idx = 0;
while (data != NULL) {
if (((file_node_t*)data)->type == FILE_MHT_NODE_TYPE) {
file_mht_node = (file_node_t*)data;
if (file_mht_node->need_writing)
mht_array[dirty_idx++] = file_mht_node;
}
data = lruc_get_next(pf->cache);
}
if (dirty_count > 0)
sort_nodes(mht_array, 0, dirty_count - 1);
// update the gmacs in the parents from last node to first (bottom layers first)
for (dirty_idx = dirty_count; dirty_idx > 0; dirty_idx--) {
file_mht_node = mht_array[dirty_idx - 1];
gcm_crypto_data_t* gcm_crypto_data =
&file_mht_node->parent->decrypted.mht
.mht_nodes_crypto[(file_mht_node->node_number - 1) % CHILD_MHT_NODES_COUNT];
if (!ipf_generate_random_key(pf, &gcm_crypto_data->key)) {
goto out;
}
status = g_cb_aes_gcm_encrypt(&gcm_crypto_data->key, &g_empty_iv, NULL, 0,
&file_mht_node->decrypted.mht, PF_NODE_SIZE,
&file_mht_node->encrypted.cipher, &gcm_crypto_data->gmac);
if (PF_FAILURE(status)) {
pf->last_error = status;
goto out;
}
}
// update mht root gmac in the meta data node
if (!ipf_generate_random_key(pf, &pf->encrypted_part_plain.mht_key))
goto out;
status = g_cb_aes_gcm_encrypt(&pf->encrypted_part_plain.mht_key, &g_empty_iv,
NULL, 0,
&pf->root_mht.decrypted.mht, PF_NODE_SIZE,
&pf->root_mht.encrypted.cipher,
&pf->encrypted_part_plain.mht_gmac);
if (PF_FAILURE(status)) {
pf->last_error = status;
goto out;
}
ret = true;
out:
free(mht_array);
return ret;
}
''''
!! `ipf_update_metadata_node`
This function first generate a random key (derived from user's KDK) and encrypt the metadata node using this key.
!! `ipf_write_all_changes_to_disk`
Write all nodes in `pf` back to the disk. It will first write data and child mht nodes, then root mht node, and last, write the metadata node.