Graphene-SGX PF Internal

 April 16, 2021 at 10:34 am

These functions performs essential operations on protected files. The implementations seem like transporting the official Intel SGX SDK cpp code to c at graphene side.

  • LRU cache is used for data/mht nodes iteration

ipf_open

  1. Create and initialize(ipf_init_fields) a pf(pf_context_t)
  2. Check validity of key, path, handle(fd), size
  3. Check if it needs to create a new file or the file exits
  4. Return pf
static pf_context_t* ipf_open(const char* path, pf_file_mode_t mode, bool create, pf_handle_t file,
                              uint64_t real_size, const pf_key_t* kdk_key, pf_status_t* status) {
    *status = PF_STATUS_NO_MEMORY;
    pf_context_t* pf = calloc(1, sizeof(*pf));

    if (!pf)
        goto out;

    if (!ipf_init_fields(pf))
        goto out;

    DEBUG_PF("handle: %d, path: '%s', real size: %lu, mode: 0x%x\n", *(int*)file, path, real_size,
             mode);

    if (kdk_key == NULL) {
        DEBUG_PF("no key specified\n");
        pf->last_error = PF_STATUS_INVALID_PARAMETER;
        goto out;
    }

    if (path && strlen(path) > PATH_MAX_SIZE - 1) {
        pf->last_error = PF_STATUS_PATH_TOO_LONG;
        goto out;
    }

    // for new file, this value will later be saved in the meta data plain part (init_new_file)
    // for existing file, we will later compare this value with the value from the file
    // (init_existing_file)
    COPY_ARRAY(pf->user_kdk_key, *kdk_key);

    // omeg: we require a canonical full path to file, so no stripping path to filename only
    // omeg: Intel's implementation opens the file, we get the fd and size from the Graphene handler

    if (!file) {
        DEBUG_PF("invalid handle\n");
        pf->last_error = PF_STATUS_INVALID_PARAMETER;
        goto out;
    }

    if (real_size % PF_NODE_SIZE != 0) {
        pf->last_error = PF_STATUS_INVALID_HEADER;
        goto out;
    }

    pf->file = file;
    pf->real_file_size = real_size;
    pf->mode = mode;

    if (!create) {
        // existing file
        if (!ipf_init_existing_file(pf, path))
            goto out;

    } else {
        // new file
        if (!ipf_init_new_file(pf, path))
            goto out;
    }

    pf->last_error = pf->file_status = PF_STATUS_SUCCESS;
    DEBUG_PF("OK (data size %lu)\n", pf->encrypted_part_plain.size);

out:
    if (pf && PF_FAILURE(pf->last_error)) {
        DEBUG_PF("failed: %d\n", pf->last_error);
        free(pf);
        pf = NULL;
    }

    if (pf)
        *status = pf->last_error;

    return pf;
}

ipf_read

  1. Check if error occurs beforehand
  2. Get data out from metadata block (1st blk) if needed
  3. Continuously retrieve a node to read and read this node (ipf_get_data_node
static size_t ipf_read(pf_context_t* pf, void* ptr, size_t size) {
    if (ptr == NULL) {
        pf->last_error = PF_STATUS_INVALID_PARAMETER;
        return 0;
    }

    if (PF_FAILURE(pf->file_status)) {
        pf->last_error = pf->file_status;
        return 0;
    }

    if (!(pf->mode & PF_FILE_MODE_READ)) {
        pf->last_error = PF_STATUS_INVALID_MODE;
        return 0;
    }

    size_t data_left_to_read = size;

    if (((uint64_t)data_left_to_read) > (uint64_t)(pf->encrypted_part_plain.size - pf->offset)) {
        // the request is bigger than what's left in the file
        data_left_to_read = (size_t)(pf->encrypted_part_plain.size - pf->offset);
    }

    // used at the end to return how much we actually read
    size_t data_attempted_to_read = data_left_to_read;

    unsigned char* out_buffer = (unsigned char*)ptr;

    // the first block of user data is read from the meta-data encrypted part
    if (pf->offset < MD_USER_DATA_SIZE) {
        // offset is smaller than MD_USER_DATA_SIZE
        size_t data_left_in_md = MD_USER_DATA_SIZE - (size_t)pf->offset;
        size_t size_to_read = MIN(data_left_to_read, data_left_in_md);

        memcpy(out_buffer, &pf->encrypted_part_plain.data[pf->offset], size_to_read);
        pf->offset += size_to_read;
        out_buffer += size_to_read;
        data_left_to_read -= size_to_read;
    }

    while (data_left_to_read > 0) {
        file_node_t* file_data_node = NULL;
        // return the data node of the current offset, will read it from disk if needed
        // (and also the mht node if needed)
        file_data_node = ipf_get_data_node(pf);
        if (file_data_node == NULL)
            break;

        size_t offset_in_node = (pf->offset - MD_USER_DATA_SIZE) % PF_NODE_SIZE;
        size_t data_left_in_node = PF_NODE_SIZE - offset_in_node;
        size_t size_to_read = MIN(data_left_to_read, data_left_in_node);

        memcpy(out_buffer, &file_data_node->decrypted.data.data[offset_in_node], size_to_read);
        pf->offset += size_to_read;
        out_buffer += size_to_read;
        data_left_to_read -= size_to_read;
    }

    if (data_left_to_read == 0 && data_attempted_to_read != size) {
        // user wanted to read more and we had to shrink the request
        assert(pf->offset == pf->encrypted_part_plain.size);
        pf->end_of_file = true;
    }

    return data_attempted_to_read - data_left_to_read;
}
static bool ipf_read_node(pf_context_t* pf, pf_handle_t handle, uint64_t node_number, void* buffer,
                          uint32_t node_size) {
    uint64_t offset = node_number * node_size;

    pf_status_t status = g_cb_read(handle, buffer, offset, node_size);
    if (PF_FAILURE(status)) {
        pf->last_error = status;
        return false;
    }

    return true;
}

ipf_write

  1. Check if error occurred
  2. Write to metadata block if there is empty space left
  3. Get data node (ipf_get_data_node) and write data into the node iteratively. Besides, set need_writing to all mht nodes in this pf.
// write zeros if `ptr` is NULL
static size_t ipf_write(pf_context_t* pf, const void* ptr, size_t size) {
    if (size == 0) {
        pf->last_error = PF_STATUS_INVALID_PARAMETER;
        return 0;
    }

    size_t data_left_to_write = size;

    if (PF_FAILURE(pf->file_status)) {
        pf->last_error = pf->file_status;
        DEBUG_PF("bad file status %d\n", pf->last_error);
        return 0;
    }

    if (!(pf->mode & PF_FILE_MODE_WRITE)) {
        pf->last_error = PF_STATUS_INVALID_MODE;
        DEBUG_PF("File is read-only\n");
        return 0;
    }

    const unsigned char* data_to_write = (const unsigned char*)ptr;

    // the first block of user data is written in the meta-data encrypted part
    if (pf->offset < MD_USER_DATA_SIZE) {
        // offset is smaller than MD_USER_DATA_SIZE
        size_t empty_place_left_in_md = MD_USER_DATA_SIZE - (size_t)pf->offset;
        size_t size_to_write = MIN(data_left_to_write, empty_place_left_in_md);

        memcpy_or_zero_initialize(&pf->encrypted_part_plain.data[pf->offset], data_to_write,
                                  size_to_write);
        pf->offset += size_to_write;
        if (data_to_write)
            data_to_write += size_to_write;
        data_left_to_write -= size_to_write;

        if (pf->offset > pf->encrypted_part_plain.size)
            pf->encrypted_part_plain.size = pf->offset; // file grew, update the new file size

        pf->need_writing = true;
    }

    while (data_left_to_write > 0) {
        file_node_t* file_data_node = NULL;
        // return the data node of the current offset, will read it from disk or create new one
        // if needed (and also the mht node if needed)
        file_data_node = ipf_get_data_node(pf);
        if (file_data_node == NULL) {
            DEBUG_PF("failed to get data node\n");
            break;
        }

        size_t offset_in_node = (size_t)((pf->offset - MD_USER_DATA_SIZE) % PF_NODE_SIZE);
        size_t empty_place_left_in_node = PF_NODE_SIZE - offset_in_node;
        size_t size_to_write = MIN(data_left_to_write, empty_place_left_in_node);

        memcpy_or_zero_initialize(&file_data_node->decrypted.data.data[offset_in_node],
                                  data_to_write, size_to_write);
        pf->offset += size_to_write;
        if (data_to_write)
            data_to_write += size_to_write;
        data_left_to_write -= size_to_write;

        if (pf->offset > pf->encrypted_part_plain.size) {
            pf->encrypted_part_plain.size = pf->offset; // file grew, update the new file size
        }

        if (!file_data_node->need_writing) {
            file_data_node->need_writing = true;
            file_node_t* file_mht_node = file_data_node->parent;
            while (file_mht_node->node_number != 0) {
                // set all the mht parent nodes as 'need writing'
                file_mht_node->need_writing = true;
                file_mht_node = file_mht_node->parent;
            }
            pf->root_mht.need_writing = true;
            pf->need_writing = true;
        }
    }

    return size - data_left_to_write;
}

ipf_get_data_node

  1. check the offset is at the edge of nodes and not in metanode
  2. If offset==size, create a new node by ipf_append_data_node; otherwise read the next data node ipf_read_data_node.
  3. "bump all the parents mht to reside before the data node in the cache"
  4. Do other LRU cache related operations TODO
static file_node_t* ipf_get_data_node(pf_context_t* pf) {
    file_node_t* file_data_node = NULL;

    if (pf->offset < MD_USER_DATA_SIZE) {
        pf->last_error = PF_STATUS_UNKNOWN_ERROR;
        return NULL;
    }

    if ((pf->offset - MD_USER_DATA_SIZE) % PF_NODE_SIZE == 0
        && pf->offset == pf->encrypted_part_plain.size) {
        // new node
        file_data_node = ipf_append_data_node(pf);
    } else {
        // existing node
        file_data_node = ipf_read_data_node(pf);
    }

    // bump all the parents mht to reside before the data node in the cache
    if (file_data_node != NULL) {
        file_node_t* file_mht_node = file_data_node->parent;
        while (file_mht_node->node_number != 0) {
            // bump the mht node to the head of the lru
            lruc_get(pf->cache, file_mht_node->physical_node_number);
            file_mht_node = file_mht_node->parent;
        }
    }

    // even if we didn't get the required data_node, we might have read other nodes in the process
    while (lruc_size(pf->cache) > MAX_PAGES_IN_CACHE) {
        void* data = lruc_get_last(pf->cache);
        assert(data != NULL);
        // for production -
        if (data == NULL) {
            pf->last_error = PF_STATUS_UNKNOWN_ERROR;
            return NULL;
        }

        if (!((file_node_t*)data)->need_writing) {
            lruc_remove_last(pf->cache);

            // before deleting the memory, need to scrub the plain secrets
            file_node_t* file_node = (file_node_t*)data;
            erase_memory(&file_node->decrypted, sizeof(file_node->decrypted));
            free(file_node);
        } else {
            if (!ipf_internal_flush(pf)) {
                // error, can't flush cache, file status changed to error
                assert(pf->file_status != PF_STATUS_SUCCESS);
                if (pf->file_status == PF_STATUS_SUCCESS)
                    pf->file_status = PF_STATUS_FLUSH_ERROR; // for release set this anyway
                return NULL;                                 // even if we got the data_node!
            }
        }
    }

    return file_data_node;
}

ipf_read_data_node

This function first reads the ciphertext of a node by specifying a node number and a pf, then decrypt the ciphertext using the callback function to get the plaintext. Besides, add what have been read to LRU cache.

ipf_close

  • called by pf_close
  • invokes ipf_internal_flush if the status is not PF_STATUS_SUCCESS
  • also do some cleaning jobs on the LRU cache

ipf_internal_flush

  1. Check if writing is needed (for mht and other nodes)
  2. Update data/mht/meta nodes
  3. Write to disk
static bool ipf_internal_flush(pf_context_t* pf) {
    if (!pf->need_writing) {
        // no changes at all
        DEBUG_PF("no need to write\n");
        return true;
    }

    if (pf->encrypted_part_plain.size > MD_USER_DATA_SIZE && pf->root_mht.need_writing) {
        // otherwise it's just one write - the meta-data node
        if (!ipf_update_all_data_and_mht_nodes(pf)) {
            // this is something that shouldn't happen, can't fix this...
            pf->file_status = PF_STATUS_CRYPTO_ERROR;
            DEBUG_PF("failed to update data nodes\n");
            return false;
        }
    }

    if (!ipf_update_metadata_node(pf)) {
        // this is something that shouldn't happen, can't fix this...
        pf->file_status = PF_STATUS_CRYPTO_ERROR;
        DEBUG_PF("failed to update metadata nodes\n");
        return false;
    }

    if (!ipf_write_all_changes_to_disk(pf)) {
        pf->file_status = PF_STATUS_WRITE_TO_DISK_FAILED;

        DEBUG_PF("failed to write changes to disk\n");
        return false;
    }

    pf->need_writing = false;

    return true;
}

ipf_update_all_data_and_mht_nodes

This function is very complex. It iterates all nodes with need_writing flag set and do corresponding change/encryption/update

  1. Iterate all data nodes needing writing: generate a key for the node, store the key in its parent mht node and encrypt data, then store the gmac in the mht node
  2. Iterate all mht nodes needing writing: put these mht nodes into an array and sort them
  3. Iterate this array from the last node to the first: like what have been done to the data nodes, mht nodes also need to be maintained in a similar way.
static bool ipf_update_all_data_and_mht_nodes(pf_context_t* pf) {
    bool ret = false;
    file_node_t** mht_array = NULL;
    file_node_t* file_mht_node;
    pf_status_t status;
    void* data = lruc_get_first(pf->cache);

    // 1. encrypt the changed data
    // 2. set the IV+GMAC in the parent MHT
    // [3. set the need_writing flag for all the parents]
    while (data != NULL) {
        if (((file_node_t*)data)->type == FILE_DATA_NODE_TYPE) {
            file_node_t* data_node = (file_node_t*)data;

            if (data_node->need_writing) {
                gcm_crypto_data_t* gcm_crypto_data =
                    &data_node->parent->decrypted.mht
                         .data_nodes_crypto[data_node->node_number % ATTACHED_DATA_NODES_COUNT];

                if (!ipf_generate_random_key(pf, &gcm_crypto_data->key))
                    goto out;

                // encrypt the data, this also saves the gmac of the operation in the mht crypto
                // node
                status = g_cb_aes_gcm_encrypt(&gcm_crypto_data->key, &g_empty_iv, NULL, 0,  // aad
                                              data_node->decrypted.data.data, PF_NODE_SIZE,
                                              data_node->encrypted.cipher, &gcm_crypto_data->gmac);
                if (PF_FAILURE(status)) {
                    pf->last_error = status;
                    goto out;
                }

                file_mht_node = data_node->parent;
#ifdef DEBUG
                // this loop should do nothing, add it here just to be safe
                while (file_mht_node->node_number != 0) {
                    assert(file_mht_node->need_writing == true);
                    file_mht_node = file_mht_node->parent;
                }
#endif
            }
        }
        data = lruc_get_next(pf->cache);
    }

    size_t dirty_count = 0;

    // count dirty mht nodes
    data = lruc_get_first(pf->cache);
    while (data != NULL) {
        if (((file_node_t*)data)->type == FILE_MHT_NODE_TYPE) {
            if (((file_node_t*)data)->need_writing)
                dirty_count++;
        }
        data = lruc_get_next(pf->cache);
    }

    // add all the mht nodes that needs writing to a list
    mht_array = malloc(dirty_count * sizeof(*mht_array));
    if (!mht_array) {
        pf->last_error = PF_STATUS_NO_MEMORY;
        goto out;
    }

    data = lruc_get_first(pf->cache);
    uint64_t dirty_idx = 0;
    while (data != NULL) {
        if (((file_node_t*)data)->type == FILE_MHT_NODE_TYPE) {
            file_mht_node = (file_node_t*)data;

            if (file_mht_node->need_writing)
                mht_array[dirty_idx++] = file_mht_node;
        }

        data = lruc_get_next(pf->cache);
    }

    if (dirty_count > 0)
        sort_nodes(mht_array, 0, dirty_count - 1);

    // update the gmacs in the parents from last node to first (bottom layers first)
    for (dirty_idx = dirty_count; dirty_idx > 0; dirty_idx--) {
        file_mht_node = mht_array[dirty_idx - 1];

        gcm_crypto_data_t* gcm_crypto_data =
            &file_mht_node->parent->decrypted.mht
                 .mht_nodes_crypto[(file_mht_node->node_number - 1) % CHILD_MHT_NODES_COUNT];

        if (!ipf_generate_random_key(pf, &gcm_crypto_data->key)) {
            goto out;
        }

        status = g_cb_aes_gcm_encrypt(&gcm_crypto_data->key, &g_empty_iv, NULL, 0,
                                      &file_mht_node->decrypted.mht, PF_NODE_SIZE,
                                      &file_mht_node->encrypted.cipher, &gcm_crypto_data->gmac);
        if (PF_FAILURE(status)) {
            pf->last_error = status;
            goto out;
        }
    }

    // update mht root gmac in the meta data node
    if (!ipf_generate_random_key(pf, &pf->encrypted_part_plain.mht_key))
        goto out;

    status = g_cb_aes_gcm_encrypt(&pf->encrypted_part_plain.mht_key, &g_empty_iv,
                                  NULL, 0,
                                  &pf->root_mht.decrypted.mht, PF_NODE_SIZE,
                                  &pf->root_mht.encrypted.cipher,
                                  &pf->encrypted_part_plain.mht_gmac);
    if (PF_FAILURE(status)) {
        pf->last_error = status;
        goto out;
    }

    ret = true;

out:
    free(mht_array);
    return ret;
}
''''

!! `ipf_update_metadata_node`

This function first generate a random key (derived from user's KDK) and encrypt the metadata node using this key. 

!! `ipf_write_all_changes_to_disk`

Write all nodes in `pf` back to the disk. It will first write data and child mht nodes, then root mht node, and last, write the metadata node.