This is not the current version of the class.

Data representation 7: Arenas and code representation

This lecture, we discuss arena allocators and the representation of code. Code representation serves as a bridge to our next unit, which is assembly programming.

Performance consequences of memory allocation

// allocate and free nodes `noperations` times
long counter = 0;
for (unsigned i = 0; i != noperations; ++i) {
    int pos = random_node_index(randomness);
    if (n[pos] == nullptr) {
        n[pos] = new node;
        n[pos]->contents = counter;
        ++counter;
    } else {
        delete n[pos];
        n[pos] = nullptr;
    }
}

Memory arenas

Example

struct node_arena {
    std::vector<node*> scratch_;

    node* allocate() {
        node* n;
        if (this->scratch_.empty()) {
            n = new node;
        } else {
            n = this->scratch_.back();
            this->scratch_.pop_back();
        }
        return n;
    }

    void deallocate(node* n) {
        this->scratch_.push_back(n);
    }
};

1.96x faster on Mac OS X desktop (0.90s → 0.46s)

Libraries and abstraction boundaries

struct node_arena {
    node* allocate();
    void deallocate(node* n);
    ~node_arena();
};

Allocator specification

struct node_arena {
    // Return a pointer to a freshly allocated `node`.
    node* allocate();

    // Deallocate `n`, which must refer to a live `node` that was
    // returned by a previous call to `allocate()`.
    void deallocate(node* n);

    // Free any storage used for allocated `node`s. All `node`s allocated
    // by this arena must be passed to `deallocate` before the arena
    // is destroyed.
    ~node_arena();
};

A specification can have many implementations

struct node_arena {
    node* allocate() {
        return new node;
    }
    void deallocate(node* n) {
        delete n;
    }
}
struct node_arena {
    std::vector<node*> scratch_;
    node* allocate() {
        node* n;
        if (scratch_.empty()) {
            n = new node;
        } else {
            n = scratch_.back();
            scratch_.pop_back();
        }
        return n;
    }
    void deallocate(node* n) {
        scratch_.push_back(n);
    }
    ~node_arena() { ... }
}
struct node_arena {
    node* allocate() {
        return new node[10];
    }
    void deallocate(node* n) {
        delete[] n;
    }
}

Per-allocation statistics?

How to store per-allocation information

Let’s add some numbers!

Number adder driver

#include <cstdio>
#include <cstdlib>
#include "hexdump.hh"

extern "C" {
int add(int a, int b);
}

int main(int argc, char* argv[]) {
    if (argc <= 2) {
        fprintf(stderr, "Usage: add A B\n\
    Prints A + B.\n");
        exit(1);
    }

    int a = strtol(argv[1], nullptr, 0);
    int b = strtol(argv[2], nullptr, 0);
    printf("%d + %d = %d\n", a, b, add(a, b));
}

C++ add

extern "C" {

int add(int a, int b) {
    return a + b;
}

}

(extern "C" tells the compiler and linker to relax about matching argument types.)

Examining the representation of code

00401220  8d 04 37 c3 66 2e 0f 1f  84 00                    |..7.f.....|

Machine code

cs61-user@5ae5a48533a8:~/cs61-lectures/datarep7$ objdump -d addf.o 

addf.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <add>:
   0:   8d 04 37                lea    (%rdi,%rsi,1),%eax
   3:   c3                      retq   

The many-to-many mapping between source code and machine code

Turning off optimization

Operation order

Parameter names

Type changes

Type changes (2)

Type changes (3)

Type changes (4)

Code changes

Code changes (2)

unsigned add(unsigned a, unsigned b) {
    while (b > 0) {
        ++a;
        --b;
    }
    return a;
}

Code changes (3)

int add(int a, int b) {
    while (a > 0) {
        a -= 4;
        b += 4;
    }
    while (a < 0) {
        a += 2;
        b -= 2;
    }
    if (a > 0) {
        ++b;
    }
    return b;
}

Data changes

__attribute__((section (".text.addfunction"))) extern const unsigned char add[] =
    { 0x8d, 0x04, 0x37, 0xc3 };

Spelunking for add

int add(int a, int b) {
    // Open a file
    const char* file = "cs61hello.jpg";
    int fd = open(file, O_RDONLY);
    assert(fd >= 0);

    // Look up its size
    struct stat s;
    int r = fstat(fd, &s);
    assert(r >= 0 && S_ISREG(s.st_mode) && s.st_size > 0);

    // Load it into memory starting at address `data`
    void* data = mmap(nullptr, s.st_size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
    assert(data != MAP_FAILED);

    // Obtain address of add function in loaded file
    uintptr_t function_address = (uintptr_t) data + 0x9efc;
    int (*function_pointer)(int, int) = (int (*)(int, int)) function_address;

    // Call add function
    return function_pointer(a, b);
}

It works!

Hello

cs61-user@5ae5a48533a8:~/cs61-lectures/datarep7$ ./add09 1 2
1 + 2 = 3
cs61-user@5ae5a48533a8:~/cs61-lectures/datarep7$ ./add09 -1 -201
-1 + -201 = -202

Why does it work?

cs61-user@5ae5a48533a8:~/cs61-lectures/datarep7$ hexdump -C cs61hello.jpg
00000000  ff d8 ff e0 00 10 4a 46  49 46 00 01 02 01 00 48  |......JFIF.....H|
00000010  00 48 00 00 ff ed 00 2c  50 68 6f 74 6f 73 68 6f  |.H.....,Photosho|
00000020  70 20 33 2e 30 00 38 42  49 4d 03 ed 00 00 00 00  |p 3.0.8BIM......|
00000030  00 10 00 48 00 00 00 01  00 01 00 48 00 00 00 01  |...H.......H....|
00000040  00 01 ff e1 79 f2 68 74  41 55 41 54 45 31 c0 55  |....y.htAUATE1.U|
00000050  53 41 ba 01 00 00 00 bb  30 00 00 00 bd 23 00 00  |SA......0....#..|
00000060  00 45 31 ed 64 48 8b 04  25 28 00 00 00 48 89 44  |.E1.dH..%(...H.D|
    ... blah blah blah ...
cs61-user@5ae5a48533a8:~/cs61-lectures/datarep7$ hexdump -C cs61hello.jpg | grep 9ef
00009ef0  ad 7f c2 6f 87 1d 69 88  a7 a8 ae ac 8d 04 37 c3  |...o..i.......7.|

Dynamic linking and loading