1. Arrays in Assembly
Question: How do we implement high level language arrays in assembly:
char carray[20];
carray[4] = 'x';
cout << carray[4] ;
Answer:
.data
carray: .ascii " "
.text
movl $4,%edi
movb $'x',carray(%edi)
movb carray(%edi),%al
Question: How about integer arrays?
int iarray[20];
iarray[4] = 7;
cout << iarray[4] ;
Answer:
.data
iarray: .zero 80 # 80 bytes of 0s == 20 integers of 0s
.text
movl $4,%edi
movl $7,iarray(,%edi,4)
movl iarray(,%edi,4),%eax
Matrices (2-dimensional arrays) applications are common:
https://www.programmingsimplified.com/c-program-add-matrices
How to implement array functionalities like this in assembly?
int a1[3][3];
a1[0][0] = 10;
a1[0][1] = 20;
a1[1][1] = 30;
int w = a[1][1];
Solution: mapping two dimensional arrays into an one dimensional space:
and provide array creation functions/constructors, set and get functions:
int *a2 = MkTwoDimIntArray(3,3)
TwoDimIntArraySet(a2,0,0,10);
TwoDimIntArraySet(a2,1,0,20);
TwoDimIntArraySet(a2,1,1,30);
int w = TwoDimIntArrayGet(a2,1,1);
In Assembly, the call code would look like:
pushl $3
pushl $3
call MkTwoDimIntArray
movl %eax,a2
add $8,%esp
pushl $10
pushl $0
pushl $0
pushl a2
call TwoDimIntArraySet
add $16,%esp
pushl $20
pushl $0
pushl $1
pushl a2
call TwoDimIntArraySet
add $16,%esp
pushl $30
pushl $1
pushl $1
pushl a2
call TwoDimIntArraySet
add $16,%esp
pushl $1
pushl $1
pushl a2
call TwoDimIntArrayGet
movl %eax,w
add $12,%esp
1.1 Implementing one dimensional char arrays
Call code in C:
int main() {
char a3[16];
a3[4] = 'A';
char x = a3[4];
char *a4 = MkOneDimCharArray(16);
OneDimCharArraySet(a4,4,'A');
char y = OneDimCharArrayGet(a4,4);
}
Function implementation code in C:
int OneDimCharSpaceSize;
// 0 1 2 3 4
char OneDimSpace[4096]; // -----------------------------------
// ^ ^
// ary ary+index
char *MkOneDimCharArray(int n)
{
OneDimCharSpaceSize = n;
return &OneDimCharSpace[0];
}
void OneDimCharArraySet(char *ary, int index, char value)
{
*(ary+index) = value;
}
char OneDimCharArrayGet(char *ary, int index)
{
return *(ary+index);
}
Function implementation code in Assembly:
.global MkOneDimCharArray
.global OneDimCharArraySet
.global OneDimCharArrayGet
.data
OneDimCharSpace: .zero 4096
OneDimCharSpaceSize: .long 0
.text
MkOneDimCharArray:
pushl %ebp
movl %esp, %ebp
lea OneDimCharSpace,%eax # eax = &oneDimSpace[0]
movl 8(%ebp),%ebx
movl %ebx,OneDimCharSpaceSize
leave
ret
OneDimCharArraySet:
pushl %ebp
movl %esp, %ebp
movl 8(%ebp),%edi # edi = &array[0]
addl 12(%ebp),%edi # edi = &array[index]
movl 16(%ebp),%eax # *(edi) = value
movb %al,(%edi)
leave
ret
OneDimCharArrayGet:
pushl %ebp
movl %esp, %ebp
movl 8(%ebp),%edi # edi = &array[0]
addl 12(%ebp),%edi # edi = &array[index]
movl $0,%eax # clear eax
movb (%edi),%al # al = *(edi)
leave
ret
Question: How do we modify the implementation to handle one dimensional int arrays?
1.2 Implementing two dimensional char arrays
int main() {
char a5[3][4];
a5[0][0] = 'x';
a5[1][0] = 'y';
a5[2][2] = 'z';
char a = a5[2][2];
char *a6 = MkTwoDimCharArray(3,4);
TwoDimCharArraySet(a6,0,0,'x'); // maps to memory[0]
TwoDimCharArraySet(a6,1,0,'y'); // maps to memory[4]
TwoDimCharArraySet(a6,2,2,'z'); // maps to memory[10]
char b = TwoDimCharArrayGet(a6,2,2);
}
To implement a two-dimensional array in assembly, which has one dimensional memory,
we need to map the two-dimensional array row by row to the one dimensional memory.
a6[0][0],a6[0][1],a6[0][2],a6[0][3],a6[1][0],a6[1][1],a6[1][2],a6[1][3],a6[2][0],a6[2][1],a6[2][2],a6[2][3]
| | | | | | | | | | | |
v v v v v v v v v v v v
[0]------[1]------[2]------[3]------[4]------[5]------[6]------[7]------[8]------[9]------[10]-----[11]
TwoDimSpace
map(row,col) -> index to TwoDimSpace, index = row*(NUMBER OF COLUMNS) + col
In C, function code:
char TwoDimSpace[4096];
int TwoDimSpaceRow = 0;
int TwoDimSpaceCol = 0;
char *MkTwoDimCharArray(int row, int col)
{
TwoDimSpaceRow = row;
TwoDimSpaceCol = col;
return &TwoDimSpace[0];
}
void TwoDimCharArraySet(char *ary, int row, int col, char value)
{
*(ary+row*TwoDimSpaceCol+col) = value;
}
char TwoDimCharArrayGet(char *ary, int row, int col)
{
return *(ary+row*TwoDimSpaceCol+col);
}
In Assembly, function code:
.global MkTwoDimCharArray
.global TwoDimCharArraySet
.global TwoDimCharArrayGet
.data
TwoDimCharSpace: .zero 4096
TwoDimCharSpaceRow: .long 0
TwoDimCharSpaceCol: .long 0
.text
MkTwoDimCharArray: # char *MkTwoDimCharArray(int row, int col)
pushl %ebp
movl %esp, %ebp
lea TwoDimCharSpace,%eax # eax = &TwoDimSpace[0]
movl 8(%ebp),%ebx
movl %ebx,TwoDimCharSpaceRow
movl 12(%ebp),%ebx
movl %ebx,TwoDimCharSpaceCol
leave
ret
TwoDimCharArraySet: # void TwoDimCharArraySet(char *ary, int row, int col, char value)
pushl %ebp
movl %esp, %ebp
movl 8(%ebp),%edi # edi = &array[0]
movl 12(%ebp),%eax # eax = row
imull TwoDimCharSpaceCol,%eax # eax = row*number of columns
addl 16(%ebp),%eax # eax = row*number of columns + col
addl %eax,%edi # edi = &array[row][0]
movl 20(%ebp),%eax # eax = value
movb %al,(%edi) # array[row][col] = al
leave
ret
TwoDimCharArrayGet: # char TwoDimCharArrayGet(char *ary, int row, int col)
pushl %ebp
movl %esp, %ebp
movl 8(%ebp),%edi # edi = &array[0]
movl 12(%ebp),%eax # eax = row
imull TwoDimCharSpaceCol,%eax # eax = row*number of columns
addl 16(%ebp),%eax # eax = row*number of columns + col
addl %eax,%edi # edi = &array[row][0]
movl $0,%eax
movb (%edi),%al # eax = value
leave
ret
Question: How to use indirect addressing+index notation here? (%edi,%eax)
Reference: https://paul.bone.id.au/blog/2018/09/05/x86-addressing/
Question: How do we derive an equation to handle three dimensional arrays, like char threedim[3][4][5] ?
1.3 Implementing two dimensional int arrays
int main() {
int a7[3][4];
a7[0][0] = 1;
a7[1][0] = 2;
a7[2][0] = 3;
int m = a7[1][1];
char *a8 = MkTwoDimIntArray(3,4);
TwoDimIntArraySet(a8,0,0,1);
TwoDimIntArraySet(a8,1,0,2);
TwoDimIntArraySet(a8,2,0,3);
int n = TwoDimIntArrayGet(a8,1,1);
}
Function implementation in Assembly:
.global MkTwoDimIntArray
.global TwoDimIntArraySet
.global TwoDimIntArrayGet
.data
TwoDimIntSpace: .zero 4096
TwoDimIntSpaceRow: .long 0
TwoDimIntSpaceCol: .long 0
.text
MkTwoDimIntArray: # int *MkTwoDimIntArray(int row, int col)
pushl %ebp
movl %esp, %ebp
lea TwoDimIntSpace,%eax # eax = &TwoDimSpace[0]
movl 8(%ebp),%ebx
movl %ebx,TwoDimIntSpaceRow
movl 12(%ebp),%ebx
movl %ebx,TwoDimIntSpaceCol
leave
ret
TwoDimIntArraySet: # void TwoDimIntArraySet(int *ary, int row, int col, int value)
pushl %ebp
movl %esp, %ebp
movl 8(%ebp),%edi # edi = &array[0]
movl 12(%ebp),%eax # eax = row
imull TwoDimIntSpaceCol,%eax # eax = row*Number of Columns
addl 16(%ebp),%eax # eax = row*Number of Columns + col index
shll $2,%eax # multiply by 4 to account for integer length of 4
addl %eax,%edi # edi = &array[0][0] + eax = &array[row][col]
movl 20(%ebp),%eax # eax = value
movl %eax,(%edi) # array[row][col] = value
leave
ret
TwoDimIntArrayGet: # int TwoDimIntArrayGet(int *ary, int row, int col)
pushl %ebp
movl %esp, %ebp
movl 8(%ebp),%edi # edi = &array[0]
movl 12(%ebp),%eax # eax = row
imull TwoDimIntSpaceCol,%eax # eax = row*Number of Columns
addl 16(%ebp),%eax # eax = row*Number of Columns + col index
shll $2,%eax # multiply by 4 to account for integer length of 4
addl %eax,%edi # edi = &array[0][0] + eax = &array[row][col]
movl (%edi),%eax # eax = array[row][col]
leave
ret
Question: How to use indirect addressing+index notation here? (%edi,%eax,4)
Question: In C, we can have short integer of 2 bytes, and have short arrays like: short shortary[3][3];
To support two dimensional short arrays, how can we implement the following functions:
MkTwoDimShortArray
TwoDimShortArraySet
TwoDimShortArrayGet
Question: How do we implement 3 dimensional arrays in assembly?
char a[3][4][5];
a[2][2][2]='x';
cout << a[2][2][2];
size = number of rows * number of columns * number of depth units
3*4*5 = 60
map(row,col,depth) -> index to ThreeDimSpace,
index = row*(NUMBER OF COLUMNS*NUMBER OF DEPTH UNITS) + col*(NUMBER of DEPTH UNITS) + depth
a[0][0][0],a[0][0][1],..,a[0][0][4],a[0][1][0],a[0][1][1],..,a[0][3][4],a[1][0][0],..,a[2][3][4]
a[0][1][1]=1 -> 0*4*5 + 1*5 + 1 = 6
a[1][1][1]=2 -> 1*4*5 + 1*5 + 1 = 26
a[2][2][2]=3 -> 2*4*5 + 2*5 + 2 = 52
a[2][2][4]=4 -> 2*4*5 + 2*5 + 2 = 54
a[2][3][1]=5 -> 2*4*5 + 3*5 + 1 = 56
1 2 3 4 5
| | | | |
v v v v v
------------------------------------------------------------
012345678911111111112222222222333333333344444444445555555555
01234567890123456789012345678901234567890123456789
2. Structures in assembly
https://www.youtube.com/watch?v=MIs1qHjI_i4
https://www.youtube.com/watch?v=TtAsN0ptKcw
How do we implement the following structure in assembly:
struct employee {
int employee_id;
char employee_phone[10];
char employee_name[20];
};
struct employee employee1;
struct employee *eptr;
eptr = malloc(sizeof(struct employee)); // similar to Employee eptr = new Employee()
# A x86 asm program using struct
.global main
.data
employee_id = 0
employee_phone = 0+4
employee_name = 0+4+10
employee_size = 0+4+10+20
employee:
.space employee_size
employee1: .long 0
new_phone: .ascii "4089241111"
.text
main:
# eax = malloc(employee_size)
pushl $employee_size
call malloc
addl $4,%esp
# eax.employee_id = 123456789
movl $123456789,employee_id(%eax)
# eax.employee_phone = "4089241234"
movl $0x39383034,employee_phone(%eax)
movl $0x32313432,employee_phone+4(%eax)
movw $0x3433,employee_phone+8(%eax)
# eax.employee_name = "john"
movl $0x6a6f686e,employee_name(%eax)
call MkEmployee
movl %eax,employee1
pushl $123456788
pushl employee1
call SetEmployeeId
add $8,%esp
pushl employee1
call GetEmployeeId
add $4,%esp
pushl $new_phone
pushl employee1
call SetEmployeePhone
add $8,%esp
pushl employee1
call GetEmployeePhone
add $4,%esp
# exit
mov %eax,%ebx
mov $1,%eax
int $0x80
MkEmployee:
pushl %ebp
movl %esp, %ebp
pushl $employee_size
call malloc
leave
ret
SetEmployeeId: # SetEmployeeId(Employee *e, int id)
pushl %ebp
movl %esp, %ebp
movl 8(%ebp),%edi
movl 12(%ebp),%eax
movl %eax,employee_id(%edi)
leave
ret
GetEmployeeId: # SetEmployeeId(Employee *e)
pushl %ebp
movl %esp, %ebp
movl 8(%ebp),%edi
movl employee_id(%edi),%eax
leave
ret
SetEmployeePhone: # SetEmployeePhone(Employee *e, char *phone)
pushl %ebp
movl %esp, %ebp
movl 8(%ebp),%edi
addl $employee_phone,%edi
movl 12(%ebp),%esi
movl $10,%ecx
SetEmployeePhoneLoop:
movb (%esi),%bl
movb %bl,(%edi)
inc %esi
inc %edi
dec %ecx
jnz SetEmployeePhoneLoop
leave
ret
GetEmployeePhone: # SetEmployeeId(Employee *e)
pushl %ebp
movl %esp, %ebp
movl 8(%ebp),%edi
movl employee_phone(%edi),%eax
leave
ret
Question: How do we implement SetEmployeeName and GetEmployeeName?