1. Arrays in Assembly


Question: How do we implement high level language arrays in assembly:

char carray[20];
carray[4] = 'x';
cout << carray[4] ;


Answer:

.data
carray: .ascii "                    "
.text
    movl $4,%edi
    movb $'x',carray(%edi)
    movb carray(%edi),%al


Question: How about integer arrays?


int iarray[20];
iarray[4] = 7;
cout << iarray[4] ;


Answer:

.data
iarray: .zero 80 # 80 bytes of 0s == 20 integers of 0s
.text
    movl $4,%edi
    movl $7,iarray(,%edi,4)
    movl iarray(,%edi,4),%eax


Matrices (2-dimensional arrays) applications are common:


https://www.programmingsimplified.com/c-program-add-matrices

How to implement array functionalities like this in assembly?

int a1[3][3];

a1[0][0] = 10;
a1[0][1] = 20;
a1[1][1] = 30;

int w = a[1][1];


Solution: mapping two dimensional arrays into an one dimensional space:


and provide array creation functions/constructors, set and get functions:


int *a2 = MkTwoDimIntArray(3,3)

TwoDimIntArraySet(a2,0,0,10);
TwoDimIntArraySet(a2,1,0,20);
TwoDimIntArraySet(a2,1,1,30);

int w = TwoDimIntArrayGet(a2,1,1);


In Assembly, the call code would look like:

  pushl $3
  pushl $3
  call MkTwoDimIntArray
  movl %eax,a2
  add   $8,%esp

  pushl $10
  pushl $0
  pushl $0
  pushl a2
  call TwoDimIntArraySet
  add   $16,%esp

  pushl $20
  pushl $0
  pushl $1
  pushl a2
  call TwoDimIntArraySet
  add   $16,%esp

  pushl $30
  pushl $1
  pushl $1
  pushl a2
  call TwoDimIntArraySet
  add   $16,%esp

  pushl $1
  pushl $1
  pushl a2
  call TwoDimIntArrayGet
  movl %eax,w
  add   $12,%esp



1.1 Implementing one dimensional char arrays


Call code in C:


int main() {
  char a3[16];
  a3[4] = 'A';
  char x = a3[4];

  char *a4 = MkOneDimCharArray(16);
  OneDimCharArraySet(a4,4,'A');
  char y = OneDimCharArrayGet(a4,4);
}


Function implementation code in C:

int OneDimCharSpaceSize;
                               //   0 1 2 3 4
char OneDimSpace[4096];        //  -----------------------------------
                               //   ^       ^
                               //  ary   ary+index

char *MkOneDimCharArray(int n)
{
  OneDimCharSpaceSize = n;
  return &OneDimCharSpace[0];
}
void OneDimCharArraySet(char *ary, int index, char value)
{
  *(ary+index) = value;
}
char OneDimCharArrayGet(char *ary, int index)
{
  return *(ary+index);
}



Function implementation code in Assembly:



.global	MkOneDimCharArray
.global OneDimCharArraySet
.global OneDimCharArrayGet
.data 
OneDimCharSpace: .zero 4096
OneDimCharSpaceSize: .long 0
.text
MkOneDimCharArray:
  pushl	%ebp
  movl	%esp, %ebp
  lea OneDimCharSpace,%eax	# eax = &oneDimSpace[0]
  movl 8(%ebp),%ebx
  movl %ebx,OneDimCharSpaceSize
  leave
  ret
OneDimCharArraySet:
  pushl	%ebp
  movl	%esp, %ebp
  movl 8(%ebp),%edi	# edi = &array[0]
  addl 12(%ebp),%edi	# edi = &array[index]
  movl 16(%ebp),%eax	# *(edi) = value
  movb %al,(%edi)
  leave
  ret
OneDimCharArrayGet:
  pushl	%ebp
  movl	%esp, %ebp
  movl 8(%ebp),%edi	# edi = &array[0]
  addl 12(%ebp),%edi	# edi = &array[index]
  movl $0,%eax		# clear eax
  movb (%edi),%al	# al = *(edi)
  leave
  ret



Question: How do we modify the implementation to handle one dimensional int arrays?



1.2 Implementing two dimensional char arrays


int main() {
  char a5[3][4];
  a5[0][0] = 'x';
  a5[1][0] = 'y';
  a5[2][2] = 'z';
  char a = a5[2][2];

  char *a6 = MkTwoDimCharArray(3,4);
  TwoDimCharArraySet(a6,0,0,'x'); 	// maps to memory[0]
  TwoDimCharArraySet(a6,1,0,'y');	// maps to memory[4]
  TwoDimCharArraySet(a6,2,2,'z');	// maps to memory[10]
  char b = TwoDimCharArrayGet(a6,2,2);
}


To implement a two-dimensional array in assembly, which has one dimensional memory,
we need to map the two-dimensional array row by row to the one dimensional memory.

a6[0][0],a6[0][1],a6[0][2],a6[0][3],a6[1][0],a6[1][1],a6[1][2],a6[1][3],a6[2][0],a6[2][1],a6[2][2],a6[2][3]
      |        |        |        |        |        |        |        |        |        |        |        |
      v        v        v        v        v        v        v        v        v        v        v        v
     [0]------[1]------[2]------[3]------[4]------[5]------[6]------[7]------[8]------[9]------[10]-----[11]
TwoDimSpace

map(row,col) -> index to TwoDimSpace, index = row*(NUMBER OF COLUMNS) + col

In C, function code:


char TwoDimSpace[4096];
int TwoDimSpaceRow = 0;
int TwoDimSpaceCol = 0;
char *MkTwoDimCharArray(int row, int col)
{
  TwoDimSpaceRow = row;
  TwoDimSpaceCol = col;
  return &TwoDimSpace[0];
}
void TwoDimCharArraySet(char *ary, int row, int col, char value)
{
  *(ary+row*TwoDimSpaceCol+col) = value;
}
char TwoDimCharArrayGet(char *ary, int row, int col)
{
  return *(ary+row*TwoDimSpaceCol+col);
}


In Assembly, function code:


.global	MkTwoDimCharArray
.global TwoDimCharArraySet
.global TwoDimCharArrayGet
.data 
TwoDimCharSpace: .zero 4096
TwoDimCharSpaceRow: .long 0
TwoDimCharSpaceCol: .long 0
.text
MkTwoDimCharArray: # char *MkTwoDimCharArray(int row, int col)
  pushl	%ebp
  movl	%esp, %ebp
  lea TwoDimCharSpace,%eax	# eax = &TwoDimSpace[0]
  movl 8(%ebp),%ebx
  movl %ebx,TwoDimCharSpaceRow
  movl 12(%ebp),%ebx
  movl %ebx,TwoDimCharSpaceCol
  leave
  ret

TwoDimCharArraySet: # void TwoDimCharArraySet(char *ary, int row, int col, char value)
  pushl	%ebp
  movl	%esp, %ebp
  movl 8(%ebp),%edi	# edi = &array[0]
  movl 12(%ebp),%eax	# eax = row
  imull TwoDimCharSpaceCol,%eax	# eax = row*number of columns
  addl 16(%ebp),%eax	# eax = row*number of columns + col
  addl %eax,%edi	# edi = &array[row][0]
  movl 20(%ebp),%eax	# eax = value
  movb %al,(%edi)	# array[row][col] = al
  leave
  ret

TwoDimCharArrayGet: # char TwoDimCharArrayGet(char *ary, int row, int col)
  pushl	%ebp
  movl	%esp, %ebp
  movl 8(%ebp),%edi	# edi = &array[0]
  movl 12(%ebp),%eax	# eax = row
  imull TwoDimCharSpaceCol,%eax	# eax = row*number of columns
  addl 16(%ebp),%eax	# eax = row*number of columns + col
  addl %eax,%edi	# edi = &array[row][0]
  movl $0,%eax
  movb (%edi),%al	# eax = value
  leave
  ret


Question: How to use indirect addressing+index notation here? (%edi,%eax)
Reference: https://paul.bone.id.au/blog/2018/09/05/x86-addressing/
Question: How do we derive an equation to handle three dimensional arrays, like char threedim[3][4][5] ?



1.3 Implementing two dimensional int arrays


int main() {
  int a7[3][4];
  a7[0][0] = 1;
  a7[1][0] = 2;
  a7[2][0] = 3;
  int m = a7[1][1];

  char *a8 = MkTwoDimIntArray(3,4);
  TwoDimIntArraySet(a8,0,0,1);
  TwoDimIntArraySet(a8,1,0,2);
  TwoDimIntArraySet(a8,2,0,3);
  int n = TwoDimIntArrayGet(a8,1,1);
}

Function implementation in Assembly:
.global	MkTwoDimIntArray
.global TwoDimIntArraySet
.global TwoDimIntArrayGet
.data 
TwoDimIntSpace: .zero 4096
TwoDimIntSpaceRow: .long 0
TwoDimIntSpaceCol: .long 0

.text
MkTwoDimIntArray: # int *MkTwoDimIntArray(int row, int col)
  pushl	%ebp
  movl	%esp, %ebp
  lea TwoDimIntSpace,%eax	# eax = &TwoDimSpace[0]
  movl 8(%ebp),%ebx
  movl %ebx,TwoDimIntSpaceRow
  movl 12(%ebp),%ebx
  movl %ebx,TwoDimIntSpaceCol
  leave
  ret

TwoDimIntArraySet: # void TwoDimIntArraySet(int *ary, int row, int col, int value)
  pushl	%ebp
  movl	%esp, %ebp
  movl 8(%ebp),%edi	# edi = &array[0]
  movl 12(%ebp),%eax	# eax = row
  imull TwoDimIntSpaceCol,%eax	# eax = row*Number of Columns
  addl 16(%ebp),%eax	# eax = row*Number of Columns + col index
  shll $2,%eax		# multiply by 4 to account for integer length of 4
  addl %eax,%edi	# edi = &array[0][0] + eax = &array[row][col]
  movl 20(%ebp),%eax	# eax = value
  movl %eax,(%edi)	# array[row][col] = value
  leave
  ret

TwoDimIntArrayGet: # int TwoDimIntArrayGet(int *ary, int row, int col)
  pushl	%ebp
  movl	%esp, %ebp
  movl 8(%ebp),%edi	# edi = &array[0]
  movl 12(%ebp),%eax	# eax = row
  imull TwoDimIntSpaceCol,%eax	# eax = row*Number of Columns
  addl 16(%ebp),%eax	# eax = row*Number of Columns + col index
  shll $2,%eax		# multiply by 4 to account for integer length of 4
  addl %eax,%edi	# edi = &array[0][0] + eax = &array[row][col]
  movl (%edi),%eax	# eax = array[row][col]
  leave
  ret


Question: How to use indirect addressing+index notation here? (%edi,%eax,4)

Question: In C, we can have short integer of 2 bytes, and have short arrays like: short shortary[3][3];
          To support two dimensional short arrays, how can we implement the following functions:
            MkTwoDimShortArray
            TwoDimShortArraySet
            TwoDimShortArrayGet

Question: How do we implement 3 dimensional arrays in assembly?



char a[3][4][5];
a[2][2][2]='x';
cout << a[2][2][2];


size = number of rows * number of columns * number of depth units

3*4*5 = 60

map(row,col,depth) -> index to ThreeDimSpace, 

index = row*(NUMBER OF COLUMNS*NUMBER OF DEPTH UNITS) + col*(NUMBER of DEPTH UNITS) + depth

a[0][0][0],a[0][0][1],..,a[0][0][4],a[0][1][0],a[0][1][1],..,a[0][3][4],a[1][0][0],..,a[2][3][4]

a[0][1][1]=1 -> 0*4*5 + 1*5 + 1 = 6
a[1][1][1]=2 -> 1*4*5 + 1*5 + 1 = 26
a[2][2][2]=3 -> 2*4*5 + 2*5 + 2 = 52
a[2][2][4]=4 -> 2*4*5 + 2*5 + 2 = 54
a[2][3][1]=5 -> 2*4*5 + 3*5 + 1 = 56

      1                   2                         3 4 5
      |                   |                         | | |
      v                   v                         v v v
------------------------------------------------------------
012345678911111111112222222222333333333344444444445555555555
          01234567890123456789012345678901234567890123456789


2. Structures in assembly

https://www.youtube.com/watch?v=MIs1qHjI_i4
https://www.youtube.com/watch?v=TtAsN0ptKcw

How do we implement the following structure in assembly:

struct employee {
  int employee_id;
  char employee_phone[10];
  char employee_name[20];
};
struct employee employee1;
struct employee *eptr;

eptr = malloc(sizeof(struct employee)); // similar to Employee eptr = new Employee()

# A x86 asm program using struct
.global  main
.data
employee_id = 0
employee_phone = 0+4
employee_name = 0+4+10
employee_size = 0+4+10+20
employee:
    .space employee_size
employee1: .long 0
new_phone: .ascii "4089241111"
.text
main:
#   eax = malloc(employee_size)
    pushl  $employee_size
    call malloc
    addl $4,%esp
#   eax.employee_id = 123456789
    movl $123456789,employee_id(%eax)    
#   eax.employee_phone = "4089241234" 
    movl $0x39383034,employee_phone(%eax)    
    movl $0x32313432,employee_phone+4(%eax)    
    movw $0x3433,employee_phone+8(%eax)    
#   eax.employee_name = "john"
    movl $0x6a6f686e,employee_name(%eax)

    call MkEmployee
    movl %eax,employee1

    pushl $123456788
    pushl employee1
    call  SetEmployeeId
    add   $8,%esp

    pushl employee1
    call  GetEmployeeId
    add   $4,%esp

    pushl $new_phone
    pushl employee1
    call  SetEmployeePhone
    add   $8,%esp

    pushl employee1
    call  GetEmployeePhone
    add   $4,%esp

#   exit
    mov    %eax,%ebx
    mov    $1,%eax
    int    $0x80

MkEmployee:
    pushl %ebp
    movl  %esp, %ebp
    pushl  $employee_size
    call malloc
    leave
    ret

SetEmployeeId: # SetEmployeeId(Employee *e, int id)
    pushl %ebp
    movl  %esp, %ebp
    movl 8(%ebp),%edi
    movl 12(%ebp),%eax
    movl %eax,employee_id(%edi) 
    leave
    ret

GetEmployeeId: # SetEmployeeId(Employee *e)
    pushl %ebp
    movl  %esp, %ebp
    movl 8(%ebp),%edi
    movl employee_id(%edi),%eax
    leave
    ret

SetEmployeePhone: # SetEmployeePhone(Employee *e, char *phone)
    pushl %ebp
    movl  %esp, %ebp
    movl 8(%ebp),%edi
    addl $employee_phone,%edi
    movl 12(%ebp),%esi
    movl $10,%ecx
SetEmployeePhoneLoop:
    movb (%esi),%bl
    movb %bl,(%edi)
    inc  %esi
    inc  %edi
    dec  %ecx
    jnz  SetEmployeePhoneLoop
    leave
    ret

GetEmployeePhone: # SetEmployeeId(Employee *e)
    pushl %ebp
    movl  %esp, %ebp
    movl 8(%ebp),%edi
    movl employee_phone(%edi),%eax
    leave
    ret

Question: How do we implement SetEmployeeName and GetEmployeeName?