Implementing binary-heap in x86-64 assembly

The name of the pictureThe name of the pictureThe name of the pictureClash Royale CLAN TAG#URR8PPP





.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;







up vote
3
down vote

favorite












I'm trying to implement some of the simplest classical data-structures in x86-64 assembly on linux.



PRIMARY GOAL: Reduce memory consumption and increase performance (maybe using x86-64 architecture specific tricks).



Also any advises about style and design are highly appreciated! I'm also trying to adhere to x86-64 Linux ABI. Here is what I currently have:



binary-heap.asm:



SYS_mmap equ 0x09
SYS_munmap equ 0x0B

ELEMENT_SIZE equ 0x08
PAGE_SIZE equ 0x8000

section .text
;rdi = size
global _create_heap
;rdi = pointer to the binary heap, rsi = element to insert
global _add
;rdi = pointer to the binary heap, rsi = element number to retrieve
global _get
;rdi = pointer to the binary heap
global _clear
;rdi = pointer to the bionary heap
global _size
;rdi = pointer to the binary heap
global _delete

_create_heap:
mov rcx, ELEMENT_SIZE
mov r10, PAGE_SIZE

;Checking if the memory required to hold the elements of tree is enough
mov rax, rdi
add rax, 2 ; we need to store elements number and current index which are 8 bytes long
mul rcx
jo too_many_elemenents

xor rdx, rdx ;clearing rdx so unsigned division is not affected
div r10


mov rbp, rdi ;ommiting frame pointer and storing elements number as a temporary
test rdx, rdx
;we need to increment rax only in case rdx is not equal to zero
jz allocate_memory
inc rax


allocate_memory:
mul r10
;calling mmap with rax len
mov rsi, rax
mov rax, SYS_mmap
mov rdi, 0x00 ; NULL pointer
mov rdx, 0x03 ; PROT_READ | PROT_WRITE
mov r10, 0x22 ; MAP_PRIVATE | MAP_ANANONYMOUS
mov r9, 0x00 ; OFFSET = 0
syscall

test rax, -1
js cannot_allocate_memory

mov [rax], rbp ; maxmimum allowed elements in this binary heap
mov [rax + ELEMENT_SIZE], dword 0 ; at the creation time there is no elements stored in the binary heap
ret

too_many_elemenents:
mov rax, -2
ret
cannot_allocate_memory:
ret

_add:
mov rcx, 0x02 ;dividor
mov rax, [rdi + ELEMENT_SIZE] ; storing current number of elements in this binary heap
cmp [rdi], rax
jz binary_heap_size_overflow

inc rax
mov [rdi + ELEMENT_SIZE], rax
mov [rdi + ELEMENT_SIZE + rax * ELEMENT_SIZE], rsi

up_heap:
xor rdx, rdx ; for division

;store offset and value of the element being swapped
mov rbp, rax
mov r10, rsi
;...

div rcx
test rax, rax
jz exit ;We reached the root of this binary heap. Exiting

mov rsi, [rdi + ELEMENT_SIZE + rax * ELEMENT_SIZE] ;updating rsi with the parent element
cmp rsi, r10
jnl exit ; Binary heap have consistent ordering. Exiting

;The actual swap of the parent and the child
mov [rdi + ELEMENT_SIZE + rax * ELEMENT_SIZE], r10
mov [rdi + ELEMENT_SIZE + rbp * ELEMENT_SIZE], rsi
;...

jmp up_heap

exit:
xor rax, rax
ret

binary_heap_size_overflow:
mov rax, -1
ret

_get:
mov rax, [rdi + ELEMENT_SIZE + ELEMENT_SIZE * rsi]
ret

_size:
mov rax, [rdi + ELEMENT_SIZE]
ret

_clear:
mov [rdi + ELEMENT_SIZE], dword 0 ; setting stored elements count to 0
ret

_delete:
mov rcx, ELEMENT_SIZE
mov rax, [rdi]
add rax, 2
mul rcx

mov rsi, rax
mov rax, SYS_munmap
syscall
ret


main.asm (To test the binary-heap data structure implemented before):



section .text
global _start
extern _create_heap
extern _add
extern _get
extern _size

_start:
mov rdi, 0x03; Size of the binary heap
call _create_heap
mov r12, rax ; according to linux x64-ABI, r12 is callee-saved

mov rdi, r12
mov rsi, 0xFFABB1
call _add

mov rdi, r12
mov rsi, 0x11
call _add

mov rdi, r12
mov rsi, 0xFFFABB1
call _add

mov rdi, r12
mov rsi, 0x12
call _add

mov rdi, r12
call _size

mov rax, 60
syscall






share|improve this question



























    up vote
    3
    down vote

    favorite












    I'm trying to implement some of the simplest classical data-structures in x86-64 assembly on linux.



    PRIMARY GOAL: Reduce memory consumption and increase performance (maybe using x86-64 architecture specific tricks).



    Also any advises about style and design are highly appreciated! I'm also trying to adhere to x86-64 Linux ABI. Here is what I currently have:



    binary-heap.asm:



    SYS_mmap equ 0x09
    SYS_munmap equ 0x0B

    ELEMENT_SIZE equ 0x08
    PAGE_SIZE equ 0x8000

    section .text
    ;rdi = size
    global _create_heap
    ;rdi = pointer to the binary heap, rsi = element to insert
    global _add
    ;rdi = pointer to the binary heap, rsi = element number to retrieve
    global _get
    ;rdi = pointer to the binary heap
    global _clear
    ;rdi = pointer to the bionary heap
    global _size
    ;rdi = pointer to the binary heap
    global _delete

    _create_heap:
    mov rcx, ELEMENT_SIZE
    mov r10, PAGE_SIZE

    ;Checking if the memory required to hold the elements of tree is enough
    mov rax, rdi
    add rax, 2 ; we need to store elements number and current index which are 8 bytes long
    mul rcx
    jo too_many_elemenents

    xor rdx, rdx ;clearing rdx so unsigned division is not affected
    div r10


    mov rbp, rdi ;ommiting frame pointer and storing elements number as a temporary
    test rdx, rdx
    ;we need to increment rax only in case rdx is not equal to zero
    jz allocate_memory
    inc rax


    allocate_memory:
    mul r10
    ;calling mmap with rax len
    mov rsi, rax
    mov rax, SYS_mmap
    mov rdi, 0x00 ; NULL pointer
    mov rdx, 0x03 ; PROT_READ | PROT_WRITE
    mov r10, 0x22 ; MAP_PRIVATE | MAP_ANANONYMOUS
    mov r9, 0x00 ; OFFSET = 0
    syscall

    test rax, -1
    js cannot_allocate_memory

    mov [rax], rbp ; maxmimum allowed elements in this binary heap
    mov [rax + ELEMENT_SIZE], dword 0 ; at the creation time there is no elements stored in the binary heap
    ret

    too_many_elemenents:
    mov rax, -2
    ret
    cannot_allocate_memory:
    ret

    _add:
    mov rcx, 0x02 ;dividor
    mov rax, [rdi + ELEMENT_SIZE] ; storing current number of elements in this binary heap
    cmp [rdi], rax
    jz binary_heap_size_overflow

    inc rax
    mov [rdi + ELEMENT_SIZE], rax
    mov [rdi + ELEMENT_SIZE + rax * ELEMENT_SIZE], rsi

    up_heap:
    xor rdx, rdx ; for division

    ;store offset and value of the element being swapped
    mov rbp, rax
    mov r10, rsi
    ;...

    div rcx
    test rax, rax
    jz exit ;We reached the root of this binary heap. Exiting

    mov rsi, [rdi + ELEMENT_SIZE + rax * ELEMENT_SIZE] ;updating rsi with the parent element
    cmp rsi, r10
    jnl exit ; Binary heap have consistent ordering. Exiting

    ;The actual swap of the parent and the child
    mov [rdi + ELEMENT_SIZE + rax * ELEMENT_SIZE], r10
    mov [rdi + ELEMENT_SIZE + rbp * ELEMENT_SIZE], rsi
    ;...

    jmp up_heap

    exit:
    xor rax, rax
    ret

    binary_heap_size_overflow:
    mov rax, -1
    ret

    _get:
    mov rax, [rdi + ELEMENT_SIZE + ELEMENT_SIZE * rsi]
    ret

    _size:
    mov rax, [rdi + ELEMENT_SIZE]
    ret

    _clear:
    mov [rdi + ELEMENT_SIZE], dword 0 ; setting stored elements count to 0
    ret

    _delete:
    mov rcx, ELEMENT_SIZE
    mov rax, [rdi]
    add rax, 2
    mul rcx

    mov rsi, rax
    mov rax, SYS_munmap
    syscall
    ret


    main.asm (To test the binary-heap data structure implemented before):



    section .text
    global _start
    extern _create_heap
    extern _add
    extern _get
    extern _size

    _start:
    mov rdi, 0x03; Size of the binary heap
    call _create_heap
    mov r12, rax ; according to linux x64-ABI, r12 is callee-saved

    mov rdi, r12
    mov rsi, 0xFFABB1
    call _add

    mov rdi, r12
    mov rsi, 0x11
    call _add

    mov rdi, r12
    mov rsi, 0xFFFABB1
    call _add

    mov rdi, r12
    mov rsi, 0x12
    call _add

    mov rdi, r12
    call _size

    mov rax, 60
    syscall






    share|improve this question























      up vote
      3
      down vote

      favorite









      up vote
      3
      down vote

      favorite











      I'm trying to implement some of the simplest classical data-structures in x86-64 assembly on linux.



      PRIMARY GOAL: Reduce memory consumption and increase performance (maybe using x86-64 architecture specific tricks).



      Also any advises about style and design are highly appreciated! I'm also trying to adhere to x86-64 Linux ABI. Here is what I currently have:



      binary-heap.asm:



      SYS_mmap equ 0x09
      SYS_munmap equ 0x0B

      ELEMENT_SIZE equ 0x08
      PAGE_SIZE equ 0x8000

      section .text
      ;rdi = size
      global _create_heap
      ;rdi = pointer to the binary heap, rsi = element to insert
      global _add
      ;rdi = pointer to the binary heap, rsi = element number to retrieve
      global _get
      ;rdi = pointer to the binary heap
      global _clear
      ;rdi = pointer to the bionary heap
      global _size
      ;rdi = pointer to the binary heap
      global _delete

      _create_heap:
      mov rcx, ELEMENT_SIZE
      mov r10, PAGE_SIZE

      ;Checking if the memory required to hold the elements of tree is enough
      mov rax, rdi
      add rax, 2 ; we need to store elements number and current index which are 8 bytes long
      mul rcx
      jo too_many_elemenents

      xor rdx, rdx ;clearing rdx so unsigned division is not affected
      div r10


      mov rbp, rdi ;ommiting frame pointer and storing elements number as a temporary
      test rdx, rdx
      ;we need to increment rax only in case rdx is not equal to zero
      jz allocate_memory
      inc rax


      allocate_memory:
      mul r10
      ;calling mmap with rax len
      mov rsi, rax
      mov rax, SYS_mmap
      mov rdi, 0x00 ; NULL pointer
      mov rdx, 0x03 ; PROT_READ | PROT_WRITE
      mov r10, 0x22 ; MAP_PRIVATE | MAP_ANANONYMOUS
      mov r9, 0x00 ; OFFSET = 0
      syscall

      test rax, -1
      js cannot_allocate_memory

      mov [rax], rbp ; maxmimum allowed elements in this binary heap
      mov [rax + ELEMENT_SIZE], dword 0 ; at the creation time there is no elements stored in the binary heap
      ret

      too_many_elemenents:
      mov rax, -2
      ret
      cannot_allocate_memory:
      ret

      _add:
      mov rcx, 0x02 ;dividor
      mov rax, [rdi + ELEMENT_SIZE] ; storing current number of elements in this binary heap
      cmp [rdi], rax
      jz binary_heap_size_overflow

      inc rax
      mov [rdi + ELEMENT_SIZE], rax
      mov [rdi + ELEMENT_SIZE + rax * ELEMENT_SIZE], rsi

      up_heap:
      xor rdx, rdx ; for division

      ;store offset and value of the element being swapped
      mov rbp, rax
      mov r10, rsi
      ;...

      div rcx
      test rax, rax
      jz exit ;We reached the root of this binary heap. Exiting

      mov rsi, [rdi + ELEMENT_SIZE + rax * ELEMENT_SIZE] ;updating rsi with the parent element
      cmp rsi, r10
      jnl exit ; Binary heap have consistent ordering. Exiting

      ;The actual swap of the parent and the child
      mov [rdi + ELEMENT_SIZE + rax * ELEMENT_SIZE], r10
      mov [rdi + ELEMENT_SIZE + rbp * ELEMENT_SIZE], rsi
      ;...

      jmp up_heap

      exit:
      xor rax, rax
      ret

      binary_heap_size_overflow:
      mov rax, -1
      ret

      _get:
      mov rax, [rdi + ELEMENT_SIZE + ELEMENT_SIZE * rsi]
      ret

      _size:
      mov rax, [rdi + ELEMENT_SIZE]
      ret

      _clear:
      mov [rdi + ELEMENT_SIZE], dword 0 ; setting stored elements count to 0
      ret

      _delete:
      mov rcx, ELEMENT_SIZE
      mov rax, [rdi]
      add rax, 2
      mul rcx

      mov rsi, rax
      mov rax, SYS_munmap
      syscall
      ret


      main.asm (To test the binary-heap data structure implemented before):



      section .text
      global _start
      extern _create_heap
      extern _add
      extern _get
      extern _size

      _start:
      mov rdi, 0x03; Size of the binary heap
      call _create_heap
      mov r12, rax ; according to linux x64-ABI, r12 is callee-saved

      mov rdi, r12
      mov rsi, 0xFFABB1
      call _add

      mov rdi, r12
      mov rsi, 0x11
      call _add

      mov rdi, r12
      mov rsi, 0xFFFABB1
      call _add

      mov rdi, r12
      mov rsi, 0x12
      call _add

      mov rdi, r12
      call _size

      mov rax, 60
      syscall






      share|improve this question













      I'm trying to implement some of the simplest classical data-structures in x86-64 assembly on linux.



      PRIMARY GOAL: Reduce memory consumption and increase performance (maybe using x86-64 architecture specific tricks).



      Also any advises about style and design are highly appreciated! I'm also trying to adhere to x86-64 Linux ABI. Here is what I currently have:



      binary-heap.asm:



      SYS_mmap equ 0x09
      SYS_munmap equ 0x0B

      ELEMENT_SIZE equ 0x08
      PAGE_SIZE equ 0x8000

      section .text
      ;rdi = size
      global _create_heap
      ;rdi = pointer to the binary heap, rsi = element to insert
      global _add
      ;rdi = pointer to the binary heap, rsi = element number to retrieve
      global _get
      ;rdi = pointer to the binary heap
      global _clear
      ;rdi = pointer to the bionary heap
      global _size
      ;rdi = pointer to the binary heap
      global _delete

      _create_heap:
      mov rcx, ELEMENT_SIZE
      mov r10, PAGE_SIZE

      ;Checking if the memory required to hold the elements of tree is enough
      mov rax, rdi
      add rax, 2 ; we need to store elements number and current index which are 8 bytes long
      mul rcx
      jo too_many_elemenents

      xor rdx, rdx ;clearing rdx so unsigned division is not affected
      div r10


      mov rbp, rdi ;ommiting frame pointer and storing elements number as a temporary
      test rdx, rdx
      ;we need to increment rax only in case rdx is not equal to zero
      jz allocate_memory
      inc rax


      allocate_memory:
      mul r10
      ;calling mmap with rax len
      mov rsi, rax
      mov rax, SYS_mmap
      mov rdi, 0x00 ; NULL pointer
      mov rdx, 0x03 ; PROT_READ | PROT_WRITE
      mov r10, 0x22 ; MAP_PRIVATE | MAP_ANANONYMOUS
      mov r9, 0x00 ; OFFSET = 0
      syscall

      test rax, -1
      js cannot_allocate_memory

      mov [rax], rbp ; maxmimum allowed elements in this binary heap
      mov [rax + ELEMENT_SIZE], dword 0 ; at the creation time there is no elements stored in the binary heap
      ret

      too_many_elemenents:
      mov rax, -2
      ret
      cannot_allocate_memory:
      ret

      _add:
      mov rcx, 0x02 ;dividor
      mov rax, [rdi + ELEMENT_SIZE] ; storing current number of elements in this binary heap
      cmp [rdi], rax
      jz binary_heap_size_overflow

      inc rax
      mov [rdi + ELEMENT_SIZE], rax
      mov [rdi + ELEMENT_SIZE + rax * ELEMENT_SIZE], rsi

      up_heap:
      xor rdx, rdx ; for division

      ;store offset and value of the element being swapped
      mov rbp, rax
      mov r10, rsi
      ;...

      div rcx
      test rax, rax
      jz exit ;We reached the root of this binary heap. Exiting

      mov rsi, [rdi + ELEMENT_SIZE + rax * ELEMENT_SIZE] ;updating rsi with the parent element
      cmp rsi, r10
      jnl exit ; Binary heap have consistent ordering. Exiting

      ;The actual swap of the parent and the child
      mov [rdi + ELEMENT_SIZE + rax * ELEMENT_SIZE], r10
      mov [rdi + ELEMENT_SIZE + rbp * ELEMENT_SIZE], rsi
      ;...

      jmp up_heap

      exit:
      xor rax, rax
      ret

      binary_heap_size_overflow:
      mov rax, -1
      ret

      _get:
      mov rax, [rdi + ELEMENT_SIZE + ELEMENT_SIZE * rsi]
      ret

      _size:
      mov rax, [rdi + ELEMENT_SIZE]
      ret

      _clear:
      mov [rdi + ELEMENT_SIZE], dword 0 ; setting stored elements count to 0
      ret

      _delete:
      mov rcx, ELEMENT_SIZE
      mov rax, [rdi]
      add rax, 2
      mul rcx

      mov rsi, rax
      mov rax, SYS_munmap
      syscall
      ret


      main.asm (To test the binary-heap data structure implemented before):



      section .text
      global _start
      extern _create_heap
      extern _add
      extern _get
      extern _size

      _start:
      mov rdi, 0x03; Size of the binary heap
      call _create_heap
      mov r12, rax ; according to linux x64-ABI, r12 is callee-saved

      mov rdi, r12
      mov rsi, 0xFFABB1
      call _add

      mov rdi, r12
      mov rsi, 0x11
      call _add

      mov rdi, r12
      mov rsi, 0xFFFABB1
      call _add

      mov rdi, r12
      mov rsi, 0x12
      call _add

      mov rdi, r12
      call _size

      mov rax, 60
      syscall








      share|improve this question












      share|improve this question




      share|improve this question








      edited Feb 24 at 21:56









      200_success

      123k14142399




      123k14142399









      asked Feb 24 at 19:29









      St.Antario

      1435




      1435

























          active

          oldest

          votes











          Your Answer




          StackExchange.ifUsing("editor", function ()
          return StackExchange.using("mathjaxEditing", function ()
          StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
          StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
          );
          );
          , "mathjax-editing");

          StackExchange.ifUsing("editor", function ()
          StackExchange.using("externalEditor", function ()
          StackExchange.using("snippets", function ()
          StackExchange.snippets.init();
          );
          );
          , "code-snippets");

          StackExchange.ready(function()
          var channelOptions =
          tags: "".split(" "),
          id: "196"
          ;
          initTagRenderer("".split(" "), "".split(" "), channelOptions);

          StackExchange.using("externalEditor", function()
          // Have to fire editor after snippets, if snippets enabled
          if (StackExchange.settings.snippets.snippetsEnabled)
          StackExchange.using("snippets", function()
          createEditor();
          );

          else
          createEditor();

          );

          function createEditor()
          StackExchange.prepareEditor(
          heartbeatType: 'answer',
          convertImagesToLinks: false,
          noModals: false,
          showLowRepImageUploadWarning: true,
          reputationToPostImages: null,
          bindNavPrevention: true,
          postfix: "",
          onDemand: true,
          discardSelector: ".discard-answer"
          ,immediatelyShowMarkdownHelp:true
          );



          );








           

          draft saved


          draft discarded


















          StackExchange.ready(
          function ()
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f188286%2fimplementing-binary-heap-in-x86-64-assembly%23new-answer', 'question_page');

          );

          Post as a guest



































          active

          oldest

          votes













          active

          oldest

          votes









          active

          oldest

          votes






          active

          oldest

          votes










           

          draft saved


          draft discarded


























           


          draft saved


          draft discarded














          StackExchange.ready(
          function ()
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f188286%2fimplementing-binary-heap-in-x86-64-assembly%23new-answer', 'question_page');

          );

          Post as a guest













































































          Popular posts from this blog

          Greedy Best First Search implementation in Rust

          Function to Return a JSON Like Objects Using VBA Collections and Arrays

          C++11 CLH Lock Implementation