Register | Login
Forum Index > Samples > MMX strlcpy
Author Message
Pages: 1
Admin
Site Admin

avatar

(send private message)

Posts: 933
Topics: 55

Location:
OverHertz Studio
[1025] MMX strlcpy - posted: 2012-05-07 20:10:13
OK so i decided i should write a sample using MMX instructions, so i've upgraded the standard strlcpy macro.

so here we have the standard macro packaged with ziron:

Code:
//
// Copyright (c) 2011. OverHertz Ltd
//
inline procedure strlcpy(dest, source, len) {
  push edi
  push esi
  push ecx

  //cld
  edi = $dest;
  esi = $source;
  ecx = $len;
  shr ecx, 2
  rep movsd

  ecx = $len;
  and ecx, 3
  rep movsb

  pop ecx
  pop esi
  pop edi
}


and here we have the upgraded MMX macro:

Code:
//
// Copyright (c) 2012. OverHertz Ltd
//
inline procedure mmx_strlcpy(dest, source, len) {
  push edi
  push esi
  push ecx

  edi = $dest;
  esi = $source;
  ecx = $len;
  
  shr ecx, 3
  @loop:
    movq mm0, qword[esi]
    movq qword[edi], mm0
    
    esi += 8;
    edi += 8;
    
    ecx--;
  jnz @loop;

  ecx = $len;
  and ecx, 3
  rep movsb

  pop ecx
  pop esi
  pop edi
  
  emms
}


a sample of using with speed test:

Code:
program WIN32CUI 'test';

#include 'ch.zir';

//
// Copyright (c) 2012. OverHertz Ltd
//
inline procedure mmx_strlcpy(dest, source, len) {
  push edi
  push esi
  push ecx

  edi = $dest;
  esi = $source;
  ecx = $len;
  
  shr ecx, 3
  @loop:
    movq mm0, qword[esi]
    movq qword[edi], mm0
    
    esi += 8;
    edi += 8;
    
    ecx--;
  jnz @loop;

  ecx = $len;
  and ecx, 3
  rep movsb

  pop ecx
  pop esi
  pop edi
  
  emms
}

char str1[2048];
char str2[2048];

edi = GetTickCount();
for (esi = 1 to 5000000) {
  mmx_strlcpy(@str2, @str1, 2048);
}
eax = GetTickCount();
eax -= edi;

print('mmx_strlcpy: ', eax, 'ms\r\n');

////////////

edi = GetTickCount();
for (esi = 1 to 5000000) {
  strlcpy(@str2, @str1, 2048);
}
eax = GetTickCount();
eax -= edi;

print('strlcpy: ', eax, 'ms\r\n');


//////////

wait_key(0);
    
ExitProcess(0);


output on core 2 quad 2.4 with 2GB 800mhz GEIL ram:

Code:
mmx_strlcpy: 3268ms
strlcpy: 4172ms

[/code]

Download Ziron
Get free hosting for Ziron related fan-sites and Ziron projects, contact me in private message.
Admin
Site Admin

avatar

(send private message)

Posts: 933
Topics: 55

Location:
OverHertz Studio
[1028] - posted: 2012-05-07 21:27:00
slightly improved:

Code:
//
// Copyright (c) 2012. OverHertz Ltd
//
inline procedure mmx_strlcpy(dest, source, len) {
  push edi
  push esi
  push ecx
  push edx

  edi = $dest;
  esi = $source;  
  ecx = $len;
  edx = ecx;
  
  shr ecx, 6
  if (ecx <> 0) {
    @loop1:
      movq mm0, qword[esi+0]
      movq mm1, qword[esi+8]
      movq mm2, qword[esi+16]
      movq mm3, qword[esi+24]
      movq mm4, qword[esi+32]
      movq mm5, qword[esi+40]
      movq mm6, qword[esi+48]
      movq mm7, qword[esi+56]
      
      movq qword[edi+0], mm0
      movq qword[edi+8], mm1
      movq qword[edi+16], mm2
      movq qword[edi+24], mm3
      movq qword[edi+32], mm4
      movq qword[edi+40], mm5
      movq qword[edi+48], mm6
      movq qword[edi+56], mm7
      
      esi += 64;
      edi += 64;
      
      edx -= 64;
      
      ecx--;
    jnz @loop1;  
  }
  
  ecx = edx;  
  shr ecx, 3
  
  if (ecx <> 0) {
    @loop2:
      movq mm0, qword[esi]
      movq qword[edi], mm0
      
      esi += 8;
      edi += 8;
      
      ecx--;
    jnz @loop2;
  }

  ecx = edx;
  and ecx, 3
  rep movsb

  pop edx
  pop ecx
  pop esi
  pop edi
  
  emms
}


test app:

Code:
program WIN32CUI 'test';

#include 'ch.zir';

//
// Copyright (c) 2012. OverHertz Ltd
//
inline procedure mmx_strlcpy2(dest, source, len) {
  push edi
  push esi
  push ecx

  edi = $dest;
  esi = $source;
  ecx = $len;
  
  shr ecx, 3
  @loop:
    movq mm0, qword[esi]
    movq qword[edi], mm0
    
    esi += 8;
    edi += 8;
    
    ecx--;
  jnz @loop;

  ecx = $len;
  and ecx, 3
  rep movsb

  pop ecx
  pop esi
  pop edi
  
  emms
}

//
// Copyright (c) 2012. OverHertz Ltd
//
inline procedure mmx_strlcpy(dest, source, len) {
  push edi
  push esi
  push ecx
  push edx

  edi = $dest;
  esi = $source;  
  ecx = $len;
  edx = ecx;
  
  shr ecx, 6
  if (ecx <> 0) {
    @loop1:
      movq mm0, qword[esi+0]
      movq mm1, qword[esi+8]
      movq mm2, qword[esi+16]
      movq mm3, qword[esi+24]
      movq mm4, qword[esi+32]
      movq mm5, qword[esi+40]
      movq mm6, qword[esi+48]
      movq mm7, qword[esi+56]
      
      movq qword[edi+0], mm0
      movq qword[edi+8], mm1
      movq qword[edi+16], mm2
      movq qword[edi+24], mm3
      movq qword[edi+32], mm4
      movq qword[edi+40], mm5
      movq qword[edi+48], mm6
      movq qword[edi+56], mm7
      
      esi += 64;
      edi += 64;
      
      edx -= 64;
      
      ecx--;
    jnz @loop1;  
  }
  
  ecx = edx;  
  shr ecx, 3
  
  if (ecx <> 0) {
    @loop2:
      movq mm0, qword[esi]
      movq qword[edi], mm0
      
      esi += 8;
      edi += 8;
      
      ecx--;
    jnz @loop2;
  }

  ecx = edx;
  and ecx, 3
  rep movsb

  pop edx
  pop ecx
  pop esi
  pop edi
  
  emms
}

char str1[8192];
char str2[8192];

////////////

edi = GetTickCount();
for (esi = 1 to 500000) {
  strlcpy(@str2, @str1, 8192);
}
eax = GetTickCount();
eax -= edi;

print('strlcpy: ', eax, 'ms\r\n');

//////////

edi = GetTickCount();
for (esi = 1 to 500000) {
  mmx_strlcpy2(@str2, @str1, 8192);
}
eax = GetTickCount();
eax -= edi;

print('mmx_strlcpy (old): ', eax, 'ms\r\n');

//////////

edi = GetTickCount();
for (esi = 1 to 500000) {
  mmx_strlcpy(@str2, @str1, 8192);
}
eax = GetTickCount();
eax -= edi;

print('mmx_strlcpy: ', eax, 'ms\r\n');

wait_key(0);
    
ExitProcess(0);


output:

Code:
strlcpy: 3531ms
mmx_strlcpy (old): 2969ms
mmx_strlcpy: 1328ms


njoy

Download Ziron
Get free hosting for Ziron related fan-sites and Ziron projects, contact me in private message.
Pages: 1
create new reply


Quick reply:

Message:



Currently Active Users:
There are currently 18 user(s) online. 0 member(s) and 18 guest(s)
Most users ever online was 1046, January 28, 2022, 2:08 pm.


Statistics:
Threads: 225 | Posts: 1848 | Members: 51 | Active Members: 51
Welcome to our newest member, yecate
const Copyright = '2011-2024 © OverHertz Ltd. All rights reserved.';
Web development by OverHertz Ltd