35 comments on “Reversing the Interview Process

  1. If a rep scasb is not considered as a branch instruction and we can cheat like Aaron by submitting an assembly solution (yes that’s a lot of conditions but gcc can’t produce rep scasb instruction even with th -Os option) then I’ve a solution in 18 bytes :P

    [BITS 32]

    section .text

    global _start

    _start:
    pop eax
    pop eax
    xor eax, eax
    xor ecx, ecx
    dec ecx
    pop edi
    repne scasb
    inc eax
    mov ebx, ecx
    not ebx
    dec ebx
    int 0×80

  2. Well that is in fact kind of cheating! :P

    And since you got to cheat, I get to be picky :D You don’t set the direction register, so your code could return something that’s not the length of the string!

    Jokes aside, I like your solution. It’s the simplest strlen possible really.

  3. 2 other **UGLY** solutions, first one use gcc variable pointers (not standard) second one is pure standard C and use a big switch and force the compiler to use an table.
    Both compiled with -O3 flag produce function without conditional branches (the first one use conditional mov but it could be replaced by non conditional instructions …)

    code : http://pastebin.com/WnjwbDzQ

    Ok this is ugly hacks that may not work with other compiler / architecture but it doesn’t use asm inline :P.

  4. 25 bytes with cld, so 24 bytes without cld. AFAIR windows clears the direction flag before the entry point, dunno about linux :p

    [BITS 32]

    section .text

    global _start

    _start:
    pop eax
    pop ebx
    pop esi
    xor ebx, ebx
    mov edx, _continue
    lea ecx, [edx+_gtfo-_continue]
    cld
    _continue:
    inc ebx
    lodsb
    test al, al
    cmovz edx, ecx
    jmp edx
    _gtfo:
    inc eax
    dec ebx
    int 0×80

    • Looks like 30 bytes by my count. What does objdump display on that?

      08048080 :
      8048080: 58 pop eax
      8048081: 5b pop ebx
      8048082: 5e pop esi
      8048083: 31 db xor ebx,ebx
      8048085: ba 91 80 04 08 mov edx,0×8048091
      804808a: 8d 8a 09 00 00 00 lea ecx,[edx+0x9]
      8048090: fc cld

      08048091 :
      8048091: 43 inc ebx
      8048092: ac lods al,BYTE PTR ds:[esi]
      8048093: 84 c0 test al,al
      8048095: 0f 44 d1 cmove edx,ecx
      8048098: ff e2 jmp edx

      0804809a :
      804809a: 40 inc eax
      804809b: 4b dec ebx
      804809c: cd 80 int 0×80

      • p@ubuntu:~/strlen$ objdump -d -M intel cmov

        cmov: file format elf32-i386

        Disassembly of section .text:

        08048060 :
        8048060: 58 pop eax
        8048061: 5b pop ebx
        8048062: 5e pop esi
        8048063: 31 db xor ebx,ebx
        8048065: ba 6e 80 04 08 mov edx,0x804806e
        804806a: 8d 4a 09 lea ecx,[edx+0x9]
        804806d: fc cld

        0804806e :
        804806e: 43 inc ebx
        804806f: ac lods al,BYTE PTR ds:[esi]
        8048070: 84 c0 test al,al
        8048072: 0f 44 d1 cmove edx,ecx
        8048075: ff e2 jmp edx

        08048077 :
        8048077: 40 inc eax
        8048078: 4b dec ebx
        8048079: cd 80 int 0×80

        so it’s 27, (or 26 without cld) — forgot to add 2 last bytes :o

  5. I decided to cheat in a different way: by reimplimenting jz! Although that’s more being a smartass than actually solving the problem, but still! :)

    Sorry about the formatting and stuff. Here’s a pastebin: http://pastebin.com/Z6MTgLTr

    Thanks for the awesome mental exercise! It was really fun trying to figure out how to do this without traditional methods. :)

    rude_strlen:
    %define JMP_TABLE dword [ebp-8]
    %define JMP_REG edi
    %define JMP_ONE dword [JMP_REG]
    %define JMP_ZERO dword [JMP_REG+4]

    %macro jayz 1
    test %1,%1 ; set/don’t set the zero flag
    pushf ; push EFLAGS onto the stack
    shr dword [esp],6 ; shift until the zeroflag is the end bit
    and dword [esp],1 ; get the zero flag
    pop %1 ; now it’s an offset to our jumptable!
    jmp dword [edi+%1*4] ; and now we have a jz instruction! :D
    %endmacro

    push ebp
    mov ebp,esp
    sub esp,8 ; for our jumptable. hee hee.

    push JMP_REG
    push esi

    xor eax,eax
    lea JMP_REG,JMP_TABLE
    mov JMP_ONE,str_one
    mov JMP_ZERO,str_zero
    add JMP_ONE,0×401000 ; I’m a windows person… sorry! :D
    add JMP_ZERO,0×401000
    mov esi,[ebp+8] ; string pointer!

    str_loop:
    movzx ecx,byte [esi]
    jayz ecx ; THIS JUMP CRAY

    str_one:
    inc eax
    inc esi
    jmp str_loop

    str_zero:
    pop esi
    pop edi
    add esp,8
    pop ebp
    retn 4

  6. In many C compilers your can store the address of some labels in an array, do this in a for/while loop, where you add one to an accumulator, and use goto to jump out of the loop when you are done. See the js0n JSO parser which gave me the inspiration. (it’s on github). This avoids stack consumption with recursive calls, though with optimization it may go away anyhow.

    • #include
      int main(int argc, char** argv) {
      char * string = argv[argc-1];
      void *states[] = {&&done, &&cont};
      int size = 0;

      while(1) {
      goto *states[!!*(string+size)];
      cont: ++size;
      }
      done:
      printf(“%s is %d bytes\n”, string, size);

      return 0;
      }

  7. Sleazy cheating in assembly language:

    strlen proc near
    mov esi, [esp+4]
    xor ecx, ecx
    @loop:
    lodsb
    inc ecx
    test al, al
    pushf
    pop ebx
    movzx ebx, bl
    and bl, 40h
    shr bl, 6
    jmp dword ptr tbl[ebx*4]
    @ret:
    mov eax, ecx
    dec eax
    retn 4
    tbl dd offset @loop, offset @ret
    strlen endp

  8. Can do it without recursion if you don’t mind breaking a few taboos…

    int mystrlen(const char *s) {
    void *my_labels[] = {&&loop, &&done};

    int result = -1;

    loop:
    ++result;
    goto *my_labels[*(s++) == ''];

    done:
    return result;
    }

    Works on GCC 4.6.3 on Ubuntu on x86-64. Not sure how portable that bool->int work is, but it consistently works as expected on Intel gear.

  9. My last comment was messed up by the blog software. The boolean test was written against a NULL byte, but the escaped zero got filtered somehow.

  10. From an anonymous contributor:

    “My personal goal here was to do it without an additional
    function call and not in asm.

    #include //for printf

    unsigned int no_cond_branch_strlen(char *s) {
    void *jmp_table[] = {&&end, &&increment};
    unsigned int length = 0;
    char num_ones;

    while(1){
    // calculate the number of ones in a byte
    num_ones = *(s+length) – ((*(s+length) >> 1) & ‘\x55′);
    num_ones = (num_ones & ‘\x33′) + ((num_ones >> 2) & ‘\x33′);
    num_ones = (num_ones + (num_ones >> 4) & ‘\x0f’);
    goto *jmp_table[ num_ones > 0 ];
    increment:
    length++;
    }
    end:
    return length;
    }

    int
    main(int argc, char **argv)
    {
    printf(“length of %s is %d\n”, argv[1], no_cond_branch_strlen(argv[1]));
    return 0;
    }

    This will only work for gcc. To make this work in msvc you’ll need to
    use inline asm to get the address of the labels via something like
    http://stackoverflow.com/questions/6421433/address-of-labels-msvc

  11. This is what I came up with. Written before reading any of your solutions. Fun challenge!

    int finish(const char *s) { return -1; }
    int length(const char *s) {
    int (*functions[])(const char*) = { length, finish };
    return 1 + functions[!*s](s + 1);
    }

  12. Quick clarification – Would bool result = a < b; be considered a conditional? I mean it is a comparision and not a conditional.

  13. Pingback: strlen without conditionals « mlindgren.ca

  14. Here’s mine. It’s pretty much the same as the ones in the original post.

    #include

    int mystrlen(char *str, int count);
    int mystrlenret(char *str, int count);

    int (*funcs[2])(char *, int) = {mystrlen, mystrlenret};

    int mystrlen(char *str, int count)
    {
    return funcs[(int)(!*str)](++str, ++count);
    }

    int mystrlenret(char *str, int count)
    {
    return count – 1;
    }

    int main(int argc, char *argv[])
    {
    printf(“%i\n”, mystrlen(argv[argc - 1], 0));
    }

  15. It seems Rofl’s solution can be improved upon slightly: you can eliminate the need to negate *c twice by reversing the function pointer table:

    ctr table[2] = { &func_x, &func_0 };
    int func(char *c) { return table[!*c](c+1); }

    Nitpicky,I know, but I just thought I’d point it out anyway. :)

  16. This one is ugly, convoluted, inefficient, non-threadsafe, and only works on POSIX systems :-)

    #include
    #include
    #include
    #include

    static sigjmp_buf fpe_env;
    static void _fpe_handler(int signal, siginfo_t *w, void *uap) {
    siglongjmp(fpe_env, w->si_code);
    }

    int _done(const char * buffer, int * ptr_length) {
    int length = *ptr_length;
    free(ptr_length);
    return length;
    }

    int _not_done(const char * buffer, int * ptr_length) {
    struct sigaction act;
    act.sa_sigaction = _fpe_handler;
    sigemptyset(&act.sa_mask);
    act.sa_flags = SA_SIGINFO;
    sigaction(SIGFPE, &act, NULL);
    int accum = 0;
    *ptr_length = 0;
    startloop:
    accum += 255 / *buffer;
    buffer++;
    (*ptr_length)++;
    goto startloop;
    }

    int strlen0(const char * buffer) {
    int * ptr_length = (int*)malloc(sizeof(int));
    int (*workers[])(const char *, int * ptr_length) = { _not_done, _done };
    return workers[sigsetjmp(fpe_env, 1)](buffer, ptr_length);
    }

    int main(int argc, char **argv) {
    if (argc != 2) {
    fprintf(stdout, “usage: %s STRING\n”, argv[0]);
    } else {
    const char * buffer = (const char *)argv[1];
    int length = strlen0(buffer);
    fprintf(stdout, “strlen(%s) = %d\n”, buffer, length);
    }
    return 0;
    }

  17. Looks like someone else thought to count by fail as well… Cheers.

    #include
    #include
    #include

    int count = 0;

    void weee(int signal)
    {
    printf(“count %i\n”,count);
    exit(0);
    }

    int main()
    {
    char* string =”This should be 17″;

    signal(SIGFPE,weee);

    bad:
    *string = *string/(*string++);
    count++;
    goto bad;

    return 42;
    }

  18. Tail-recursive version (i.e. the compiler replaces the call with an unconditional jump):

    typedef int (*ctr)(char *, int);
    static int func_0(char *c, int len) { return len-1; }

    static int func(char *c, int len)
    {
    static ctr table[2] = { &func_0, &func };
    return table[!!*c](c+1, len+1);
    }

    int str_len(char *c) { return func(c, 0); }

  19. This one involves C++ exception handling and compiled with MSVC.

    int div0_strlen(char* str)
    {
    char* str_end = str;

    __try {
    for(int trash;;*str_end++, trash = *str_end / *str_end) {}
    } __except (EXCEPTION_EXECUTE_HANDLER) {}

    return str_end – str;
    }

    int main(int argc, char* argv[])
    {
    printf(“len = %d\n”, div0_strlen(“test string”));

    return 0;
    }

  20. I think everyone has missed the point of the question. Every one of these solutions just find a way to trick the compiler (or CPU) to make the decision for you. Tail recursion and computed jumps are technicalities to avoid jcc or equivalent, as is evident when the compiler transforms your beautiful Ocaml style tail recursion back into a loop Rolf!

    The only real way to solve this without making a decision at any point is something like:

    int crazy_strlen(char *str) {
    int non_null = 1;
    int count = 0;

    non_null &= !!str[0]; count += non_null;
    non_null &= !!str[1]; count += non_null;
    non_null &= !!str[2]; count += non_null;
    non_null &= !!str[3]; count += non_null;

    non_null &= !!str[MAX_LEN]; count += non_null;

    return count;
    }

    Of course this is just because all the decisions (ie. max string length) have been made before compilation.

    • This will almost certainly segfault. Your code doesn’t stop counting at the NUL terminator; it will merrily trample onwards through the rest of the machine’s address space until it hits something it’s not allowed to read.

      • Correct. I’ve been spending most of my time in environments without memory protection hardware but my point stands. Besides there are ways around this eg. allocate a separate page for each string per page and set MAX_LEN to the page size.

        Every other solution has an explicit loop, just not actually using the branch instructions. If this is the criterion then the simplest solution on x86 is just “rep scasb” since there is no explicit branch or loop here, the loop is performed in microcode.

    • (Also observe that the result is incorrect: at least one byte after the terminating NUL is very likely to be truthy.)

  21. Of course although the previous version is data storage optimal, it is not time optimal unless string length happens to equal MAX_LEN!

    If we take no “conditional branches” to mean no loops we can build some logic to perform this operation optimally (and introduce it as an instruction in our soft-core processor):

    library ieee;
    use ieee.std_logic_1164.all;
    use ieee.numeric_std.all;

    – Calculate string length, fetching string from asynchronous memory
    entity strlen is
    port
    (
    clk : in std_logic;

    — Start strlen calculation
    start : in std_logic;
    — String memory address
    str_addr : in std_logic_vector(31 downto 0);

    – Memory
    mem_addr : out std_logic_vector(31 downto 0);
    mem_data : in std_logic_vector(7 downto 0);
    mem_read : out std_logic;

    — Result
    count : out std_logic_vector(31 downto 0);
    done : out std_logic
    );

    end entity strlen;

    architecture SYN of strlen is
    signal incr : unsigned(count’high downto count’low);
    begin

    count_proc : process (clk, start)
    variable count_u : unsigned(count’high downto count’low);
    variable mem_addr_u : unsigned(mem_addr’high downto mem_addr’low);
    begin
    – Note for timing reasons start should be synchronous with clk even though
    – it is used as async reset
    if start = ’1′ then
    count_u := (others => ’0′);
    done <= '0';
    incr <= to_unsigned(1, incr'length);
    mem_addr_u <= unsigned(str_addr);
    elsif rising_edge(clk) then
    incr(0) <= incr(0) and (mem_data(0) or mem_data(1) or mem_data(2) or mem_data(3) or
    mem_data(4) or mem_data(5) or mem_data(6) or mem_data(7));
    count_u := count_u + incr;
    end if;

    count <= std_logic_vector(count_u);
    mem_addr <= std_logic_vector(mem_addr_u);
    end process;

    done <= not incr(0);

    end architecture SYN;

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Connecting to %s