Programs That Know Themselves

I have a program for you. This is its source code:

int f = 0;
char *l = \
"int#life[]={99,-1,99,-1,99,-1,99,-1,99,-1,99,-1,-2,10,79,10,-1,10,25,3,4,3,2,6,1,7,28,10,-1,10,25,3"
",4,3,2,6,1,3,32,10,-1,10,25,3,4,3,2,2,5,6,29,10,-1,10,25,3,4,3,2,5,2,6,29,10,-1,10,25,3,4,3,2,5,2,3"
",32,10,-1,10,25,6,1,3,2,2,5,7,28,10,-1,10,25,6,1,3,2,2,5,7,28,10,-1,10,79,10,-1,-2,10,3,3,2,3,4,3,2"
",3,1,11,3,5,4,11,1,7,2,8,3,10,-1,10,3,3,2,3,4,3,2,3,1,11,2,3,1,3,3,11,1,3,5,9,3,10,-1,10,3,3,2,4,2,"
"4,2,3,5,3,5,3,3,3,6,3,5,6,2,2,10,10,-1,10,3,3,2,3,1,2,1,3,2,3,5,3,5,9,6,3,5,6,3,7,4,10,-1,10,3,3,2,"
"3,4,3,2,3,5,3,5,9,6,3,5,3,12,2,3,10,-1,10,3,3,2,3,4,3,2,3,5,3,5,3,3,3,6,3,5,7,2,8,3,10,-1,10,3,3,2,"
"3,4,3,2,3,                                                                               5,3,5,3,3,"
"3,6,3,5,7,                         2,7    ,4,  10,-1, -2,10,7                            9,10,-1,10"
",25,5,3,8,                         2,1    1,2  5,10,- 1,1                                0,24,3,1,3"
",2,9,1,11,                         25,    10,  -1     ,10,23                             ,3,3,3,1,3"
",3,3,5,3,2                         9,1    0,-  1,10,  23,9,1                             ,3,2,3,6,3"
",29,10,-1,                         10,    23,  9,1,8  ,6,                                3,29,10,-1"
",10,23,3,3                         ,3,1,3 ,3,  3,     5,3,29,                            10,-1,10,2"
"3,3,3,3,1,                         3,4,3, 4,3  ,2     9,10,-1                            ,10,79,10,"
"-1,-2,99,-                                                                               1,99,-1,99"
",-1,99,-1,   99,  -1,    99,  -1, 99,-1,-1,-2   ,-2};    int#putchar (int);i  nt#print   f(const#ch"
"ar*,...);c   har  #*s    kip  Whi tespace(cha  r#* ptr   )#{#while#( *pt     r#==#0x20   #||#*ptr#="
"=#0x0a#||#   *pt  r#==  #0x0  9#|     |#*     ptr   #==      #0x     0d)#{#  pt          r++;#}#ret"
"urn#ptr;}c   har  #*p ri ntE  ntr     y(c     har#*buf,      int     #*spac   es,int#    offset)#{#"
"while#(off   set  #!=    #0)  #{#     if#     (*spaces#      ==#     -2)            #o   ffset--;#s"
"paces++;#}   #pu  tch    ar(  0x2     2);     #ch   ar#      *sr     c#=#buf  ;#while(   src#!=#0#&"
"&#spaces#!   =#0  )#{    #in  t#n     cha     rac   ter      s#=     #*space  s++;#if    #(ncharact"
"ers#==#-2)                                                                               #break;#in"
"t#nspaces#                         =#*sp   aces++;#  for(int#i=0                         ;#i#<#ncha"
"racters;#i                        ++) #{#  if(*src#= =#0)#return                         #0;#src#=#"
"skipWhites                       pac   e(s rc)   ;#c     har                             #c#=#*src+"
"+;#putchar                       (c);#}#if #(n  spa      ces                             #==#-1#&&#"
"*spaces#!=                       #-2)#{#pu tchar(0x      22)                             ;#putchar("
"0x0a);#put                       cha   r(0 x22   );#     con                             tinue;#}#f"
"or(int#i=0                       ;#i   #<# nsp    ace    s;#                             i++)#{#put"
"char(0x20)                                                                               ;#}#}#putc"
"har(0x22);#putchar(0x0a);#return#src;}int#main()#{#char#flag[]#=#{105,110,116,32,102,32,61,32,48,59"
",10,0};#flag[8]#=#(f#?#48#:#49);#printf(flag);#char#preamble[]#=#{99,104,97,114,32,42,108,32,61,32,"
"92,10,0};#printf(preamble);#char#*p#=#l;#p#=#printEntry(p,life,0);#p#=#printEntry(p,life,f#?#1#:#3)"
";#p#=#printEntry(p,life,2);#p#=#printEntry(p,life,f#?#3#:#1);#p#=#printEntry(p,life,4);#putchar(0x2"
"2);putchar(0x3b);#putchar(0x0a);#putchar(0x0a);#char#*ptr#=#l;#while(*ptr#!=#0x00)#{#char#c#=#*ptr+"
"+;#if#(c#!=#0x20)#putchar((c#==#0x23)#?#0x20#:#c);#}#/*................................-C_S_*/}";

int life[]={99,-1,99,-1,99,-1,99,-1,99,-1,99,-1,-2,10,79,10,-1,10,25,3,4,3,2,6,1,7,28,10,-1,10,25,3,4,3,2,6,1,3,32,10,-1,10,25,3,4,3,2,2,5,6,29,10,-1,10,25,3,4,3,2,5,2,6,29,10,-1,10,25,3,4,3,2,5,2,3,32,10,-1,10,25,6,1,3,2,2,5,7,28,10,-1,10,25,6,1,3,2,2,5,7,28,10,-1,10,79,10,-1,-2,10,3,3,2,3,4,3,2,3,1,11,3,5,4,11,1,7,2,8,3,10,-1,10,3,3,2,3,4,3,2,3,1,11,2,3,1,3,3,11,1,3,5,9,3,10,-1,10,3,3,2,4,2,4,2,3,5,3,5,3,3,3,6,3,5,6,2,2,10,10,-1,10,3,3,2,3,1,2,1,3,2,3,5,3,5,9,6,3,5,6,3,7,4,10,-1,10,3,3,2,3,4,3,2,3,5,3,5,9,6,3,5,3,12,2,3,10,-1,10,3,3,2,3,4,3,2,3,5,3,5,3,3,3,6,3,5,7,2,8,3,10,-1,10,3,3,2,3,4,3,2,3,5,3,5,3,3,3,6,3,5,7,2,7,4,10,-1,-2,10,79,10,-1,10,25,5,3,8,2,11,25,10,-1,10,24,3,1,3,2,9,1,11,25,10,-1,10,23,3,3,3,1,3,3,3,5,3,29,10,-1,10,23,9,1,3,2,3,6,3,29,10,-1,10,23,9,1,8,6,3,29,10,-1,10,23,3,3,3,1,3,3,3,5,3,29,10,-1,10,23,3,3,3,1,3,4,3,4,3,29,10,-1,10,79,10,-1,-2,99,-1,99,-1,99,-1,99,-1,99,-1,99,-1,99,-1,-1,-2,-2};int putchar(int);int printf(const char*,...);char *skipWhitespace(char *ptr) { while (*ptr == 0x20 || *ptr == 0x0a || *ptr == 0x09 || *ptr == 0x0d) { ptr++; } return ptr;}char *printEntry(char *buf,int *spaces,int offset) { while (offset != 0) { if (*spaces == -2) offset--; spaces++; } putchar(0x22); char *src = buf; while(src != 0 && spaces != 0) { int ncharacters = *spaces++; if (ncharacters == -2) break; int nspaces = *spaces++; for(int i=0; i < ncharacters; i++) { if(*src == 0) return 0; src = skipWhitespace(src); char c = *src++; putchar(c); } if (nspaces == -1 && *spaces != -2) { putchar(0x22); putchar(0x0a); putchar(0x22); continue; } for(int i=0; i < nspaces; i++) { putchar(0x20); } } putchar(0x22); putchar(0x0a); return src;}int main() { char flag[] = {105,110,116,32,102,32,61,32,48,59,10,0}; flag[8] = (f ? 48 : 49); printf(flag); char preamble[] = {99,104,97,114,32,42,108,32,61,32,92,10,0}; printf(preamble); char *p = l; p = printEntry(p,life,0); p = printEntry(p,life,f ? 1 : 3); p = printEntry(p,life,2); p = printEntry(p,life,f ? 3 : 1); p = printEntry(p,life,4); putchar(0x22);putchar(0x3b); putchar(0x0a); putchar(0x0a); char *ptr = l; while(*ptr != 0x00) { char c = *ptr++; if (c != 0x20) putchar((c == 0x23) ? 0x20 : c); } /*................................-C_S_*/}

It consists of a multi-line string that also seems to contain code, and a really long line of code on the bottom. If your squint your eyes a bit, you can see that the multi-line string reads “Life Imitates Art”. Also, when looking closely, some parts of the line of code at the bottom reappear in the string. So, what could this program do? Let us compile it and find out.

$ gcc program.c -o program
$ ./program
int f = 1;
char *l = \
"int#life[]={99,-1,99,-1,99,-1,99,-1,99,-1,99,-1,-2,10,79,10,-1,10,25,3,4,3,2,6,1,7,28,10,-1,10,25,3"
",4,3,2,6,1,3,32,10,-1,10,25,3,4,3,2,2,5,6,29,10,-1,10,25,3,4,3,2,5,2,6,29,10,-1,10,25,3,4,3,2,5,2,3"
",32,10,-1,10,25,6,1,3,2,2,5,7,28,10,-1,10,25,6,1,3,2,2,5,7,28,10,-1,10,79,10,-1,-2,10,3,3,2,3,4,3,2"
",3,1,11,3,5,4,11,1,7,2,8,3,10,-1,10,3,3,2,3,4,3,2,3,1,11,2,3,1,3,3,11,1,3,5,9,3,10,-1,10,3,3,2,4,2,"
"4,2,3,5,3,5,3,3,3,6,3,5,6,2,2,10,10,-1,10,3,3,2,3,1,2,1,3,2,3,5,3,5,9,6,3,5,6,3,7,4,10,-1,10,3,3,2,"
"3,4,3,2,3,5,3,5,9,6,3,5,3,12,2,3,10,-1,10,3,3,2,3,4,3,2,3,5,3,5,3,3,3,6,3,5,7,2,8,3,10,-1,10,3,3,2,"
"3,4,3,2,3,                                                                               5,3,5,3,3,"
"3,6,3,5,7,                         2,7,4   ,10,-1,-  2,10,79,10,                         -1,10,25,5"
",3,8,2,11,                        25, 10,  -1,10,24, 3,1,3,2,9,1                         ,11,25,10,"
"-1,10,23,3                       ,3,   3,1 ,3,   3,3     ,5,                             3,29,10,-1"
",10,23,9,1                       ,3,2,3,6, 3,2  9,1      0,-                             1,10,23,9,"
"1,8,6,3,29                       ,10,-1,10 ,23,3,3,      3,1                             ,3,3,3,5,3"
",29,10,-1,                       10,   23, 3,3   ,3,     1,3                             ,4,3,4,3,2"
"9,10,-1,10                       ,79   ,10 ,-1    ,-2    ,99                             ,-1,99,-1,"
"99,-1,99,-                                                                               1,99,-1,99"
",-1,99,-1,   -1,  -2,    -2}  ;in t#putchar(i   nt);i    nt#printf(c onst#ch  ar*,...)   ;char#*ski"
"pWhitespac   e(c  har    #*p  tr) #{#while#(*  ptr #==   #0x20#||#*p tr#     ==#0x0a#|   |#*ptr#==#"
"0x09#||#*p   tr#  ==#0  x0d)  #{#     ptr     ++;   #}#      ret     urn#pt  r;          }char#*pri"
"ntEntry(ch   ar#  *bu f, int  #*s     pac     es,int#of      fse     t)#{#w   hile#(o    ffset#!=#0"
")#{#if#(*s   pac  es#    ==#  -2)     #of     fset--;#s      pac     es+            +;   #}#putchar"
"(0x22);#ch   ar#  *sr    c#=  #bu     f;#     whi   le(      src     #!=#0#&  &#spaces   #!=#0)#{#i"
"nt#ncharac   ter  s#=    #*s  pac     es+     +;#   if#      (nc     haracte  rs#==#-    2)#break;#"
"int#nspace                                                                               s#=#*space"
"s++;#for(i                         nt#    i=0  ;#i#<# ncharac                            ters;#i++)"
"#{#if(*src                         #==    #0)  #retur n#0                                ;#src#=#sk"
"ipWhitespa                         ce(    src  );     #char#                             c#=#*src++"
";#putchar(                         c);    #}#  if#(n  spaces                             #==#-1#&&#"
"*spaces#!=                         #-2    )#{  #putc  har                                (0x22);#pu"
"tchar(0x0a                         );#put cha  r(     0x22);#                            continue;#"
"}#for(int#                         i=0;#i #<#  ns     paces;#                            i++)#{#put"
"char(0x20)                                                                               ;#}#}#putc"
"har(0x22);#putchar(0x0a);#return#src;}int#main()#{#char#flag[]#=#{105,110,116,32,102,32,61,32,48,59"
",10,0};#flag[8]#=#(f#?#48#:#49);#printf(flag);#char#preamble[]#=#{99,104,97,114,32,42,108,32,61,32,"
"92,10,0};#printf(preamble);#char#*p#=#l;#p#=#printEntry(p,life,0);#p#=#printEntry(p,life,f#?#1#:#3)"
";#p#=#printEntry(p,life,2);#p#=#printEntry(p,life,f#?#3#:#1);#p#=#printEntry(p,life,4);#putchar(0x2"
"2);putchar(0x3b);#putchar(0x0a);#putchar(0x0a);#char#*ptr#=#l;#while(*ptr#!=#0x00)#{#char#c#=#*ptr+"
"+;#if#(c#!=#0x20)#putchar((c#==#0x23)#?#0x20#:#c);#}#/*................................-C_S_*/}";

int life[]={99,-1,99,-1,99,-1,99,-1,99,-1,99,-1,-2,10,79,10,-1,10,25,3,4,3,2,6,1,7,28,10,-1,10,25,3,4,3,2,6,1,3,32,10,-1,10,25,3,4,3,2,2,5,6,29,10,-1,10,25,3,4,3,2,5,2,6,29,10,-1,10,25,3,4,3,2,5,2,3,32,10,-1,10,25,6,1,3,2,2,5,7,28,10,-1,10,25,6,1,3,2,2,5,7,28,10,-1,10,79,10,-1,-2,10,3,3,2,3,4,3,2,3,1,11,3,5,4,11,1,7,2,8,3,10,-1,10,3,3,2,3,4,3,2,3,1,11,2,3,1,3,3,11,1,3,5,9,3,10,-1,10,3,3,2,4,2,4,2,3,5,3,5,3,3,3,6,3,5,6,2,2,10,10,-1,10,3,3,2,3,1,2,1,3,2,3,5,3,5,9,6,3,5,6,3,7,4,10,-1,10,3,3,2,3,4,3,2,3,5,3,5,9,6,3,5,3,12,2,3,10,-1,10,3,3,2,3,4,3,2,3,5,3,5,3,3,3,6,3,5,7,2,8,3,10,-1,10,3,3,2,3,4,3,2,3,5,3,5,3,3,3,6,3,5,7,2,7,4,10,-1,-2,10,79,10,-1,10,25,5,3,8,2,11,25,10,-1,10,24,3,1,3,2,9,1,11,25,10,-1,10,23,3,3,3,1,3,3,3,5,3,29,10,-1,10,23,9,1,3,2,3,6,3,29,10,-1,10,23,9,1,8,6,3,29,10,-1,10,23,3,3,3,1,3,3,3,5,3,29,10,-1,10,23,3,3,3,1,3,4,3,4,3,29,10,-1,10,79,10,-1,-2,99,-1,99,-1,99,-1,99,-1,99,-1,99,-1,99,-1,-1,-2,-2};int putchar(int);int printf(const char*,...);char *skipWhitespace(char *ptr) { while (*ptr == 0x20 || *ptr == 0x0a || *ptr == 0x09 || *ptr == 0x0d) { ptr++; } return ptr;}char *printEntry(char *buf,int *spaces,int offset) { while (offset != 0) { if (*spaces == -2) offset--; spaces++; } putchar(0x22); char *src = buf; while(src != 0 && spaces != 0) { int ncharacters = *spaces++; if (ncharacters == -2) break; int nspaces = *spaces++; for(int i=0; i < ncharacters; i++) { if(*src == 0) return 0; src = skipWhitespace(src); char c = *src++; putchar(c); } if (nspaces == -1 && *spaces != -2) { putchar(0x22); putchar(0x0a); putchar(0x22); continue; } for(int i=0; i < nspaces; i++) { putchar(0x20); } } putchar(0x22); putchar(0x0a); return src;}int main() { char flag[] = {105,110,116,32,102,32,61,32,48,59,10,0}; flag[8] = (f ? 48 : 49); printf(flag); char preamble[] = {99,104,97,114,32,42,108,32,61,32,92,10,0}; printf(preamble); char *p = l; p = printEntry(p,life,0); p = printEntry(p,life,f ? 1 : 3); p = printEntry(p,life,2); p = printEntry(p,life,f ? 3 : 1); p = printEntry(p,life,4); putchar(0x22);putchar(0x3b); putchar(0x0a); putchar(0x0a); char *ptr = l; while(*ptr != 0x00) { char c = *ptr++; if (c != 0x20) putchar((c == 0x23) ? 0x20 : c); } /*................................-C_S_*/}

Woah, the output looks like source code, really similar to the one we just compiled! The string now displays “Art Imitates Life”. Ok, let us feed the output of our first program to compiler, and see what kind of output it generates:

$ ./program | gcc -x c - -o program2
$ ./program2
int f = 0;
char *l = \
"int#life[]={99,-1,99,-1,99,-1,99,-1,99,-1,99,-1,-2,10,79,10,-1,10,25,3,4,3,2,6,1,7,28,10,-1,10,25,3"
",4,3,2,6,1,3,32,10,-1,10,25,3,4,3,2,2,5,6,29,10,-1,10,25,3,4,3,2,5,2,6,29,10,-1,10,25,3,4,3,2,5,2,3"
",32,10,-1,10,25,6,1,3,2,2,5,7,28,10,-1,10,25,6,1,3,2,2,5,7,28,10,-1,10,79,10,-1,-2,10,3,3,2,3,4,3,2"
",3,1,11,3,5,4,11,1,7,2,8,3,10,-1,10,3,3,2,3,4,3,2,3,1,11,2,3,1,3,3,11,1,3,5,9,3,10,-1,10,3,3,2,4,2,"
"4,2,3,5,3,5,3,3,3,6,3,5,6,2,2,10,10,-1,10,3,3,2,3,1,2,1,3,2,3,5,3,5,9,6,3,5,6,3,7,4,10,-1,10,3,3,2,"
"3,4,3,2,3,5,3,5,9,6,3,5,3,12,2,3,10,-1,10,3,3,2,3,4,3,2,3,5,3,5,3,3,3,6,3,5,7,2,8,3,10,-1,10,3,3,2,"
"3,4,3,2,3,                                                                               5,3,5,3,3,"
"3,6,3,5,7,                         2,7    ,4,  10,-1, -2,10,7                            9,10,-1,10"
",25,5,3,8,                         2,1    1,2  5,10,- 1,1                                0,24,3,1,3"
",2,9,1,11,                         25,    10,  -1     ,10,23                             ,3,3,3,1,3"
",3,3,5,3,2                         9,1    0,-  1,10,  23,9,1                             ,3,2,3,6,3"
",29,10,-1,                         10,    23,  9,1,8  ,6,                                3,29,10,-1"
",10,23,3,3                         ,3,1,3 ,3,  3,     5,3,29,                            10,-1,10,2"
"3,3,3,3,1,                         3,4,3, 4,3  ,2     9,10,-1                            ,10,79,10,"
"-1,-2,99,-                                                                               1,99,-1,99"
",-1,99,-1,   99,  -1,    99,  -1, 99,-1,-1,-2   ,-2};    int#putchar (int);i  nt#print   f(const#ch"
"ar*,...);c   har  #*s    kip  Whi tespace(cha  r#* ptr   )#{#while#( *pt     r#==#0x20   #||#*ptr#="
"=#0x0a#||#   *pt  r#==  #0x0  9#|     |#*     ptr   #==      #0x     0d)#{#  pt          r++;#}#ret"
"urn#ptr;}c   har  #*p ri ntE  ntr     y(c     har#*buf,      int     #*spac   es,int#    offset)#{#"
"while#(off   set  #!=    #0)  #{#     if#     (*spaces#      ==#     -2)            #o   ffset--;#s"
"paces++;#}   #pu  tch    ar(  0x2     2);     #ch   ar#      *sr     c#=#buf  ;#while(   src#!=#0#&"
"&#spaces#!   =#0  )#{    #in  t#n     cha     rac   ter      s#=     #*space  s++;#if    #(ncharact"
"ers#==#-2)                                                                               #break;#in"
"t#nspaces#                         =#*sp   aces++;#  for(int#i=0                         ;#i#<#ncha"
"racters;#i                        ++) #{#  if(*src#= =#0)#return                         #0;#src#=#"
"skipWhites                       pac   e(s rc)   ;#c     har                             #c#=#*src+"
"+;#putchar                       (c);#}#if #(n  spa      ces                             #==#-1#&&#"
"*spaces#!=                       #-2)#{#pu tchar(0x      22)                             ;#putchar("
"0x0a);#put                       cha   r(0 x22   );#     con                             tinue;#}#f"
"or(int#i=0                       ;#i   #<# nsp    ace    s;#                             i++)#{#put"
"char(0x20)                                                                               ;#}#}#putc"
"har(0x22);#putchar(0x0a);#return#src;}int#main()#{#char#flag[]#=#{105,110,116,32,102,32,61,32,48,59"
",10,0};#flag[8]#=#(f#?#48#:#49);#printf(flag);#char#preamble[]#=#{99,104,97,114,32,42,108,32,61,32,"
"92,10,0};#printf(preamble);#char#*p#=#l;#p#=#printEntry(p,life,0);#p#=#printEntry(p,life,f#?#1#:#3)"
";#p#=#printEntry(p,life,2);#p#=#printEntry(p,life,f#?#3#:#1);#p#=#printEntry(p,life,4);#putchar(0x2"
"2);putchar(0x3b);#putchar(0x0a);#putchar(0x0a);#char#*ptr#=#l;#while(*ptr#!=#0x00)#{#char#c#=#*ptr+"
"+;#if#(c#!=#0x20)#putchar((c#==#0x23)#?#0x20#:#c);#}#/*................................-C_S_*/}";

int life[]={99,-1,99,-1,99,-1,99,-1,99,-1,99,-1,-2,10,79,10,-1,10,25,3,4,3,2,6,1,7,28,10,-1,10,25,3,4,3,2,6,1,3,32,10,-1,10,25,3,4,3,2,2,5,6,29,10,-1,10,25,3,4,3,2,5,2,6,29,10,-1,10,25,3,4,3,2,5,2,3,32,10,-1,10,25,6,1,3,2,2,5,7,28,10,-1,10,25,6,1,3,2,2,5,7,28,10,-1,10,79,10,-1,-2,10,3,3,2,3,4,3,2,3,1,11,3,5,4,11,1,7,2,8,3,10,-1,10,3,3,2,3,4,3,2,3,1,11,2,3,1,3,3,11,1,3,5,9,3,10,-1,10,3,3,2,4,2,4,2,3,5,3,5,3,3,3,6,3,5,6,2,2,10,10,-1,10,3,3,2,3,1,2,1,3,2,3,5,3,5,9,6,3,5,6,3,7,4,10,-1,10,3,3,2,3,4,3,2,3,5,3,5,9,6,3,5,3,12,2,3,10,-1,10,3,3,2,3,4,3,2,3,5,3,5,3,3,3,6,3,5,7,2,8,3,10,-1,10,3,3,2,3,4,3,2,3,5,3,5,3,3,3,6,3,5,7,2,7,4,10,-1,-2,10,79,10,-1,10,25,5,3,8,2,11,25,10,-1,10,24,3,1,3,2,9,1,11,25,10,-1,10,23,3,3,3,1,3,3,3,5,3,29,10,-1,10,23,9,1,3,2,3,6,3,29,10,-1,10,23,9,1,8,6,3,29,10,-1,10,23,3,3,3,1,3,3,3,5,3,29,10,-1,10,23,3,3,3,1,3,4,3,4,3,29,10,-1,10,79,10,-1,-2,99,-1,99,-1,99,-1,99,-1,99,-1,99,-1,99,-1,-1,-2,-2};int putchar(int);int printf(const char*,...);char *skipWhitespace(char *ptr) { while (*ptr == 0x20 || *ptr == 0x0a || *ptr == 0x09 || *ptr == 0x0d) { ptr++; } return ptr;}char *printEntry(char *buf,int *spaces,int offset) { while (offset != 0) { if (*spaces == -2) offset--; spaces++; } putchar(0x22); char *src = buf; while(src != 0 && spaces != 0) { int ncharacters = *spaces++; if (ncharacters == -2) break; int nspaces = *spaces++; for(int i=0; i < ncharacters; i++) { if(*src == 0) return 0; src = skipWhitespace(src); char c = *src++; putchar(c); } if (nspaces == -1 && *spaces != -2) { putchar(0x22); putchar(0x0a); putchar(0x22); continue; } for(int i=0; i < nspaces; i++) { putchar(0x20); } } putchar(0x22); putchar(0x0a); return src;}int main() { char flag[] = {105,110,116,32,102,32,61,32,48,59,10,0}; flag[8] = (f ? 48 : 49); printf(flag); char preamble[] = {99,104,97,114,32,42,108,32,61,32,92,10,0}; printf(preamble); char *p = l; p = printEntry(p,life,0); p = printEntry(p,life,f ? 1 : 3); p = printEntry(p,life,2); p = printEntry(p,life,f ? 3 : 1); p = printEntry(p,life,4); putchar(0x22);putchar(0x3b); putchar(0x0a); putchar(0x0a); char *ptr = l; while(*ptr != 0x00) { char c = *ptr++; if (c != 0x20) putchar((c == 0x23) ? 0x20 : c); } /*................................-C_S_*/}

Hey, I have seen this one! It’s the source code our first program again. Is it magic? No, it is a special type of self-reproducing computer program called a Quine.

Two snakes biting each other's tails
Twin-ouroboros

The relation can be visualized with above (adapted) graphic of the ancient Ouroboros, the self-consuming snake. In our case, we have two snakes, and eating (compiling and running) the others tail (output) yields an endless loop.

What are Quines?

I first came across the topic of Quines (the programs, not the philosopher) in Douglas R. Hofstadter’s Book: “Gödel, Escher, Bach”. In it, he introduces it as a concept where a sentence is preceeded by itself inside quotation marks:

“yields falsehood when preceded by its quotation” yields falsehood when preceded by its quotation Quine’s paradox

In this case, a sentence refers to itself and becomes paradoxical. Part of the sentence is treated operatively, that is we are reading it as a statement, but the part in quotes is treated as an object the statement is about. This is really similar to the program above: we also have the same source code twice, once with quotation marks and once without.

This is necessary, because when a program normally runs, it does not know anything about its source code. It exists as a sequence of actions carried out by the instructions in the source code, but it does not know about them. In the case of compiled languages like C, it might even be impossible to recover the source code from a compiled program. We can work around that by including the source twice: once as data used for self-reproduction, and once as a list of instructions. If you are into biological analogies, one could relate the string l to the DNA of an organism and the instructions to its proteins. The structure of the proteins is encoded in the DNA, and certain proteins, such as the enzyme polymerase, read from it in order to reproduce the information.

They both have to exist at the same time, otherwise our organism is not viable: without the DNA, the polymerase has nothing to read from, and without polymerase nothing is there to actually read and replicate the DNA. Similarily, our program needs both the data to output and the instructions to output the data. This is the reason for the apparant duplication in the initial program: they encode the same information in different ways.

How does it work?

This section is rather technical, so you can also jump to conclusion(s).

If we take a look at the non-minified version of our program, we have a function that prints our source code string l and wraps it in quotation marks, that is it passes the data on to the next program. Then, another part reads the string l again, this time with out encoding it:

char *ptr = l;
while(*ptr != 0x00) {
    char c = *ptr++;
    // Decode pound sign to space
    if (c != 0x20) putchar((c == 0x23) ? 0x20 : c);
}

It looks a bit cryptic through all the pointer arithmetic, but essentially it just outputs our string l and undoes any encoding we previously did.

But what about the ASCII art? Well, it is defined in the array life, which contains a run-length encoding generated by a Haskell program. Two consecutive numbers x and y specify how many characters are followed by how many spaces respectively. Since we can’t arbitrarily place spaces in a C-program, we encode all spaces of the source code with a pound sign, which will be decoded into a space as we have seen above. The function printEntry then spits out the source code characters interleaved with spaces exactly as specified. We also can specify an offset to reuse the spacing data for the two slogans (Life Imitates Art, Art Imitates Life).

char *printEntry(char *buf, int *spaces, int offset) {
    // Seek to correct position
    while (offset != 0) {
        if (*spaces == -2) offset--;
        spaces++;
    }

    putchar(0x22);
    char *src = buf;
    while(src != 0 && spaces != 0) {
        int ncharacters = *spaces++;gg
        if (ncharacters == -2) break;

        int nspaces = *spaces++;
        
        for(int i=0; i < ncharacters; i++) {
            if(*src == 0) return 0;
            src = skipWhitespace(src);
            char c = *src++;
            // Use pound sign instead of space
            putchar(c);
        }

        if (nspaces == -1 && *spaces != -2) {
            putchar(0x22);
            putchar(0x0a);
            putchar(0x22);
            continue;
        }

        for(int i=0; i < nspaces; i++) {
            putchar(0x20);
        }
    }
    putchar(0x22);
    putchar(0x0a);

    return src;
}

The putchar function which outputs a single character is used quite a lot. It is convenient to pass the ASCII characters as hexadecimal numbers, because if we were to use quotes, we would need to escape those in the source code string which makes the whole endeavour more complicated.

In order to write a 2-Quine that has two states and reproduces itself every other execution, we define a flag at the beginning whose value is inverted each execution. This is the code that outputs the flag code:

// ASCII for "int f = 0;\n"
char flag[] = {105, 110, 116, 32, 102, 32, 61, 32, 48, 59, 10, 0};

// Toggle flag each run, ASCII 48 = '0' and ASCII 49 = '1'
flag[8] = (f ? 48 : 49);

// Output flag code
printf(flag);

A wrapper script then strips the code of any unnecessary whitespace and comments and concatenates it into the source-code-string-source-code format. Then we “feed the snake to itself” by compiling and running a few times and check whether the output remains the same:

#!/bin/bash
sed -e 's#^.*//.*$##' < main.c |  # Strip comments
     tr -d '\n' |  # Delete newlines
     sed -e 's/ \+/ /g' |  # Squash spaces
     sed -e 's/, /,/g' |  # Remove argument spaces
     tr ' ' '#' | # Translate spaces
     awk 'BEGIN {print "int f = 1;\nchar *l = \\"} { print "\"" $0 "\";\n\n" }' > minify.c

cat minify.c main.c > packed.c

gcc packed.c -o run1 && 
./run1 > run1.out.c &&
gcc run1.out.c -o run2 &&
./run2 > run2.out.c &&
gcc run2.out.c -o run3 &&
./run3 > run3.out.c

echo Executing similarity check....

diff run{1,3}.out.c

if [[ $? -eq 0 ]]; then
    echo Passed successful
    exit 0
else
    echo "Error: Differences detected."
    exit -1
fi

And that’s basically it, sadly (gladly) there is no magic involved, just a bunch of hacks.

Conclusion

A program was presented that can reproduce its own source code in a two-step process while entertaining the user with some ASCII-Art. Does this have any practical uses? No, not really. In reality, when a computer program wants to duplicate, it forks itself with help of the operating system, although this is a rather different kind of reproduction.

This was more about the brain teaser of writing such a program, and let me tell you: most of the time I was as confused when writing the code as you probably are when reading it. The Wikipedia article on quines is a good starting point. Dylan Beattie gave a really enticing talk about the The Art of Code that talks about Quines at around the 00:27:00 mark, among other absolutely mind blowing things.

I would not be surprised if someone has already done a similar 2-Quine, if you know about that please let me know!

You can find the complete source code on Github and as ZIP.

Comments

To add a comment, write an email with this link.