481 code

advertisement
#include "stdafx.h"
#include <stdio.h>
#include<CString>
#include <windows.h>
#include <fstream>
#include <iostream>
#include<string>
#include "mysql.h"
//#include <Dbghelp.h> //ImageRvaToVa
#include <io.h>
#include <imagehlp.h>
#include <map>
#pragma comment (lib, "imagehlp.lib")
using namespace std;
string s1, s2;
char * cc;
char * c2[5000000];
//Counter for total API calls
int myID = 1;
struct aa
{
char * ch;
}aa[500];
int snum = 0;
void getapi(char* szStr)
{
int i,j;
//Allocate memory
WCHAR wszClassName[256];
memset(wszClassName, 0, sizeof(wszClassName));
MultiByteToWideChar(CP_ACP, 0, szStr, strlen(szStr) + 1, wszClassName,
(sizeof(wszClassName) / sizeof(wszClassName[0])));
//Attempt to create file
LPCWSTR ss = wszClassName;
HANDLE hFile = CreateFile(ss, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL, NULL);
//If we couldn't create the file then return
if(hFile == INVALID_HANDLE_VALUE) {
printf("Create File Failed, skipping this file.\n");
return;
}
//Attempt file mapping
HANDLE hFileMapping = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
//If file mapping failed then return
if (hFileMapping == NULL || hFileMapping == INVALID_HANDLE_VALUE) {
printf("Could not create file mapping object (%d), skipping this file.\n",
GetLastError());
return;
}
//Attempt to set the starting point - base address
LPBYTE lpBaseAddress = (LPBYTE)MapViewOfFile(hFileMapping, FILE_MAP_READ, 0, 0,
0);
//If we don't have the base address then return
if (lpBaseAddress == NULL) {
printf("Could not map view of file (%d), skipping this file.\n",
GetLastError());
return;
}
//If we get to here then there were no problems with the file or mapping and we're
good to go
PIMAGE_DOS_HEADER pDosHeader = (PIMAGE_DOS_HEADER)lpBaseAddress;
PIMAGE_NT_HEADERS pNtHeaders = (PIMAGE_NT_HEADERS)(lpBaseAddress + pDosHeader>e_lfanew);
//Point to the import table
DWORD Rva_import_table = pNtHeaders>OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress;
//If we can't find the import table then get out of here
if(Rva_import_table == 0) {
printf("no import table, skipping this file.\n");
goto UNMAP_AND_EXIT;
}
PIMAGE_IMPORT_DESCRIPTOR pImportTable =
(PIMAGE_IMPORT_DESCRIPTOR)ImageRvaToVa(pNtHeaders, lpBaseAddress, Rva_import_table,
NULL);
//Subtract base address to get the actual address of the import table
//
printf("FileAddress Of ImportTable: %p\n", ((DWORD)pImportTable (DWORD)lpBaseAddress));
IMAGE_IMPORT_DESCRIPTOR null_iid;
IMAGE_THUNK_DATA null_thunk;
memset(&null_iid, 0, sizeof(null_iid));
memset(&null_thunk, 0, sizeof(null_thunk));
//Loop through import table
for(i = 0; memcmp(pImportTable + i, &null_iid, sizeof(null_iid)) != 0; i++) {
LPCSTR szDllName = (LPCSTR)ImageRvaToVa(pNtHeaders, lpBaseAddress,
pImportTable[i].Name, NULL);
string s123 = szDllName;
if (s123 =="MSVBVM60.DLL") return;
if (s123 =="MSVCP110D.DLL") return;
PIMAGE_THUNK_DATA32 pThunk = (PIMAGE_THUNK_DATA32)ImageRvaToVa(pNtHeaders,
lpBaseAddress, pImportTable[i].FirstThunk, NULL);
for(j = 0; memcmp(pThunk + j, &null_thunk, sizeof(null_thunk)) != 0; j++) {
if(pThunk[j].u1.AddressOfData & IMAGE_ORDINAL_FLAG32) {
//printf("\t [%d] \t %ld \t \n", j, pThunk[j].u1.AddressOfData
& 0xffff);
}
else {
PIMAGE_IMPORT_BY_NAME pFuncName =
(PIMAGE_IMPORT_BY_NAME)ImageRvaToVa(pNtHeaders, lpBaseAddress,
pThunk[j].u1.AddressOfData, NULL);
if (snum == 646)
cc = cc;
//setting cc = API call
cc = (char *)pFuncName->Name;
if (cc == "_CIcos") return;
if (cc == "SOFTWARE\\Borland\\Delphi\\RTL") return;
//If we see any of the following characters then the file we
are looking at is packed or encrypted and we will disregard
if(cc[0] == '?') return;
if(cc[0] == '@') return;
c2[snum] = cc;
//increment snum for use in loop and again further below
snum++;
}
}
}
//
printf("\n\nSNUM: %d\n\n", snum);
UNMAP_AND_EXIT:
CloseHandle(hFileMapping);
CloseHandle(hFile);
return;
}
void identifyMalicious(char filepath[], char name[]){
printf("Discovered Malicious File: %s\n", name);
printf("In Directory: %s\n\n", filepath);
}
//This method is used to populate the API table as well as the File table and takes a
different integer parameter depending on which it is doing
void populateTable(MYSQL *ptr, int table, int ref, char filepath[]){
//This will contain our directory path
char str[100];
//Location of directory to be scanned. This will eventually be a variable that the
user selects from the interface
strcpy_s (str,filepath);
long Handle;
struct _finddata_t fileinfo;
std::string files = filepath;
files.append("*");
//If we can't find the directory
if((Handle = _findfirst(files.c_str(), &fileinfo))==-1L)
printf("didnt find files\n");
//If we find the directory
else {
//find the first file
_findnext(Handle,&fileinfo);
//until there are no more files
while(_findnext(Handle,&fileinfo) == 0) {
//This will contain the signature if we are populating the File
table
char signature[200000];
signature[0] = '\0';
//Allocate memory
memset(str, 0, sizeof(str));
//Set str back to the directory location
strcpy_s(str,filepath);
//Then concatenate the actual file name to the end of the directory
strcat_s(str,fileinfo.name);
//reset snum
snum = 0;
if(table == 1){
printf("Extracting API calls from file: %s\n", fileinfo.name);
}
else if(table == 2){
printf("Creating API signature for file: %s\n",
fileinfo.name);
}
else{
printf("Scanning file: %s\n", fileinfo.name);
}
//Use the getapi method to populate c2 with all of the APIs
getapi(str);
//loop through every API call in the file.
for (int i = 0; i < snum; i++) {
//This will contain our MySQL query
char query[600];
//If we're populating the API_CALL table
if(table == 1){
//MySQL query to insert ID and API call
//
printf("File: %s\n",
fileinfo.name);
sprintf_s(query, "insert into API_CALLS (ID, API)
values (%d,'%s')", myID, c2[i]);
//Print API to console for demonstration and testing
purposes
//
printf("%d.) %s ",
i, query);
//Attempt to insert API into database and increment
total API counter if successful
if(mysql_query(ptr, query) == 0){
myID++;
}
//handles line spacing for printing to console
//
cout<<c2[i]<<endl;
}
//Generate API signature
if(table == 2 || table == 3){
//
printf("File Name: %s\n", fileinfo.name);
char *API_ID;
MYSQL_RES *result;
MYSQL_ROW row;
//Get the ID of the API we are currently looking at
//Need to consider case where API is not in the
table!!!!!!!
sprintf_s(query, "select ID from API_CALLS where API =
'%s'", c2[i]);
if(mysql_real_query(ptr, query, (UINT)strlen(query)) !=
0){
printf("Query failed\n");
}
if(!(result = mysql_use_result(ptr))){
printf("Fetch failed\n");
}
//Append the ID to the string to create a list of ID
numbers
while(row = mysql_fetch_row(result)){
API_ID = row[0];
string temp = signature;
if(temp.find(API_ID) == std::string::npos){
if(signature[0] == '\0'){
strcat_s(signature, API_ID);
}
else{
strcat_s(signature, ",");
strcat_s(signature, API_ID);
}
}
}
}
}
//Once we're out of the for loop we need to add this signature to
the File_DB table
if(table == 2 && snum != 0){
//This will contain our MySQL query
char query[200000];
//MySQL query to insert File name, ref, and signature
if(ref == 0) {
sprintf_s(query, "insert into File_DB (File_name, ref,
signature) values ('%s', 0, '%s')", fileinfo.name, signature);
}
else {
sprintf_s(query, "insert into File_DB (File_name, ref,
signature) values ('%s', -1, '%s')", fileinfo.name, signature);
}
//
printf("%s", query);
//Attempt to insert file signature into database
if(mysql_query(ptr, query) != 0){
printf("signature insert failed");
}
}
//Classification with Naive Bayes
if(table == 3 && snum != 0){
char query[200000];
std::string s(signature);
s = s + '\0';
std::string delimiter = ",";
MYSQL_RES *result;
MYSQL_ROW row;
//arrays of probabilities
double mal_prob[2000];
double ben_prob[2000];
int counter = 0;
double total_id;
double malicious_id;
//set = 1 so when we multiply by probability array we don't
get null * x
double prob_mal = 1;
double prob_ben = 1;
size_t pos = 0;
std::string ID;
//loop through every ID in signature and create probability
arrays for classification based on the FILE_DB table
while((pos = s.find(delimiter)) != std::string::npos){
//
printf("POS: %d\n",
pos);
ID = s.substr(0, pos);
//
printf("ID: %s\n",
ID.c_str());
//Now select row count from file_db where signature
contains ID
sprintf_s(query, "select count(*) from FILE_DB where
signature LIKE '%%%s%%'", ID.c_str());
//
printf("Query:
%s\n", query);
if(mysql_real_query(ptr, query, (UINT)strlen(query)) !=
0){
printf("Query failed\n");
}
if(!(result = mysql_use_result(ptr))){
printf("Fetch failed\n");
}
while(row = mysql_fetch_row(result)){
//convert string to int
total_id = atoi(row[0]);
//
printf("Total ID: %f\n", total_id);
//
printf("Total count: %s\n", row[0]);
//
printf("Total count: %f\n", total_id);
}
//Now select row count from file_db where signature
contains ID and is malicious
sprintf_s(query, "select count(*) from FILE_DB where
ref != 0 and signature LIKE '%%%s%%'", ID.c_str());
//
printf("Query:
%s\n", query);
if(mysql_real_query(ptr, query, (UINT)strlen(query)) !=
0){
printf("Query failed\n");
}
if(!(result = mysql_use_result(ptr))){
printf("Fetch failed\n");
}
while(row = mysql_fetch_row(result)){
//convert string to int
malicious_id = atoi(row[0]);
//
printf("Malicious ID: %f\n", malicious_id);
//
printf("Malicious count: %s\n", row[0]);
//
printf("Malicious count: %f\n", malicious_id);
}
mal_prob[counter] = (malicious_id / total_id);
ben_prob[counter] = ((total_id - malicious_id) /
total_id);
//
printf("Mal Prob: %f\n",
mal_prob[counter]);
//
printf("Ben Prob: %f\n",
ben_prob[counter]);
counter = counter + 1;
//erase this part of the string to be able to move
forward
s.erase(0, pos + delimiter.length());
}
//loop through both probability arrays and find the total
probability
for(int x = 0; x < counter; x++){
if(mal_prob[x] == 0) {
mal_prob[x] = 0.00001;
ben_prob[x] = 0.99999;
}
if(ben_prob[x] == 0) {
ben_prob[x] = 0.00001;
mal_prob[x] = 0.99999;
}
prob_mal = (prob_mal * mal_prob[x]);
prob_ben = (prob_ben * ben_prob[x]);
//
printf("Prob Mal: %f\n",
prob_mal);
//
printf("Prob Ben: %f\n",
prob_ben);
//
printf("Mal Prob: %f\n",
(malicious_id/total_id));
//
printf("Ben Prob: %f\n",
((total_id-malicious_id)/total_id));
}
//
printf("Prob Mal: %f\n", prob_mal);
//
printf("Prob Ben: %f\n", prob_ben);
if(prob_mal >= prob_ben) {
identifyMalicious(filepath, fileinfo.name);
}
//
printf("Malicious Probability:
%f\n", prob_mal);
//
printf("Benign Probability: %f\n",
prob_ben);
}
}
_findclose(Handle);
}
}
int _tmain(int argc, _TCHAR* argv[]) {
//MySQL connection stuff
MYSQL *conn_ptr;
MYSQL_RES *res;
MYSQL_ROW row;
char query[1024];
char API[20];
string ss_mysql;
conn_ptr = mysql_init(NULL);
//If MySQL initiation fails then return
if(!conn_ptr){
printf("mysql_init_failed\n");
return EXIT_FAILURE;
}
//Attempt to connect to the local MySQL server with the credentials passed as
parameters
conn_ptr = mysql_real_connect(conn_ptr,"localhost","root","1234","test",0,NULL,0);
//If the connection was made we're golden
if(conn_ptr){
printf("Connection success\n");
}
//Otherwise we have a problem. Consider returning if this happens, but for now
we'll just let it play out to test the other components
else{
printf("Error connecting to database: %s\n", mysql_error(conn_ptr));
}
//End of MySQL connection stuff
//For demonstrational purposes we will start with empty tables and populate them
//Populate API_CALL Table with all APIs found
populateTable(conn_ptr, 1, 0, "C:\\481_malicious\\");
populateTable(conn_ptr, 1, 0, "C:\\481_benign\\");
populateTable(conn_ptr, 1, 0, "C:\\Test_Files\\");
//Populate FILE_DB Table with malicious signature and benign signatures
populateTable(conn_ptr, 2, -1, "C:\\481_malicious\\");
populateTable(conn_ptr, 2, 0, "C:\\481_benign\\");
//Scan a directory and find all malicious files
populateTable(conn_ptr, 3, 999, "C:\\Test_Files\\");
system("pause");
return 0;
}
Download