当前位置 博文首页 > 闫玉林的博客:使用libsvm对政府网站访问数据的分析和预测
选取题材,政府网站访问数据。想通过使用人工智能的方法,根据已有数据建立模型,对新的访问数据进行鉴别,区分网站访问是否为正常访问,从而进行一些业务处理。更进一步,预测哪些是危险访问(网络攻击),从而能及时做出一些策略应对,保障政府网站的安全,保障正常网民的访问。
-- 根据统计出的一月访问量超过3000的认为是垃圾数据,对数据加标签
UPDATE cms_site_chart_main_copy1 set IS_GOOD = 1;
UPDATE cms_site_chart_main_copy1 set IS_GOOD = 0 where ip in ('45.194.128.134','220.178.96.24','121.41.29.248','183.166.59.35','120.78.131.13','59.111.104.146','183.166.61.6','183.166.61.1','203.207.121.14','60.171.44.29','58.242.245.178','59.111.104.158','183.166.61.13','203.207.121.3','203.207.121.13','218.22.156.219','183.166.61.2','203.207.121.8','59.111.104.147','112.26.205.196','43.249.175.170','114.255.59.101','218.22.163.50','183.166.61.7','203.207.121.9','203.207.121.1','119.23.27.3','59.111.104.49','210.45.123.30','183.166.61.10','203.207.121.5','203.207.121.2','59.111.104.115','59.111.104.137','183.166.59.149','60.166.58.227','59.111.104.134','183.166.61.3','60.21.221.203','119.23.40.231','59.111.104.138','59.111.104.75','203.207.121.11','183.166.61.14','59.111.104.135','59.111.104.44','183.166.61.5','220.178.199.10','59.111.104.132','120.77.240.164','59.111.104.14','59.111.104.148','221.231.138.216','183.166.79.204','203.207.121.7','58.242.245.179','183.166.61.8','58.242.245.180','220.178.96.26','183.166.24.222','220.178.96.25','218.22.161.170','111.39.184.24','59.111.104.127');
-- 处理is_pc字段,转为0,1
UPDATE cms_site_chart_main_copy1 set IS_PC = 1 where IS_PC = 'true';
UPDATE cms_site_chart_main_copy1 set IS_PC = 0 where IS_PC = 'false' or IS_PC is null or IS_PC = '';
-- 处理国家字段,转为数字
SELECT COUNTRY,count(1) FROM cms_site_chart_main_copy1 GROUP BY COUNTRY;
UPDATE cms_site_chart_main_copy1 set COUNTRY = '-1' where COUNTRY is null or COUNTRY = '';
UPDATE cms_site_chart_main_copy1 set COUNTRY = '0' where COUNTRY = '未分配或者内网IP';
UPDATE cms_site_chart_main_copy1 set COUNTRY = '1' where COUNTRY = '中国';
UPDATE cms_site_chart_main_copy1 set COUNTRY = '2' where COUNTRY = '俄罗斯';
UPDATE cms_site_chart_main_copy1 set COUNTRY = '3' where COUNTRY = '加拿大';
UPDATE cms_site_chart_main_copy1 set COUNTRY = '4' where COUNTRY = '印度';
UPDATE cms_site_chart_main_copy1 set COUNTRY = '5' where COUNTRY = '巴拿马';
UPDATE cms_site_chart_main_copy1 set COUNTRY = '6' where COUNTRY = '新加坡';
UPDATE cms_site_chart_main_copy1 set COUNTRY = '7' where COUNTRY = '日本';
UPDATE cms_site_chart_main_copy1 set COUNTRY = '8' where COUNTRY = '柬埔寨';
UPDATE cms_site_chart_main_copy1 set COUNTRY = '9' where COUNTRY = '比利时';
UPDATE cms_site_chart_main_copy1 set COUNTRY = '10' where COUNTRY = '沙特阿拉伯';
UPDATE cms_site_chart_main_copy1 set COUNTRY = '11' where COUNTRY = '泰国';
UPDATE cms_site_chart_main_copy1 set COUNTRY = '12' where COUNTRY = '爱尔兰';
UPDATE cms_site_chart_main_copy1 set COUNTRY = '13' where COUNTRY = '美国';
UPDATE cms_site_chart_main_copy1 set COUNTRY = '14' where COUNTRY = '英国';
UPDATE cms_site_chart_main_copy1 set COUNTRY = '15' where COUNTRY = '荷兰';
UPDATE cms_site_chart_main_copy1 set COUNTRY = '16' where COUNTRY = '菲律宾';
UPDATE cms_site_chart_main_copy1 set COUNTRY = '17' where COUNTRY = '越南';
UPDATE cms_site_chart_main_copy1 set COUNTRY = '18' where COUNTRY = '韩国';
UPDATE cms_site_chart_main_copy1 set COUNTRY = '19' where COUNTRY = '香港';
UPDATE cms_site_chart_main_copy1 set COUNTRY = '20' where COUNTRY = '马来西亚';
-- 处理语言
SELECT `LANGUAGE`,count(1) FROM cms_site_chart_main_copy1 GROUP BY `LANGUAGE` HAVING count(1) > 10;
UPDATE cms_site_chart_main_copy1 set `LANGUAGE` = '-1' where `LANGUAGE` is null or `LANGUAGE` = '' or `LANGUAGE` not in ('en-US','zh-CN','zh-Hans-CN');
UPDATE cms_site_chart_main_copy1 set `LANGUAGE` = '0' where `LANGUAGE` = 'en-US';
UPDATE cms_site_chart_main_copy1 set `LANGUAGE` = '1' where `LANGUAGE` = 'zh-CN';
UPDATE cms_site_chart_main_copy1 set `LANGUAGE` = '2' where `LANGUAGE` = 'zh-Hans-CN';
-- 处理省份
SELECT PROVINCE,count(1) FROM cms_site_chart_main_copy1 GROUP BY `PROVINCE` HAVING count(1) > 20;
UPDATE cms_site_chart_main_copy1 set PROVINCE = '-1' where PROVINCE is null or PROVINCE = '';
UPDATE cms_site_chart_main_copy1 set PROVINCE = '0' where PROVINCE = '上海市';
UPDATE cms_site_chart_main_copy1 set PROVINCE = '2' where PROVINCE = '内蒙古自治区';
UPDATE cms_site_chart_main_copy1 set PROVINCE = '2' where PROVINCE = '北京市';
UPDATE cms_site_chart_main_copy1 set PROVINCE = '3' where PROVINCE = '四川省';
UPDATE cms_site_chart_main_copy1 set PROVINCE = '4' where PROVINCE = '天津市';
UPDATE cms_site_chart_main_copy1 set PROVINCE = '5' where PROVINCE = '安徽省';
UPDATE cms_site_chart_main_copy1 set PROVINCE = '6' where PROVINCE = '山东省';
UPDATE cms_site_chart_main_copy1 set PROVINCE = '7' where PROVINCE = '山西省';
UPDATE cms_site_chart_main_copy1 set PROVINCE = '8' where PROVINCE = '广东省';
UPDATE cms_site_chart_main_copy1 set PROVINCE = '9' where PROVINCE = '江苏省';
UPDATE cms_site_chart_main_copy1 set PROVINCE = '10' where PROVINCE = '江西省';
UPDATE cms_site_chart_main_copy1 set PROVINCE = '11' where PROVINCE = '河北省';
UPDATE cms_site_chart_main_copy1 set PROVINCE = '12' where PROVINCE = '河南省';
UPDATE cms_site_chart_main_copy1 set PROVINCE = '13' where PROVINCE = '浙江省';
UPDATE cms_site_chart_main_copy1 set PROVINCE = '14' where PROVINCE = '湖北省';
UPDATE cms_site_chart_main_copy1 set PROVINCE = '15' where PROVINCE = '湖南省';
UPDATE cms_site_chart_main_copy1 set PROVINCE = '16'