珂珂的个人博客 - 一个程序猿的个人网站

winform 做的一个采集器

    功能不是太多。。。根据接口配置可以实现任意的采集

    添加文章定义的接口,就是往数据库插入数据


    public interface IGather
    {
        FYJ.Data.IDbHelper Db { get; set; }
        ////// 表名前缀
        ///string TablePre { get; set; }

        ////// 添加
        /////////MessageEx Insert(Post model);

        ////// 获取分类
        //////CateDataSource GetCate();

        ////// 产生一个新的ID
        //////string GetNewID();
    }


    分析采集源定义的接口,全是属性,主要获取来源,作者,分析内容,我们采集别人的总要给个原来链接和来源吧。


 public interface IAnalyze
    {
        string Url { get; }
        string Html { get;}
        string Title { get; }
        string Author { get;}
        string Content { get; }
        string FromSource { get;}

        string Keywords { get;}

        string Description { get;}
    }


    下面贴上部分源码...有点复杂


using GatherTool.Analyze;
using GatherTool.Discuz;
using GatherTool.Util;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Configuration;
using System.Data;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using System.Windows.Forms;
using System.Xml;

namespace GatherTool
{
    public partial class MainForm : Form
    {
        public MainForm()
        {
            InitializeComponent();
            Control.CheckForIllegalCrossThreadCalls = false;
        }

        #region  初始化
        protected override void OnLoad(EventArgs e)
        {
            base.OnLoad(e);
            LoadClassConfig();
            LoadAtt();
            LoadAnalyzeClassConfig();
            this.AllowDrop = true;
            this.toolStripProgressBar1.Visible = false;
        }

        private ListdbList;
        private ListtablePreList;
        //上传文件后fileID为键名,objectTag为键值
        private DictionaryfileDic;
        //网页html中下载远程图片后保存的fileID
        private ListfileIDList;
        private void LoadClassConfig()
        {
            dbList = new List();
            tablePreList = new List();
            Listlist = new List();
            XmlDocument doc = new XmlDocument();
            doc.Load(Path.Combine(Application.StartupPath, "Gather.xml"));
            foreach (XmlNode node in doc.GetElementsByTagName("Gather"))
            {
                list.Add(node.Attributes["type"].Value);
                tablePreList.Add(node.Attributes["tablePre"].Value);
                FYJ.Data.IDbHelper db = FYJ.Data.DbFactory.CreateIDbHelper(node.Attributes["providerName"].Value, node.Attributes["connectionString"].Value);
                dbList.Add(db);
            }
            this.cbConfigName.DataSource = list;
        }

        private void LoadAnalyzeClassConfig()
        {
            Listlist = new List();
            XmlDocument doc = new XmlDocument();
            doc.Load(Path.Combine(Application.StartupPath, "Gather.xml"));
            foreach (XmlNode node in doc.GetElementsByTagName("Analyze"))
            {
                list.Add(node.Attributes["type"].Value);
            }
            this.cbAnalyze.DataSource = list;
        }

        //附件权限列表框
        private void LoadAtt()
        {
            //允许下载 0  需要回复下载1  禁止下载2
            DataTable dt = new DataTable();
            dt.Columns.Add("name");
            dt.Columns.Add("value");
            dt.Rows.Add(new object[] { "允许下载", "0" });
            dt.Rows.Add(new object[] { "需要回复下载", "1" });
            dt.Rows.Add(new object[] { "禁止下载", "2" });
            this.cbAttachmentLimit.DataSource = dt;
            this.cbAttachmentLimit.DisplayMember = "name";
            this.cbAttachmentLimit.ValueMember = "value";
        }
        #endregion

        private IGather Gather
        {
            get
            {
                if (cbConfigName.Items.Count > 0)
                {
                    IGather gather = (IGather)Activator.CreateInstance(Type.GetType(this.cbConfigName.Text));
                    FYJ.Data.IDbHelper db = this.dbList[this.cbConfigName.SelectedIndex];
                    gather.Db = db;
                    gather.TablePre = this.tablePreList[this.cbConfigName.SelectedIndex];

                    return gather;
                }

                return null;
            }
        }

        private void cbConfigName_SelectedIndexChanged(object sender, EventArgs e)
        {
            if (cbConfigName.Items.Count > 0)
            {
                FYJ.Data.IDbHelper db = this.dbList[this.cbConfigName.SelectedIndex];
                this.txtConnectionString.Text = db.ConnectionString;
            }
        }

        private void btnTest_Click(object sender, EventArgs e)
        {
            if (cbConfigName.Items.Count > 0)
            {
                FYJ.Data.IDbHelper db = this.dbList[this.cbConfigName.SelectedIndex];
                if (db.TestCanConnectionOpen())
                {
                    MessageBox.Show("连接成功");
                }
                else
                {
                    MessageBox.Show("连接失败");
                }
            }
        }

        private void discuz用户ToolStripMenuItem_Click(object sender, EventArgs e)
        {
            if (!String.IsNullOrEmpty(this.cbConfigName.Text))
            {
                DisUserForm frm = new DisUserForm();
                FYJ.Data.IDbHelper db = this.dbList[this.cbConfigName.SelectedIndex];
                frm.Db = db;
                frm.TablePre = this.tablePreList[this.cbConfigName.SelectedIndex];
                frm.Show();
            }
        }

        private void comboBox_category_DropDown(object sender, EventArgs e)
        {
            if (!String.IsNullOrEmpty(this.cbConfigName.Text))
            {
                ThreadPool.QueueUserWorkItem((o) =>{
                    try
                    {
                        this.toolStripStatusLabel1.Text = "正在获取板块/分类列表...";
                        CateDataSource data = Gather.GetCate();
                        this.comboBox_category.DataSource = data.DataSource;
                        this.comboBox_category.DisplayMember = data.DisplayMember;
                        this.comboBox_category.ValueMember = data.ValueMember;
                        this.toolStripStatusLabel1.Text = "就绪";
                    }
                    catch (Exception ex)
                    {
                        this.toolStripStatusLabel1.Text = ex.Message;
                    }
                });
            }
        }

        #region 分析
        private void btnAnalyze_Click(object sender, EventArgs e)
        {
            if (this.txtFromUrl.Text.Trim() == "")
            {
                MessageBox.Show("请输入url");
                return;
            }
            if (cbAnalyze.Items.Count == 0)
            {
                MessageBox.Show("没有分析类");
                return;
            }
            fileIDList = new List();
            IAnalyze ana = (IAnalyze)Activator.CreateInstance(Type.GetType(this.cbAnalyze.Text), this.txtFromUrl.Text.Trim());
            this.txtSource.Text = ana.FromSource;
            this.txtTitle.Text = ana.Title;
            this.txtFromAuthor.Text = ana.Author;
            this.richTextBox1.Text = ana.Content;

            ThreadPool.QueueUserWorkItem((o) =>{
                try
                {
                    string url = this.txtFromUrl.Text.Trim();
                    string content = this.richTextBox1.Text;

                    #region 下载html中的图片并上传到百度空间
                    if (this.checkBox_下载图片.Checked)
                    {
                        HttpHelper helper = new HttpHelper();
                        ListimageList = helper.GetTagAttribute(content, "img", "src");//图片列表
                        string html = string.Empty;
                        UploadHelper up = new UploadHelper();
                        this.toolStripProgressBar1.Visible = true;
                        this.toolStripProgressBar1.Maximum = imageList.Count;
                        for (int i = 0; i < imageList.Count; i++)
                        {
                            try
                            {
                                string imageRealPath = helper.GetRealPath(imageList[i], url);
                                string fileName = Path.GetFileName(imageRealPath.IndexOf("?") == -1 ? imageRealPath : imageRealPath.Substring(0, imageRealPath.IndexOf("?")));
                                this.logList1.AddItem("分析到远程图片:" + imageRealPath);
                                this.toolStripStatusLabel1.Text = ("正在从" + imageRealPath + "下载图片..." + (i + 1) + "/" + imageList.Count);
                                HttpWebRequest req = (HttpWebRequest)WebRequest.Create(imageRealPath);
                                req.Proxy = null; //解决.net 4 第一次请求慢的问题
                                req.KeepAlive = true;
                                req.Method = "GET";
                                req.AllowAutoRedirect = true;
                                req.ContentType = "application/x-www-form-urlencoded";
                                req.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
                                req.Timeout = 10000;
                                HttpWebResponse res = (HttpWebResponse)req.GetResponse();
                                Stream stream = res.GetResponseStream();
                                string str = up.UploadToBaidu(stream, Path.GetFileName(imageList[i]));
                                FYJ.Common.JsonHelper json = new FYJ.Common.JsonHelper(str);
                                if (json.GetValue("code") != "1")
                                {
                                    this.logList1.AddErrorItem("下载" + imageRealPath + "失败," + str);
                                }

                                if (!fileIDList.Contains(json.GetValue("fileID")))
                                {
                                    fileIDList.Add(json.GetValue("fileID"));
                                }
                                //替换img标签的图片
                                content = content.Replace(imageList[i], json.GetValue("url"));
                            }
                            catch (Exception ex)
                            {
                                this.logList1.AddErrorItem("下载" + imageList[i] + "失败," + ex.Message);
                            }

                            this.toolStripProgressBar1.Value = i + 1;
                        }

                        this.richTextBox1.Text = content;
                    }
                    #endregion
                }
                catch (Exception ex)
                {
                    this.logList1.AddErrorItem("分析" + this.txtFromUrl.Text.Trim() + "失败," + ex.Message);
                }

                this.toolStripStatusLabel1.Text = "就绪";
                this.toolStripProgressBar1.Visible = false;
            });
        }
        #endregion

        #region 上传附件
        private void btnUploadAttachment_Click(object sender, EventArgs e)
        {
            if (this.listBoxFile.Items.Count == 0)
            {
                MessageBox.Show("没有附件供上传");
                return;
            }
            fileDic = new Dictionary();
            ThreadPool.QueueUserWorkItem((o) =>{
                UploadHelper up = new UploadHelper();
                this.toolStripProgressBar1.Visible = true;
                this.toolStripProgressBar1.Maximum = this.listBoxFile.Items.Count;
                for (int i = 0; i < this.listBoxFile.Items.Count; i++)
                {
                    string filePath = this.listBoxFile.Items[i].ToString();
                    try
                    {
                        this.toolStripStatusLabel1.Text = ("正在上传文件..." + (i + 1) + "/" + this.listBoxFile.Items.Count);
                        Stream stream = new FileStream(filePath, FileMode.Open);
                        string str = up.UploadToBaidu(stream, Path.GetFileName(filePath));
                        FYJ.Common.JsonHelper json = new FYJ.Common.JsonHelper(str);
                        if (json.GetValue("code") != "1")
                        {
                            this.logList1.AddErrorItem("上传文件" + filePath + "失败," + str);
                        }

                        if (!fileDic.ContainsKey(json.GetValue("fileID")))
                        {
                            fileDic.Add(json.GetValue("fileID"), "attachment");
                        }
                    }
                    catch (Exception ex)
                    {
                        this.logList1.AddErrorItem("上传文件" + filePath + "失败," + ex.Message);
                    }

                    this.toolStripProgressBar1.Value=i+1;
                }
                this.toolStripProgressBar1.Visible = false;
                this.toolStripStatusLabel1.Text = "就绪";
            });
        }
        #endregion

        #region 发布
        private void btnAdd_Click(object sender, EventArgs e)
        {
            if (this.txtTitle.Text.Trim() == "")
            {
                MessageBox.Show("标题不能为空");
                return;
            }
            if (this.richTextBox1.Text.Trim() == "")
            {
                MessageBox.Show("内容不能为空");
                return;
            }

            ThreadPool.QueueUserWorkItem((o) =>{
                try
                {
                    this.toolStripStatusLabel1.Text = "正在发布...";
                    string postID = Gather.GetNewID();

                    this.toolStripStatusLabel1.Text = "正在保存文件关系...";
                    UploadHelper up = new UploadHelper();
                    this.toolStripProgressBar1.Visible = true;
                    this.toolStripProgressBar1.Maximum = 0;
                    if(fileDic!=null)
                    {
                        this.toolStripProgressBar1.Maximum += fileDic.Count;
                    }

                    if (fileIDList != null)
                    {
                        this.toolStripProgressBar1.Maximum += fileIDList.Count;
                    }

                    if(fileDic!=null)
                    {
                        foreach (string fileID in fileDic.Keys)
                        {
                            up.SaveFileRelationDb(fileID, postID, fileDic[fileID]);
                            this.toolStripProgressBar1.Value++;
                        }
                    }
                 
                    if(fileIDList!=null)
                    {
                        foreach (string fileID in fileIDList)
                        {
                            up.SaveFileRelationDb(fileID, postID, "");
                            this.toolStripProgressBar1.Value++;
                        }
                    }
                
                    this.toolStripProgressBar1.Visible = false;

                    this.toolStripStatusLabel1.Text = "正在发布...";
                    GatherTool.Util.Post model = new GatherTool.Util.Post();
                    model.PostID = postID;
                    model.AuthorID = this.textBox_发帖用户Id.Text;
                    if (this.comboBox_category.SelectedValue != null)
                    {
                        model.CateID = this.comboBox_category.SelectedValue.ToString();
                    }
                    model.AuthorUserName = this.textBox1_发帖用户名.Text.Trim();
                    model.Title = this.txtTitle.Text.Trim();
                    model.Content = this.richTextBox1.Text.Trim();
                    model.AttachmentLimit = Convert.ToInt32(this.cbAttachmentLimit.SelectedValue);
                    model.FromAuthor = this.txtFromAuthor.Text.Trim();
                    model.FromSourceUrl = this.txtFromUrl.Text.Trim();
                    model.FromSource = this.txtSource.Text.Trim();

                    GatherTool.Util.MessageEx message = Gather.Insert(model);
                    if (message.Code > 0)
                    {
                        this.logList1.AddItem(message.Message);
                    }
                    else
                    {
                        this.logList1.AddErrorItem(message.Message);
                    }
                }
                catch (Exception ex)
                {
                    this.logList1.AddErrorItem(ex.Message);
                }

                this.toolStripStatusLabel1.Text = "就绪";
            });
        }
        #endregion

        #region 拖动
        private void listBox1_DragEnter(object sender, DragEventArgs e)
        {
            e.Effect = DragDropEffects.Move;
        }

        private void listBox1_DragDrop(object sender, DragEventArgs e)
        {
            if (e.Data.GetDataPresent(DataFormats.FileDrop, false))
            {
                String[] files = (String[])e.Data.GetData(DataFormats.FileDrop);

                ListtempList = new List();
                foreach (String s in files)
                {
                    if (File.Exists(s))
                    {

                        return;
                    }

                    //if (Directory.Exists(s))
                    //{
                    //    foreach (String f in Directory.GetFiles(s))
                    //    {
                    //        tempList.Add(f);
                    //    }
                    //}
                }
            }
        }
        #endregion

        #region 添加删除附件
        private void btnFileAdd_Click(object sender, EventArgs e)
        {
            using (OpenFileDialog of = new OpenFileDialog())
            {
                of.Multiselect = true;
                if (of.ShowDialog() == DialogResult.OK)
                {
                    foreach(string file in of.FileNames)
                    {
                        foreach (object item in this.listBoxFile.Items)
                        {
                            if (item.ToString() == file)
                            {
                                break;
                            }
                        }

                        this.listBoxFile.Items.Add(file);
                    }
                }
            }
        }

        private void btnFileDelete_Click(object sender, EventArgs e)
        {
            this.listBoxFile.Items.Remove(this.listBoxFile.SelectedItem);
        }
        #endregion

    }
}



上一篇:jUploader上传

下一篇:个人代码全部开源


0 评论

查看所有评论

给个评论吧