一年半未更新技术博客了...最近收藏夹实在太乱,一千多个收藏未整理,加上360浏览器和QQ浏览器都在用,收藏夹一直未统一。于是想把它们统一起来,开始研究他们的收藏夹,发现格式是差不多的json。QQ浏览器在路径C:\Users\用户\AppData\Local\Tencent\QQBrowser\User Data\Default\QQ号 下的Bookmarks 文件,360浏览器在路径C:\Users\用户\AppData\Local\360Chrome\Chrome\User Data\Default\360UID436447721_V8下,也是Bookmarks 文件,IE 的收藏夹不一样,在路径C:\Users\用户\Favorites下,以单个文件存在的。
为什么做这个东西呢,不是有自带的收藏夹管理吗,没错,但没有我想要的一些功能,比如我想找出哪些网页已经失效,我想查看收藏日期,我想清除重复URL,这些都没有。有了自己的管理,我还可以把网页依次抓取一遍,存取html,避免网页失效以后不知道是什么了,还可以将不必要的暂时删掉。
浏览器自带的管理界面

我自己做的网页管理界面

所有收藏夹全部以自己服务器上的为准,所以还需要有一个生成收藏夹的功能,用来替换QQ浏览器的Bookmarks。不管其它语言吹得如何好,我还是喜欢C# 做界面,说命令行操作比GUI方便我是不太同意的...

首先来分析Bookmarks的json格式,将代码放入http://www.bejson.com/jsonviewernew/ 这个网站,就可以直观看到视图了。

360 的差不多,不过也还是有少许差异。最开始我没想到读取这个JSON文件来导入自己数据库,我是先将收藏夹导出HTML 文件,再分析这个HTML文件导入数据库,花了太多无用功了。这个HTML 好像是统一的格式,IE导出也是这个,但不符合HTML规范,标签大写的,而且没有关闭标签...还要分析其中的关系树,很废了一番功夫。

定义两个列表,一个用来存储导入的分类,另一个存储导入的收藏夹,由于以前可能导入过,所以还需要定义两个列表用来查询数据库已存在的分类和收藏。
string html = System.IO.File.ReadAllText(txtFilePath.Text);
html = html.Replace("<p>", "");
html = html.Replace("</H3>", "</H3></DT>");
html = html.Replace("</A>", "</A></DT>");
//html = html.Replace("<DT>", "");
//html = html.Replace("<DL", "<dl");
//html = html.Replace("</DL>", "</dl>");
//html = html.Replace("<A", "<a");
//html = html.Replace("</A>", "</a>");
//html = html.Replace("<H3", "<h3");
//html = html.Replace("</H3>", "</h3>");
//html = html.Replace("HREF=", "href=");
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(html);
HtmlNode node = doc.DocumentNode;
List<my_tb_favorite_cate> cateList = new List<my_tb_favorite_cate>();
List<my_tb_favorite> favoriteList = new List<my_tb_favorite>();
string sql = "select * from my_tb_favorite"; //如果不是select * 一定不要update my_tb_favorite 因为这里为了效率没有查出全部字段
DataTable dt = FYJ.Data.Min.DbHelper.Instance.GetDataTable(sql);
List<my_tb_favorite> dbFavoriteList = ObjectHelper.DataTableToModel<my_tb_favorite>(dt);
sql = "select * from my_tb_favorite_cate";
dt = FYJ.Data.Min.DbHelper.Instance.GetDataTable(sql);
List<my_tb_favorite_cate> dbCateList = ObjectHelper.DataTableToModel<my_tb_favorite_cate>(dt);
Load(node.SelectSingleNode("dl"), dbCateList, dbFavoriteList, null, cateList, favoriteList);Load 方法,有递归
private void Load(HtmlNode node, List<my_tb_favorite_cate> dbCateList, List<my_tb_favorite> dbFavoriteList, string parentID, List<my_tb_favorite_cate> cateList, List<my_tb_favorite> favoriteList)
{
if (node.ChildNodes != null)
{
my_tb_favorite_cate cate = null;
foreach (HtmlNode b in node.ChildNodes)
{
if (b.InnerHtml.Trim() == "")
{
continue;
}
if (b.OriginalName == "DT")
{
HtmlNode h3 = b.SelectSingleNode("h3");
if (h3 != null)
{
string add_date = h3.Attributes["ADD_DATE"] == null ? null : h3.Attributes["ADD_DATE"].Value;
string update_date = h3.Attributes["LAST_MODIFIED"] == null ? null : h3.Attributes["LAST_MODIFIED"].Value;
cate = GetCate(dbCateList, parentID, h3.InnerText, add_date, update_date);
cateList.Add(cate);
continue;
}
HtmlNodeCollection alist = b.SelectNodes("a");
if (alist != null)
{
foreach (HtmlNode a in alist)
{
AddFavorite(a, dbFavoriteList, parentID, favoriteList);
}
}
}
if (b.OriginalName == "DL")
{
Load(b, dbCateList, dbFavoriteList, cate.cateID, cateList, favoriteList);
}
}
}
}//方法结束分析完成就可以将两个列表存入数据库了。
分析JSON明显简单多了...当然用Newtonsoft.Json 这个了,同样定义4个List
string sql = "select * from my_tb_favorite"; //如果不是select * 一定不要update my_tb_favorite 因为这里为了效率没有查出全部字段
DataTable dt = FYJ.Data.Min.DbHelper.Instance.GetDataTable(sql);
List<my_tb_favorite> dbFavoriteList = ObjectHelper.DataTableToModel<my_tb_favorite>(dt);
sql = "select * from my_tb_favorite_cate";
dt = FYJ.Data.Min.DbHelper.Instance.GetDataTable(sql);
List<my_tb_favorite_cate> dbCateList = ObjectHelper.DataTableToModel<my_tb_favorite_cate>(dt);
string json = System.IO.File.ReadAllText(BookmarksPathQQ.Text + "\\Bookmarks");
//string json = System.IO.File.ReadAllText("d:\\Bookmarks");
List<my_tb_favorite_cate> cateList = new List<my_tb_favorite_cate>();
List<my_tb_favorite> favoriteList = new List<my_tb_favorite>();
JObject jo = (JObject)JsonConvert.DeserializeObject(json);
string checksum = jo["checksum"].ToString();
string version = jo["version"].ToString();
JToken roots = jo["roots"];
JToken bookmark_bar = roots["bookmark_bar"];
JToken other = roots["other"];
JToken synced = roots["synced"];
LoadQQBookmarks(bookmark_bar, dbCateList, dbFavoriteList, null, cateList, favoriteList);
LoadQQBookmarks(other, dbCateList, dbFavoriteList, null, cateList, favoriteList);
LoadQQBookmarks 方法,也有一个递归
private void LoadQQBookmarks(JToken obj, List<my_tb_favorite_cate> dbCateList, List<my_tb_favorite> dbFavoriteList, string parentID, List<my_tb_favorite_cate> cateList, List<my_tb_favorite> favoriteList)
{
var children = obj["children"];
string date_added = obj["date_added"] == null ? null : obj["date_added"].ToString();
string date_modified = obj["date_modified"] == null ? null : obj["date_modified"].ToString();
var meta_info = obj["meta_info"];
string name = obj["name"].ToString();
string type = obj["type"].ToString();
if (type == "folder")
{
if (children.HasValues)
{
my_tb_favorite_cate cate = GetCate(dbCateList, parentID, name, date_added, date_modified);
cateList.Add(cate);
foreach (var item in children)
{
LoadQQBookmarks(item, dbCateList, dbFavoriteList, cate.cateID, cateList, favoriteList);
}
}
else //空文件夹
{
}
}
else if (type == "url")
{
string url = obj["url"].ToString();
my_tb_favorite favorite = dbFavoriteList.Where(x => x.favoriteUrl == url).FirstOrDefault(); //如果数据库中查找到存在该URL则不进行操作 分类以第一个为准
if (favorite == null)
{
favorite = favoriteList.Where(x => x.favoriteUrl == url).FirstOrDefault(); //如果已经添加该URL则不进行操作 (被导入书签中存在重复URL) 分类以第一个为准
if (favorite == null)
{
favorite = new my_tb_favorite();
favorite.favoriteID = Guid.NewGuid().ToString("N");
favorite.cateID = parentID;
favorite.ADD_DATE = DateTime.Now;
favorite.UPDATE_DATE = DateTime.Now;
favorite.IsAdd = true;
favorite.favoriteTitle = name;
favorite.favoriteUrl = url;
if (String.IsNullOrEmpty(date_added) || date_added == "0")
{
favorite.ADD_DATE = DateTime.Now;
}
favorite.favoriteFrom = "bookmarks";
//favorite.Icon = "";
favoriteList.Add(favorite);
}
}
}
}导入IE 的也比较简单,分析文件夹关系就可以
private void LoadIEBookmarks(string folder, List<my_tb_favorite_cate> dbCateList, List<my_tb_favorite> dbFavoriteList, string parentID, List<my_tb_favorite_cate> cateList, List<my_tb_favorite> favoriteList)
{
string[] files = System.IO.Directory.GetFiles(folder, "*.url");
for (int i = 0; i < files.Length; i++)
{
string title = System.IO.Path.GetFileNameWithoutExtension(files[i]);
System.IO.StreamReader reader = new System.IO.StreamReader(files[i]);
string src = reader.ReadToEnd();
reader.Close();
Regex reg1 = new Regex("BASEURL=(.*)");
string url = "";
if (reg1.Match(src).Success)
{
url = reg1.Match(src).Value.Substring(8).Trim();
}
else
{
if (Regex.IsMatch(src, "URL=(.*)"))
{
url = Regex.Match(src, "URL=(.*)").Value.Substring(4).Trim();
}
}
my_tb_favorite favorite = dbFavoriteList.Where(x => x.favoriteUrl == url).FirstOrDefault(); //如果数据库中查找到存在该URL则不进行操作 分类以第一个为准
if (favorite == null)
{
favorite = favoriteList.Where(x => x.favoriteUrl == url).FirstOrDefault(); //如果已经添加该URL则不进行操作 (被导入书签中存在重复URL) 分类以第一个为准
if (favorite == null)
{
favorite = new my_tb_favorite();
favorite.favoriteID = Guid.NewGuid().ToString("N");
favorite.cateID = parentID;
favorite.ADD_DATE = DateTime.Now;
favorite.UPDATE_DATE = DateTime.Now;
favorite.IsAdd = true;
favorite.favoriteTitle = title;
favorite.favoriteUrl = url;
//favorite.ADD_DATE = DateTime.Now;
favorite.favoriteFrom = "bookmarks";
Regex reg2 = new Regex("IconFile=(.*)");
string ico = "";
if (reg2.Match(src).Success)
{
ico = reg2.Match(src).Value.Substring(9).Trim();
}
favorite.Icon = ico;
favoriteList.Add(favorite);
}
}
}
foreach (string s in System.IO.Directory.GetDirectories(folder))
{
my_tb_favorite_cate cate = GetCate(dbCateList, parentID, System.IO.Path.GetFileNameWithoutExtension(s), "", "");
cateList.Add(cate);
LoadIEBookmarks(s, dbCateList, dbFavoriteList, cate.cateID, cateList, favoriteList);
}
}生成Bookmarks
string sql = "select favoriteID,favoriteTitle,ADD_DATE,favoriteUrl,cateID from my_tb_favorite where favoriteIsDelete=false"; DataTable dt = FYJ.Data.Min.DbHelper.Instance.GetDataTable(sql); List<my_tb_favorite> list = FYJ.ObjectHelper.DataTableToModel<my_tb_favorite>(dt); sql = "select * from my_tb_favorite_cate"; dt = FYJ.Data.Min.DbHelper.Instance.GetDataTable(sql); List<my_tb_favorite_cate> catelist = FYJ.ObjectHelper.DataTableToModel<my_tb_favorite_cate>(dt); Browser(list, catelist);
Browser 方法
private void Browser(List<my_tb_favorite> list, List<my_tb_favorite_cate> catelist)
{
Roots roots = new Roots();
#region bookmark_bar
Bookmark_bar bookmark_bar = new Bookmark_bar();
my_tb_favorite_cate rootcate = catelist.Where(x => x.parentID == null || x.parentID == "").First();
rootcate.cateDisplay = "书签栏"; //更改根路径叫书签栏 360叫收藏栏
bookmark_bar.date_added = todate(rootcate.ADD_DATE) + "";
bookmark_bar.date_modified = todate(rootcate.UPDATE_DATE) + "";
bookmark_bar.id = rootcate.cateID;
bookmark_bar.name = rootcate.cateDisplay;
bookmark_bar.type = "folder";
LoadBrowser(bookmark_bar, list, catelist);
roots.bookmark_bar = bookmark_bar;
#endregion
#region synced
Bookmark_bar synced = new Bookmark_bar();
synced.children = new List<Bookmark_bar>();
synced.date_added = "13138209447997937";
synced.date_modified = "13138209447997921";
synced.id = "1957";
synced.name = "移动设备书签"; //360叫移动设备收藏
synced.type = "folder";
roots.synced = synced;
#endregion
#region other
Bookmark_bar other = new Bookmark_bar();
other.children = new List<Bookmark_bar>();
other.date_added = "13138209448006030";
other.date_modified = "13138209448006029";
other.id = "1922";
other.name = "其它书签"; //360叫其它收藏
other.type = "folder";
foreach (var v in list.Where(x => x.cateID == null || x.cateID == ""))
{
Bookmark_bar nodel = new Bookmark_bar();
nodel.date_added = todate(v.ADD_DATE) + "";
nodel.id = v.favoriteID;
nodel.name = v.favoriteTitle;
nodel.type = "url";
nodel.url = v.favoriteUrl;
other.children.Add(nodel);
}
roots.other = other;
#endregion
Bookmarks book = new Bookmarks();
book.checksum = "1447540acb8cdbc0686acc5444766b73";
book.version = 1;
book.roots = roots;
string json = Newtonsoft.Json.JsonConvert.SerializeObject(book);
using (System.Windows.Forms.SaveFileDialog diag = new System.Windows.Forms.SaveFileDialog())
{
diag.InitialDirectory = BookmarksPathQQ.Text;
diag.FileName = "Bookmarks";
if (diag.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
System.IO.File.WriteAllText(diag.FileName, json);
MessageBox.Show("生成成功");
}
}
}
private void LoadBrowser(Bookmark_bar bar, List<my_tb_favorite> list, List<my_tb_favorite_cate> catelist)
{
List<Bookmark_bar> children = new List<Favorite.Bookmark_bar>();
foreach (var v in catelist.Where(x => x.parentID == bar.id))
{
Bookmark_bar nodel = new Bookmark_bar();
nodel.date_added = todate(v.ADD_DATE) + "";
nodel.date_modified = todate(v.UPDATE_DATE) + "";
nodel.id = v.cateID;
nodel.name = v.cateDisplay;
nodel.type = "folder";
LoadBrowser(nodel, list, catelist);
children.Add(nodel);
}
foreach (var v in list.Where(x => x.cateID == bar.id))
{
if (v.cateID == null || v.cateID == "")
{
continue;
}
Bookmark_bar nodel = new Bookmark_bar();
nodel.date_added = todate(v.ADD_DATE) + "";
nodel.id = v.favoriteID;
nodel.name = v.favoriteTitle;
nodel.type = "url";
nodel.url = v.favoriteUrl;
children.Add(nodel);
}
bar.children = children;
}
private string todate(DateTime? time)
{
if(time==null)
{
return "";
}
DateTime BaseTime = new DateTime(1970, 1, 1);
long l = (time.Value.AddHours(-8).Ticks - BaseTime.Ticks);
return l.ToString();
}导入JSON这个一晚上做好了,之前走弯路导入HTML的费太多时间了...
珂珂的个人博客 - 一个程序猿的个人网站