{"id":26420,"date":"2017-07-04T23:49:24","date_gmt":"2017-07-04T14:49:24","guid":{"rendered":"http:\/\/www.sejuku.net\/blog\/?p=26420"},"modified":"2024-05-06T11:50:28","modified_gmt":"2024-05-06T02:50:28","slug":"26420","status":"publish","type":"post","link":"https:\/\/www.sejuku.net\/blog\/26420","title":{"rendered":"\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u57fa\u790e\u6280\u8853\uff01tf-idf\u3092\u7c21\u5358\u306b\u89e3\u8aac\uff01"},"content":{"rendered":"<p>\u6587\u66f8\u5206\u985e\u306a\u3069\u306b\u304a\u3044\u3066\u3001\u6587\u66f8\u306e\u7279\u5fb4\u91cf\u3068\u3057\u3066\u4f7f\u308f\u308c\u308b\u53e4\u5178\u7684\u306a\u6307\u6a19\u306bTF-IDF\u304c\u3042\u308a\u307e\u3059\u3002TF-IDF\u306f\u6587\u66f8\u3060\u3051\u3067\u306a\u304f\u3001\u69d8\u3005\u306a\u30c7\u30fc\u30bf\u306b\u9069\u7528\u3067\u304d\u3066\u30b7\u30f3\u30d7\u30eb\u3060\u3051\u3069\u975e\u5e38\u306b\u4f7f\u3044\u3084\u3059\u3044\u7279\u5fb4\u91cf\u3067\u3059\u3002<\/p>\n<p>\u3053\u306e\u8a18\u4e8b\u3067\u306f<br \/>\n<div class=\"box01\"><\/p>\n<ul>\n<li><b>TF-IDF\u306e\u8a08\u7b97\u5f0f<\/b><\/li>\n<li><b>TF-IDF\u306ePython\u5b9f\u88c5<\/b><\/li>\n<\/ul>\n<\/div>\n<p>\u306b\u3064\u3044\u3066\u7d39\u4ecb\u3057\u307e\u3059\u3002\u30b3\u30fc\u30c9\u306fJupyter Notebook\u5f62\u5f0f\u3067\u914d\u5e03\u3057\u307e\u3059\u306e\u3067\u3001\u662f\u975e\u624b\u5143\u3067\u8a66\u3057\u3066\u307f\u3066\u304f\u3060\u3055\u3044\u306d\u3002<\/p>\n<h2>TF-IDF\u3068\u306f<\/h2>\n<p>IF-IDF\u3068\u306f\u3001\u6587\u66f8\u5185\u306e\u5358\u8a9e\u306e\u91cd\u8981\u5ea6\uff08\u91cd\u307f\uff09\u3092\u793a\u3059\u624b\u6cd5\u306e\u4e00\u3064\u3067\u3059\u3002\u5404\u30c7\u30fc\u30bf\u304c\u6587\u66f8\u3067\u3001\u305d\u306e\u7279\u5fb4\u304c\u5358\u8a9e\u306b\u306a\u3063\u3066\u3044\u308b\u3068\u304d\u3092\u8003\u3048\u307e\u3059\u3002\u3053\u306e\u3068\u304d\u7279\u5fb4\u91cf\u3068\u3057\u3066\u9078\u3079\u308b\u306e\u306f\u3001\u5358\u8a9e\u306e\u51fa\u73fe\u56de\u6570\u304b\u3053\u306eTF-IDF\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n<p>\u3055\u3066\u3001\u91cd\u8981\u5ea6\u306e\u5927\u304d\u3044\u5358\u8a9e\u306f\u3001\u305d\u306e\u6587\u66f8\u306e\u7279\u5fb4\u8a9e\u3068\u3057\u3066\u6271\u3046\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u9006\u306b\u3001\u91cd\u8981\u5ea6\u304c\u5c0f\u3055\u3044\u5358\u8a9e\u306f\u3001\u305d\u306e\u6587\u66f8\u5185\u3067\u306f\u30af\u30e9\u30b9\u5206\u985e\u306a\u3069\u306b\u5927\u304d\u306a\u5f71\u97ff\u3092\u53ca\u307c\u3055\u306a\u3044\u3082\u306e\u3060\u3068\u8003\u3048\u308b\u4e8b\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n<p>TF-IDF\u306fTF\u3068IDF\u3068\u3044\u3046\u4e8c\u3064\u306e\u6307\u6a19\u3092\u304b\u3051\u5408\u308f\u305b\u305f\u5024\u3067\u3059\u3002\u305d\u308c\u305e\u308c\u306e\u8a08\u7b97\u65b9\u6cd5\u3092\u898b\u3066\u307f\u307e\u3057\u3087\u3046\u3002<\/p>\n<h3>TF<\/h3>\n<p><img decoding=\"async\" class=\"aligncenter size-large wp-image-67457\" src=\"https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2017\/07\/tf-640x360.png\" alt=\"\" width=\"640\" height=\"360\" srcset=\"https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2017\/07\/tf-640x360.png 640w, https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2017\/07\/tf-150x84.png 150w, https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2017\/07\/tf-300x169.png 300w, https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2017\/07\/tf.png 700w\" sizes=\"(max-width: 640px) 100vw, 640px\" \/><\/p>\n<p>TF\u306f\u300c\u5358\u8a9e\u306e\u51fa\u73fe\u983b\u5ea6\u300d\u3092\u8868\u3057\u307e\u3059\u3002\u3053\u308c\u306f\u5358\u8a9e\u306e\u51fa\u73fe\u56de\u6570\u3092\u305d\u306e\u6587\u66f8\u5185\u306b\u3042\u308b\u5358\u8a9e\u306e\u7dcf\u5408\u8a08\u6570\u3067\u5272\u3063\u305f\u3060\u3051\u306e\u7c21\u5358\u306a\u6307\u6a19\u3067\u3059\u3002\u3053\u306e\u5024\u304c\u5927\u304d\u3044\u3068\u91cd\u8981\u3067\u3001\u5c0f\u3055\u3044\u3068\u305d\u3093\u306a\u306b\u91cd\u8981\u3067\u306f\u306a\u3044\u3068\u8003\u3048\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n<h3>IDF<\/h3>\n<p><img decoding=\"async\" class=\"aligncenter size-large wp-image-67460\" src=\"https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2017\/07\/idf-640x360.jpg\" alt=\"\" width=\"640\" height=\"360\" srcset=\"https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2017\/07\/idf-640x360.jpg 640w, https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2017\/07\/idf-150x84.jpg 150w, https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2017\/07\/idf-300x169.jpg 300w, https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2017\/07\/idf.jpg 700w\" sizes=\"(max-width: 640px) 100vw, 640px\" \/><\/p>\n<p>IDF\u306f\u300c\u9006\u6587\u66f8\u983b\u5ea6\u300d\u3068\u8a00\u3044\u307e\u3059\u3002\u3053\u308c\u306fthis\u3084is\u3001a\u3001am\u306a\u3069\u306e\u3088\u3046\u306a\u3001\u4e00\u822c\u8a9e\uff08\u3069\u3093\u306a\u6587\u66f8\u306b\u3082\u51fa\u3066\u304f\u308b\u3088\u3046\u306a\u5358\u8a9e\uff09\u306e\u30d5\u30a3\u30eb\u30bf\u3068\u3057\u3066\u6a5f\u80fd\u3057\u307e\u3059\u3002IDF\u5024\u304c\u9ad8\u3044\u3068\u91cd\u8981\u3067\u3001\u5c0f\u3055\u3044\u3068\u305d\u3093\u306a\u306b\u91cd\u8981\u3067\u306f\u306a\u3044\u3068\u8003\u3048\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n<p>\u5f0f\u3092\u307f\u3066\u304f\u3060\u3055\u3044\u3002IDF\u304c\u5c0f\u3055\u3044\u3068\u3044\u3046\u3053\u3068\u306f\u3001df(t)\u306e\u5024\u304c\u5927\u304d\u3044\u3068\u3044\u3046\u3053\u3068\u3067\u3059\u306d\u3002\u3064\u307e\u308a<b>\u3044\u308d\u3093\u306a\u6587\u66f8\u306b\u51fa\u3066\u304f\u308b\u3088\u304f\u898b\u308b\u5358\u8a9e\u306f\u305d\u3053\u307e\u3067\u91cd\u8981\u3067\u306f\u306a\u3044<\/b>\u3068\u8a55\u4fa1\u3057\u3066\u3044\u308b\u3053\u3068\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n<h3>IF-IDF<\/h3>\n<p><img decoding=\"async\" class=\"aligncenter size-large wp-image-67461\" src=\"https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2017\/07\/tfidf2-640x360.png\" alt=\"\" width=\"640\" height=\"360\" srcset=\"https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2017\/07\/tfidf2-640x360.png 640w, https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2017\/07\/tfidf2-150x84.png 150w, https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2017\/07\/tfidf2-300x169.png 300w, https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2017\/07\/tfidf2.png 700w\" sizes=\"(max-width: 640px) 100vw, 640px\" \/><\/p>\n<p>TF-IDF\u306f\u4e0a\u8a18\u306eTF\u3068IDF\u3092\u304b\u3051\u5408\u308f\u305b\u305f\u3060\u3051\u306e\u6307\u6a19\u3067\u3059\u3002\u3053\u306e\u5024\u304c\u5927\u304d\u3044\u3068\u91cd\u8981\u3067\u3001\u5c0f\u3055\u3044\u3068\u91cd\u8981\u3067\u306f\u306a\u3044\u3068\u898b\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u30ab\u30a6\u30f3\u30c8\u30d9\u30fc\u30b9\u3067\u6587\u66f8\u30af\u30e9\u30b9\u30bf\u30ea\u30f3\u30b0\u3092\u884c\u3063\u305f\u5834\u5408\u3001this\u3084is\u306e\u3088\u3046\u306a\u4e00\u822c\u8a9e\u306f\u51fa\u73fe\u56de\u6570\u304c\u591a\u3044\u306e\u3067\u7d50\u679c\u306b\u5927\u304d\u304f\u5f71\u97ff\u3092\u4e0e\u3048\u308b\u53ef\u80fd\u6027\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n<p>\u3067\u3059\u304cTF-IDF\u3092\u4f7f\u3063\u305f\u5834\u5408\u306f\u3069\u3046\u306a\u308b\u3067\u3057\u3087\u3046\u304b\u3002\u305d\u306e\u5834\u5408\u306f\u3001\u4e00\u822c\u8a9e\u306fIF-IDF\u5024\u304c\u5c0f\u3055\u3044\u306e\u3067\u305d\u3053\u307e\u3067\u5f71\u97ff\u3092\u4e0e\u3048\u307e\u305b\u3093\u3002\u305d\u3057\u3066\u9006\u306b\u3001\u305d\u308c\u4ee5\u5916\u306eTF-IDF\u304c\u5927\u304d\u304b\u3063\u305f\u5024\u304c\u6587\u66f8\u30af\u30e9\u30b9\u30bf\u30ea\u30f3\u30b0\u306b\u5927\u304d\u306a\u5f71\u97ff\u3092\u4e0e\u3048\u308b\u53ef\u80fd\u6027\u304c\u51fa\u3066\u304f\u308b\u3068\u8a00\u3046\u3053\u3068\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n<h2>Python\u5b9f\u88c5<\/h2>\n<p>IF-IDF\u306e\u30ca\u30a4\u30fc\u30d6\u306a\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u306f\u4e0a\u8a18\u306e\u901a\u308a\u3067\u3059\u304c\u3001\u540c\u3058\u30b3\u30f3\u30bb\u30d7\u30c8\u3067\u5c11\u3057\u9055\u3046\u6570\u5f0f\u306e\u3082\u306e\u304c\u591a\u6570\u3042\u308a\u307e\u3059\u3002\u4eca\u56de\u306f\u4e0a\u8a18\u306e\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3092\u30b7\u30f3\u30d7\u30eb\u306bPython\u3060\u3051\u3067\u5b9f\u88c5\u3057\u3066\u307f\u307e\u3057\u305f\u3002<\/p>\n<h3>\u6587\u5b57\u5217\u3092\u7528\u610f<\/h3>\n<p>\u307e\u305a\u306f\u6587\u5b57\u5217\u3092\u7528\u610f\u3057\u3066\u5358\u8a9e\u3054\u3068\u306b\u5206\u5272\u3057\u307e\u3059\u3002<\/p>\n<pre class=\"lang:default decode:true \">from math import log\r\nimport pandas as pd\r\n\r\ndocs = [\r\n    [\"\u72ac\", \"\u53ef\u611b\u3044\", \"\u72ac\", \"\u5927\u304d\u3044\"],\r\n    [\"\u732b\", \"\u5c0f\u3055\u3044\", \"\u732b\", \"\u53ef\u611b\u3044\", \"\u53ef\u611b\u3044\"],\r\n    [\"\u866b\", \"\u5c0f\u3055\u3044\", \"\u53ef\u611b\u304f\u306a\u3044\"]\r\n]\r\n\r\nwords = list(set(w for doc in docs for w in doc))\r\nwords.sort()\r\nwords<\/pre>\n<p>Out:<\/p>\n<pre class=\"lang:default decode:true\">['\u53ef\u611b\u3044', '\u53ef\u611b\u304f\u306a\u3044', '\u5927\u304d\u3044', '\u5c0f\u3055\u3044', '\u72ac', '\u732b', '\u866b']<\/pre>\n<h3>TF-IDF\u306e\u5b9f\u88c5<\/h3>\n<p>TF-IDF\u306e\u5b9f\u88c5\u306f\u4ee5\u4e0b\u306e\u901a\u308a\u3001\u6570\u5f0f\u901a\u308a\u3067\u3059\u3002<\/p>\n<pre class=\"lang:default decode:true \">N = len(docs)\r\n\r\ndef tf(t, d):\r\n    return d.count(t)\/len(d)\r\n\r\ndef idf(t):\r\n    df = 0\r\n    for doc in docs:\r\n        df += t in doc\r\n    \r\n    return log(N\/df)+1\r\n\r\ndef tfidf(t, d):\r\n    return tf(t,d)* idf(t)\r\n<\/pre>\n<p>\u3053\u308c\u3092\u4f7f\u3063\u3066\u5148\u7a0b\u306e\u30c7\u30fc\u30bf\u306eTF-IDF\u5024\u3092\u8a08\u7b97\u3057\u3066\u307f\u307e\u3059\u3002<\/p>\n<p>\u307e\u305a\u306fTF\u3002<\/p>\n<pre class=\"lang:default decode:true \">result = []\r\nfor i in range(N):\r\n    result.append([])\r\n    d = docs[i]\r\n    for j in range(len(words)):\r\n        t = words[j]\r\n        \r\n        result[-1].append(tf(t,d))\r\n        \r\ntf_ = pd.DataFrame(result, columns=words)\r\ntf_<\/pre>\n<div class=\"cell border-box-sizing code_cell rendered\">\n<div class=\"output_wrapper\">\n<div class=\"output\">\n<div class=\"output_area\">\n<div class=\"prompt output_prompt\">Out:<\/div>\n<div class=\"output_html rendered_html output_subarea output_execute_result\">\n<div>\n<table class=\"dataframe\" border=\"1\">\n<thead>\n<tr>\n<th><\/th>\n<th>\u53ef\u611b\u3044<\/th>\n<th>\u53ef\u611b\u304f\u306a\u3044<\/th>\n<th>\u5927\u304d\u3044<\/th>\n<th>\u5c0f\u3055\u3044<\/th>\n<th>\u72ac<\/th>\n<th>\u732b<\/th>\n<th>\u866b<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<th>0<\/th>\n<td>0.25<\/td>\n<td>0.000000<\/td>\n<td>0.25<\/td>\n<td>0.000000<\/td>\n<td>0.5<\/td>\n<td>0.0<\/td>\n<td>0.000000<\/td>\n<\/tr>\n<tr>\n<th>1<\/th>\n<td>0.40<\/td>\n<td>0.000000<\/td>\n<td>0.00<\/td>\n<td>0.200000<\/td>\n<td>0.0<\/td>\n<td>0.4<\/td>\n<td>0.000000<\/td>\n<\/tr>\n<tr>\n<th>2<\/th>\n<td>0.00<\/td>\n<td>0.333333<\/td>\n<td>0.00<\/td>\n<td>0.333333<\/td>\n<td>0.0<\/td>\n<td>0.0<\/td>\n<td>0.333333<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"prompt input_prompt\">\u6b21\u306bIDF\u3067\u3059\u3002<\/div>\n<div>\n<pre class=\"lang:default decode:true \">result = []\r\nfor j in range(len(words)):\r\n    t = words[j]\r\n    result.append(idf(t))\r\n\r\nidf_ = pd.DataFrame(result, index=words, columns=[\"IDF\"])\r\nidf_\r\n<\/pre>\n<div class=\"prompt output_prompt\">Out:<\/div>\n<div class=\"output_html rendered_html output_subarea output_execute_result\">\n<div>\n<table class=\"dataframe\" border=\"1\">\n<thead>\n<tr>\n<th><\/th>\n<th>IDF<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<th>\u53ef\u611b\u3044<\/th>\n<td>1.405465<\/td>\n<\/tr>\n<tr>\n<th>\u53ef\u611b\u304f\u306a\u3044<\/th>\n<td>2.098612<\/td>\n<\/tr>\n<tr>\n<th>\u5927\u304d\u3044<\/th>\n<td>2.098612<\/td>\n<\/tr>\n<tr>\n<th>\u5c0f\u3055\u3044<\/th>\n<td>1.405465<\/td>\n<\/tr>\n<tr>\n<th>\u72ac<\/th>\n<td>2.098612<\/td>\n<\/tr>\n<tr>\n<th>\u732b<\/th>\n<td>2.098612<\/td>\n<\/tr>\n<tr>\n<th>\u866b<\/th>\n<td>2.098612<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div>\n<\/div>\n<\/div>\n<div>\u305d\u3057\u3066TF-IDF\u3067\u3059\u3002<\/div>\n<div>\n<pre class=\"lang:default decode:true \">result = []\r\nfor i in range(N):\r\n    result.append([])\r\n    d = docs[i]\r\n    for j in range(len(words)):\r\n        t = words[j]\r\n        \r\n        result[-1].append(tfidf(t,d))\r\n\r\ntfidf_ = pd.DataFrame(result, columns=words)\r\ntfidf_\r\n<\/pre>\n<p>Out:<\/p>\n<table class=\"dataframe\" border=\"1\">\n<thead>\n<tr>\n<th><\/th>\n<th>\u53ef\u611b\u3044<\/th>\n<th>\u53ef\u611b\u304f\u306a\u3044<\/th>\n<th>\u5927\u304d\u3044<\/th>\n<th>\u5c0f\u3055\u3044<\/th>\n<th>\u72ac<\/th>\n<th>\u732b<\/th>\n<th>\u866b<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<th>0<\/th>\n<td>0.351366<\/td>\n<td>0.000000<\/td>\n<td>0.524653<\/td>\n<td>0.000000<\/td>\n<td>1.049306<\/td>\n<td>0.000000<\/td>\n<td>0.000000<\/td>\n<\/tr>\n<tr>\n<th>1<\/th>\n<td>0.562186<\/td>\n<td>0.000000<\/td>\n<td>0.000000<\/td>\n<td>0.281093<\/td>\n<td>0.000000<\/td>\n<td>0.839445<\/td>\n<td>0.000000<\/td>\n<\/tr>\n<tr>\n<th>2<\/th>\n<td>0.000000<\/td>\n<td>0.699537<\/td>\n<td>0.000000<\/td>\n<td>0.468488<\/td>\n<td>0.000000<\/td>\n<td>0.000000<\/td>\n<td>0.699537<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div>\n<p>TF\u306e\u5024\u3001IDF\u306e\u5024\u3001TF-IDF\u306e\u5024\u3092\u8868\u793a\u3059\u308b\u30bb\u30eb\u3092\u5b9f\u969b\u306b\u52d5\u304b\u3057\u3066\u5024\u3092\u78ba\u304b\u3081\u3066\u304f\u3060\u3055\u3044\u3002\u524d\u306e\u30bb\u30af\u30b7\u30e7\u30f3\u3067\u7d39\u4ecb\u3057\u305f\u30b3\u30f3\u30bb\u30d7\u30c8\u901a\u308a\u306e\u7d50\u679c\u304c\u51fa\u3066\u3044\u308b\u3053\u3068\u304c\u308f\u304b\u308b\u3068\u601d\u3044\u307e\u3059\u3002\u307e\u305f\u3001\u5b9f\u969b\u306bTF-IDF\u3092\u4f7f\u3046\u3068\u304d\u306f\u3001<a href=\"http:\/\/scikit-learn.org\/stable\/modules\/generated\/sklearn.feature_extraction.text.TfidfVectorizer.html\">sklearn\u306a\u3089 tfidfvectorizer<\/a>\u3001<a href=\"https:\/\/radimrehurek.com\/gensim\/models\/tfidfmodel.html\">gensim\u306a\u3089models.tfidfmodel<\/a>\u306e\u3088\u3046\u306a\u5b9f\u88c5\u6e08\u307f\u306e\u30af\u30e9\u30b9\u3092\u4f7f\u3044\u307e\u3057\u3087\u3046\u3002<\/p>\n<p>\u305d\u306e\u65b9\u304c\u901f\u304f\u3066\u78ba\u5b9f\u3067\u3059\u3002<\/p>\n<h2>\u3082\u3063\u3068\u52c9\u5f37\u3059\u308b\u306b\u306f<\/h2>\n<p>TF-IDF\u4ee5\u5916\u306b\u3082\u3001\u30c7\u30fc\u30bf\u306e\u7a2e\u985e\u3084\u76ee\u7684\u306b\u3088\u3063\u3066\u69d8\u3005\u306a\u7279\u5fb4\u91cf\u304c\u3042\u308a\u307e\u3059\u3002\u30c7\u30fc\u30bf\u89e3\u6790\u306b\u304a\u3044\u3066\u3001\u9069\u5207\u306a\u7279\u5fb4\u91cf\u3092\u9078\u3076\u3068\u3044\u3046\u306e\u306f\u975e\u5e38\u306b\u91cd\u8981\u306a\u8981\u7d20\u306b\u306a\u308a\u307e\u3059\u3002\u3069\u3093\u306a\u30c7\u30fc\u30bf\u306b\u3069\u3093\u306a\u7279\u5fb4\u91cf\u3092\u4f7f\u3046\u304b\u306a\u3069\u3001\u5b9f\u8df5\u7684\u306a\u30c7\u30fc\u30bf\u89e3\u6790\u306e\u52c9\u5f37\u3092\u3057\u305f\u3044\u306a\u3089\u3070\u3001kaggle\u306a\u3069\u306e\u30b3\u30f3\u30da\u30c6\u30a3\u30b7\u30e7\u30f3\u306b\u53c2\u52a0\u3057\u306a\u304c\u3089\u5b9f\u6226\u7d4c\u9a13\u3092\u7a4d\u3080\u306e\u304c\u4e00\u756a\u3060\u3068\u601d\u3044\u307e\u3059\u3002<\/p>\n<p>\u307e\u305f\u3001<a href=\"https:\/\/lp.sejuku.net\/lp1_blog_01\/?cid=ai_btn2_26420\">\u4f8d\u30a8\u30f3\u30b8\u30cb\u30a2\u306e\u30de\u30f3\u30fb\u30c4\u30fc\u30fb\u30de\u30f3\u3067\u30c7\u30fc\u30bf\u89e3\u6790\u306e\u624b\u6cd5\u3092\u5b66\u3076<\/a>\u306e\u3082\u304a\u3059\u3059\u3081\u306e\u3067\u3001\u662f\u975e\u8003\u3048\u3066\u307f\u3066\u304f\u3060\u3055\u3044\u3002<\/p>\n<h2>\u307e\u3068\u3081<\/h2>\n<p>\u3053\u306e\u8a18\u4e8b\u3067\u306f\u3001TF-IDF\u306b\u3064\u3044\u3066\u307e\u3068\u3081\u307e\u3057\u305f\u3002TF-IDF\u3067\u6587\u66f8\u30c7\u30fc\u30bf\u3092\u8868\u73fe\u3057\u3066k-means\u306a\u3069\u3067\u30af\u30e9\u30b9\u30bf\u30ea\u30f3\u30b0\u3092\u884c\u3063\u305f\u308a\u3068\u3001\u69d8\u3005\u306a\u6a5f\u68b0\u5b66\u7fd2\u30e2\u30c7\u30eb\u306e\u5165\u529b\u30c7\u30fc\u30bf\u3068\u3057\u3066\u4f7f\u3046\u3053\u3068\u304c\u3067\u304d\u308b\u306e\u3067\u3001\u662f\u975e\u899a\u3048\u3066\u4f7f\u3063\u3066\u307f\u3066\u304f\u3060\u3055\u3044\u306d\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u6587\u66f8\u5206\u985e\u306a\u3069\u306b\u304a\u3044\u3066\u3001\u6587\u66f8\u306e\u7279\u5fb4\u91cf\u3068\u3057\u3066\u4f7f\u308f\u308c\u308b\u53e4\u5178\u7684\u306a\u6307\u6a19\u306bTF-IDF\u304c\u3042\u308a\u307e\u3059\u3002TF-IDF\u306f\u6587\u66f8\u3060\u3051\u3067\u306a\u304f\u3001\u69d8\u3005\u306a\u30c7\u30fc\u30bf\u306b\u9069\u7528\u3067\u304d\u3066\u30b7\u30f3\u30d7\u30eb\u3060\u3051\u3069\u975e\u5e38\u306b\u4f7f\u3044\u3084\u3059\u3044\u7279\u5fb4\u91cf\u3067\u3059\u3002 \u3053\u306e\u8a18\u4e8b\u3067\u306f TF-IDF\u306e\u8a08\u7b97\u5f0f  [&hellip;]<\/p>\n","protected":false},"author":4,"featured_media":67456,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"_acf_changed":false,"inline_featured_image":false,"swell_btn_cv_data":"","footnotes":""},"categories":[1],"tags":[1281,49],"class_list":["post-26420","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-others","tag-ai","tag-python"],"acf":[],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v27.3 - https:\/\/yoast.com\/product\/yoast-seo-wordpress\/ -->\n<title>\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u57fa\u790e\u6280\u8853\uff01tf-idf\u3092\u7c21\u5358\u306b\u89e3\u8aac\uff01 | \u4f8d\u30a8\u30f3\u30b8\u30cb\u30a2\u30d6\u30ed\u30b0<\/title>\n<meta name=\"description\" content=\"\u3053\u306e\u8a18\u4e8b\u3067\u306f\u300c \u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u57fa\u790e\u6280\u8853\uff01tf-idf\u3092\u7c21\u5358\u306b\u89e3\u8aac\uff01 \u300d\u306b\u3064\u3044\u3066\u3001\u8ab0\u3067\u3082\u7406\u89e3\u3067\u304d\u308b\u3088\u3046\u306b\u89e3\u8aac\u3057\u307e\u3059\u3002\u3053\u306e\u8a18\u4e8b\u3092\u8aad\u3081\u3070\u3001\u3042\u306a\u305f\u306e\u60a9\u307f\u304c\u89e3\u6c7a\u3059\u308b\u3060\u3051\u3058\u3083\u306a\u304f\u3001\u65b0\u305f\u306a\u6c17\u4ed8\u304d\u3082\u767a\u898b\u3067\u304d\u308b\u3053\u3068\u3067\u3057\u3087\u3046\u3002\u304a\u60a9\u307f\u306e\u65b9\u306f\u305c\u3072\u3054\u4e00\u8aad\u304f\u3060\u3055\u3044\u3002\" \/>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.sejuku.net\/blog\/26420\" \/>\n<meta property=\"og:locale\" content=\"ja_JP\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u57fa\u790e\u6280\u8853\uff01tf-idf\u3092\u7c21\u5358\u306b\u89e3\u8aac\uff01 | \u4f8d\u30a8\u30f3\u30b8\u30cb\u30a2\u30d6\u30ed\u30b0\" \/>\n<meta property=\"og:description\" content=\"\u3053\u306e\u8a18\u4e8b\u3067\u306f\u300c \u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u57fa\u790e\u6280\u8853\uff01tf-idf\u3092\u7c21\u5358\u306b\u89e3\u8aac\uff01 \u300d\u306b\u3064\u3044\u3066\u3001\u8ab0\u3067\u3082\u7406\u89e3\u3067\u304d\u308b\u3088\u3046\u306b\u89e3\u8aac\u3057\u307e\u3059\u3002\u3053\u306e\u8a18\u4e8b\u3092\u8aad\u3081\u3070\u3001\u3042\u306a\u305f\u306e\u60a9\u307f\u304c\u89e3\u6c7a\u3059\u308b\u3060\u3051\u3058\u3083\u306a\u304f\u3001\u65b0\u305f\u306a\u6c17\u4ed8\u304d\u3082\u767a\u898b\u3067\u304d\u308b\u3053\u3068\u3067\u3057\u3087\u3046\u3002\u304a\u60a9\u307f\u306e\u65b9\u306f\u305c\u3072\u3054\u4e00\u8aad\u304f\u3060\u3055\u3044\u3002\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.sejuku.net\/blog\/26420\" \/>\n<meta property=\"og:site_name\" content=\"\u4f8d\u30a8\u30f3\u30b8\u30cb\u30a2\u30d6\u30ed\u30b0\" \/>\n<meta property=\"article:publisher\" content=\"https:\/\/www.facebook.com\/sejuku2013\" \/>\n<meta property=\"article:author\" content=\"https:\/\/www.facebook.com\/sejuku2013\/\" \/>\n<meta property=\"article:published_time\" content=\"2017-07-04T14:49:24+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2024-05-06T02:50:28+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2017\/07\/tfidf.png\" \/>\n\t<meta property=\"og:image:width\" content=\"700\" \/>\n\t<meta property=\"og:image:height\" content=\"394\" \/>\n\t<meta property=\"og:image:type\" content=\"image\/png\" \/>\n<meta name=\"author\" content=\"\u4f8d\u30a8\u30f3\u30b8\u30cb\u30a2\u7de8\u96c6\u90e8\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:creator\" content=\"@https:\/\/twitter.com\/samuraijuku\" \/>\n<meta name=\"twitter:site\" content=\"@samuraijuku\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\\\/\\\/schema.org\",\"@graph\":[{\"@type\":\"Article\",\"@id\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/26420#article\",\"isPartOf\":{\"@id\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/26420\"},\"author\":{\"name\":\"\u4f8d\u30a8\u30f3\u30b8\u30cb\u30a2\u7de8\u96c6\u90e8\",\"@id\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/#\\\/schema\\\/person\\\/e8ca7fd09857a736a25e6b4455a3ab61\"},\"headline\":\"\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u57fa\u790e\u6280\u8853\uff01tf-idf\u3092\u7c21\u5358\u306b\u89e3\u8aac\uff01\",\"datePublished\":\"2017-07-04T14:49:24+00:00\",\"dateModified\":\"2024-05-06T02:50:28+00:00\",\"mainEntityOfPage\":{\"@id\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/26420\"},\"wordCount\":72,\"publisher\":{\"@id\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/#organization\"},\"image\":{\"@id\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/26420#primaryimage\"},\"thumbnailUrl\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/wp-content\\\/uploads\\\/2017\\\/07\\\/tfidf.png\",\"keywords\":[\"AI\",\"python\"],\"articleSection\":[\"\u305d\u306e\u4ed6\"],\"inLanguage\":\"ja\"},{\"@type\":\"WebPage\",\"@id\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/26420\",\"url\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/26420\",\"name\":\"\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u57fa\u790e\u6280\u8853\uff01tf-idf\u3092\u7c21\u5358\u306b\u89e3\u8aac\uff01 | \u4f8d\u30a8\u30f3\u30b8\u30cb\u30a2\u30d6\u30ed\u30b0\",\"isPartOf\":{\"@id\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/#website\"},\"primaryImageOfPage\":{\"@id\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/26420#primaryimage\"},\"image\":{\"@id\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/26420#primaryimage\"},\"thumbnailUrl\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/wp-content\\\/uploads\\\/2017\\\/07\\\/tfidf.png\",\"datePublished\":\"2017-07-04T14:49:24+00:00\",\"dateModified\":\"2024-05-06T02:50:28+00:00\",\"description\":\"\u3053\u306e\u8a18\u4e8b\u3067\u306f\u300c \u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u57fa\u790e\u6280\u8853\uff01tf-idf\u3092\u7c21\u5358\u306b\u89e3\u8aac\uff01 \u300d\u306b\u3064\u3044\u3066\u3001\u8ab0\u3067\u3082\u7406\u89e3\u3067\u304d\u308b\u3088\u3046\u306b\u89e3\u8aac\u3057\u307e\u3059\u3002\u3053\u306e\u8a18\u4e8b\u3092\u8aad\u3081\u3070\u3001\u3042\u306a\u305f\u306e\u60a9\u307f\u304c\u89e3\u6c7a\u3059\u308b\u3060\u3051\u3058\u3083\u306a\u304f\u3001\u65b0\u305f\u306a\u6c17\u4ed8\u304d\u3082\u767a\u898b\u3067\u304d\u308b\u3053\u3068\u3067\u3057\u3087\u3046\u3002\u304a\u60a9\u307f\u306e\u65b9\u306f\u305c\u3072\u3054\u4e00\u8aad\u304f\u3060\u3055\u3044\u3002\",\"breadcrumb\":{\"@id\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/26420#breadcrumb\"},\"inLanguage\":\"ja\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/26420\"]}]},{\"@type\":\"ImageObject\",\"inLanguage\":\"ja\",\"@id\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/26420#primaryimage\",\"url\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/wp-content\\\/uploads\\\/2017\\\/07\\\/tfidf.png\",\"contentUrl\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/wp-content\\\/uploads\\\/2017\\\/07\\\/tfidf.png\",\"width\":700,\"height\":394},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/26420#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"Home\",\"item\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u57fa\u790e\u6280\u8853\uff01tf-idf\u3092\u7c21\u5358\u306b\u89e3\u8aac\uff01\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/#website\",\"url\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/\",\"name\":\"\u4f8d\u30a8\u30f3\u30b8\u30cb\u30a2\u30d6\u30ed\u30b0\",\"description\":\"\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0\u5b66\u7fd2\u306e\u3059\u3079\u3066\u304c\u30b3\u30b3\u306b\u3002\",\"publisher\":{\"@id\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/#organization\"},\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/?s={search_term_string}\"},\"query-input\":{\"@type\":\"PropertyValueSpecification\",\"valueRequired\":true,\"valueName\":\"search_term_string\"}}],\"inLanguage\":\"ja\"},{\"@type\":\"Organization\",\"@id\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/#organization\",\"name\":\"\u682a\u5f0f\u4f1a\u793eSAMURAI\",\"url\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/\",\"logo\":{\"@type\":\"ImageObject\",\"inLanguage\":\"ja\",\"@id\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/#\\\/schema\\\/logo\\\/image\\\/\",\"url\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/wp-content\\\/uploads\\\/2023\\\/07\\\/logo.png\",\"contentUrl\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/wp-content\\\/uploads\\\/2023\\\/07\\\/logo.png\",\"width\":600,\"height\":600,\"caption\":\"\u682a\u5f0f\u4f1a\u793eSAMURAI\"},\"image\":{\"@id\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/#\\\/schema\\\/logo\\\/image\\\/\"},\"sameAs\":[\"https:\\\/\\\/www.facebook.com\\\/sejuku2013\",\"https:\\\/\\\/x.com\\\/samuraijuku\",\"https:\\\/\\\/www.youtube.com\\\/channel\\\/UCCFOQO5aDK0xXam4cUQXT8g\\\/featured\"]},{\"@type\":\"Person\",\"@id\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/#\\\/schema\\\/person\\\/e8ca7fd09857a736a25e6b4455a3ab61\",\"name\":\"\u4f8d\u30a8\u30f3\u30b8\u30cb\u30a2\u7de8\u96c6\u90e8\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"ja\",\"@id\":\"https:\\\/\\\/secure.gravatar.com\\\/avatar\\\/507c280c5c67d2c11fec4fdba20e5bf1ec2fe91f9deb42d2ec50382778b311bf?s=96&d=mm&r=g\",\"url\":\"https:\\\/\\\/secure.gravatar.com\\\/avatar\\\/507c280c5c67d2c11fec4fdba20e5bf1ec2fe91f9deb42d2ec50382778b311bf?s=96&d=mm&r=g\",\"contentUrl\":\"https:\\\/\\\/secure.gravatar.com\\\/avatar\\\/507c280c5c67d2c11fec4fdba20e5bf1ec2fe91f9deb42d2ec50382778b311bf?s=96&d=mm&r=g\",\"caption\":\"\u4f8d\u30a8\u30f3\u30b8\u30cb\u30a2\u7de8\u96c6\u90e8\"},\"description\":\"\u3010\u30d7\u30ed\u30d5\u30a3\u30fc\u30eb\u3011 DX\u8a8d\u5b9a\u53d6\u5f97\u4e8b\u696d\u8005\u306b\u9078\u5b9a\u3055\u308c\u3066\u3044\u308b\u682a\u5f0f\u4f1a\u793eSAMURAI\u306e\u30de\u30fc\u30b1\u30c6\u30a3\u30f3\u30b0\u30fb\u30b3\u30df\u30e5\u30cb\u30b1\u30fc\u30b7\u30e7\u30f3\u90e8\u304c\u904b\u55b6\u3002\u300c\u8cea\u306e\u9ad8\u3044IT\u6559\u80b2\u3092\u3001\u3059\u3079\u3066\u306e\u4eba\u306b\u300d\u3092\u30df\u30c3\u30b7\u30e7\u30f3\u306b\u3001IT\u30fb\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0\u3092\u5b66\u3073\u59cb\u3081\u305f\u521d\u5b66\u8005\u306e\u65b9\u306b\u5411\u3051\u8a18\u4e8b\u3092\u57f7\u7b46\u3002 \u7d2f\u8a08\u6307\u5c0e\u8005\u65704\u4e075,000\u540d\u4ee5\u4e0a\u306e\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0\u30b9\u30af\u30fc\u30eb\u300c\u4f8d\u30a8\u30f3\u30b8\u30cb\u30a2\u300d\u3001\u7d2f\u8a08\u767b\u9332\u8005\u65701\u4e078,000\u4eba\u4ee5\u4e0a\u306e\u30aa\u30f3\u30e9\u30a4\u30f3\u5b66\u7fd2\u30b5\u30fc\u30d3\u30b9\u300c\u4f8d\u30c6\u30e9\u30b3\u30e4\u300d\u3067\u6271\u3046\u6559\u6750\u958b\u767a\u306e\u30ce\u30a6\u30cf\u30a6\u30012013\u5e74\u306e\u5275\u696d\u304b\u3089\u904b\u55b6\u3067\u5f97\u305f\u77e5\u898b\u306b\u57fa\u3065\u304d\u3001\u8a18\u4e8b\u306e\u57f7\u7b46\u3060\u3051\u3067\u306a\u304f\u7de8\u96c6\u30fb\u76e3\u4fee\u3082\u62c5\u5f53\u3057\u3066\u3044\u307e\u3059\u3002 \u3010\u5c02\u9580\u5206\u91ce\u3011 IT\\\/Web\u958b\u767a\\\/AI\u30fb\u30ed\u30dc\u30c3\u30c8\u958b\u767a\\\/\u30a4\u30f3\u30d5\u30e9\u958b\u767a\\\/\u30b2\u30fc\u30e0\u958b\u767a\\\/AI\\\/Web\u30c7\u30b6\u30a4\u30f3\",\"sameAs\":[\"https:\\\/\\\/www.sejuku.net\\\/\",\"https:\\\/\\\/www.facebook.com\\\/sejuku2013\\\/\",\"https:\\\/\\\/www.instagram.com\\\/samuraiengineer_official\\\/\",\"https:\\\/\\\/x.com\\\/https:\\\/\\\/twitter.com\\\/samuraijuku\",\"https:\\\/\\\/www.youtube.com\\\/channel\\\/UCCFOQO5aDK0xXam4cUQXT8g\"],\"url\":\"https:\\\/\\\/www.sejuku.net\\\/blog\\\/author\\\/samurai-blog\"}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u57fa\u790e\u6280\u8853\uff01tf-idf\u3092\u7c21\u5358\u306b\u89e3\u8aac\uff01 | \u4f8d\u30a8\u30f3\u30b8\u30cb\u30a2\u30d6\u30ed\u30b0","description":"\u3053\u306e\u8a18\u4e8b\u3067\u306f\u300c \u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u57fa\u790e\u6280\u8853\uff01tf-idf\u3092\u7c21\u5358\u306b\u89e3\u8aac\uff01 \u300d\u306b\u3064\u3044\u3066\u3001\u8ab0\u3067\u3082\u7406\u89e3\u3067\u304d\u308b\u3088\u3046\u306b\u89e3\u8aac\u3057\u307e\u3059\u3002\u3053\u306e\u8a18\u4e8b\u3092\u8aad\u3081\u3070\u3001\u3042\u306a\u305f\u306e\u60a9\u307f\u304c\u89e3\u6c7a\u3059\u308b\u3060\u3051\u3058\u3083\u306a\u304f\u3001\u65b0\u305f\u306a\u6c17\u4ed8\u304d\u3082\u767a\u898b\u3067\u304d\u308b\u3053\u3068\u3067\u3057\u3087\u3046\u3002\u304a\u60a9\u307f\u306e\u65b9\u306f\u305c\u3072\u3054\u4e00\u8aad\u304f\u3060\u3055\u3044\u3002","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.sejuku.net\/blog\/26420","og_locale":"ja_JP","og_type":"article","og_title":"\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u57fa\u790e\u6280\u8853\uff01tf-idf\u3092\u7c21\u5358\u306b\u89e3\u8aac\uff01 | \u4f8d\u30a8\u30f3\u30b8\u30cb\u30a2\u30d6\u30ed\u30b0","og_description":"\u3053\u306e\u8a18\u4e8b\u3067\u306f\u300c \u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u57fa\u790e\u6280\u8853\uff01tf-idf\u3092\u7c21\u5358\u306b\u89e3\u8aac\uff01 \u300d\u306b\u3064\u3044\u3066\u3001\u8ab0\u3067\u3082\u7406\u89e3\u3067\u304d\u308b\u3088\u3046\u306b\u89e3\u8aac\u3057\u307e\u3059\u3002\u3053\u306e\u8a18\u4e8b\u3092\u8aad\u3081\u3070\u3001\u3042\u306a\u305f\u306e\u60a9\u307f\u304c\u89e3\u6c7a\u3059\u308b\u3060\u3051\u3058\u3083\u306a\u304f\u3001\u65b0\u305f\u306a\u6c17\u4ed8\u304d\u3082\u767a\u898b\u3067\u304d\u308b\u3053\u3068\u3067\u3057\u3087\u3046\u3002\u304a\u60a9\u307f\u306e\u65b9\u306f\u305c\u3072\u3054\u4e00\u8aad\u304f\u3060\u3055\u3044\u3002","og_url":"https:\/\/www.sejuku.net\/blog\/26420","og_site_name":"\u4f8d\u30a8\u30f3\u30b8\u30cb\u30a2\u30d6\u30ed\u30b0","article_publisher":"https:\/\/www.facebook.com\/sejuku2013","article_author":"https:\/\/www.facebook.com\/sejuku2013\/","article_published_time":"2017-07-04T14:49:24+00:00","article_modified_time":"2024-05-06T02:50:28+00:00","og_image":[{"width":700,"height":394,"url":"https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2017\/07\/tfidf.png","type":"image\/png"}],"author":"\u4f8d\u30a8\u30f3\u30b8\u30cb\u30a2\u7de8\u96c6\u90e8","twitter_card":"summary_large_image","twitter_creator":"@https:\/\/twitter.com\/samuraijuku","twitter_site":"@samuraijuku","schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"Article","@id":"https:\/\/www.sejuku.net\/blog\/26420#article","isPartOf":{"@id":"https:\/\/www.sejuku.net\/blog\/26420"},"author":{"name":"\u4f8d\u30a8\u30f3\u30b8\u30cb\u30a2\u7de8\u96c6\u90e8","@id":"https:\/\/www.sejuku.net\/blog\/#\/schema\/person\/e8ca7fd09857a736a25e6b4455a3ab61"},"headline":"\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u57fa\u790e\u6280\u8853\uff01tf-idf\u3092\u7c21\u5358\u306b\u89e3\u8aac\uff01","datePublished":"2017-07-04T14:49:24+00:00","dateModified":"2024-05-06T02:50:28+00:00","mainEntityOfPage":{"@id":"https:\/\/www.sejuku.net\/blog\/26420"},"wordCount":72,"publisher":{"@id":"https:\/\/www.sejuku.net\/blog\/#organization"},"image":{"@id":"https:\/\/www.sejuku.net\/blog\/26420#primaryimage"},"thumbnailUrl":"https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2017\/07\/tfidf.png","keywords":["AI","python"],"articleSection":["\u305d\u306e\u4ed6"],"inLanguage":"ja"},{"@type":"WebPage","@id":"https:\/\/www.sejuku.net\/blog\/26420","url":"https:\/\/www.sejuku.net\/blog\/26420","name":"\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u57fa\u790e\u6280\u8853\uff01tf-idf\u3092\u7c21\u5358\u306b\u89e3\u8aac\uff01 | \u4f8d\u30a8\u30f3\u30b8\u30cb\u30a2\u30d6\u30ed\u30b0","isPartOf":{"@id":"https:\/\/www.sejuku.net\/blog\/#website"},"primaryImageOfPage":{"@id":"https:\/\/www.sejuku.net\/blog\/26420#primaryimage"},"image":{"@id":"https:\/\/www.sejuku.net\/blog\/26420#primaryimage"},"thumbnailUrl":"https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2017\/07\/tfidf.png","datePublished":"2017-07-04T14:49:24+00:00","dateModified":"2024-05-06T02:50:28+00:00","description":"\u3053\u306e\u8a18\u4e8b\u3067\u306f\u300c \u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u57fa\u790e\u6280\u8853\uff01tf-idf\u3092\u7c21\u5358\u306b\u89e3\u8aac\uff01 \u300d\u306b\u3064\u3044\u3066\u3001\u8ab0\u3067\u3082\u7406\u89e3\u3067\u304d\u308b\u3088\u3046\u306b\u89e3\u8aac\u3057\u307e\u3059\u3002\u3053\u306e\u8a18\u4e8b\u3092\u8aad\u3081\u3070\u3001\u3042\u306a\u305f\u306e\u60a9\u307f\u304c\u89e3\u6c7a\u3059\u308b\u3060\u3051\u3058\u3083\u306a\u304f\u3001\u65b0\u305f\u306a\u6c17\u4ed8\u304d\u3082\u767a\u898b\u3067\u304d\u308b\u3053\u3068\u3067\u3057\u3087\u3046\u3002\u304a\u60a9\u307f\u306e\u65b9\u306f\u305c\u3072\u3054\u4e00\u8aad\u304f\u3060\u3055\u3044\u3002","breadcrumb":{"@id":"https:\/\/www.sejuku.net\/blog\/26420#breadcrumb"},"inLanguage":"ja","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.sejuku.net\/blog\/26420"]}]},{"@type":"ImageObject","inLanguage":"ja","@id":"https:\/\/www.sejuku.net\/blog\/26420#primaryimage","url":"https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2017\/07\/tfidf.png","contentUrl":"https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2017\/07\/tfidf.png","width":700,"height":394},{"@type":"BreadcrumbList","@id":"https:\/\/www.sejuku.net\/blog\/26420#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Home","item":"https:\/\/www.sejuku.net\/blog\/"},{"@type":"ListItem","position":2,"name":"\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u57fa\u790e\u6280\u8853\uff01tf-idf\u3092\u7c21\u5358\u306b\u89e3\u8aac\uff01"}]},{"@type":"WebSite","@id":"https:\/\/www.sejuku.net\/blog\/#website","url":"https:\/\/www.sejuku.net\/blog\/","name":"\u4f8d\u30a8\u30f3\u30b8\u30cb\u30a2\u30d6\u30ed\u30b0","description":"\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0\u5b66\u7fd2\u306e\u3059\u3079\u3066\u304c\u30b3\u30b3\u306b\u3002","publisher":{"@id":"https:\/\/www.sejuku.net\/blog\/#organization"},"potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/www.sejuku.net\/blog\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"ja"},{"@type":"Organization","@id":"https:\/\/www.sejuku.net\/blog\/#organization","name":"\u682a\u5f0f\u4f1a\u793eSAMURAI","url":"https:\/\/www.sejuku.net\/blog\/","logo":{"@type":"ImageObject","inLanguage":"ja","@id":"https:\/\/www.sejuku.net\/blog\/#\/schema\/logo\/image\/","url":"https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2023\/07\/logo.png","contentUrl":"https:\/\/www.sejuku.net\/blog\/wp-content\/uploads\/2023\/07\/logo.png","width":600,"height":600,"caption":"\u682a\u5f0f\u4f1a\u793eSAMURAI"},"image":{"@id":"https:\/\/www.sejuku.net\/blog\/#\/schema\/logo\/image\/"},"sameAs":["https:\/\/www.facebook.com\/sejuku2013","https:\/\/x.com\/samuraijuku","https:\/\/www.youtube.com\/channel\/UCCFOQO5aDK0xXam4cUQXT8g\/featured"]},{"@type":"Person","@id":"https:\/\/www.sejuku.net\/blog\/#\/schema\/person\/e8ca7fd09857a736a25e6b4455a3ab61","name":"\u4f8d\u30a8\u30f3\u30b8\u30cb\u30a2\u7de8\u96c6\u90e8","image":{"@type":"ImageObject","inLanguage":"ja","@id":"https:\/\/secure.gravatar.com\/avatar\/507c280c5c67d2c11fec4fdba20e5bf1ec2fe91f9deb42d2ec50382778b311bf?s=96&d=mm&r=g","url":"https:\/\/secure.gravatar.com\/avatar\/507c280c5c67d2c11fec4fdba20e5bf1ec2fe91f9deb42d2ec50382778b311bf?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/507c280c5c67d2c11fec4fdba20e5bf1ec2fe91f9deb42d2ec50382778b311bf?s=96&d=mm&r=g","caption":"\u4f8d\u30a8\u30f3\u30b8\u30cb\u30a2\u7de8\u96c6\u90e8"},"description":"\u3010\u30d7\u30ed\u30d5\u30a3\u30fc\u30eb\u3011 DX\u8a8d\u5b9a\u53d6\u5f97\u4e8b\u696d\u8005\u306b\u9078\u5b9a\u3055\u308c\u3066\u3044\u308b\u682a\u5f0f\u4f1a\u793eSAMURAI\u306e\u30de\u30fc\u30b1\u30c6\u30a3\u30f3\u30b0\u30fb\u30b3\u30df\u30e5\u30cb\u30b1\u30fc\u30b7\u30e7\u30f3\u90e8\u304c\u904b\u55b6\u3002\u300c\u8cea\u306e\u9ad8\u3044IT\u6559\u80b2\u3092\u3001\u3059\u3079\u3066\u306e\u4eba\u306b\u300d\u3092\u30df\u30c3\u30b7\u30e7\u30f3\u306b\u3001IT\u30fb\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0\u3092\u5b66\u3073\u59cb\u3081\u305f\u521d\u5b66\u8005\u306e\u65b9\u306b\u5411\u3051\u8a18\u4e8b\u3092\u57f7\u7b46\u3002 \u7d2f\u8a08\u6307\u5c0e\u8005\u65704\u4e075,000\u540d\u4ee5\u4e0a\u306e\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0\u30b9\u30af\u30fc\u30eb\u300c\u4f8d\u30a8\u30f3\u30b8\u30cb\u30a2\u300d\u3001\u7d2f\u8a08\u767b\u9332\u8005\u65701\u4e078,000\u4eba\u4ee5\u4e0a\u306e\u30aa\u30f3\u30e9\u30a4\u30f3\u5b66\u7fd2\u30b5\u30fc\u30d3\u30b9\u300c\u4f8d\u30c6\u30e9\u30b3\u30e4\u300d\u3067\u6271\u3046\u6559\u6750\u958b\u767a\u306e\u30ce\u30a6\u30cf\u30a6\u30012013\u5e74\u306e\u5275\u696d\u304b\u3089\u904b\u55b6\u3067\u5f97\u305f\u77e5\u898b\u306b\u57fa\u3065\u304d\u3001\u8a18\u4e8b\u306e\u57f7\u7b46\u3060\u3051\u3067\u306a\u304f\u7de8\u96c6\u30fb\u76e3\u4fee\u3082\u62c5\u5f53\u3057\u3066\u3044\u307e\u3059\u3002 \u3010\u5c02\u9580\u5206\u91ce\u3011 IT\/Web\u958b\u767a\/AI\u30fb\u30ed\u30dc\u30c3\u30c8\u958b\u767a\/\u30a4\u30f3\u30d5\u30e9\u958b\u767a\/\u30b2\u30fc\u30e0\u958b\u767a\/AI\/Web\u30c7\u30b6\u30a4\u30f3","sameAs":["https:\/\/www.sejuku.net\/","https:\/\/www.facebook.com\/sejuku2013\/","https:\/\/www.instagram.com\/samuraiengineer_official\/","https:\/\/x.com\/https:\/\/twitter.com\/samuraijuku","https:\/\/www.youtube.com\/channel\/UCCFOQO5aDK0xXam4cUQXT8g"],"url":"https:\/\/www.sejuku.net\/blog\/author\/samurai-blog"}]}},"_links":{"self":[{"href":"https:\/\/www.sejuku.net\/blog\/wp-json\/wp\/v2\/posts\/26420","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.sejuku.net\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.sejuku.net\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.sejuku.net\/blog\/wp-json\/wp\/v2\/users\/4"}],"replies":[{"embeddable":true,"href":"https:\/\/www.sejuku.net\/blog\/wp-json\/wp\/v2\/comments?post=26420"}],"version-history":[{"count":0,"href":"https:\/\/www.sejuku.net\/blog\/wp-json\/wp\/v2\/posts\/26420\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.sejuku.net\/blog\/wp-json\/wp\/v2\/media\/67456"}],"wp:attachment":[{"href":"https:\/\/www.sejuku.net\/blog\/wp-json\/wp\/v2\/media?parent=26420"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.sejuku.net\/blog\/wp-json\/wp\/v2\/categories?post=26420"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.sejuku.net\/blog\/wp-json\/wp\/v2\/tags?post=26420"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}