Web + Life Hack

〜True But Useless〜

【ruby】【rails】【API】じゃらんAPIでせこせこホテル情報を取得する方法


みんな大好きAPI


web系で働いて良くAPIスクレイピングを利用するようになりました。
例を上げると

楽天の商品検索API

http://webservice.rakuten.co.jp/api/itemsearch/



などです。
今回は


じゃらんAPI

http://www.jalan.net/jw/jwp0000/jww0001.do

スクレイピングしました。

大まかな流れとしては


  1. マイグレーションファイル作成
  2. APIを作成し、レスポンスフィールドをDBに挿入


になります。
また今回は多くの人に利用して頂けるように

  • 都道府県コード別
  • 大エリアコード別

に分けました。ぜひ、参考にして頂けたらと思います!


前提条件

g08m11% ruby -v                                                                                                                                                        (master)[~/work]
ruby 1.9.3p125 (2012-02-16 revision 34643) [x86_64-darwin11.4.0]

g08m11% rails -v                                                                                                                                                       (master)[~/work]
Rails 4.0.1

g08m11% gem list | grep nokogiri
nokogiri (1.6.1, 1.6.0)

g08m11% gem list | grep pp
activesupport (4.0.2, 4.0.1, 4.0.0, 3.2.15, 3.1.0)
gem-wrappers (1.2.4)
rspec-support (3.0.0.beta1)


1・マイグレーションファイル作成


都道府県コード別

classCreateHotels <  ActiveRecord::Migration
def change
create_table:hotelsdo|t|
t.string:planname
t.string:roomname
t.string:plandetailurl
t.string:facility
t.string:plancheckin
t.string:plancheckOut
t.string:splyperiodstrday
t.string:splyperiodendday
t.string:planpictureurl
t.string:planpicturecaption
t.string:meal
t.string:ratetype
t.string:samplerate
t.string:servicechargerate
t.string:stock
t.string:hotelid
t.string:hotelname
t.string:postcode
t.string:hoteladdress
t.string:region
t.string:prefecture
t.string:largearea
t.string:smallarea
t.string:hoteltype
t.string:hoteldetailurl
t.string:hotelcatchcopy
t.string:hotelcaption
t.string:pictureurl
t.string:picturecaption
t.string:x
t.string:y
t.string:hotelnamekana
t.string:numberofratings
t.string:rating
t.timestamps
end
end
end



大エリアコード別

classCreateHotels < ;ActiveRecord::Migration
defchange
create_table:hotelsdo|t|
t.string:planname
t.string:roomname
t.string:plandetailurl
t.string:facility
t.string:plancheckin
t.string:plancheckOut
t.string:splyperiodstrday
t.string:splyperiodendday
t.string:planpictureurl
t.string:planpicturecaption
t.string:meal
t.string:ratetype
t.string:samplerate
t.string:servicechargerate
t.string:stay
t.string :date
t.string :month
t.string :year
t.string:rate
t.string:stock
t.string:hotelid
t.string:hotelname
t.string:postcode
t.string:hoteladdress
t.string:region
t.string:prefecture
t.string:largearea
t.string:smallarea
t.string:hoteltype
t.string:hoteldetailurl
t.string:hotelcatchcopy
t.string:hotelcaption
t.string:pictureurl
t.string:picturecaption
t.string:x
t.string:y
t.string:hotelnamekana
t.string:numberofratings
t.string:rating
t.timestamps
end
end
end


2・APIを作成し、レスポンスフィールドをDBに挿入


都道府県コード別

require 'active_support'
require "open-uri"
require "rubygems"
require "nokogiri"
require 'nkf'
require 'pp'

# -*- encoding: utf-8 -*-

class Tasks::InsertContentsTask
  def self.execute_api

    apis = []
    for i in 1..47 do
      #for i in 1..2 do
      str = "%02d" % i.to_s
      pref = 'pref=' + str + '0000'
      hotel_api = 'http://jws.jalan.net/APIAdvance/StockSearch/V1/?key=aqr1442b809a0e&'
      hotel_api += pref
      hotel_api += '&stay_count=2&adult_num=2&min_rate=10000&max_rate=50000&count=100'
      pp apis << hotel_api
    end

    apis.each do |api|
      pp h = Hash.from_xml(open(api).read)
      if h['Results']['Plan'].present?
        h['Results']['Plan'].each do |t|
          if t['PlanPictureURL'].present?
            hotel= Hotel.new
            hotel.planname = t['PlanName']
            hotel.roomname = t['RoomName']
            hotel.plandetailurl = t['PlanDetailURL']
            hotel.plancheckin = t['PlanCheckIn']
            hotel.plancheckOut = t['PlanCheckOut']
            hotel.splyperiodstrday  = t['SplyPeriodStrDay']
            hotel.splyperiodendday  = t['SplyPeriodStrDay']
            hotel.planpictureurl = t['PlanPictureURL']
            hotel.planpicturecaption  = t['PlanPictureCaption']
            hotel.meal  = t['Meal']
            hotel.ratetype  = t['RateType']
            hotel.samplerate = t['SampleRate']
            hotel.servicechargerate  = t['ServiceChargeRate']
            hotel.hotelid  = t['Hotel']['HotelID']
            hotel.hotelname = t['Hotel']['HotelName']
            hotel.postcode    = t['Hotel']['PostCode']
            hotel.hoteladdress = t['Hotel']['HotelAddress']
            hotel.region  = t['Hotel']['Area']['Region']
            hotel.prefecture  = t['Hotel']['Area']['Prefecture']
            hotel.largearea   = t['Hotel']['Area']['LargeArea']
            hotel.smallarea    = t['Hotel']['Area']['SmallArea']
            hotel.hoteltype   = t['Hotel']['HotelType']
            hotel.hoteldetailurl   = t['Hotel']['HotelDetailURL']
            hotel.hotelcatchcopy   = t['Hotel']['HotelCatchCopy']
            hotel.hotelcaption    = t['Hotel']['HotelCaption']
            hotel.pictureurl         = t['Hotel']['PictureURL']
            hotel.picturecaption     = t['Hotel']['PictureCaption']
            hotel.x                  = t['Hotel']['X']
            hotel.y                   = t['Hotel']['Y']
            hotel.hotelnamekana       = t['Hotel']['HotelNameKana']
            hotel.numberofratings     = t['Hotel']['NumberOfRatings']
            hotel.rating    = t['Hotel']['NumberOfRatings']
            hotel.save
          end
        end

      end
    end

  end
end



大エリアコード別

require 'active_support'
require "open-uri"
require "rubygems"
require "nokogiri"
require 'nkf'
require 'pp'

# -*- encoding: utf-8 -*-

class Tasks::InsertContentsTask
  def self.execute_test
    areas = []
    apis = []
    h = Hash.from_xml(open('http://www.jalan.net/jalan/doc/jws/data/area.xml').read)
    h['Area']['Prefecture'].each do |pref|
      pref['LargeArea'].each do |area|
        areas << area['cd']
      end
    end

    areas.each do |area_id|
      l_area = 'l_area=' + area_id
      stay_date = 'stay_date=' + Date.today.strftime("%Y%m%d")
      hotel_api = 'http://jws.jalan.net/APIAdvance/StockSearch/V1/?key=aqr1442b809a0e&'
      hotel_api += l_area
      hotel_api += '&stay_count=2&adult_num=2&min_rate=10000&max_rate=50000&count=100&'
      hotel_api += stay_date
      pp apis << hotel_api
    end
    apis.each do |api|
      h = Hash.from_xml(open(api).read)
      if h['Results']['Plan'].present?
        h['Results']['Plan'].each do |t|
          if t['PlanPictureURL'].present?
            hotel = Hotel.new
            hotel.planname = t['PlanName']
            hotel.roomname = t['RoomName']
            hotel.plandetailurl = t['PlanDetailURL']
            hotel.plancheckin = t['PlanCheckIn']
            hotel.plancheckOut = t['PlanCheckOut']
            hotel.splyperiodstrday  = t['SplyPeriodStrDay']
            hotel.splyperiodendday  = t['SplyPeriodStrDay']
            hotel.planpictureurl = t['PlanPictureURL']
            hotel.planpicturecaption  = t['PlanPictureCaption']
            hotel.meal  = t['Meal']
            hotel.ratetype  = t['RateType']
            hotel.samplerate = t['SampleRate']
            hotel.servicechargerate  = t['ServiceChargeRate']
            hotel.date   = t['Stay']['Date'].first['date'].to_s
            hotel.month   = t['Stay']['Date'].first['month'].to_s
            hotel.year   = t['Stay']['Date'].first['year'].to_s
            hotel.rate   = t['Stay']['Date'].first['Rate'].to_s
            hotel.stock   = t['Stay']['Date'].first['Stock'].to_s
            hotel.hotelid  = t['Hotel']['HotelID']
            hotel.hotelname = t['Hotel']['HotelName']
            hotel.postcode    = t['Hotel']['PostCode']
            hotel.hoteladdress = t['Hotel']['HotelAddress']
            hotel.region  = t['Hotel']['Area']['Region']
            hotel.prefecture  = t['Hotel']['Area']['Prefecture']
            hotel.largearea   = t['Hotel']['Area']['LargeArea']
            hotel.smallarea    = t['Hotel']['Area']['SmallArea']
            hotel.hoteltype   = t['Hotel']['HotelType']
            hotel.hoteldetailurl   = t['Hotel']['HotelDetailURL']
            hotel.hotelcatchcopy   = t['Hotel']['HotelCatchCopy']
            hotel.hotelcaption    = t['Hotel']['HotelCaption']
            hotel.pictureurl         = t['Hotel']['PictureURL']
            hotel.picturecaption     = t['Hotel']['PictureCaption']
            hotel.x                  = t['Hotel']['X']
            hotel.y                   = t['Hotel']['Y']
            hotel.hotelnamekana       = t['Hotel']['HotelNameKana']
            hotel.numberofratings     = t['Hotel']['NumberOfRatings']
            hotel.rating    = t['Hotel']['NumberOfRatings']
            hotel.save
          end

        end


      end

    end
  end

end



冗長的な書き方なのでいけてはないんですが、
すぐにAPIのレスポンスフィールドをマスタデータにしたいというエンジニアの方はぜひ
ご利用ください!
後、大エリアの方がたくさんの情報を取得できます。

個人的な写真:

過去最大の豪雪になったのは過去最大に幸せになってしまった
僕のせいかも知れない。。。
でもやっぱり、ハッピーバレンタイン!
http://instagram.com/p/kd6Z7bwYmb/


http://instagram.com/p/kd6gAhQYmh/