-->

基于Python爬虫应用开发系列教学之泰国狮子航空航班查询爬虫

Hi,欢迎回来参阅廉航爬虫系列教学,上一期我们给大家分享了东南亚最热门的航空网站(亚航)的爬虫, 按照约定,本期将给大家分享另一个位于泰国的热门航空(泰国狮子航空)
廉航 – 泰国狮子航空(SL)
泰国狮子航空是泰国一家廉价航空,并以廊曼国际机场作为枢纽机场。泰狮航是印尼狮子航空与泰国的合作伙伴以合资的方式成立。(资料摘自:wiki自由百科)
Wiki 维基百科全书
对于喜欢出游东南亚的朋友应该对泰国不陌生吧,泰国一直是我国出游人数最高的国家(没有之一),这得益于当地的消费低廉且环境优美等元素,当然还有一个原因是因为签证的成本低了(目前支持落地签)还有就是相对国内旅游景点来说,去泰国旅游还比较优惠,就是花费比国内还低,其中最大的原因莫过于机票的廉价、平均从国内始发到达泰国从200到500RMB不等(含税)比比皆是,远的不说就国内飞往热门的城市最低也500起步?首先交通费用就已经算是一笔不菲的支出了,还有吃的呢?花的呢?好吧,扯远啦,拉回来;总得来说,泰国狮子航空(SL)也算是一个价格相对较为廉价的航司;虽没亚航知名度大,但也不失为一个找廉价机票的好地方!
泰国(图片摘自网络)
好啦,接下来该开始今天的主题啦,泰国狮子航空(SL)官网:https://www.lionairthai.com/cn/cn/ 经研究分析,该站没有加密方式,同一IP频繁访问则触发谷歌验证,登录情况下有个字母加数字的验证码,而我们这里仅仅只实现查询的模拟,所以后续的扣位步骤则不再阐述,该站的查询接口由两个请求组成,第一个请求返回一串不规则的字符(通常3到4位字符);使用该字符串请求第二个接口便能返回数据,在这过程中没有任何加密,且,经过多次测试该站是可以通过在请求伪造IP达到不被封的目的(即:无需使用代理进行并发查询)
代码片段
def Get_hangban_data(self,timeout):
        ip = 0
        while(True):
            ip = random.randint(1, 255)
            if(ip not in self.fakeIP):
                break
        headers = {
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
            'accept-encoding': 'gzip, deflate, br',
            'accept-language': 'zh-CN,zh;q=0.9',
            'user-agent': self.ua,
            'X-Forwarded-For': '{}.{}.{}.{}'.format(ip, random.randint(0, 255),random.randint(0, 255), random.randint(0, 248)),
        }
        if (self.start_place not in self.place_list or self.end_place not in self.place_list):
            dict_data = {
                "sessionId": self.sessionId,
                "status": "SUCCESS",
                "msg": "This airlines has not scheduled flight from {} to {} The current number of threads as:{} The Query Proxy as:{} The timeout as:{}".format(
                    self.start_place, self.end_place,threading.activeCount(), self.proxyInfo, round(time.perf_counter() - self.time_start, 2)),
                "pricedItineraries": [],
                "validTime": None,
                "needAdjust": True,
                "needPushPrice": False
            }
            return dict_data
        # 获取航班信息
        url = f"https://search.lionairthai.com/default.aspx?aid=207&depCity={self.start_place}&arrCity={self.end_place}&Jtype=1&depDate={self.depDate}&adult1={self.adtCount}&child1={self.infCount}&infant1=0&culture=zh-CHS"
        if (self.proxy is None):
            return self.session.get(url, headers=headers, timeout=timeout, allow_redirects=False,stream=False).text
        else:
            return self.session.get(url, headers=headers, timeout=timeout, allow_redirects=False,proxies=self.proxy,stream=False).text
代码片段二
def GetFlightSearch(self,Referer,key,timeout):
        ip = 0
        while (True):
            ip = random.randint(1, 255)
            if (ip not in self.fakeIP):
                break
        headers = {
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
            'accept-encoding': 'gzip, deflate, br',
            'accept-language': 'zh-CN,zh;q=0.9',
            'user-agent': self.ua,
            'X-Forwarded-For': '{}.{}.{}.{}'.format(ip, random.randint(0, 255),random.randint(0, 255), random.randint(0, 248)),
        }
        url = f'https://search.lionairthai.com/SL/Flight.aspx/GetFlightSearch'
        headers["Origin"] = 'https://search.lionairthai.com'
        headers["Referer"] = f'https://search.lionairthai.com/{Referer}'
        data = {
            't': key
        }
        if (self.proxy is None):
            return self.session.post(url, json=data, headers=headers, timeout=timeout, allow_redirects=False,stream=False).text
        else:
            return self.session.post(url, json=data, headers=headers, timeout=timeout, allow_redirects=False,proxies=self.proxy,stream=False).text
完整代码
import random
import re
import time
import asyncio
import aiohttp
import threading
import requests
import json
import datetime
import requests
import traceback

class SL(object):
    def __init__(self, depCode, arrCode, depDate, adtCount, chdCount, infCount,sessionId):
        self.timeout = 10
        self.time_start = time.perf_counter()
        self.fakeIP = ['1', '14', '27', '36', '39', '40', '42', '43', '45', '47', '49', '52', '54', '58', '59', '60', '61', '62', '68', '69', '71', '81', '82', '91', '94', '101', '103', '106', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '128', '129', '132', '134', '137', '139', '140', '144', '146', '148', '150', '152', '153', '154', '157', '159', '160', '161', '162', '163', '164', '166', '167', '168', '170', '171', '172', '175', '180', '182', '183', '185', '188', '192', '193', '198', '199', '202', '203', '204', '210', '211', '212', '218', '219', '220', '221', '222', '223']
        self.ua_list = [
            "Mozilla/5.0 (Linux; Android 5.1.1; Nexus 5 Build/LMY48B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.78 Mobile Safari/537.36",
            "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1",
            "Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Mobile Safari/537.36",
            "Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Mobile Safari/537.36",
            "Mozilla/5.0 (Linux; Android 8.0.0; Pixel 2 XL Build/OPD1.170816.004) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Mobile Safari/537.36",
            "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1",
            "Mozilla/5.0 (iPad; CPU OS 11_0 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) Version/11.0 Mobile/15A5341f Safari/604.1",
            "Mozilla/5.0 (Linux; U; Android 8.1.0; zh-cn; BLA-AL00 Build/HUAWEIBLA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/8.9 Mobile Safari/537.36",
            "Mozilla/5.0 (Linux; U; Android 8.0.0; zh-CN; MHA-AL00 Build/HUAWEIMHA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.108 UCBrowser/12.1.4.994 Mobile Safari/537.36",
            "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
            "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
            "Mozilla/5.0(iPad;U;CPUOS4_3_3likeMacOSX;en-us)AppleWebKit/533.17.9(KHTML,likeGecko)Version/5.0.2Mobile/8J2Safari/6533.18.5",
            "Mozilla/5.0(iPhone;U;CPUiPhoneOS4_3_3likeMacOSX;en-us)AppleWebKit/533.17.9(KHTML,likeGecko)Version/5.0.2Mobile/8J2Safari/6533.18.5",
            "MQQBrowser/26Mozilla/5.0(Linux;U;Android2.3.7;zh-cn;MB200Build/GRJ22;CyanogenMod-7)AppleWebKit/533.1(KHTML,likeGecko)Version/4.0MobileSafari/533.1",
            "Opera/9.80(Android2.3.4;Linux;OperaMobi/build-1107180945;U;en-GB)Presto/2.8.149Version/11.10",
            "Mozilla/5.0 (Mobile; rv:18.0) Gecko/18.0 Firefox/18.0",
            "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36",
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60",
            "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
            "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
            "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36",
            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
            "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36",
            "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
            "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER) ",
            "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)"
        ]
        requests.adapters.DEFAULT_RETRIES=1
        self.session = requests.session()
        self.session.keep_alive = False
        self.ua = random.choice(self.ua_list)
        self.start_place = depCode.upper()
        self.end_place = arrCode.upper()
        self.depDate = depDate
        self.adtCount = int(adtCount)
        self.chdCount = int(chdCount)
        self.infCount = int(infCount)
        self.sessionId = sessionId
        self.proxy = None
        self.proxyInfo = None
        index = random.randint(1,10)
        if(index > 0):
            socks5 = ['0.0.0.0.0:0000']
            self.proxyInfo = random.choice(socks5)
            self.proxy = {'https': f'socks5://{self.proxyInfo}@{self.proxyInfo}'}
        self.place_list = ['ADL', 'ARD', 'AOR', 'AMQ', 'VPM', 'ATQ', 'ABU', 'BJW', 'BPN', 'BTJ', 'TKG', 'BDO', 'BLR', 'DMK', 'BDJ', 'BWX', 'BTH', 'BTW', 'BUW', 'BKS', 'BMU', 'BDG', 'BOM', 'BNE', 'WUB', 'UOL', 'CSX', 'CZX', 'CTU', 'CNX', 'CEI', 'CKG', 'CMB', 'MEQ', 'DAD', 'DEL', 'DPS', 'DAC', 'DUM', 'ENE', 'FKQ', 'FUK', 'GLX', 'GTO', 'CAN', 'KWE', 'GNS', 'HAK', 'HGH', 'HAN', 'HDY', 'HFE', 'SGN', 'HKG', 'IPH', 'BXB', 'CGK', 'HLP', 'DJB', 'DJJ', 'JED', 'TNA', 'JHB', 'KTM', 'KAZ', 'KDI', 'KTE', 'KTG', 'KKC', 'COK', 'PUM', 'CCU', 'KBU', 'KBR', 'BKI', 'KBV', 'KUL', 'KNO', 'TGG', 'KCH', 'KMG', 'KOE', 'LBU', 'LBJ', 'LHE', 'LUV', 'LGK', 'LKA', 'SMG', 'LSW', 'LUW', 'UPG', 'MKZ', 'MLG', 'MJU', 'MDC', 'MKW', 'MOF', 'MKF', 'MED', 'MNA', 'MEL', 'MKQ', 'MYY', 'OTI', 'NBX', 'NGO', 'NST', 'KHN', 'NKG', 'NTX', 'NGB', 'KIX', 'PDG', 'AEG', 'PKY', 'PLM', 'LLO', 'PLW', 'PKN', 'PGK', 'PKU', 'PEN', 'PER', 'PHS', 'PNH', 'HKT', 'PNK', 'PSJ', 'LOP', 'RJM', 'RTI', 'SBG', 'SMQ', 'SYX', 'CTS', 'SXK', 'YKR', 'SRG', 'PVG', 'SZX', 'FLZ', 'DTB', 'SIN', 'SOQ', 'SZB', 'SWQ', 'SUB', 'SOC', 'URT', 'SYD', 'NAH', 'TPE', 'TXE', 'TMC', 'TNJ', 'BEJ', 'TJQ', 'TRK', 'TWU', 'TTE', 'TRV', 'TSN', 'TRZ', 'NRT', 'TLI', 'TST', 'UBP', 'UTH', 'UTP', 'KNG', 'VNS', 'WGP', 'WNI', 'WMX', 'WUH', 'XIY', 'DEX', 'RGN', 'JOG', 'CGO']
    """获取航班信息"""
    def Get_hangban_data(self,timeout):
        ip = 0
        while(True):
            ip = random.randint(1, 255)
            if(ip not in self.fakeIP):
                break
        headers = {
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
            'accept-encoding': 'gzip, deflate, br',
            'accept-language': 'zh-CN,zh;q=0.9',
            'user-agent': self.ua,
            'X-Forwarded-For': '{}.{}.{}.{}'.format(ip, random.randint(0, 255),random.randint(0, 255), random.randint(0, 248)),
        }
        if (self.start_place not in self.place_list or self.end_place not in self.place_list):
            dict_data = {
                "sessionId": self.sessionId,
                "status": "SUCCESS",
                "msg": "This airlines has not scheduled flight from {} to {} The current number of threads as:{} The Query Proxy as:{} The timeout as:{}".format(
                    self.start_place, self.end_place,threading.activeCount(), self.proxyInfo, round(time.perf_counter() - self.time_start, 2)),
                "pricedItineraries": [],
                "validTime": None,
                "needAdjust": True,
                "needPushPrice": False
            }
            return dict_data
        # 获取航班信息
        url = f"https://search.lionairthai.com/default.aspx?aid=207&depCity={self.start_place}&arrCity={self.end_place}&Jtype=1&depDate={self.depDate}&adult1={self.adtCount}&child1={self.infCount}&infant1=0&culture=zh-CHS"
        if (self.proxy is None):
            return self.session.get(url, headers=headers, timeout=timeout, allow_redirects=False,stream=False).text
        else:
            return self.session.get(url, headers=headers, timeout=timeout, allow_redirects=False,proxies=self.proxy,stream=False).text
    def GetFlightSearch(self,Referer,key,timeout):
        ip = 0
        while (True):
            ip = random.randint(1, 255)
            if (ip not in self.fakeIP):
                break
        headers = {
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
            'accept-encoding': 'gzip, deflate, br',
            'accept-language': 'zh-CN,zh;q=0.9',
            'user-agent': self.ua,
            'X-Forwarded-For': '{}.{}.{}.{}'.format(ip, random.randint(0, 255),random.randint(0, 255), random.randint(0, 248)),
        }
        url = f'https://search.lionairthai.com/SL/Flight.aspx/GetFlightSearch'
        headers["Origin"] = 'https://search.lionairthai.com'
        headers["Referer"] = f'https://search.lionairthai.com/{Referer}'
        data = {
            't': key
        }
        if (self.proxy is None):
            return self.session.post(url, json=data, headers=headers, timeout=timeout, allow_redirects=False,stream=False).text
        else:
            return self.session.post(url, json=data, headers=headers, timeout=timeout, allow_redirects=False,proxies=self.proxy,stream=False).text
    def Prase(self,html):
        if('SegmentInformation' not in html):
            dict_data = {
                "sessionId": self.sessionId,
                "status": "SUCCESS",
                "msg": "The choose days has not any scheduled flight, Please re-select. The current number of threads as:{} The Query Proxy as:{} The timeout as:{}".format(threading.activeCount(),self.proxyInfo,round(time.perf_counter() - self.time_start, 2)),
                "pricedItineraries": [],
                "validTime": None,
                "needAdjust": True,
                "needPushPrice": False
            }
            return dict_data
        D = json.loads(html)["d"]
        itemlist = []
        for i in D:
            TotalNoStops = i["TotalNoStops"]
            if(TotalNoStops != 0):
                continue
            item = {}
            item["depCode"] = self.start_place
            item["arrCode"] = self.end_place
            item["airline_code"] = i["MACode"]
            item["flight_number"] = i["FlightNo"]
            depTime = i["DepartureDate"]
            arrTime = i["ArrivalDate"]
            de = int(re.search('(\d+)',depTime).group(1))
            ae = int(re.search('(\d+)',arrTime).group(1))
            bb = (datetime.datetime.fromtimestamp(de / 1000) + datetime.timedelta(hours=-1))
            aa = (datetime.datetime.fromtimestamp(ae / 1000) + datetime.timedelta(hours=-1))
            item["departureDateTime"] = bb.strftime("%Y-%m-%dT%H:%M:%S")
            item["arrivalDateTime"] = aa.strftime("%Y-%m-%dT%H:%M:%S")
            item["elapsedTime"] = int(((ae - de)/1000)/60)
            PromoFlight = i["PromoFlight"]
            if(PromoFlight is None):
                PromoFlight = i["EconomyFlight"]
                if(PromoFlight is None):
                    PromoFlight = i["PremiumEconomyFlight"]
                if(PromoFlight is None):
                    continue
            item["total_fare"] = float(PromoFlight["priceInfo"]["TotalPrice"])/(self.adtCount + self.chdCount + self.infCount)
            item["tax"] = float(PromoFlight["priceInfo"]["TotalTax"])/(self.adtCount + self.chdCount + self.infCount)
            item["base_fare"] = item["total_fare"] - item["tax"]
            item["booking_classes"] = PromoFlight["outBoundFlights"][0]["Segments"][0]["BookingClass"]
            item["seatsRemaining"] = 4 if PromoFlight["outBoundFlights"][0]["Segments"][0]["AvailSeats"] == "" else int(PromoFlight["outBoundFlights"][0]["Segments"][0]["AvailSeats"])
            item["currency"] = PromoFlight["CurrencyCode"]
            item_data = {
                "gdsSource": None,
                "ipcc": None,
                "currencyCode": item["currency"],
                "pricingInfos": [{
                    "baseFare": item["base_fare"],
                    "baseFareCurrency": item["currency"],
                    "equivFare": item["base_fare"],
                    "equivFareCurrency": item["currency"],
                    "verifiedFare": None,
                    "verifiedFareCurrency": None,
                    "originalFare": item["base_fare"],
                    "originalFareCurrency": item["currency"],
                    "fareRuleFare": None,
                    "fareRuleFareCurrency": None,
                    "fareRuleId": None,
                    "disablePriceCheck": None,
                    "supplierMarkupFare": None,
                    "supplierMarkupFareCurrency": None,
                    "supplierMarkupInfo": None,
                    "tax": item["tax"],
                    "taxCurrency": item["currency"],
                    "verifiedTax": None,
                    "verifiedTaxCurrency": None,
                    "originalTax": item["tax"],
                    "originalTaxCurrency": item["currency"],
                    "fareRuleTax": None,
                    "fareRuleTaxCurrency": None,
                    "totalFare": item["total_fare"],
                    "totalFareCurrency": item["currency"],
                    "passengerType": "ADULT",
                    "passengerQuantity": self.adtCount,
                    "airlineCode": None,
                    "fareType": None,
                    "changeFare": None,
                    "changePercentage": None,
                    "changeFareCurrency": None,
                    "refundFare": None,
                    "refundFareCurrency": None,
                    "refundPercentage": None,
                    "fareBasisCodes": [],
                    "baggageInfos": [],
                    "replBgs": None,
                }],
                "airItinerary": {
                    "airTripType": "ONE_WAY",
                    "originDestinationOptions": [{
                        "elapsedTime": None,
                        "flightSegments": [{
                            "departureCode": self.start_place,
                            "departureName": None,
                            "departureTerminal": None,
                            "departureDateTime": item["departureDateTime"],
                            "departureTimeZone": None,
                            "arrivalCode": self.end_place,
                            "arrivalName": None,
                            "arrivalTerminal": None,
                            "arrivalDateTime": item["arrivalDateTime"],
                            "arrivalTimeZone": None,
                            "elapsedTime": item["elapsedTime"],
                            "cabin": item["booking_classes"],
                            "replCabin": None,
                            "replClass": None,
                            "cabinClass": None,
                            "airEquipType": None,
                            "marketingAirlineCode": item["airline_code"],
                            "marketingAirlineName": None,
                            "marketingFlightNumber": item["flight_number"],
                            "operatingAirlineCode": item["airline_code"],
                            "operatingFlightNumber": item["flight_number"],
                            "mealCode": None,
                            "stopQuantity": None,
                            "stopLocationCode": None,
                            "seatsRemaining": item["seatsRemaining"],
                            "codeShare": None,
                            "eTicket": None,
                            "onTimePercent": None,
                            "onTimeRate": None,
                            "marriageGrp": None,
                            "availabilitySource": None,
                            "eticket": None
                        }]
                    }]
                },
                "validatingCarrier": item["airline_code"],
                "validatingCarrierName": None,
                "lastTicketingDate": None,
                "data": None,
                "createTime": time.strftime("%m-%d %H:%M:%S", time.localtime()),
                "sk": None,
                "st": None
            }
            itemlist.append(item_data)
        if (len(itemlist) == 0):
            dict_data = {
                "sessionId": self.sessionId,
                "status": "SUCCESS",
                "msg": "The choose days has not any scheduled flight, Please re-select. The current number of threads as:{} The Query Proxy as:{} The timeout as:{}".format(threading.activeCount(),self.proxyInfo,round(time.perf_counter() - self.time_start, 2)),
                "pricedItineraries": [],
                "validTime": None,
                "needAdjust": True,
                "needPushPrice": False
            }
            return dict_data
        else:
            dict_data = {
                "sessionId": self.sessionId,
                "status": "SUCCESS",
                "msg": "None, The current number of threads as:{} The Query Proxy as:{} The timeout as:{}".format(threading.activeCount(),self.proxyInfo, round(time.perf_counter() - self.time_start, 2)),
                "pricedItineraries": itemlist,
                "validTime": None,
                "needAdjust": True,
                "needPushPrice": False
            }
            return dict_data

    def Run(self):
        try:
            SLMatch = None
            json_response = None
            num = time.time()
            timePath = self.timeout /2
            while(int(time.time() - num)<=self.timeout):
                try:
                    json_response = self.Get_hangban_data(timePath/2)
                    break
                except:
                    pass
            if(json_response == None):
                dict_data = {
                    "sessionId": self.sessionId,
                    "status": "FAILED",
                    "msg": "[in:Run] => Read Timeout with Line 1. The current number of threads as:{} The Query Proxy as:{} The timeout as:{}".format(threading.activeCount(), self.proxyInfo, round(time.perf_counter() - self.time_start, 2)),
                    "pricedItineraries": [],
                    "validTime": None,
                    "needAdjust": True,
                    "needPushPrice": False
                }
                return dict_data
            if ("pricedItineraries" in json_response):
                return json_response
            if ("ShieldSquare Captcha" in json_response):
                dict_data = {
                    "sessionId": self.sessionId,
                    "status": "FAILED",
                    "msg": "[in:SLmatch] => The search for scheduled flight has been failed, beacuse that return the Captchs, Please re-select. The current number of threads as:{} The Query Proxy as:{} The timeout as:{}".format(
                        threading.activeCount(), self.proxyInfo, round(time.perf_counter() - self.time_start, 2)),
                    "pricedItineraries": [],
                    "validTime": None,
                    "needAdjust": True,
                    "needPushPrice": False
                }
                return dict_data
            SLMatch = re.search(r"window.location.href = '(.+?)';", json_response, re.S)
            if (SLMatch is None):
                dict_data = {
                    "sessionId": self.sessionId,
                    "status": "FAILED",
                    "msg": "[in:SLmatch] => The search for scheduled flight has been failed, beacuse that fields is not in result, Please re-select. The current number of threads as:{} The Query Proxy as:{} The timeout as:{}".format(
                        threading.activeCount(),self.proxyInfo, round(time.perf_counter() - self.time_start, 2)),
                    "pricedItineraries": [],
                    "validTime": None,
                    "needAdjust": True,
                    "needPushPrice": False
                }
                return dict_data
            t = re.search('&t=(.+?)$', SLMatch.group(1))
            if (t is None):
                dict_data = {
                    "sessionId": self.sessionId,
                    "status": "FAILED",
                    "msg": "[in:tVar] => The search for scheduled flight has been failed, beacuse that fields is not in result, Please re-select. The current number of threads as:{} The Query Proxy as:{} The timeout as:{}".format(
                        threading.activeCount(),self.proxyInfo, round(time.perf_counter() - self.time_start, 2)),
                    "pricedItineraries": [],
                    "validTime": None,
                    "needAdjust": True,
                    "needPushPrice": False
                }
                return dict_data
            while(int(time.time() - num)<=self.timeout):
                try:
                    json_response = self.GetFlightSearch(SLMatch, t.group(1),timePath/2)
                    break
                except:
                    pass
            if (json_response == None):
                dict_data = {
                    "sessionId": self.sessionId,
                    "status": "FAILED",
                    "msg": "[in:Run] => Read Timeout with Line 2. The current number of threads as:{} The Query Proxy as:{} The timeout as:{}".format(
                        threading.activeCount(), self.proxyInfo, round(time.perf_counter() - self.time_start, 2)),
                    "pricedItineraries": [],
                    "validTime": None,
                    "needAdjust": True,
                    "needPushPrice": False
                }
                return dict_data
            json_response = self.Prase(json_response)
            return json_response
        except Exception:
            e = traceback.format_exc().replace(' ','').replace('\n','')
            dict_data = {
                "sessionId": self.sessionId,
                "status": "FAILED",
                "msg": "[in:Run] => The search for scheduled flight has been failed, beacuse that {}, Please re-select. The current number of threads as:{} The Query Proxy as:{} The timeout as:{}".format(
                    e,threading.activeCount(),self.proxyInfo, round(time.perf_counter() - self.time_start, 2)),
                "pricedItineraries": [],
                "validTime": None,
                "needAdjust": True,
                "needPushPrice": False
            }
            return dict_data

    """实现代码逻辑"""
    def main(self):
        return self.Run()
if __name__ == '__main__':
    def randomdata():
        # 设置起始日期元组
        a1 = (2019, 9, 1, 0, 0, 0, 0, 0, 0)
        a2 = (2019, 10, 31, 23, 59, 59, 0, 0, 0)
        # 生成时间戳
        start = time.mktime(a1)
        end = time.mktime(a2)
        # 在起始时间戳中随机取出一个
        random_mktime = random.randint(start, end)
        # 将随机出来的时间戳转换回时间元组并格式化字符串
        randomdata = time.strftime("%Y-%m-%d", time.localtime(random_mktime))
        return randomdata
    def Tes():
        ip = None
        index = random.randint(1,10)
        if(index > 5):
            ip_url = 'http://vps.qiyouji.com:59983/api/getsmartip'
            ip = requests.get(ip_url).text
        randomdatas = randomdata()
        d = random.choice(
            ['SCLPMC', 'SCLCCP', 'SCLCPO', 'SCLPUQ', 'SCLLIM', 'SCLLSC', 'SCLCJC', 'SCLIQQ', 'SCLZCO', 'SCLBBA',
             'SCLAQP', 'SCLZAL', 'SCLARI', 'CCPLSC', 'CCPIQQ', 'PMCSCL', 'CCPSCL', 'CPOSCL', 'PUQSCL', 'LIMSCL',
             'LSCSCL', 'CJCSCL', 'IQQSCL', 'ZCOSCL', 'BBASCL', 'AQPSCL', 'ZALSCL', 'ARISCL', 'LSCSCL', 'IQQSCL'])
        Jp = SL("CNX", "DMK", "2019-10-21", 1, 0, 0, "SEARCH123338")
        dict_data = Jp.main()
        print(d[3:], d[:3], randomdatas, dict_data)
    t = []
    now = time.time()
    for i in range(1):
        t.append(threading.Thread(target=Tes))
    for i in t:
        i.start()
    for i in t:
        i.join()
    print(f'总耗时:{int(time.time()-now)}秒')
温馨提示,上述代码使用了socks5代理协议,运行前请自行作调整,本文仅限于学术交流之用,如侵犯了您的权益,请与我联系,我将配合删除,转发本文请注明出处,有任何疑问可以加入我们的QQ技术交流群探讨:544185435 不便之处请谅解!
本文固定地址
本文的固定地址为: https://www.itmaohome.com/429.html 转发请注明出处!

0 评论:

发表评论