
I9b             M   @   s~  d  d l  Z  d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l	 Z	 d  d l
 Z
 d  d l Z d  d l m Z d  d l m Z m Z m Z d  d l m Z d  d l m Z m Z d  d l m Z m Z m Z m Z d  d l Z d  d l Z d  d l m Z m Z m Z m  Z  m! Z! d d	 l" m# Z# d  d
 l$ m% Z% e j& j' j( e%  y& d  d l) m* Z* m+ Z+ m, Z, m- Z- Wn Yn Xy d  d l. m/ Z0 Wn d Z1 Yn Xd Z1 e  j2 j3 e  j4    e# j5 k re  j6 e# j5  d d l7 Td e  j8 k r&d Z9 n d Z9 y d  d l: m; Z; Wn" d  d l< m; Z; e= d  Yn Xe9 sue> d  d d l? m@ Z@ e9 se> d jA d e# jB d e# jC   e> d jA d e# jD   y d  d lE TWn eF d    Yn Xy d  d lG TWn eF d    Yn5 XeH jI d  jJ d d  jJ d d  ZH e> d  eH  e9 rd  d lK ZK eK jL eK jM d!   eK jL eK jM d"   d d lN Td d# lO mP ZP d d$ l" mQ ZQ eR ry# d d% lS mT ZT mU ZU eT   ZV Wn" e	 jW   eF d&  d ZR Yn Xe9 se> d'  eX eY  eZ re[ Z\ e] d k	 rQe[ d( e^ e]  7Z[ e e[  Z_ n e[ Z_ t` d k rig  a` ea d) d*   t` D  a` eb t` pg   ac tc j   ad td je eH  x( t` D]  Zf td je e d ef  jg  qWeb   Zh eh je eH  ea ei  Zi ei r;x( ei D]  Zf td je ef  eh je ef  qWei jj eH  n	 eH g Zi ek jJ d+ d,  Zl ek e[ Zm em jJ d+ d,  Zn eo ep  Zq d- d. d/ d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 d: d; h Zr er js et  eu eH  d  Zv eu eH  d  Zw ex sf  Zy ez si  Z{ f  Z| f  Z} g  Z~ e e~ e  r#ea e~  Z~ ex s/d Z e s>e   Z e sPeb   Z n- x* e D]" Z e tc k rWe= d< e d=  qWWe rd Z e sd Z e rg  Z x* e D]" Z e jj e
 j e d> d  qWe e  Z x1 e D] Z e e d 7Z qWn d Z d Z d Z e r"e r"d Z e@   Z e j e e  a eP d?  Z d d@ dA g e dB <eP dC  Z d e eH <eP dD  Z dE e d <dH Z dI Z dJ Z e] d k	 rdK e j e\  e e j e^ e]   dL e j e\  dM Z n e j e[  Z e j dN dO dP dQ dR dS dT e j Z e j dU dT e j Z e j dV  Z e j dW dX dY jA dZ e  d[ e d\ e d] jA d^ e  d\ d_ jA d^ e  d` dT e j Z e j e  Z da db   Z e   a e j dc  Z e pi  Z xl e j   D]^ \ Z Z e s	qxF e D]> Z e j dd  d k r4	q	e j e dd dT e j e dd <q	WqWe e9 sk	e n de e^ e j    jJ df d  dg d dh d Z di dj   Z dk dl   Z d dm dn  Z d do dp  Z dq dr   Z ds dt   Z du dv   Z d dw dx  Z d dy dz  Z e Z d d d d{ d|  Z e Z d} d~   Z d d   Z d d d d d  Z d d d d d d  Z d d d  Z d d   Z e; d d  d d    Z d d d  Z d d d  Z d d d  Z d d   Z e; d d  d d    Z e; d d  d d    Z d d   Z d d   Z d d d  Z e; d d  d d    Z d d   Z d d   Z d d d  Z d d   Z d d   Z d d   Z d d   Z d d   Z d d   Z d d   Z d d   Z d d   Z d d d d  Z d d d d d d  Z d d   Z d d   Z d d   Z d d d  Z d d   Z d d   Z d d   Z d d   Z d d   Z d d   Z e j d d    Z e j d  d d    Z e j d d d d g d d    Z e j d+ d d d d d d d d g e j d d d d d d d d d g d+ d d    Z d+ d d  Z e j d  d d    Z e j d  d d    Z i  a x td D] Zf e ef  t ef <qWe re   Z n	 eb   Z y6 e9 rd  d lK ZK eK jL eK jM d   d  d l TWn Yn Xe rMy d  d l m Z Wn e= d    Yn Xe ry d  d l m Z Wn d Z e= d    Yn Xe ry d  d l m Z m Z Wn d Z eF d    Yn Xe r]xo e D]g Z y) e   e d e d <e e d d Wqe k
 r6Z z eF d e    WYd d Z [ XqXqWe j d e d d  Z e j  e d k rzeF d  e  d S)    N)fnmatch)timesleepprocess_time)escape)datetime	timedelta)urljoinurlsplit
urlunsplit
quote_plus)Flaskrequestmake_responseResponseredirect   )CONSTS)InsecureRequestWarning)UnionListAnyTuple)detectFT)*ZZMIRROR_UNITTEST)	lru_cachezpackage fastcache not found, fallback to stdlib lru_cache, no FUNCTION is effected, only maybe a bit slower. Considering install it using "pip3 install fastcache"z,lru_cache loaded successfully from fastcache)ZmirrorThreadLocalz+zmirror version: {version} author: {author}versionauthorzGithub: {site_url}Zsite_urlu   the config_default.py is missing, this program may not works normally
config_default.py 文件丢失, 这会导致配置文件不向后兼容, 请重新下载一份 config_default.pyu  the config_default.py is missing, fallback to default configs(if we can), please COPY the config_default.py to config.py, and change it's content, or use the configs in the more_configs folder
自定义配置文件 config.py 丢失或存在错误, 将使用默认设置, 请将 config_default.py 复制一份为 config.py, 并根据自己的需求修改里面的设置(或者使用 more_configs 中的配置文件)z./ 	zhttps:// zhttp://zconfig file found, mirroring: zzmirror.utilszzmirror.connection_pool)LRUDict)connection_pool)	FileCacheget_expire_from_mimezLCan Not Create Local File Cache, local file cache is disabled automatically.zLocal file cache enabled:c             C   s7   g  |  ]- } | j  d   j d d  j d d   q S)z./ 	zhttps://r   zhttp://)stripreplace).0d r)   #/var/www/zmirror/zmirror/zmirror.py
<listcomp>   s   	 r+   /z\/zcontent-typeZdateexpireszcache-controlzlast-modifiedZserverlocationzaccept-rangeszaccess-control-allow-originzaccess-control-allow-headerszaccess-control-allow-methodszaccess-control-expose-headerszaccess-control-max-agez access-control-allow-credentialsztiming-allow-originzAn isolated domain:zOwould not have effect because it did not appears in the `external_domains` liststricti   z	image/pngiB  z&www.fake-domain.com/folder/foo/bar.pngd   i  ztarget.domain.comexample.com/path/no/query/stringz(?::|%(?:25)?3[Aa])z3(?:\\*(?:/|x2[Ff])|%(?:(?:25)?5[Cc]%)*(?:25)?2[Ff])z2(?:\\*["']|%(?:(?:25)?5[Cc]%)*2(?:52)?[27]|&quot;)z(?:|)zN(?P<prefix>\b(?:(?:src|href|action)\s*=|url\s*\(|url\s*:|@import\s*|"\s*:)\s*)z(?P<quote_left>["'])?zn(?P<domain_and_scheme>(?P<scheme>(?:https?:)?\\?/\\?/)(?P<domain>(?:[-a-z0-9]+\.)+[a-z]+(?P<port>:\d{1,5})?))?z(?P<path>[^\s;:\\+$?#'"\{}]*?z!(?P<query_string>\?[^\s?#'"]*?)?)z*(?P<quote_right>["')])(?P<right_suffix>\W)flagsz\bdomain=(\.?([\w-]+\.)+\w+)\bz*(?P<prefix>[pP]ath)=(?P<path>[\w\._/-]+?;)z(?P<domain_prefix>z(?P<scheme>z#(?:https?(?P<colon>{REGEX_COLON}))?REGEX_COLONz%(?P<scheme_slash>%s)(?P=scheme_slash)z)?z<(?P<slash2>(?(scheme_slash)(?P=scheme_slash)|{REGEX_SLASH}))REGEX_SLASHzCextdomains(?(slash2)(?P=slash2)|{REGEX_SLASH})(?P<is_https>https-)?z#(?P<real_domain>(?:[\w-]+\.)+\w+)\bc                 s   d d l  m }  |  d d   t D    t t   j    d   f d d   d d	 } d
 d j |  d } t j d
 d d j	 d t
  d t d d d j	 d t  d d | d j	 d t  d  S)u   产生 regex_basic_mirrorlization
    用一个函数包裹起来是因为在 try_match_and_add_domain_to_rewrite_white_list()
    中需要动态修改 external_domains, 修改以后可能需要随之生成新的正则, 包裹一下比较容易调用
    r   )Counterc             s   s+   |  ]! } t  j | j d   d  Vq d S).r   N)rer   split)r'   xr)   r)   r*   	<genexpr>U  s    z8_regex_generate__basic_mirrorlization.<locals>.<genexpr>keyc                s     |  S)Nr)   )r=   )cr)   r*   <lambda>V  s    z7_regex_generate__basic_mirrorlization.<locals>.<lambda>reverseTz(?:r3   r4   z(?P<scheme>z#(?:https?(?P<colon>{REGEX_COLON}))?r6   z%(?P<scheme_slash>%s)(?P=scheme_slash)z(?P<quote>{REGEX_QUOTE})REGEX_QUOTEz&(?P<domain>([a-zA-Z0-9-]+\.){1,5}%s)\bz=(?P<suffix_slash>(?(scheme_slash)(?P=scheme_slash)|{SLASH}))?ZSLASHz(?(quote)(?P=quote)))collectionsr8   allowed_domains_setsortedlistkeysjoinr;   compileformatr6   r7   rC   )r8   Zregex_all_remote_tldr)   )r@   r*   %_regex_generate__basic_mirrorlizationM  s    0SrL   z!zmirror_verify=[a-zA-Z0-9]+\b;? ?	url_regexZunittestr9   Zstatic_folderZtemplate_folderc             C   s   d d   } t  j | |   S)u  
    response_text_basic_rewrite() 的实验性升级版本, 默认启用

    *v0.28.1.dev*
        之前版本是在正则中匹配所有允许的域名, 现在改为匹配所有可能允许的TLD,
        可以带来一些性能的提升, 并且容易进行动态域名添加和通配符支持

    *v0.28.2*
        进一步优化正则, 性能提升 47% 左右 (速度约为传统暴力替换的4.4倍)

    *v0.28.3*
        目前来看该功能工作得相当好, 由实验性特性改为正式使用
        移除旧版 response_text_basic_rewrite(), 只保留一个为了向下兼容的 alias

    :param text: 远程响应文本
    :type text: str
    :return: 重写后的响应文本
    :rtype: str
    c             S   s5  t  d |   } | t k r9 t s/ t |  r9 |  j   St  d |   } t  d |   p` | p` d } t  d |   p{ t |  } t r t j d |  n t } | r | t	 k r | | d | | | } n
 | | } t  d |   } | r | | | St  d |   r%t
 j d |  j d |  | S| d	 | Sd  S)
Ndomainsuffix_slashscheme_slashr,   colonr$   Z
extdomainsquote   )	get_grouprE   "enable_automatic_domains_whitelist.try_match_and_add_domain_to_rewrite_white_listgroupZguess_colon_from_slashmy_host_portmy_host_namer&   domain_alias_to_target_setmy_host_scheme)mremote_domainrO   ZslashrQ   Z_my_host_nameZcorerR   r)   r)   r*   regex_reassemble  s$    

 z<response_text_basic_mirrorlization.<locals>.regex_reassemble)regex_basic_mirrorlizationsub)textr^   r)   r)   r*   "response_text_basic_mirrorlization  s    !rb   c          
   C   sb   t  d k	 r t  St rJ x1 t D]) } y |  j d |  Wn Yq X| Sq Wt r^ t |   d Sd S)u   
    试图解析并返回二进制串的编码, 如果失败, 则返回 None
    :param byte_content: 待解码的二进制串
    :type byte_content: bytes
    :return: 编码类型或None
    :rtype: Union[str, None]
    Nencoding)Z force_decode_remote_using_encodeZpossible_charsetsdecodecchardet_available	c_chardet)Zbyte_contentZcharsetr)   r)   r*   encoding_detect  s    	rg   c          	   C   s   t  r t j d |   t r, t j d |   |  r y t j   t j   t	 j   t
 j   t j   t j   t j   t j   t j   t j   t j   t j   t j   Wn t d  t j   Yn Xd S)u  
    清理程序运行中产生的垃圾, 在程序运行期间会被自动定期调用
    包括各种重写缓存, 文件缓存等
    默认仅清理过期的
    :param is_force_flush: 是否无视有效期, 清理所有缓存
    :type is_force_flush: bool
    Zforce_flushZforce_flush_allZ!ErrorWhenCleaningFunctionLruCacheN)enable_connection_keep_aliver!   clearlocal_cache_enablecacheZcheck_all_expireurl_to_use_cdnis_domain_match_glob_whitelistcache_clearis_mime_streamed"extract_real_url_from_embedded_urlembed_real_url_to_embedded_urlcheck_global_ua_passis_mime_represents_textextract_mime_from_content_typeis_content_type_using_cdnis_ua_in_whitelistverify_ip_hash_cookieis_denied_because_of_spideris_ip_not_in_allow_rangeerrprint	traceback	print_exc)Zis_force_flushr)   r)   r*   cache_clean  s,    












r}   c             C   s  | s y t  d |  j d t |  d   d t |  d   |  j d  } | d k rm t d t |     | |  j d f   |  j d i     Wn t d	 |   t j   Yn Xt s t j	   t j
   k r t   n d St j |  j d
 d  |  j d d  t |  f  d S)u  
    定时任务容器. 调用目标函数, 并在运行结束后创建下一次定时

    :param task_dict: 定时任务的相关参数, dict
      { "target":目标函数(可调用的函数对象,不是函数名字符串) 必须,
        "iterval":任务延时(秒) 可选,
        "priority":优先级 可选,
        "name":定时任务别名 可选
        "args":位置型参数 (arg1,arg2) 可选,
        "kwargs":键值型参数 {key:value,} 可选,
      }
    :param add_task_only: 是否只添加定时任务而不执行
    z	CronTask:nametargetzTarget:Nztarget is not given in argskwargsZErrorWhenProcessingCronTasksZintervali,  Zpriorityi  )	infoprintgetstr
ValueErrorrz   r{   r|   enable_cron_tasks	threadingcurrent_threadmain_threadexittask_schedulerZentercron_task_container)Z	task_dictadd_task_onlyZtarget_funcr)   r)   r*   r     s*    3
r   c            	   C   sn   xg t  s/ t j   t j   k r+ t   n d St d  y t j   Wq t d  t	 j
   Yq Xq Wd S)uR   定时任务宿主, 每分钟检查一次列表, 运行时间到了的定时任务N<   ZErrorDuringExecutingCronTasks)r   r   r   r   r   r   r   runrz   r{   r|   r)   r)   r)   r*   cron_task_host5  s    


r   c             C   sc   t  j d k r g  t  _ n t t  j  t  _ t  j j |  | f  t d |  d | d t  j  d S)u  
    添加临时域名替换列表
    用于纯文本域名替换, 见 `plain_replace_domain_alias` 选项
    :param source_domain: 被替换的域名
    :param replaced_to_domain: 替换成这个域名
    :type source_domain: str
    :type replaced_to_domain: str
    NzA domaintozadded to temporary_domain_alias)parsetemporary_domain_aliasrG   appenddbgprint)Zsource_domainZreplaced_to_domainr)   r)   r*   add_temporary_domain_aliasG  s    	r   c             C   s
   |  t  k S)u   是否是外部域名)domains_alias_to_target_domain)rN   r)   r)   r*   is_external_domainZ  s    r   c             C   s   |  d k s |  r d S|  t  k r' d S| r? t |   r? d St d |  d  t t  } | j |   t |  a t j |   t  j |   t	 |   t
 |  <t   a y: t t d  d d d	  } | j |  d
  Wd QRXWn t j   Yn Xd S)u  
    若域名与`domains_whitelist_auto_add_glob_list`中的通配符匹配, 则加入 external_domains 列表
    被加入 external_domains 列表的域名, 会被应用重写机制
    用于在程序运行过程中动态添加域名到external_domains中
    也可在外部函数(custom_func.py)中使用
    关于 external_domains 更详细的说明, 请看 default_config.py 中对应的文档
    :type domain: str
    :type force_add: bool
    :rtype: bool
    NFTz	A domain:z"was added to external_domains listzautomatic_domains_whitelist.logarc   zutf-8
)rE   rm   r   rG   external_domainsr   tupleexternal_domains_setaddcalc_domain_replace_prefixprefix_buffrL   r_   openzmirror_rootwriter{   r|   )rN   Z	force_addZ_bufffpr)   r)   r*   rV   `  s(    	rV   c       	      C   s  d } d } i  } |  d k r* t    } nT d |  k rN d } |  j d d  }  d |  k rr d } |  j d d  }  t  |   } | d d  d	 k rt d
 | d d  j d   } | j } | j p d | j r d | j n d } | d d  d k r| d d  } d } n t |  } t |  } | rH| j d d  } | rZt	 |  } | | d <| | d <| | d <t | d  j | d <| St |  } | r| j d d  } | rt	 |  } t
 | d <t d k | d <| | d <t | d  j | d <| S)u  
    解析镜像url(可能含有extdomains), 并提取出原始url信息
    可以不是完整的url, 只需要有 path 部分即可(query_string也可以有)
    若参数留空, 则使用当前用户正在请求的url
    支持json (处理 \/ 和 \. 的转义)

    :rtype: dict[str, Union[str, bool]]
    :return: {'domain':str, 'is_https':bool, 'path':str, 'path_query':str}
    FNz\/Tr,   z\.r9      z/extdomains/z//?r      zhttps-rN   is_https
path_querypathzhttps://)extract_url_path_and_queryr&   r
   lstripnetlocr   queryis_target_domain_use_httpsclient_requests_text_rewrites_esctarget_domaintarget_scheme)	Z
mirror_urlZ_is_escaped_dotZ_is_escaped_slashresultZinput_path_queryr<   real_domainZreal_path_query	_is_httpsr)   r)   r*   decode_mirror_url  sP    
#	)	  


  

r   c       
      C   sy  | r |  j  d d  } n |  } t |  } d | j d d  k rJ |  S| pe | j pe t j pe t } | t k r t d | d  |  S| d k	 r | d d	  d
 k r d
 t	 } q | s | j
 r t } q d } n d } t |  r d | } n d } t d | d | d t d |  t | | d t |  j d   }	 | j rc|	 d | j 7}	 | rut |	  }	 |	 S)zconvert url from remote to mirror url
    :type raw_url_or_path: str
    :type remote_domain: str
    :type is_scheme: bool
    :type is_escape: bool
    :rtype: str
    zr\/r,   z/extdomains/Nr   zdomain:z is not in allowed_domains_setFrS   z//r   zraw_url_or_path=z
; domain: z;myurl_prefix:z; middle_part: #)r&   r
   r   r   r   r]   r   rE   r   rY   schememyurl_prefixr   r	   r   r   Zfragmentr   )
Zraw_url_or_pathr]   	is_schemeZ	is_escapeZ_raw_url_or_pathsprN   Z
our_prefixZmiddle_partr   r)   r)   r*   encode_mirror_url  s8    			&	r   c             C   s8   t  d k r d St  d k r  d S|  t  k r0 d Sd Sd S)u&   请求目标域名时是否使用httpsZNONEFZALLTN)Zforce_https_domains)rN   r)   r)   r*   r     s    r   c             C   s   t  j |   d S)uK   添加域名到ssrf白名单, 不支持通配符
    :type domain: str
    N)rE   r   )rN   r)   r)   r*   add_ssrf_allowed_domain  s    r   Z
error_dumpc       	      C   sA  d d l  } y"t j j t |    s: t j t |    t j   j d  } d d l	 } d t j   d t
 j   d | d t j   d t | d	 d
 d t t d	 d
 i } | d k	 r | j   | | d <t j j t j j t |   | d   } t | d   } | j | | | j  Wd QRX| SWn d SYn Xd S)u*  
    dump当前状态到文件
    :param folder: 文件夹名
    :type folder: str
    :param our_response: Flask返回对象, 可选
    :type our_response: Response
    :param msg: 额外的信息
    :type msg: str
    :return: dump下来的文件绝对路径
    :rtype: Union[str, None]
    r   Nzsnapshot_%Y-%m-%d_%H-%M-%Sr   r   msgr{   configZto_dictTZFlaskRequestZOurResponsez.dumpwb)pickleosr   existsr   mkdirr   nowstrftimer   r   dumpr{   
format_excZ
attributesr   ZfreezeabspathrI   r   ZHIGHEST_PROTOCOL)	Zfolderr   Zour_responser   Z	_time_strr   Zsnapshotdump_file_pathr   r)   r)   r*   dump_zmirror_snapshot  s*    

+r   zUnknown Errori  c             C   s  | r t  j   t |   t |  t  r5 |  j   }  t d |   } d } xT t d d   t t	   D]7 } | d j
 d | d t t t	 j |     7} qf Wd j
 d	 |  d
 | d | r t t  j    n d d | d t j d t j  } | st | j   |  S| Sd S)z

    :type content_only: bool
    :type errormsg: Union(str, bytes)
    :type error_code: int
    :type is_traceback: bool
    :rtype: Union[Response, str]
    r   r   c             S   s$   |  d d k o# |  d d   d k S)Nr   _rS   __r)   )r=   r)   r)   r*   rA   T  s    z%generate_error_page.<locals>.<lambda>z*<tr><td>{attrib}</td><td>{value}</td></tr>attribvalueu:  <!doctype html><html lang="zh-CN"><head><meta charset="UTF-8">
<title>zmirror internal error</title>
<style>code{{background-color: #cccaca;}}</style>
</head>
<body>
<h1>zmirror internal error</h1>
An fatal error occurs. 服务器中运行的zmirror出现一个内部错误.<br>

<hr>
<h2>If you are visitor 如果你是访客</h2>
This site is temporary unavailable because some internal error<br>
Please contact your site admin. <br>
该镜像站暂时出现了临时的内部故障, 请联系网站管理员<br>

<hr>
<h2>If you are admin</h2>
You can find full detail log in your server's log.<br>
For apache, typically at <code>/var/log/apache2/YOUR_SITE_NAME_error.log</code><br>
tips: you can use <code>tail -n 100 -f YOUR_SITE_NAME_error.log</code> to view real-time log<br>
<br>
If you can't solve it by your self, here are some ways may help:<br>
<ul>
    <li>contact the developer by email: <a href="mailto:i@z.codes" target="_blank">aploium &lt;i@z.codes&gt;</a></li>
    <li>seeking for help in zmirror's <a href="https://gitter.im/zmirror/zmirror" target="_blank">online chat room</a></li>
    <li>open an <a href="https://github.com/aploium/zmirror/issues" target="_blank">issue</a> (as an bug report) in github</li>
</ul>
<h3>Snapshot Dump</h3>
An snapshot has been dumped to <code>{dump_file_path}</code> <br>
You can load it using (Python3 code) <code>pickle.load(open(r"{dump_file_path}","rb"))</code><br>
The snapshot contains information which may be helpful for debug
<h3>Detail</h3>
<table border="1"><tr><th>Attrib</th><th>Value</th></tr>
{request_detail}
</table>
<h3>Additional Information</h3>
<pre>{errormsg}</pre>
<h3>Traceback</h3>
<pre>{traceback_str}</pre>
<hr>
<div style="font-size: smaller">Powered by <em>zmirror {version}</em><br>
<a href="{official_site}" target="_blank">{official_site}</a></div>
</body></html>errormsgrequest_detailZtraceback_strzNone or not displayedr   r   Zofficial_siteN)r{   r|   rz   
isinstancebytesrd   r   filterdirr   rK   html_escaper   __getattribute__r   r   __VERSION____GITHUB_URL__r   encode)r   
error_codeis_tracebackZcontent_onlyr   r   r   Z
error_pager)   r)   r*   generate_error_pageA  s$    	

")+	r   c             C   s,   t  d |  d d  } | j j d d  | S)z:rtype Responsecontent_typestatusi0  zX-CachezFileHit-304)r   headersr   )Z_content_typerr)   r)   r*   generate_304_response  s    r   c             C   s   t  } x6 |  D]. } | | |  | t t j d d   7} q Wt t j | j d d    d d  } x  t |  d k  r | d 7} qm Wt t j | t  j d d    d d  } x  t |  d k  r | d 7} q W| | S)	u   
    生成一个标示用户身份的hash
    在 human_ip_verification 功能中使用
    hash一共14位
    hash(前7位+salt) = 后7位 以此来进行验证
    :rtype str
    r   i@T rc   zutf-8rS   N   0)	&human_ip_verification_answers_hash_strr   randomZrandinthexzlibadler32r   len)Z
input_dictZstrbuffr?   input_key_hashoutput_hashr)   r)   r*   generate_ip_verify_hash  s    ,+/r   maxsizei   c             C   sz   yg |  d d  } |  d d  } t  t j | t j d d    d d  } | | k rb d Sd SWn d SYn Xd S)u   
    根据cookie中的hash判断是否允许用户访问
    在 human_ip_verification 功能中使用
    hash一共14位
    hash(前7位+salt) = 后7位 以此来进行验证
    :type hash_cookie_value: str
    :rtype: bool
    N   rc   zutf-8rS   TF)r   r   r   r   r   )Zhash_cookie_valuer   r   Zcalculated_hashr)   r)   r*   rw     s    
	rw   GETc             C   s   t  r | d k r t j |   r t j |   } t j |   } | j |  d | d <t d k r t d |  | d d  t |   t j	 |  | d t |  d	 t
 t j  d
 | j d
  d | d S)uQ   更新 local_cache 中缓存的资源, 追加content
    在stream模式中使用r   Fwithout_content   ZLocalCache_UpdateCacheN   obj_sizer-   last_modified	info_dict)rj   rk   	is_cachedget_infoget_objset_dataverbose_levelr   r   put_objr#   r   mimer   )urlcontentmethodr   respr)   r)   r*   update_content_in_local_cache  s    !
 #r  c             C   s   t  j d k s | j d k r" d St d |  d |  | r\ t j |  } d | _ d } n | } t t  j j  } t  j j	 j
 d d  } t j |  | d t t  j  d	 | d
 | d d | d
 | i d S)a;  
    put our response object(headers included) to local cache
    :param without_content: for stream mode use
    :param url: client request url
    :param _our_resp: our response(flask response object) to client, would be storge
    :type url: str
    :type _our_resp: Response
    :type without_content: bool
    r      NzPuttingCache:zwithout_content:r   zLast-Modifiedr-   r   r   r   r   )r   r  status_coder   copyresponser   remote_responser  r   r   rk   r   r#   r   )r   Z	_our_respr   Zour_respr   r   r)   r)   r*   put_response_to_local_cache  s$    		r
  c             C   s   t  r t j d k r t j |   r | d k	 rn d | k rn t j |  | j d d   rn t d |   t   St j	 |   } | j d d  r d St j
 |   } t | t  s t  t j d d  | Sn d Sd S)	u   
    尝试从本地缓存中取出响应
    :param url: real url with query string
    :type client_header: dict
    :rtype: Union[Response, None]
    r   Nzif-modified-sincezFileCacheHit-304r   Tzx-zmirror-cacheZFileHit)rj   r   r  rk   r   Zis_unchangedr   r   r   r   r   r   r   AssertionErrorset_extra_resp_header)r   client_headerZcached_infor  r)   r)   r*   try_get_cached_response  s    $r  c             C   s#  t  d |   } t  d |   } t  d |   } t  d |   } t  d |   } t  d |   } |  j   } d | k s~ d | k r d } | j d d	  } n d
 } | sFd | k r d | k r | sF| d k sFd | k r d	 | k sF| r | | k sF| r3d | k r3d t j k r3| d d  d	 k sF| rJd | k rJ| St rZt |  | pft j }	 |	 t k r|  j   St	 t j
 |  } t j d k  rd | k r| j d d	  } | j d	  sd	 | } d t j k rt	 |	 d	 | j d	   }
 n |	 d	 | j d	  }
 |	 t k r2d |
 } t rU|
 t k rUt |
 d } n d
 } | rt t t j | j    t } n. | sd } n d | k rd t } n t } t	 | |  } | rt rt | d t |
 d d | } | r t |  } | | | | t  d |   } | S) z
    Reassemble url parts split by the regex.
    :param match_obj: match object of stdlib re
    :return: re assembled url string (included prefix(url= etc..) and suffix.)
    :rtype: str
    prefix
quote_leftquote_rightr   rN   r   z\/Tr,   Fr   importr4   r$   zsrc=Z
javascriptNr   "      z/../z/extdomains/r   r   httpz//url_mimeZescape_slashZright_suffix)r  r  )rT   rW   r&   r   r   rU   rV   r]   rE   r	   remote_pathsysversion_info
startswithr   r   enable_static_resource_CDNrl   r[   CDN_domainsr   r   r   cdn_domains_numberrY   r   &cdn_redirect_encode_query_str_into_urlrq   r   )	match_objr  r  r  r   Zmatch_domainr   Zwhole_match_stringZrequire_slash_escaperN   url_no_schemeZ_this_url_mime_cdnZreplace_to_scheme_domainZreassembled_urlZreassembledr)   r)   r*   regex_url_reassemble  sj    +8



	$		r"     c             C   sA   |  j    }  t |  k r d Sx t D] } | |  k r# d Sq# Wd S)u   
    当机器人或蜘蛛的请求被ban时, 检查它是否处在允许的白名单内
    被 is_denied_because_of_spider() 调用
    :type ua_str: str
    TF)lowerZglobal_ua_white_nameZspider_ua_white_list)ua_strZ
allowed_uar)   r)   r*   rv     s    rv   c             C   sZ   |  j    }  d |  k s$ d |  k rR t |   rA t d |   d St d |   d Sd Sd S)u@   检查user-agent是否因为是蜘蛛或机器人而需要ban掉ZspiderZbotz!A Spider/Bot's access was grantedFzA Spider/Bot was denied, UA is:TN)r$  rv   r   )r%  r)   r)   r*   rx     s    rx   c           	   C   sc   t    }  t j j t t   r_ t t t  d d d   } |  j | j   j	    Wd QRX|  S)u   从文件加载ip白名单r   rc   zutf-8N)
setr   r   r   r   )human_ip_verification_whitelist_file_pathr   r   readliner%   )Zset_buffr   r)   r)   r*   load_ip_whitelist_file  s
    	 r)  c             C   s]   y: t  t t  d d d  } | j |  d  Wd QRXWn t d  t j   Yn Xd S)u   写入ip白名单到文件r   rc   zutf-8r   NzUnable to write whitelist file)r   r   r'  r   rz   r{   r|   )ip_to_allowr   r)   r)   r*   append_ip_whitelist_file  s    
r+  c             C   s   |  t  k r d St d |  d |  t  j |   t j   t |   yp t t t  d d d N } | j	 t
 j   j d  d |  d t t j  d t |  d	  Wd QRXWn+ t d
 t j j t   t j   Yn Xd S)u%   添加ip到白名单, 并写入文件Nzip white addedzinfo:r   rc   zutf-8z%Y-%m-%d %H:%M:%S r   zUnable to write log file)single_ip_allowed_setr   r   ry   rn   r+  r   r   Z#human_ip_verification_whitelist_logr   r   r   r   r   r   
user_agentreprrz   r   r   r   r{   r|   )r*  info_record_dictr   r)   r)   r*   ip_whitelist_add  s    

Lr1  c             C   sD   |  t  k r d St j |   } x t D] } | | k r& d Sq& Wd S)u   判断ip是否在白名单中FT)r-  	ipaddress
ip_address0human_ip_verification_default_whitelist_networks)r3  Zip_address_objZallowed_networkr)   r)   r*   ry     s    ry   c             C   s   xv |  j  t  D]e } y | j | d d Wn& t j k
 rU t j   t   Yn Xt d k r t	 d | j
    q W| j d d d t   d S)u}   
    stream模式下, 预读远程响应的content
    :param requests_response_obj:
    :type buffer_queue: queue.Queue
    timeout
   r  Z
BufferSizeN)Ziter_contentZstream_transfer_buffer_sizeZputqueueZFullr{   r|   r   r   r   Zqsize)Zrequests_response_objbuffer_queueparticle_contentr)   r)   r*   'preload_streamed_response_content_async  s    
 r:  c              c   s  d }  t    } d } d } t j d t  } t j d t d t j | f d d  } | j	   x8y | j
 d	 d
  } Wn- t j k
 r t d  t j   d SYn X| j   | d k	 r t r | r t |  d k r d } d } n
 | | 7} | VnV t j t k r#t |  t t j d <t rO| rOt t j | d t j j j d St d k ra |  t |  7}  t d |  d |  d t    | d  qa Wd S)u7   异步, 一边读取远程响应, 一边发送给用户r       Fr   r   r   daemonTr5     ZWeGotAnSteamTimeoutNr   i   rS   r  r   ztotal_size:ztotal_speed(KB/s):gư>i    i   )r   r7  ZQueueZ/stream_transfer_async_preload_max_packages_sizer   Threadr:  r   r	  startr   ZEmpty	warnprintr{   r|   Z	task_donerj   r   r!  rl   r  
remote_urlr   r  r   r   )Z
total_size_start_timeZ_content_bufferZ_disable_cache_temporaryr8  tr9  r)   r)   r*   iter_streamed_response_async	  sF    			


	
	
rD  c             C   s\  |  r d t  j d <t   } n t   \ } t  j d <t d t  j j  t | d t  j j } xt  j j D]} | j	   } | t
 k r| d k r t  j j | } t d |  t r t | d t  j  } t | d d	  | j | <t d
 | j |  n| d k rZt t  j  r@d t  j k r@t  j d | j | <qt  j j | | j | <n | d k rt d k rt | j | <qt d k rt j j d  pt j j d  pt } | | j | <q| d k rt | j d <qt | j | <n t  j j | | j | <| d k rm x* t   D] } | j j d t |   q!Wqm Wt d | j  | S)z
    Copy and parse remote server's response headers, generate our flask response object

    :type is_streamed: bool
    :return: flask response object
    :rtype: Response
    r   req_time_bodyZRemoteRespHeadersr   r.   z302 locationzmwm/headers-locationFTz302 rewrite locationzcontent-typezutf-8z; charset=utf-8access-control-allow-origintiming-allow-originNz_*_originZOriginzAccess-Control-Allow-Originz
set-cookiez
Set-CookiezOurRespHeaders:
)rF  rG  )r   r   rD  response_content_rewriter   r	  r   r   r  r$  allowed_remote_response_headerscustom_text_rewriter_enablecustom_response_text_rewriterrA  r   rs   r   r   Zcustom_allowed_originr   r   r   response_cookies_deep_copyr   response_cookie_rewrite)is_streamedr  r  Z
header_keyZheader_key_lowerZ	_locationZ_origincookie_stringr)   r)   r*   copy_response=  sF    	*!rQ  c              C   s   t  j j j j j }  g  } x |  D] \ } } | j   d k r" t d k r | j d d  } | j d d  } | j d d  } d | j   k r t	 r t
 j d	 |  } n8 t	 d
 k	 r t  j t k r t
 j d t  j d |  } | j |  q" W| S)a  
    It's a BAD hack to get RAW cookies headers, but so far, we don't have better way.
    We'd go DEEP inside the urllib's private method to get raw headers

    raw_headers example:
    [('Cache-Control', 'private'),
    ('Content-Length', '48234'),
    ('Content-Type', 'text/html; Charset=utf-8'),
    ('Server', 'Microsoft-IIS/8.5'),
    ('Set-Cookie','BoardList=BoardID=Show; expires=Mon, 02-May-2016 16:00:00 GMT; path=/'),
    ('Set-Cookie','aspsky=abcefgh; expires=Sun, 24-Apr-2016 16:00:00 GMT; path=/; HttpOnly'),
    ('Set-Cookie', 'ASPSESSIONIDSCSSDSSQ=OGKMLAHDHBFDJCDMGBOAGOMJ; path=/'),
    ('X-Powered-By', 'ASP.NET'),
    ('Date', 'Tue, 26 Apr 2016 12:32:40 GMT')]

    z
set-cookiezhttp://zSecure;r   z;Secure;z; SecureZhttponlyzpath=/;Nz\g<prefix>=/extdomains/z\g<path>)r   r	  rawZ_original_responser   Z_headersr$  r[   r&   Z&enable_aggressive_cookies_path_rewriteregex_cookie_path_rewriterr`   r]   rZ   r   )Zraw_headersZheader_cookies_string_listr~   r   r)   r)   r*   rM    s"    rM  c              C   sm  t    }  t j j } t    |  } t t j  sK t d t j  | | f St d k r t d t j t j j	 d d  | d d   t
 t j j  } | d k	 r | t j _ t j j	 } t d k	 r t | k r t d t    t rt | t j t j  } t | t  r| } nV t | t  s9t | t  rq| \ } } | rqt d t j  | j d d	  | f St d k	 rt | k rt d
 t    t |  } t d k	 rt | k rt d t    t rWt j d k rWxr t j   D]d \ } } xU | D]M }	 |	 j d  }
 |
 d k	 r9|
 j t j  r9qt | | |	 d  } qWqW| j d d	  | f S)zw
    Rewrite requests response's content's url. Auto skip binary (based on MIME).
    :return: Tuple[bytes, float]
    ZBinaryr  z	Text-likeNr=  zDStringTrace: appears in the RAW remote response text, code line no. ZSkip_builtin_rewriterc   zutf-8z>StringTrace: appears after custom text rewrite, code line no. z:StringTrace: appears after builtin rewrite, code line no. z	text/htmlrM   r  ) r   r   r	  r  rs   r   r   r   r   ra   rg   rc   developer_string_tracer   current_line_numberrK  rL  rA  r   r   r   rG   r   r   r   response_text_rewritecustom_inject_contentitemsr   matchr!  Zinject_content)rB  Z_contentrE  rc   	resp_textZ
resp_text2Zis_skip_builtin_rewriteZpositionrY  itemr   r)   r)   r*   rI    sJ    	
 $	rI  c              O   s*   d d l  m } | d t  t |  |   S)u   本函数在v0.28.3被移除, 对本函数的调用会被映射出去
    如果需要查看本函数代码, 请查看git历史到 v0.28.3 以前
    r   )warnz[This function is deprecated since v0.28.3, use response_text_basic_mirrorlization() instead)warningsr]  DeprecationWarningrb   )r   r   r]  r)   r)   r*   response_text_basic_rewrite  s    r`  c             C   s  t  r6 x- t t j D] \ } } |  j | |  }  q W|  j d  d k rX t d |   t j t	 |   }  |  j d  d k r t d |   t
 d k	 r t
 |  k r t d t    t |   }  t
 d k	 r t
 |  k r t d t    |  j d t d	 d	 t d	  }  |  j d
 t d d t d  }  |  j d t d t  }  |  j d	 t d	 d	 t d	  }  |  j d t d d t d  }  t
 d k	 rt
 |  k rt d t    |  S)zb
    rewrite urls in text-like content (html,css,js)
    :type resp_text: str
    :rtype: str
    zURL("/http://"r   z0 resp_text:z1 resp_text:Nz;StringTrace: appears after advanced rewrite, code line no. z?StringTrace: appears after basic mirrorlization, code line no. z".r  z'.'zdomain=.zdomain=zDStringTrace: appears after js cookies string rewrite, code line no. )url_custom_redirect_enableplain_replace_domain_aliasr   r   r&   findr   regex_adv_url_rewriterr`   r"  rU  rV  rb   target_domain_rootmy_host_name_no_port)r[  Zbefore_replaceZafter_replacer)   r)   r*   rW    s*    """"rW  c             C   s   t  j d t |   }  |  S)z`
    rewrite response cookie string's domain to `my_host_name`
    :type cookie_string: str
    zdomain=)regex_cookie_rewriterr`   rg  )rP  r)   r)   r*   rN  +  s    rN  c              C   sM   t  j r5 t  j r d n d }  t |  t  j t  j  St t t t  j  Sd S)uU   
    组装目标服务器URL, 即生成 parse.remote_url 的值
    :rtype: str
    zhttps://zhttp://N)r   r   r   r	   r]   remote_path_queryr   r   )r   r)   r)   r*   assemble_remote_url8  s    	rj  c               C   s=   t  j t k r9 t t  j  s9 t r5 t t  j  d Sd Sd S)u   
    SSRF防护, 第一层, 在请求刚开始时被调用, 检查域名是否允许
    :return: 如果请求触发了SSRF防护, 则返回True
    :rtype: bool
    FT)r   r]   rE   rV   +developer_temporary_disable_ssrf_preventionr   r)   r)   r)   r*   ssrf_check_layer_1F  s    rl  c              C   s  i  }  t  d t j  x t j D] \ } } | j   } | d k rJ q  q  | d k rh | d k rh q  q  | d k r d | k s d | k r d } d	 | k r | d
 7} d | k r | d 7} | r  | |  | <q  q  t |  |  | <| d k r  t j d |  |  |  | <q  Wt  d |   |  S)u  
    Extract necessary client header, filter out some.

    对于浏览器请求头的策略是黑名单制, 在黑名单中的头会被剔除, 其余所有请求头都会被保留

    对于浏览器请求头, zmirror会移除掉其中的 host和content-length
    并重写其中的cookie头, 把里面可能存在的本站域名修改为远程服务器的域名

    :return: 重写后的请求头
    :rtype: dict
    zBrowserRequestHeaders:hostcontent-lengthzcontent-typer   zaccept-encodingZbrZsdchZgzipzgzip, ZdeflateZcookiezFilteredBrowserRequestHeaders:)zhostrn  )r   r   r   r$  r   $regex_remove__zmirror_verify__headerr`   )Zrewrited_headersZ	head_nameZ
head_valueZhead_name_lZ	_str_buffr)   r)   r*   extract_client_headerW  s0    $


rp  c             C   s   d d   } t  j | |   } t d k	 rF t | k rF t d t    t j t |  } | j t t  } t	 d |  d |  | S)a  
    Rewrite proxy domain to origin domain, extdomains supported.
    Also Support urlencoded url.
    This usually used in rewriting request params

    eg. http://foo.bar/extdomains/accounts.google.com to http://accounts.google.com
    eg2. foo.bar/foobar to www.google.com/foobar
    eg3. http%3a%2f%2fg.zju.tools%2fextdomains%2Faccounts.google.com%2f233
            to http%3a%2f%2faccounts.google.com%2f233

    :type raw_text: str
    :rtype: str
    c             S   s   t  d |   } |  j d  } t  d |   } t t  d |    } |  j d  } d } | r d | k r | s{ t |  r | d | 7} n | d | 7} | | d	 7} | | 7} | S)
Nr   rQ   rP   r   r   r   r  ZhttpsrS   )rT   rW   boolr   )r   r   rQ   rP   r   r   r   r)   r)   r*   replace_to_real_domain  s    
z<client_requests_text_rewrite.<locals>.replace_to_real_domainNzAStringTrace: appears client_requests_text_rewrite, code line no. zClientRequestedUrl: z<- Has Been Rewrited To ->)
!regex_request_rewriter_extdomainsr`   rU  r   rV  "regex_request_rewriter_main_domainr   r&   rY   r   )Zraw_textrr  Zreplacedr)   r)   r*   r     s    r   c             C   sU   |  d k r t  j }  t |   } | j p- d } | rQ | j rQ | d | j 7} | S)z
    Convert http://foo.bar.com/aaa/p.html?x=y to /aaa/p.html?x=y

    :param no_query:
    :type full_url: str
    :param full_url: full url
    :return: str
    Nr,   r   )r   r   r
   r   r   )Zfull_urlZno_queryr<   r   r)   r)   r*   r     s    		r   c       	   
   C   sy  t  |   j } t d |  d |  | t k rD t rD t d |   | sP d } t j | |  d | d | d | j   } t	 r t
 j |  } n t j   } t   t j d <| j | d	 t d
 d d t d t } t   t j d t j d <t d t j d d d t d k rut | j j d | j d | j  t d | j j  | ret d | j j  t d | j  | S)uy   实际发送请求到目标服务器, 对于重定向, 原样返回给用户
    被request_remote_site_and_parse()调用ZFinalRequestUrlZFinalHostnamez5Trying to access an OUT-OF-ZONE domain(SSRF Layer 2):Nr   ZparamsdataZreq_start_timeZproxiesZallow_redirectsFstreamZverifyreq_time_headerzRequestTime:vr   r  zFinalSentToRemoteRequestUrl:z
Rem Resp Stat: zRemoteRequestHeaders: zRemoteRequestRawData: zRemoteResponseHeaders: )r
   r   r   rE   rk  ConnectionAbortedErrorrequestsZRequestZpreparerh   r!   Zget_sessionZSessionr   r   sendrequests_proxiesenable_stream_content_transferZdeveloper_do_not_verify_sslr   r   r  r   r  r   Zbody)	r   r  r   Z	param_getru  Zfinal_hostnameZprepped_reqZ_sessionr   r)   r)   r*   send_request  s>    
"r~  c           	   C   s   t  j   }  t |   } | d k	 rW y |  j d |  }  Wn d } Yn Xt |   }  t r t |  t  r~ |  j d |  }  t j d |  |  k r t	 d t
    |  | f S)u   
    解析出浏览者发送过来的data, 如果是文本, 则进行重写
    如果是文本, 则对文本内容进行重写后返回str
    如果是二进制则, 则原样返回, 不进行任何处理 (bytes)
    :rtype: Union[str, bytes, None]
    Nrc   zFStringTrace: appears after client_requests_bin_rewrite, code line no. )r   get_datarg   rd   r   rU  r   r   r   r   rV  )ru  rc   r)   r)   r*   prepare_client_request_data  s    r  c              C   s   t  d t j  }  t j d d k r@ t j d d t j d  t j j d  d k	 r t j r t j d d t j d	  t j d
 d t   t j d  t j d d t j  t	 r t j r t
 d  |  S)u4   
    生成我们的响应
    :rtype: Response
    rO  rw  gh㈵>zX-Header-Req-Timez%.4f
start_timeNzX-Body-Req-TimerE  zX-Compute-TimezX-Powered-Byz
zmirror/%sZtraffic)rQ  r   streamed_our_responser   r  r   r   r   r   Zdeveloper_dump_all_trafficsr   )r  r)   r)   r*   generate_our_response6  s    "	
r  c              C   s  t  j j j d d  t  _ t t  j  t  _ t rS t t  j  rS t	 d d d St
 oe t t  j  t  _ t  j j j d d  t  _ d t  j k o d t  j k o d	 t  j k o d
 t  j k o t  j j j d k o t  j j d k t  _ t d k r)t d t  j d t  j d t  j d t   d d t rt  j rt  j t k rd t  j j k rqt  j j j d  }  n$ t  j rd }  n t t  j j  }  d t  j |  g t t  j <t t  j  rd t t  j d <t d t  j  n t d t  j  d S)u   处理远程服务器的响应zContent-Typer   s'   This site is just for static resources.r   i  zCache-Controlzno-storezmust-revalidatez	max-age=0Zprivater   r  r   zResponse Content-Type:zIsStreamed:z
cacheable:ZLinerx  zContent-Lengthr   FTr   zCDN enabled for:zCDN disabled for:Nr:   )r   r	  r   r   r   rt   r   Zonly_serve_static_resourcesru   generate_simple_resp_pager}  ro   r  Zcache_controlr   r  r  	cacheabler   r   rV  r  r!  rl   r   r  )Zlengthr)   r)   r*   parse_remote_responseN  s4    *				r  r   c       	      C   se  t  j } t t t  j   } d } x:t t j   d |    D]\ } } | | k r\ qA | | d <y1 t t	 |  d t
 j d t  j d t  j } Wn wA Yn Xd | j k o d k n rt d | d	 d
 | |  d k sA | d k r qA qs| \ } } nk d | j k o"d k n rs| |  d k rU| d k rA | | f } qA n | d k	 rA | \ } } n qA t d | d |  t  j d |  t t  j d | d d } t d t
 j d |  | t
 _ | t | t
 j f <yQ t t d  d d d / } | j d j t j   | t
 j |   Wd QRXWn Yn Xt |  j t
 _ t   | SWd Sd S)u  
    猜测url所对应的正确域名
    当响应码为 404 或 500 时, 很有可能是把请求发送到了错误的域名
    而应该被发送到的正确域名, 很有可能在最近几次请求的域名中
    本函数会尝试最近使用的域名, 如果其中有出现响应码为 200 的, 那么就认为这条url对应这个域名
    相当于发生了一次隐式url重写

    * 本函数很可能会改写 parse 与 request

    :rtype: Union[Tuple[Response, float], None]
    Nr   r  r   ru  i  iW  zDomain guess failed:rx  r   i,  i  zdomain guess successful, fromr   zX-Domain-Guessr]   r   TzShadow rewriting, fromzdomain_guess.logr   rc   zutf-8z{}	{}	{}	-->	{}
)r   r]   rG   r
   rA  	enumeraterecent_domainsrH   r~  r   r   r  r  request_data_encodedr  r   r  r   ri  r   domain_guess_cacher   r   r   r   rK   r   r   assemble_parse)	ZdepthZcurrent_domainr   Z
redirectedirN   r  rewrited_urlZfwr)   r)   r*   guess_correct_domain  s\    	)
					3r  c               C   sb   t  t j d t j d t j d t j t _ t j j t j k r^ t	 d t j j d t j  d S)u]   
    请求远程服务器(high-level), 并在返回404/500时进行 domain_guess 尝试
    r  r   ru  zrequests's remote urlzdoes no equals our rewrited urlN)
r~  r   rA  r   r  r  r  r	  r   r@  r)   r)   r)   r*   request_remote_site  s    			r  c               C   s  t  d t j  t j j t j  d k rB t  d t j  t   St t t j	   r[ d St
 r t t t j	   r t d d  St r{t s t r t s t t j  r{t  d t j d  d	 t j k r=t r t t j j d	   st r=t t j j d	  t  r=t t j d
 t j j d	  t  d t j  n> t d t j t t j  j d d   j d d  d d Sd S)uY   过滤用户请求, 视情况拒绝用户的访问
    :rtype: Union[Response, None]
    zClient Request Url: zcrossdomain.xmlzcrossdomain.xml hit fromNs$   Spiders Are Not Allowed To This Sitei  Zipzis verifying cookieszmirror_verifyr0  z$add to ip_whitelist because cookies:z/ip_ban_verify_page?origin=rc   zutf-8codei.  )r   r   r   r   r   basenamecrossdomain_xmlrr   r   r.  Zis_deny_spiders_by_403rx   r  human_ip_verification_enabled,human_ip_verification_whitelist_from_cookies/enable_custom_access_cookie_generate_and_verifymust_verify_cookiesry   remote_addrZcookiesrw   r   custom_verify_access_cookier1  r   base64Zurlsafe_b64encoder   rd   r)   r)   r)   r*   filter_client_request  s0    	*
r  c              C   s  t  d t j  t j rP d t j d d  k rP t  d  t t j d d St r d t j d d  k r t j	 j
 d  r t t j	 j
 d   d	 }  |  t k r t t t j |   d d St  d
  t rt j t k r,t j j t j t t j d  } t  d t j d |  t | d d St  d  x t D] \ } } t j | t j d t j d k	 rt j | | t j d t j } t  d t j d |  t | d d St  d  q=Wt rt t t  } | d k	 r| Sd S)u  对用户的请求进行按需重定向处理
    与 rewrite_client_request() 不同, 使用301/307等进行外部重定向, 不改变服务器内部数据
    遇到任意一个需要重定向的, 就跳出本函数

    这是第一阶段重定向

    第一阶段重定向, 是在 rewrite_client_request() 内部隐式重写 *之前* 的重定向
    第二阶段重定向, 是在 rewrite_client_request() 内部隐式重写 *之后* 的重定向

    如果 `custom_prior_request_redirect_enable` 启用, 则会调用 custom_func.custom_prior_redirect_func() 进行自定义重定向

    :return: 如果不需要重定向, 则返回None, 否则返回重定向的 Response
    :rtype: Union[Response, None]
    zprior_request_redirect url:z/extdomains/Nr   z4Requesting main domain in extdomains, redirect back.r  i3  ZrefererrN   z9prior_request_redirect before url_custom_redirect_enable:r   zRedirect fromr   z8prior_request_redirect before url_custom_redirect_regex:r5   z6url_custom_redirect_regex not match! remote_path_query)r   r   r   r   r   r   r   ri  !enable_individual_sites_isolationr   r   r   isolated_domainsr   rb  url_custom_redirect_listr&   url_custom_redirect_regexr;   rZ  
IGNORECASEr`   Z$custom_prior_request_redirect_enableZcustom_prior_redirect_func)Zreference_domainZredirect_toZregex_matchZregex_replaceZredirectionr)   r)   r*   prior_request_redirect(  s2    #
1
"
$!r  c              C   s  t  r t r t j t k r t t j d r t j d k r t t t j d  t k r t t	 t
 j   r t t t t j t j j    t t    }  t r t |  d t t j d }  t |  d t St r@t t j t j  } | d k	 r@t d  t j j d	  d k	 r<t j d
 d t   t j d	  | St j  t
 j! f t" k rt" t j  t
 j! f } t# t j$ d | d d } t d t
 j% d |  t | d d Sd S)u  
    这是第二阶段重定向, 内部隐式重写 *之后* 的重定向
    第一阶段重定向, 是在 rewrite_client_request() 内部隐式重写 *之前* 的重定向
    第二阶段重定向, 是在 rewrite_client_request() 内部隐式重写 *之后* 的重定向

    遇到任意一个需要重定向的, 就跳出本函数

    :return: 如果不需要重定向, 则返回None, 否则返回重定向的 Response
    :rtype: Union[Response, None]
    r   r   rS   r  r   r  NzCacheHit,Returnr  zX-Compute-Timez%.4fr]   r   Tz%Redirect via domain_guess_cache, fromr   i3  )&r  Z(cdn_redirect_code_if_cannot_hard_rewriter   r!  rl   r  intZcdn_soft_redirect_minimum_sizerv   r   r   r.  r	   r[   r  r   r   r   r  r   r  rq   r   rj   r  rA  r  r   r   r   r  r   r]   r   r  r   ri  r   )Zredirect_to_urlr  rN   r  r)   r)   r*   posterior_request_redirect]  s<    
"		r  c              C   s   t    }  |  d t _ |  d t _ |  d t _ |  d t _ t t j  t _ t   t _ t j t j j	 d  d d  t _
 d t t j <t d	 t j d
 t j  d S)u9   将用户请求的URL解析为对应的目标服务器URLrN   r   r   r   z//rS   NTzafter assemble_parse, url:z   path_query:)r   r   r]   r   r  ri  r   rj  rA  rd  r!  r  r   )Z_tempr)   r)   r*   r    s    	&r  c              C   s   d }  t  r^ t t j  } | d k	 r^ t d t j d |  | t _ t |  j t _ d }  t r t r t	   } | } xn t D]f \ } } t
 j | | |  } | | k r t d | d |  t | t _ t |  j t _ d }  Pq W|  r t   |  S)u  
    在这里的所有重写都只作用程序内部, 对请求者不可见
    与 prior_request_redirect() 的外部301/307重定向不同,
    本函数通过改变程序内部变量来起到重定向作用
    返回True表示进行了重定向, 需要重载某些设置, 返回False表示未重定向
    遇到重写后, 不会跳出本函数, 而是会继续下一项. 所以重写顺序很重要
    FNzBeforeEmbeddedExtract:z After:TzShadowUrlRedirect:r   )r  rp   r   r   r   r
   r   rb  shadow_url_redirect_regexr   r;   r`   r   r  )has_been_rewritedZreal_urlZ_path_queryZ_path_query_rawZbeforeafterr)   r)   r*   rewrite_client_request  s,    		r  c             C   s   t  r t j   |  S)N)rh   r!   release_lock)r  r)   r)   r*   zmirror_after_request  s    
r  z/zmirror_statc              C   s  t  j r% t  j d k r% t d d  Sd }  |  t d t j    7}  |  t d t j    7}  |  t d t j    7}  |  t d t j    7}  |  t d	 t	 j    7}  |  t d
 t
 j    7}  |  t d t
 j    7}  |  t d t j    7}  |  t d t j    7}  |  t d t j    7}  |  t d t j    7}  |  t d t j    7}  |  t d t j    7}  |  t d  7}  |  t d t  7}  d |  d S)u$   返回服务器的一些状态信息z	127.0.0.1s   Only 127.0.0.1 are allowedi  r   rp   z
is_content_type_streamedz
embed_real_url_to_embedded_urlz
check_global_ua_passz
extract_mime_from_content_typez
is_content_type_using_cdnz
is_ua_in_whitelistz
is_mime_represents_textz
is_domain_match_glob_whitelistz
verify_ip_hash_cookiez
is_denied_because_of_spiderz
is_ip_not_in_allow_rangez

current_threads_numberz
----------------
z
domain_alias_to_target_setz<pre>z</pre>
)r   r  r  Zstrxrp   
cache_inforo   rq   rr   rt   ru   rs   rm   rw   rx   ry   r   Zactive_countrZ   )outputr)   r)   r*   zmirror_status  s&    r  z/ip_ban_verify_pagemethodsPOSTc              C   s  t  j d k r)t d t  j  d }  xW t t  D]I \ } } |  d | d | t |  d k rp t | d  n d f 7}  q2 WxI t D]A \ } } } |  d | t	 r d	 n d t |  t |  f 7}  q Wd
 t  j
 k r |  d t t  j
 j d
   7}  d t t  t t  t	 rd n d t |  f St  j d k rt d t  j  x t t  D] \ } } t  j j t |  d  } | d k rt	 rqUn t d | d j   d  S| | d k rt d | d j   d  St	 rUPqUWt	 rt d d  Si  } x\ t D]T \ } } } | t  j k s1t  j | rHt d | j   d  St  j | | | <qWd }	 d
 t  j k ry+ t j t  j j d
   j d d  }	 Wn. t d t t  j j d
   d d SYn( Xt |	  j }
 |
 r|
 t k rd }	 t rt |  st d d  St |	 d t } t r|t |  } | j d | d  t  j!   t" d! t#  d" t# d# d$ | | d% <n| t$ rt% | t   } t d& |  | d' k rt d d  S| j d | d  t  j!   t" d! t#  d" t# d# d$ | | d% <t& t  j d( | | Sd' S))u   生成一个身份验证页面r   zVerifying IP:r   zN%s <input type="text" name="%d" placeholder="%s" style="width: 190px;" /><br/>r   r  rS   z'%s %s<input type="%s" name="%s" /><br/>u)   <span style="color: red;">(必填)<span> rH  zF<input type="hidden" name="origin" value="%s" style="width: 190px;" />u]  <!doctype html>
        <html lang="zh-CN">
        <head>
        <meta charset="UTF-8">
        <title>%s</title>
        </head>
        <body>
          <h1>%s</h1>
          <p>这样的验证只会出现一次，通过后您会被加入白名单，之后相同IP的访问不会再需要验证。<br/>
          提示: 由于手机和宽带IP经常会发生改变，您可能会多次看到这一页面。</p>
          %s <br>
          <pre style="border: 1px dashed;">%s</pre>
          <form method='post'>%s<button type='submit'>递交</button>
          </form>
        </body>
        </html>u7   只需要回答出以下<b>任意一个</b>问题即可u+   你需要回答出以下<b>所有问题</b>r  zVerifying Request Forms   Please answer question: r  r   s   Wrong answer in: s#   Please answer at least ONE questions   Param Missing or Blank: r,   rc   zutf-8z#Unable to decode origin from value:r   Ts!   Verification Failed, please checkr   r  r-   ZdaysZmax_age   i  Z__zmirror_verifyzSelfGeneratedCookie:Nr0  )'r   r  r   r  r  human_ip_verification_questionsr   r   Z%human_ip_verification_identity_recordZ4human_ip_verification_answer_any_one_questions_is_okr   r   Zhuman_ip_verification_titleZ!human_ip_verification_descriptionZformr   r  r   r  Zurlsafe_b64decoderd   r   r
   r   rY   identity_verify_requiredcustom_identity_verifyZgenerate_html_redirect_pageZ!human_ip_verification_success_msgr  r   Z
set_cookier   r   r   Z4human_ip_verification_whitelist_cookies_expires_daysr  custom_generate_access_cookier1  )Z	form_bodyZq_idZ	_questionZrec_explain_stringZrec_nameZ
input_typeZsubmitted_answerZrecord_dictZ	form_typerH  r   r  _hashr)   r)   r*   ip_ban_verify_page	  s    ;	+(
r  ZOPTIONSZPUTZDELETEZHEADZPATCHz/<path:input_path>c          
   C   s   yy t  d |   } x0 t j j   D] \ } } | j j | |  q" Wx0 t j j   D] \ } } | j j d |  qU WWn t d d  SYn X| Sd S)uN   入口函数的壳, 只是包了一层异常处理, 实际是 main_function() 
input_pathz
Set-Cookier   TN)	main_functionr   Zextra_resp_headersrY  r   r&  Zextra_cookiesr   r   )r  r  r~   r   rP  r)   r)   r*   zmirror_enter{	  s    r  c             C   s  t  d  t j   t j t _ t   t j d <t   t   } | d k	 rZ t  d  | St	   } | d k	 rs | St
   } t   r t d d  St   t _ t   } | d k	 r | St   \ t _ t _ t   t   t   } t rt j rt t j | d t j t  d  | S)	u8   本程序的实际入口函数
    :rtype: Response
    z-----BeginRequest-----r  Nz"-----EndRequest(filtered out)-----s,   SSRF Prevention! Your domain is NOT ALLOWED.i  r   z-----EndRequest-----)r   r   Zinitr   r  r   r   r  r  r  r  rl  r  rp  r  r  r  request_dataZrequest_data_encodingr  r  r  rj   r  r
  rA  r  )r  r   r  r  r)   r)   r*   r  	  s6    

	
					
r  z/crossdomain.xmlc               C   s   t  d d d S)NaD  <?xml version="1.0"?>
<!DOCTYPE cross-domain-policy SYSTEM "http://www.macromedia.com/xml/dtds/cross-domain-policy.dtd">
<cross-domain-policy>
<allow-access-from domain="*"/>
<site-control permitted-cross-domain-policies="all"/>
<allow-http-request-headers-from domain="*" headers="*" secure="false"/>
</cross-domain-policy>r   ztext/x-cross-domain-policy)r   r)   r)   r)   r*   r  	  s    r  z/about_zmirrorc               C   s=   t  d j d t j d t j d t j d t d t  d d S)	Nzzmirror
version: {version}
Author: {author}
Github: {github_url}
Note: Love Luciaz Forever!

Mirroring: {source_site}
This site: {my_domain}
r   r   Z
github_urlZsource_siteZ	my_domainr   z
text/plain)r   rK   r   r   
__AUTHOR__r   r   rY   r)   r)   r)   r*   about_zmirror	  s
    	r  custom_func)rL  zuCannot import custom_response_text_rewriter custom_func.py, `custom_text_rewriter` is now disabled(if it was enabled))r  zoCannot import custom_identity_verify from custom_func.py, `identity_verify` is now disabled (if it was enabled))r  r  zCannot import custom_generate_access_cookie and custom_generate_access_cookie from custom_func.py, `enable_custom_access_cookie_generate_and_verify` is now disabled (if it was enabled)r   r   ZUnableToInitCronTaskr<  __main__z#Please use `python3 wsgi.py` to run)r1   r2   (  r   r  r;   r  r   Zschedr7  r  r   r{   r2  r   r   r   r   r   Zhtmlr   r   r   r   Zurllib.parser	   r
   r   r   Zurllibrz  Zflaskr   r   r   r   r   r   r   Z$requests.packages.urllib3.exceptionsr   ZpackagesZurllib3Zdisable_warningsZtypingr   r   r   r   Zcchardetr   rf   re   r   r   getcwdZZMIRROR_ROOTchdirZexternal_pkgs.ColorfulPyPrintenvironZunittest_modeZ	fastcacher   	functoolsr@  r   Zthreadlocalr   rK   r   r  r   Zconfig_defaultrz   r   r   r%   r&   Z	importlibZreloadZimport_moduleZutilsZlru_dictr    r!   rj   Zcache_systemr"   r#   rk   r|   Z!ColorfulPyPrint_set_verbose_levelr   Z%developer_enable_experimental_featurerY   rg  rX   r   Zmy_host_name_urlencodedr   rG   r&  r   rE   r   Z_domainZhostnamerZ   r   r   r[   Zmy_host_scheme_escapedr   Zmyurl_prefix_escapedr   r  r  rJ  updateZcustom_allowed_remote_headersZextract_root_domainrf  Zmy_host_name_rootr}  Zsteamed_mime_keywordsrb  r  r  r  rc  r   r   Z$enable_stream_transfer_async_preloadrU   Z$domains_whitelist_auto_add_glob_listr  r  Zisolated_domainr  r  Zis_use_proxyr|  r  Zbuffr4  ZnetworkZ
ip_networkr  Zquestionr   r  r  r   Z	schedulerr   rl   r  r  r6   r7   rC   ZREGEX_MY_HOST_NAMErJ   r  re  rh  rT  rs  rt  rL   r_   ro  rX  rY  krx  r   r   I__name__apprb   rg   r}   r   r   r   r   rV   r   Z$extract_from_url_may_have_extdomainsr   Zconvert_to_mirror_urlr   r   r   r   r   r   rw   r  r
  r  r"  rv   rx   r)  r+  r1  ry   r:  rD  rQ  rM  rI  r`  rW  rN  rj  rl  rp  r   r   r~  r  r  r  r  r  r  r  r  r  r  Zafter_requestr  Zrouter  r  r  r  r  r  r   r   r-  r  rK  rL  r  r  r  r   Zcron_tasks_listZ
_task_dictglobals	Exceptioner>  Zthr?  r   r)   r)   r)   r*   <module>   sZ  "(&!
	
"
'




		
					 	

BK'		("	9$/,@1)M'	
4M)E	*=64"=\'5@.'y''K	




