U
    I9b                    @   sn  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlmZ d dlmZmZmZ d dlmZ d dlmZmZ d dlmZmZmZmZ d dlZd dlZd dlmZmZmZm Z m!Z! dd	l"m#Z# d d
l$m%Z% ej&j'(e% zd dl)m*Z*m+Z+m,Z,m-Z- W n   Y nX zd dl.m/Z0 W n   dZ1Y nX dZ1e j23e 4 e#j5krze 6e#j5 ddl7T de j8krdZ9ndZ9zd dl:m;Z; W n    d dl<m;Z; e=d Y nX e9se>d ddl?m@Z@ e9se>djAe#jBe#jCd e>djAe#jDd zd dlET W n   eFd  Y nX zd dlGT W n   eFd  Y n&X eHIdJddJddZHe>deH e9rd dlKZKeKLeKMd  eKLeKMd! ddlNT dd"lOmPZP dd#l"mQZQ eRrzdd$lSmTZTmUZU eT ZVW n    e	W  eFd% dZRY nX e9se>d& eXeY eZr*e[Z\e]dk	rRe[d'e^e] 7 Z[ee[Z_ne[Z_t`dkrdg a`ead(d) t`D a`ebt`pg actc adtdeeH t`D ]Zftdeedef jg qeb ZheheeH eaeiZieir eiD ]Zftdeef eheef qeijeH neHgZiekJd*d+Zleke[ ZmemJd*d+ZneoepZqd,d-d.d/d0d1d2d3d4d5d6d7d8d9d:hZrerset eueHd  ZveueHd  Zwexs|d;Zyezsi Z{d;Z|d;Z}g Z~ee~ereae~Z~exsdZese Zeseb Zn"eD ]Zetckre=d<ed= qerdZes dZerLg ZeD ]Zeje
jedd> qeeZeD ]Zeed 7 Zq6ndZdZdZesheshdZe@ ZeeeaePd?Zdd@dAgedB< ePdCZdeeH< ePdDZdEedF< dGZdHZdIZe]dk	rdJee\ e ee^e] dK ee\ dL Zn
ee[ZejdMejdNZejdOejdNZedPZejdQdRdSjAedTdUe   dV e dWjAedX  dV dYjAedX dZ ejdNZeeZd[d\ Ze aed]Zepi Ze D ]L\ZZesĐqeD ]2Zed^dkrqejed^ ejdNed^< qȐqee9send_e^e Jd`d dddaZdbdc Zddde ZddfdgZddhdiZdjdk Zdldm Zdndo ZddpdqZddrdsZeZddtduZeZdvdw Zdxdy Zdd{d|ZdddZdddZdd Ze;dddd ZdddZŐd ddZƐdddZdd Ze;dddd Ze;dddd Zdd Zdd Z̐dddZe;dddd Zdd Zdd ZАdddZdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZڐdddZېdddZdd Zdd Zdd ZߐdddÄZddń ZddǄ ZddɄ Zdd˄ Zdd̈́ Zddτ Zejddф ZedҡddԄ Zejdddgd׍ddل Zejd*dddddddgd׍ejddddddddgd׍dddZdddZeddd Zeddd Zi atdD ]Zfeeftef< 	qe	re˃ Zneb Zz*e9
r d dlKZKeKLeKMd d dlT W n   Y nX e
rfzd dlmZ W n   e=d  Y nX e
rzd dlmZ W n   dZe=d  Y nX e
rzd dlmZmZ W n   dZeFd  Y nX erPeD ]ZZz"e ed  ed< eedd W n0 ek
r. Z zeFde  W 5 dZ[X Y nX 
qejeddZ e   edkrjeFd e  dS (	      N)fnmatch)timesleepprocess_time)escape)datetime	timedelta)urljoinurlsplit
urlunsplit
quote_plus)Flaskrequestmake_responseResponseredirect   )CONSTS)InsecureRequestWarning)UnionListAnyTuple)detectFT)*ZZMIRROR_UNITTEST)	lru_cachezpackage fastcache not found, fallback to stdlib lru_cache, no FUNCTION is effected, only maybe a bit slower. Considering install it using "pip3 install fastcache"z,lru_cache loaded successfully from fastcache)ZmirrorThreadLocalz+zmirror version: {version} author: {author})versionauthorzGithub: {site_url})Zsite_urlu   the config_default.py is missing, this program may not works normally
config_default.py 文件丢失, 这会导致配置文件不向后兼容, 请重新下载一份 config_default.pyu  the config_default.py is missing, fallback to default configs(if we can), please COPY the config_default.py to config.py, and change it's content, or use the configs in the more_configs folder
自定义配置文件 config.py 丢失或存在错误, 将使用默认设置, 请将 config_default.py 复制一份为 config.py, 并根据自己的需求修改里面的设置(或者使用 more_configs 中的配置文件)./ 	https:// http://zconfig file found, mirroring: zzmirror.utilszzmirror.connection_pool)LRUDict)connection_pool)	FileCacheget_expire_from_mimezLCan Not Create Local File Cache, local file cache is disabled automatically.zLocal file cache enabled:c                 C   s&   g | ]}| d ddddqS )r   r    r!   r"   )stripreplace).0d r,   #/var/www/zmirror/zmirror/zmirror.py
<listcomp>   s     r.   /\/content-typedateexpireszcache-controlzlast-modifiedZserverlocationzaccept-rangesaccess-control-allow-originzaccess-control-allow-headerszaccess-control-allow-methodszaccess-control-expose-headerszaccess-control-max-agez access-control-allow-credentialstiming-allow-originr,   zAn isolated domain:zOwould not have effect because it did not appears in the `external_domains` list)stricti   z	image/pngiB  z&www.fake-domain.com/folder/foo/bar.pngd   i  ztarget.domain.com)zexample.comz/path/no/query/stringz(?::|%(?:25)?3[Aa])z3(?:\\*(?:/|x2[Ff])|%(?:(?:25)?5[Cc]%)*(?:25)?2[Ff])z2(?:\\*["']|%(?:(?:25)?5[Cc]%)*2(?:52)?[27]|&quot;)(?:|)a9  (?P<prefix>\b(?:(?:src|href|action)\s*=|url\s*\(|url\s*:|@import\s*|"\s*:)\s*)(?P<quote_left>["'])?(?P<domain_and_scheme>(?P<scheme>(?:https?:)?\\?/\\?/)(?P<domain>(?:[-a-z0-9]+\.)+[a-z]+(?P<port>:\d{1,5})?))?(?P<path>[^\s;:\\+$?#'"\{}]*?(?P<query_string>\?[^\s?#'"]*?)?)(?P<quote_right>["')])(?P<right_suffix>\W)flagsz\bdomain=(\.?([\w-]+\.)+\w+)\bz*(?P<prefix>[pP]ath)=(?P<path>[\w\._/-]+?;)z(?P<domain_prefix>(?P<scheme>#(?:https?(?P<colon>{REGEX_COLON}))?REGEX_COLON%(?P<scheme_slash>%s)(?P=scheme_slash)z)?z<(?P<slash2>(?(scheme_slash)(?P=scheme_slash)|{REGEX_SLASH})))REGEX_SLASHzCextdomains(?(slash2)(?P=slash2)|{REGEX_SLASH})(?P<is_https>https-)?z#(?P<real_domain>(?:[\w-]+\.)+\w+)\bc                     s   ddl m}  | dd tD  tt   fdddd}d	d
| d }td	ddj	t
ddt   d d
 dj	td  d d|  dj	td d S )u   产生 regex_basic_mirrorlization
    用一个函数包裹起来是因为在 try_match_and_add_domain_to_rewrite_white_list()
    中需要动态修改 external_domains, 修改以后可能需要随之生成新的正则, 包裹一下比较容易调用
    r   )Counterc                 s   s"   | ]}t |d d V  qdS ).N)rer   split)r*   xr,   r,   r-   	<genexpr>U  s     z8_regex_generate__basic_mirrorlization.<locals>.<genexpr>c                    s    |  S Nr,   rI   cr,   r-   <lambda>V      z7_regex_generate__basic_mirrorlization.<locals>.<lambda>T)keyreverser9   r:   r;   r>   r?   r@   rB   z(?P<quote>{REGEX_QUOTE}))REGEX_QUOTEz&(?P<domain>([a-zA-Z0-9-]+\.){1,5}%s)\bz=(?P<suffix_slash>(?(scheme_slash)(?P=scheme_slash)|{SLASH}))?)ZSLASHz(?(quote)(?P=quote)))collectionsrD   allowed_domains_setsortedlistkeysjoinrG   compileformatrA   rC   rS   )rD   Zregex_all_remote_tldr,   rM   r-   %_regex_generate__basic_mirrorlizationM  s6    



r\   z!zmirror_verify=[a-zA-Z0-9]+\b;? ?	url_regexZunittestrE   )Zstatic_folderZtemplate_folderc                 C   s   dd }t || S )u  
    response_text_basic_rewrite() 的实验性升级版本, 默认启用

    *v0.28.1.dev*
        之前版本是在正则中匹配所有允许的域名, 现在改为匹配所有可能允许的TLD,
        可以带来一些性能的提升, 并且容易进行动态域名添加和通配符支持

    *v0.28.2*
        进一步优化正则, 性能提升 47% 左右 (速度约为传统暴力替换的4.4倍)

    *v0.28.3*
        目前来看该功能工作得相当好, 由实验性特性改为正式使用
        移除旧版 response_text_basic_rewrite(), 只保留一个为了向下兼容的 alias

    :param text: 远程响应文本
    :type text: str
    :return: 重写后的响应文本
    :rtype: str
    c                 S   s   t d| }|tkr&trt|s&|  S t d| }t d| p@|p@d}t d| pRt|}trdtd|nt}|r|t	kr|| d | | | }n|| }t d| }|r|| | S t d| rt
d|d|| S |d	 | S d S )
Ndomainsuffix_slashscheme_slashr/   colonr'   Z
extdomainsquote   )	get_grouprU   "enable_automatic_domains_whitelist.try_match_and_add_domain_to_rewrite_white_listgroupZguess_colon_from_slashmy_host_portmy_host_namer)   domain_alias_to_target_setmy_host_scheme)mremote_domainr_   Zslashra   Z_my_host_nameZcorerb   r,   r,   r-   regex_reassemble  s&    



z<response_text_basic_mirrorlization.<locals>.regex_reassemble)regex_basic_mirrorlizationsub)textrn   r,   r,   r-   "response_text_basic_mirrorlization  s    !rr   c                 C   sT   t dk	rt S tr@tD ]*}z| j|d W n   Y qX |  S qtrPt| d S dS )u   
    试图解析并返回二进制串的编码, 如果失败, 则返回 None
    :param byte_content: 待解码的二进制串
    :type byte_content: bytes
    :return: 编码类型或None
    :rtype: Union[str, None]
    Nencodingrt   )Z force_decode_remote_using_encodeZpossible_charsetsdecodecchardet_available	c_chardet)Zbyte_contentZcharsetr,   r,   r-   encoding_detect  s    	
rx   c                 C   s   t rtj| d tr tj| d | rzlt  t  t	  t
  t  t  t  t  t  t  t  t  t  W n   td t  Y nX dS )u  
    清理程序运行中产生的垃圾, 在程序运行期间会被自动定期调用
    包括各种重写缓存, 文件缓存等
    默认仅清理过期的
    :param is_force_flush: 是否无视有效期, 清理所有缓存
    :type is_force_flush: bool
    )Zforce_flush)Zforce_flush_allZ!ErrorWhenCleaningFunctionLruCacheN)enable_connection_keep_aliver$   clearlocal_cache_enablecacheZcheck_all_expireurl_to_use_cdnis_domain_match_glob_whitelistcache_clearis_mime_streamed"extract_real_url_from_embedded_urlembed_real_url_to_embedded_urlcheck_global_ua_passis_mime_represents_textextract_mime_from_content_typeis_content_type_using_cdnis_ua_in_whitelistverify_ip_hash_cookieis_denied_because_of_spideris_ip_not_in_allow_rangeerrprint	traceback	print_exc)Zis_force_flushr,   r,   r-   cache_clean  s,    r   c                 C   s   |szft d| dt| d dt| d  | d}|dkrNtdt|  || dd| d	i  W n   td
|  t  Y nX tst	 t
 krt  ndS t| dd| ddt| f dS )u  
    定时任务容器. 调用目标函数, 并在运行结束后创建下一次定时

    :param task_dict: 定时任务的相关参数, dict
      { "target":目标函数(可调用的函数对象,不是函数名字符串) 必须,
        "iterval":任务延时(秒) 可选,
        "priority":优先级 可选,
        "name":定时任务别名 可选
        "args":位置型参数 (arg1,arg2) 可选,
        "kwargs":键值型参数 {key:value,} 可选,
      }
    :param add_task_only: 是否只添加定时任务而不执行
    z	CronTask:nametargetzTarget:Nztarget is not given in argsr,   kwargsZErrorWhenProcessingCronTasksZinterval,  Zpriorityi  )	infoprintgetstr
ValueErrorr   r   r   enable_cron_tasks	threadingcurrent_threadmain_threadexittask_schedulerZentercron_task_container)Z	task_dictadd_task_onlyZtarget_funcr,   r,   r-   r     s.    &





r   c                   C   sX   t s t t krt  ndS td zt  W q    td t	
  Y q X q dS )uR   定时任务宿主, 每分钟检查一次列表, 运行时间到了的定时任务N<   ZErrorDuringExecutingCronTasks)r   r   r   r   r   r   r   runr   r   r   r,   r,   r,   r-   cron_task_host5  s    r   c                 C   sF   t jdkrg t _ntt jt _t j| |f td| d|dt j dS )u  
    添加临时域名替换列表
    用于纯文本域名替换, 见 `plain_replace_domain_alias` 选项
    :param source_domain: 被替换的域名
    :param replaced_to_domain: 替换成这个域名
    :type source_domain: str
    :type replaced_to_domain: str
    NzA domaintozadded to temporary_domain_alias)parsetemporary_domain_aliasrW   appenddbgprint)Zsource_domainZreplaced_to_domainr,   r,   r-   add_temporary_domain_aliasG  s    	
r   c                 C   s   | t kS )u   是否是外部域名)domains_alias_to_target_domainr^   r,   r,   r-   is_external_domainZ  s    r   c              	   C   s   | dks| sdS | t krdS |s,t| s,dS td| d tt}||  t|at|  t |  t	| t
| < t az0ttdddd	}|| d
  W 5 Q R X W n   t  Y nX dS )u  
    若域名与`domains_whitelist_auto_add_glob_list`中的通配符匹配, 则加入 external_domains 列表
    被加入 external_domains 列表的域名, 会被应用重写机制
    用于在程序运行过程中动态添加域名到external_domains中
    也可在外部函数(custom_func.py)中使用
    关于 external_domains 更详细的说明, 请看 default_config.py 中对应的文档
    :type domain: str
    :type force_add: bool
    :rtype: bool
    NFTz	A domain:z"was added to external_domains listzautomatic_domains_whitelist.logautf-8rs   
)rU   r~   r   rW   external_domainsr   tupleexternal_domains_setaddcalc_domain_replace_prefixprefix_buffr\   ro   openzmirror_rootwriter   r   )r^   Z	force_addZ_bufffpr,   r,   r-   rf   `  s(    


rf   c           	      C   sz  d}d}i }| dkrt  }n8d| kr4d}| dd} d| krLd}| dd} t | }|dd d	kr td
|dd d }|j}|jpd|jrd|j nd }|dd dkr|dd }d}nt|}t|}|r|dd}|rt	|}||d< ||d< ||d< t|d j|d< |S t|}|r:|dd}|rHt	|}t
|d< tdk|d< ||d< t|d j|d< |S )u  
    解析镜像url(可能含有extdomains), 并提取出原始url信息
    可以不是完整的url, 只需要有 path 部分即可(query_string也可以有)
    若参数留空, 则使用当前用户正在请求的url
    支持json (处理 \/ 和 \. 的转义)

    :rtype: dict[str, Union[str, bool]]
    :return: {'domain':str, 'is_https':bool, 'path':str, 'path_query':str}
    FNr0   Tr/   z\.rE      /extdomains///?r!      zhttps-r^   is_https
path_querypathr    )extract_url_path_and_queryr)   r
   lstripnetlocr   queryis_target_domain_use_httpsclient_requests_text_rewrites_esctarget_domaintarget_scheme)	Z
mirror_urlZ_is_escaped_dotZ_is_escaped_slashresultZinput_path_queryrH   real_domainZreal_path_query	_is_httpsr,   r,   r-   decode_mirror_url  sP    
    r   c           
      C   s  |r|  dd}n| }t|}d|jdd kr4| S |pF|jpFtjpFt}|tkr`td|d | S |dk	r|dd	 d
krd
t	 }q|s|j
rt}qd}nd}t|rd| }nd}td| d | d t d |  t|| d t|d}	|jr|	d|j 7 }	|rt|	}	|	S )zconvert url from remote to mirror url
    :type raw_url_or_path: str
    :type remote_domain: str
    :type is_scheme: bool
    :type is_escape: bool
    :rtype: str
    zr\/r/   r   Nr   zdomain:z is not in allowed_domains_setFrc   r   r!   zraw_url_or_path=z
; domain: z;myurl_prefix:z; middle_part: #)r)   r
   r   r   r   rm   r   rU   r   ri   schememyurl_prefixr   r	   r   r   Zfragmentr   )
Zraw_url_or_pathrm   	is_schemeZ	is_escapeZ_raw_url_or_pathspr^   Z
our_prefixZmiddle_partr   r,   r,   r-   encode_mirror_url  s:    	


$r   c                 C   s,   t dkrdS t dkrdS | t kr$dS dS dS )u&   请求目标域名时是否使用httpsZNONEFZALLTN)Zforce_https_domainsr   r,   r,   r-   r     s    r   c                 C   s   t |  dS )uK   添加域名到ssrf白名单, 不支持通配符
    :type domain: str
    N)rU   r   r   r,   r,   r-   add_ssrf_allowed_domain  s    r   
error_dumpc           	   	   C   s   ddl }ztjt| s(tt|  t d}ddl	}t t
 |t t|ddttddd}|dk	r||  ||d< tjtjt| |d }t|d	}||||j W 5 Q R X |W S    Y dS X dS )
u*  
    dump当前状态到文件
    :param folder: 文件夹名
    :type folder: str
    :param our_response: Flask返回对象, 可选
    :type our_response: Response
    :param msg: 额外的信息
    :type msg: str
    :return: dump下来的文件绝对路径
    :rtype: Union[str, None]
    r   Nzsnapshot_%Y-%m-%d_%H-%M-%ST)Zto_dict)r   r   msgr   configZFlaskRequestZOurResponsez.dumpwb)pickleosr   existsr   mkdirr   nowstrftimer   r   dumpr   
format_excZ
attributesr   ZfreezeabspathrY   r   ZHIGHEST_PROTOCOL)	Zfolderr   Zour_responser   Z	_time_strr   Zsnapshotdump_file_pathr   r,   r,   r-   dump_zmirror_snapshot  s,    

r   Unknown Error  c              	   C   s   |rt   t|  t| tr&|  } t| d}d}tdd tt	D ]$}|dj
|ttt	|d7 }qFdj
| ||rtt  nd|tjtjd	}|st| |S |S d
S )z

    :type content_only: bool
    :type errormsg: Union(str, bytes)
    :type error_code: int
    :type is_traceback: bool
    :rtype: Union[Response, str]
    r   r!   c                 S   s   | d dko| dd  dkS )Nr   ___r,   rL   r,   r,   r-   rO   T  rP   z%generate_error_page.<locals>.<lambda>z*<tr><td>{attrib}</td><td>{value}</td></tr>)attribvalueu:  <!doctype html><html lang="zh-CN"><head><meta charset="UTF-8">
<title>zmirror internal error</title>
<style>code{{background-color: #cccaca;}}</style>
</head>
<body>
<h1>zmirror internal error</h1>
An fatal error occurs. 服务器中运行的zmirror出现一个内部错误.<br>

<hr>
<h2>If you are visitor 如果你是访客</h2>
This site is temporary unavailable because some internal error<br>
Please contact your site admin. <br>
该镜像站暂时出现了临时的内部故障, 请联系网站管理员<br>

<hr>
<h2>If you are admin</h2>
You can find full detail log in your server's log.<br>
For apache, typically at <code>/var/log/apache2/YOUR_SITE_NAME_error.log</code><br>
tips: you can use <code>tail -n 100 -f YOUR_SITE_NAME_error.log</code> to view real-time log<br>
<br>
If you can't solve it by your self, here are some ways may help:<br>
<ul>
    <li>contact the developer by email: <a href="mailto:i@z.codes" target="_blank">aploium &lt;i@z.codes&gt;</a></li>
    <li>seeking for help in zmirror's <a href="https://gitter.im/zmirror/zmirror" target="_blank">online chat room</a></li>
    <li>open an <a href="https://github.com/aploium/zmirror/issues" target="_blank">issue</a> (as an bug report) in github</li>
</ul>
<h3>Snapshot Dump</h3>
An snapshot has been dumped to <code>{dump_file_path}</code> <br>
You can load it using (Python3 code) <code>pickle.load(open(r"{dump_file_path}","rb"))</code><br>
The snapshot contains information which may be helpful for debug
<h3>Detail</h3>
<table border="1"><tr><th>Attrib</th><th>Value</th></tr>
{request_detail}
</table>
<h3>Additional Information</h3>
<pre>{errormsg}</pre>
<h3>Traceback</h3>
<pre>{traceback_str}</pre>
<hr>
<div style="font-size: smaller">Powered by <em>zmirror {version}</em><br>
<a href="{official_site}" target="_blank">{official_site}</a></div>
</body></html>zNone or not displayed)errormsgrequest_detailZtraceback_strr   r   Zofficial_siteN)r   r   r   
isinstancebytesru   r   filterdirr   r[   html_escaper   __getattribute__r   r   __VERSION____GITHUB_URL__r   encode)r   
error_codeis_tracebackZcontent_onlyr   r   r   Z
error_pager,   r,   r-   generate_error_pageA  s.    	

 
*  0r   c                 C   s   t | dd}|jdd |S )z:rtype Responsei0  )content_typestatuszX-CachezFileHit-304)r   headersr   )Z_content_typerr,   r,   r-   generate_304_response  s    r   c                 C   s   t }| D ]$}||| |  ttdd 7 }qtt|jdddd }t|dk rb|d7 }qLtt|t  jdddd }t|dk r|d7 }q|| S )	u   
    生成一个标示用户身份的hash
    在 human_ip_verification 功能中使用
    hash一共14位
    hash(前7位+salt) = 后7位 以此来进行验证
    :rtype str
    r   i@T r   rs   rc   N   0)	&human_ip_verification_answers_hash_strr   randomZrandinthexzlibadler32r   len)Z
input_dictZstrbuffrQ   input_key_hashoutput_hashr,   r,   r-   generate_ip_verify_hash  s    "
"
r	     maxsizec                 C   sf   zR| dd }| dd }t t|t jdddd }||krJW dS W dS W n   Y dS X dS )u   
    根据cookie中的hash判断是否允许用户访问
    在 human_ip_verification 功能中使用
    hash一共14位
    hash(前7位+salt) = 后7位 以此来进行验证
    :type hash_cookie_value: str
    :rtype: bool
    N   r   rs   rc   TF)r  r  r  r  r   )Zhash_cookie_valuer  r  Zcalculated_hashr,   r,   r-   r     s    

r   GETc                 C   s   t r|dkrt| rt| }t| }|| d|d< tdkr^td| |dd t| tj	| |t|t
tj|d|d	 dS )
uQ   更新 local_cache 中缓存的资源, 追加content
    在stream模式中使用r  Fwithout_content   ZLocalCache_UpdateCacheN   last_modified)obj_sizer3   r  	info_dict)r{   r|   	is_cachedget_infoget_objset_dataverbose_levelr   r  put_objr&   r   mimer   )urlcontentmethodr  respr,   r,   r-   update_content_in_local_cache  s    


 r   c              	   C   s   t jdks|jdkrdS td| d| |r@t|}d|_d}n|}tt jj}t jj	
dd}tj| |tt j||||dd	 dS )
a;  
    put our response object(headers included) to local cache
    :param without_content: for stream mode use
    :param url: client request url
    :param _our_resp: our response(flask response object) to client, would be storge
    :type url: str
    :type _our_resp: Response
    :type without_content: bool
    r     NzPuttingCache:zwithout_content:r   zLast-Modified)r  r  )r3   r  r  r  )r   r  status_coder   copyresponser  remote_responser  r   r   r|   r  r&   r  )r  Z	_our_respr  Zour_respr  r  r,   r,   r-   put_response_to_local_cache  s(    
r&  c                 C   s   t rtjdkrt| r|dk	rLd|krLt| |ddrLtd|  t S t	| }|ddrfdS t
| }t|ts~ttdd |S ndS dS )	u   
    尝试从本地缓存中取出响应
    :param url: real url with query string
    :type client_header: dict
    :rtype: Union[Response, None]
    r  Nzif-modified-sincezFileCacheHit-304r  Tzx-zmirror-cacheZFileHit)r{   r   r  r|   r  Zis_unchangedr   r   r   r  r  r   r   AssertionErrorset_extra_resp_header)r  client_headerZcached_infor  r,   r,   r-   try_get_cached_response  s    


r*  c                 C   sL  t d| }t d| }t d| }t d| }t d| }t d| }|  }d|ksTd|krfd}|dd	}nd
}|rd|krd|kr|r|dksd|krd	|ks|r||ks|sd|krdtjkr|dd d	ks|sd|kr|S trt| |ptj}	|	tkr|  S t	tj
|}tjdk r2d|kr2|dd	}|d	sFd	| }dtjkrht	|	d	 |d	}
n|	d	 |d	 }
|	tkrd|
 }tr|
tkrt|
 d }nd
}|rttt| t   }n$|sd}nd|krdt }nt}t	||}|r tr t|t|
 d |d}|r.t|}|| | | t d|  }|S )z
    Reassemble url parts split by the regex.
    :param match_obj: match object of stdlib re
    :return: re assembled url string (included prefix(url= etc..) and suffix.)
    :rtype: str
    prefix
quote_leftquote_rightr   r^   r   r0   Tr/   Fr  importr;   r'   zsrc=Z
javascriptNr   ")      z/../r   r   r!   httpr   )url_mimeZescape_slashZright_suffix)rd   rg   r)   r   r  re   rf   rm   rU   r	   remote_pathsysversion_info
startswithr   r   enable_static_resource_CDNr}   rk   CDN_domainsr  r  r   cdn_domains_numberri   r   &cdn_redirect_encode_query_str_into_urlr   r   )	match_objr+  r,  r-  r   Zmatch_domainr   Zwhole_match_stringZrequire_slash_escaper^   url_no_schemeZ_this_url_mime_cdnZreplace_to_scheme_domainZreassembled_urlZreassembledr,   r,   r-   regex_url_reassemble  s    





		


	



r>     c                 C   s0   |   } t| krdS tD ]}|| kr dS qdS )u   
    当机器人或蜘蛛的请求被ban时, 检查它是否处在允许的白名单内
    被 is_denied_because_of_spider() 调用
    :type ua_str: str
    TF)lowerZglobal_ua_white_nameZspider_ua_white_list)ua_strZ
allowed_uar,   r,   r-   r     s    r   c                 C   sD   |   } d| ksd| kr<t| r.td|  dS td|  dS dS dS )u@   检查user-agent是否因为是蜘蛛或机器人而需要ban掉ZspiderZbotz!A Spider/Bot's access was grantedFzA Spider/Bot was denied, UA is:TN)r@  r   r   )rA  r,   r,   r-   r     s    

r   c               	   C   sJ   t  } tjttrFtttddd}| | 	  W 5 Q R X | S )u   从文件加载ip白名单r   r   rs   )
setr   r   r   r   )human_ip_verification_whitelist_file_pathr   r   readliner(   )Zset_buffr   r,   r,   r-   load_ip_whitelist_file  s
    rE  c              	   C   sR   z0t ttddd}|| d  W 5 Q R X W n   td t  Y nX dS )u   写入ip白名单到文件r   r   rs   r   zUnable to write whitelist fileN)r   r   rC  r   r   r   r   )ip_to_allowr   r,   r,   r-   append_ip_whitelist_file  s    rG  c              	   C   s   | t krdS td| d| t |  t  t|  z\tttddd@}|	t
 dd |  d ttj d t| d	  W 5 Q R X W n&   td
tjt t  Y nX dS )u%   添加ip到白名单, 并写入文件Nzip white addedzinfo:r   r   rs   z%Y-%m-%d %H:%M:%S r   zUnable to write log file)single_ip_allowed_setr   r   r   r   rG  r   r   Z#human_ip_verification_whitelist_logr   r   r   r   r   r   
user_agentreprr   r   r   r   r   r   )rF  info_record_dictr   r,   r,   r-   ip_whitelist_add  s,    
rM  c                 C   s2   | t krdS t| }tD ]}||kr dS qdS )u   判断ip是否在白名单中FT)rI  	ipaddress
ip_address0human_ip_verification_default_whitelist_networks)rO  Zip_address_objZallowed_networkr,   r,   r-   r     s    
r   c              	   C   sv   |  tD ]R}z|j|dd W n$ tjk
rD   t  t  Y nX tdkr
t	d|
  q
|jddd t  dS )u}   
    stream模式下, 预读远程响应的content
    :param requests_response_obj:
    :type buffer_queue: queue.Queue
    
   Ztimeoutr0  Z
BufferSizeN)Ziter_contentZstream_transfer_buffer_sizeZputqueueZFullr   r   r   r  r   Zqsize)Zrequests_response_objbuffer_queueparticle_contentr,   r,   r-   'preload_streamed_response_content_async  s     rV  c                  c   s,  d} t  }d}d}tjtd}tjttj|fdd}|	  z|j
dd}W n( tjk
rt   td	 t  Y d
S X |  |d
k	rtr|st|dkrd}d
}n||7 }|V  n>tjtkrt|ttj d< tr|sttj|tjjjd d
S tdkr<| t|7 } td| d| d t  | d   q<d
S )u7   异步, 一边读取远程响应, 一边发送给用户r   rP   Fr  T)r   r   daemon   rR  ZWeGotAnSteamTimeoutNi   rc   )r  r  ztotal_size:ztotal_speed(KB/s):r
  gư>)r   rS  ZQueueZ/stream_transfer_async_preload_max_packages_sizer   ThreadrV  r   r%  startr   ZEmpty	warnprintr   r   Z	task_doner{   r  r=  r}   r   
remote_urlr   r  r  r   )Z
total_size_start_timeZ_content_bufferZ_disable_cache_temporaryrT  trU  r,   r,   r-   iter_streamed_response_async	  sJ    
r_  c                 C   s  | rdt jd< t }nt \}t jd< tdt jj t|t jjd}t jjD ]^}|	 }|t
kr|dkrt jj| }td| trt|dt j}t|dd	|j|< td
|j|  n|dkrtt jrdt jkrt jd |j|< nt jj| |j|< n|dkrrtdkrt|j|< nVtdkrPtjdpBtjdpBt}||j|< n |dkrft|jd< n
t|j|< nt jj| |j|< |dkrLt D ]}|jdt| qqLtd|j |S )z
    Copy and parse remote server's response headers, generate our flask response object

    :type is_streamed: bool
    :return: flask response object
    :rtype: Response
    r   req_time_bodyZRemoteRespHeaders)r   r4   z302 locationzmwm/headers-locationFTz302 rewrite locationr1   r   z; charset=utf-8)r5   r6   Nz_*_originZOriginr5   zAccess-Control-Allow-Origin
set-cookie
Set-CookiezOurRespHeaders:
)r   r   r_  response_content_rewriter   r%  r   r   r"  r@  allowed_remote_response_headerscustom_text_rewriter_enablecustom_response_text_rewriterr\  r   r   r  r   Zcustom_allowed_originr   r   r   response_cookies_deep_copyr   response_cookie_rewrite)is_streamedr  r  Z
header_keyZheader_key_lowerZ	_locationZ_origincookie_stringr,   r,   r-   copy_response=  sF    	





 

rl  c                  C   s   t jjjjj} g }| D ]\}}| dkrtdkrV|dd}|dd}|dd}d| krt	rtt
d	|}n(t	d
k	rt jtkrt
dt j d |}|| q|S )a  
    It's a BAD hack to get RAW cookies headers, but so far, we don't have better way.
    We'd go DEEP inside the urllib's private method to get raw headers

    raw_headers example:
    [('Cache-Control', 'private'),
    ('Content-Length', '48234'),
    ('Content-Type', 'text/html; Charset=utf-8'),
    ('Server', 'Microsoft-IIS/8.5'),
    ('Set-Cookie','BoardList=BoardID=Show; expires=Mon, 02-May-2016 16:00:00 GMT; path=/'),
    ('Set-Cookie','aspsky=abcefgh; expires=Sun, 24-Apr-2016 16:00:00 GMT; path=/; HttpOnly'),
    ('Set-Cookie', 'ASPSESSIONIDSCSSDSSQ=OGKMLAHDHBFDJCDMGBOAGOMJ; path=/'),
    ('X-Powered-By', 'ASP.NET'),
    ('Date', 'Tue, 26 Apr 2016 12:32:40 GMT')]

    rb  r"   zSecure;r!   z;Secure;z; SecureZhttponlyzpath=/;Nz\g<prefix>=/extdomains/z\g<path>)r   r%  rawZ_original_responser   Z_headersr@  rk   r)   Z&enable_aggressive_cookies_path_rewriteregex_cookie_path_rewriterrp   rm   rj   r   )Zraw_headersZheader_cookies_string_listr   r   r,   r,   r-   rh    s&    
 rh  c                  C   s  t  } tjj}t  |  }ttjs6tdtj ||fS tdkrbtdtjtjj	dd |dd  t
tjj}|dk	r~|tj_tjj	}tdk	rt|krtdt  tr(t|tjtj}t|tr|}n@t|tst|tr|\}}|rtdtj |jdd	|fS tdk	r(t|kr(td
t  t|}tdk	rPt|krPtdt  trtjdkrt D ]L\}}|D ]<}	|	d}
|
dk	r|
tjsqvt|||	d }qvqj|jdd	|fS )zw
    Rewrite requests response's content's url. Auto skip binary (based on MIME).
    :return: Tuple[bytes, float]
    ZBinaryr0  z	Text-likeNrX  zDStringTrace: appears in the RAW remote response text, code line no. ZSkip_builtin_rewriter   rs   z>StringTrace: appears after custom text rewrite, code line no. z:StringTrace: appears after builtin rewrite, code line no. z	text/htmlr]   r  ) r   r   r%  r  r   r  r   r   r  rq   rx   rt   developer_string_tracer   current_line_numberrf  rg  r\  r   r   r   rW   r   r  r   response_text_rewritecustom_inject_contentitemsr   matchr=  Zinject_content)r]  Z_contentr`  rt   	resp_textZ
resp_text2Zis_skip_builtin_rewriteZpositionrt  itemr   r,   r,   r-   rd    sN    

  


rd  c                  O   s    ddl m} |dt t| |S )u   本函数在v0.28.3被移除, 对本函数的调用会被映射出去
    如果需要查看本函数代码, 请查看git历史到 v0.28.3 以前
    r   )warnz[This function is deprecated since v0.28.3, use response_text_basic_mirrorlization() instead)warningsrx  DeprecationWarningrr   )r   r   rx  r,   r,   r-   response_text_basic_rewrite  s    
r{  c                 C   sH  t r$ttj D ]\}}| ||} q| ddkr<td|  tt	| } | ddkr`td|  t
dk	r|t
| kr|tdt  t| } t
dk	rt
| krtdt  | dt d	 d	t d	 } | d
t d dt d } | dt dt } | d	t d	 d	t d	 } | dt d dt d } t
dk	rDt
| krDtdt  | S )zb
    rewrite urls in text-like content (html,css,js)
    :type resp_text: str
    :rtype: str
    zURL("/http://"r   z0 resp_text:z1 resp_text:Nz;StringTrace: appears after advanced rewrite, code line no. z?StringTrace: appears after basic mirrorlization, code line no. z".r/  z'.'zdomain=.domain=zDStringTrace: appears after js cookies string rewrite, code line no. )url_custom_redirect_enableplain_replace_domain_aliasr   r   r)   findr   regex_adv_url_rewriterrp   r>  rp  rq  rr   target_domain_rootmy_host_name_no_port)rv  Zbefore_replaceZafter_replacer,   r,   r-   rr    s*    

rr  c                 C   s   t dt | } | S )z`
    rewrite response cookie string's domain to `my_host_name`
    :type cookie_string: str
    r}  )regex_cookie_rewriterrp   r  )rk  r,   r,   r-   ri  +  s    ri  c                  C   s:   t jr&t jrdnd} t| t j t jS ttt t jS dS )uU   
    组装目标服务器URL, 即生成 parse.remote_url 的值
    :rtype: str
    r    r"   N)r   r   r   r	   rm   remote_path_queryr   r   )r   r,   r,   r-   assemble_remote_url8  s    r  c                   C   s.   t jtkr*tt js*tr&tt j dS dS dS )u   
    SSRF防护, 第一层, 在请求刚开始时被调用, 检查域名是否允许
    :return: 如果请求触发了SSRF防护, 则返回True
    :rtype: bool
    FT)r   rm   rU   rf   +developer_temporary_disable_ssrf_preventionr   r,   r,   r,   r-   ssrf_check_layer_1F  s    


r  c                  C   s   i } t dtj tjD ]\}}| }|dkr2qq|dkrF|dkrFqq|dkrd|ks^d|krd}d|krr|d	7 }d
|kr|d
7 }|r|| |< qqt|| |< |dkrtd| | | |< qt d|  | S )u  
    Extract necessary client header, filter out some.

    对于浏览器请求头的策略是黑名单制, 在黑名单中的头会被剔除, 其余所有请求头都会被保留

    对于浏览器请求头, zmirror会移除掉其中的 host和content-length
    并重写其中的cookie头, 把里面可能存在的本站域名修改为远程服务器的域名

    :return: 重写后的请求头
    :rtype: dict
    zBrowserRequestHeaders:)hostzcontent-lengthr1   r!   zaccept-encodingZbrZsdchZgzipzgzip, ZdeflateZcookiezFilteredBrowserRequestHeaders:)r   r   r   r@  r   $regex_remove__zmirror_verify__headerrp   )Zrewrited_headersZ	head_nameZ
head_valueZhead_name_lZ	_str_buffr,   r,   r-   extract_client_headerW  s2    

r  c                 C   sZ   dd }t || }tdk	r0t|kr0tdt  tt|}|tt}t	d| d| |S )a  
    Rewrite proxy domain to origin domain, extdomains supported.
    Also Support urlencoded url.
    This usually used in rewriting request params

    eg. http://foo.bar/extdomains/accounts.google.com to http://accounts.google.com
    eg2. foo.bar/foobar to www.google.com/foobar
    eg3. http%3a%2f%2fg.zju.tools%2fextdomains%2Faccounts.google.com%2f233
            to http%3a%2f%2faccounts.google.com%2f233

    :type raw_text: str
    :rtype: str
    c                 S   s   t d| }| d}t d| }tt d| }| d}d}|rxd|krl|sRt|r`|d| 7 }n|d| 7 }||d	 7 }||7 }|S )
Nr   ra   r`   r   r   r!   r2  Zhttpsrc   )rd   rg   boolr   )r<  r   ra   r`   r   r   r   r,   r,   r-   replace_to_real_domain  s    



z<client_requests_text_rewrite.<locals>.replace_to_real_domainNzAStringTrace: appears client_requests_text_rewrite, code line no. zClientRequestedUrl: z<- Has Been Rewrited To ->)
!regex_request_rewriter_extdomainsrp   rp  r   rq  "regex_request_rewriter_main_domainr   r)   ri   r   )Zraw_textr  Zreplacedr,   r,   r-   r     s    r   c                 C   s<   | dkrt j} t| }|jpd}|s8|jr8|d|j 7 }|S )z
    Convert http://foo.bar.com/aaa/p.html?x=y to /aaa/p.html?x=y

    :param no_query:
    :type full_url: str
    :param full_url: full url
    :return: str
    Nr/   r   )r   r  r
   r   r   )Zfull_urlZno_queryrH   r   r,   r,   r-   r     s    	

r   c           	      C   s  t | j}td| d| |tkr.ts.td||s6d}tj|| |||d }t	r^t
|}nt }t tjd< |j|tdtt d}t tjd  tjd	< td
tjd	 dd tdkrt|jjd|jd|j td|jj |rtd|jj td|j |S )uy   实际发送请求到目标服务器, 对于重定向, 原样返回给用户
    被request_remote_site_and_parse()调用ZFinalRequestUrlZFinalHostnamez5Trying to access an OUT-OF-ZONE domain(SSRF Layer 2):N)r   ZparamsdataZreq_start_timeF)ZproxiesZallow_redirectsstreamZverifyreq_time_headerzRequestTime:r  vr0  zFinalSentToRemoteRequestUrl:z
Rem Resp Stat: zRemoteRequestHeaders: zRemoteRequestRawData: zRemoteResponseHeaders: )r
   r   r   rU   r  ConnectionAbortedErrorrequestsZRequestZpreparery   r$   Zget_sessionZSessionr   r   sendrequests_proxiesenable_stream_content_transferZdeveloper_do_not_verify_sslr  r   r  r  r"  r   Zbody)	r  r  r   Z	param_getr  Zfinal_hostnameZprepped_reqZ_sessionr   r,   r,   r-   send_request  sB    


	r  c                  C   s   t  } t| }|dk	rBz| j|d} W n   d}Y n
X t| } trxt| tr\| j|d} tj|d| krxt	dt
  | |fS )u   
    解析出浏览者发送过来的data, 如果是文本, 则进行重写
    如果是文本, 则对文本内容进行重写后返回str
    如果是二进制则, 则原样返回, 不进行任何处理 (bytes)
    :rtype: Union[str, bytes, None]
    Nrs   zFStringTrace: appears after client_requests_bin_rewrite, code line no. )r   get_datarx   ru   r   rp  r   r   r   r   rq  )r  rt   r,   r,   r-   prepare_client_request_data  s    
r  c                  C   s   t tjd} tjd dkr0tddtjd   tjddk	rxtjsxtddtjd	   td
dt tjd    tddtj  t	rtjst
d | S )u4   
    生成我们的响应
    :rtype: Response
    )rj  r  gh㈵>zX-Header-Req-Time%.4f
start_timeNzX-Body-Req-Timer`  X-Compute-TimezX-Powered-Byz
zmirror/%sZtraffic)rl  r   streamed_our_responser   r(  r   r   r   r   Zdeveloper_dump_all_trafficsr   )r  r,   r,   r-   generate_our_response6  s    
r  c                  C   sb  t jjddt _tt jt _tr8tt js8t	dddS t
oDtt jt _t jjddt _dt jkodt jkod	t jkod
t jkot jjjdkot jjdkt _tdkrtdt jdt jdt jdt dd	 tr^t jr^t jtkr^dt jjkrt jjd} nt jrd} ntt jj} dt j| gtt j< tt jrRdtt j d< tdt j ntdt j dS )u   处理远程服务器的响应zContent-Typer!   s'   This site is just for static resources.  )r   zCache-Controlzno-storezmust-revalidatez	max-age=0Zprivater  r!  r  zResponse Content-Type:zIsStreamed:z
cacheable:ZLiner  zContent-LengthrF   FTr   zCDN enabled for:zCDN disabled for:N)r   r%  r   r   r   r   r  Zonly_serve_static_resourcesr   generate_simple_resp_pager  r   r  Zcache_controlr   r  r"  	cacheabler  r   rq  r8  r=  r}   r  r  )Zlengthr,   r,   r-   parse_remote_responseN  sJ    
    r  r   c           	   
   C   s  t j}ttt j}d}tt d|  D ]\}}||kr@q,||d< ztt	|t
jt jt jd}W n   Y q,Y nX d|j  krdkrn n0td|dd || d ks,|dkrq,n|\}}nPd	|j  krd
krn n4|| d kr|dkr,||f}q,n|dk	r,|\}}nq,td|d| t d| tt j|dd}tdt
jd| |t
_|t|t
jf< z>ttdddd"}|dt |t
j| W 5 Q R X W n   Y nX t|jt
_t  |  S dS )u  
    猜测url所对应的正确域名
    当响应码为 404 或 500 时, 很有可能是把请求发送到了错误的域名
    而应该被发送到的正确域名, 很有可能在最近几次请求的域名中
    本函数会尝试最近使用的域名, 如果其中有出现响应码为 200 的, 那么就认为这条url对应这个域名
    相当于发生了一次隐式url重写

    * 本函数很可能会改写 parse 与 request

    :rtype: Union[Tuple[Response, float], None]
    Nr   r  r   r  i  iW  zDomain guess failed:r  r  r   i  zdomain guess successful, fromr   zX-Domain-GuessTrm   r   zShadow rewriting, fromzdomain_guess.logr   r   rs   z{}	{}	{}	-->	{}
)r   rm   rW   r
   r\  	enumeraterecent_domainsrX   r  r   r   r  r)  request_data_encodedr"  r   r(  r   r  r  domain_guess_cacher   r   r   r   r[   r   r   assemble_parse)	ZdepthZcurrent_domainr   Z
redirectedir^   r  rewrited_urlZfwr,   r,   r-   guess_correct_domain  s`    



*r  c                   C   s@   t tjtjtjtjdt_tjjtjkr<t	dtjjdtj dS )u]   
    请求远程服务器(high-level), 并在返回404/500时进行 domain_guess 尝试
    r  zrequests's remote urlzdoes no equals our rewrited urlN)
r  r   r\  r   r  r)  r  r%  r  r[  r,   r,   r,   r-   request_remote_site  s    
 r  c                   C   s  t dtj tjtjdkr0t dtj t S tttj	rBdS t
r^tttj	r^tddS trtsltrpts|ttjrt dtjd d	tjkrtrttjd	strttjd	trttjtjd	d
 t dtj n,tdtttjjddjdd ddS dS )uY   过滤用户请求, 视情况拒绝用户的访问
    :rtype: Union[Response, None]
    zClient Request Url: zcrossdomain.xmlzcrossdomain.xml hit fromNs$   Spiders Are Not Allowed To This Siter  Zipzis verifying cookieszmirror_verifyrL  z$add to ip_whitelist because cookies:z/ip_ban_verify_page?origin=r   rs   i.  code)r   r   r  r   r   basenamecrossdomain_xmlr   r   rJ  Zis_deny_spiders_by_403r   r  human_ip_verification_enabled,human_ip_verification_whitelist_from_cookies/enable_custom_access_cookie_generate_and_verifymust_verify_cookiesr   remote_addrZcookiesr   r   custom_verify_access_cookierM  r   base64Zurlsafe_b64encoder   ru   r,   r,   r,   r-   filter_client_request  sN    


 r  c                  C   sh  t dtj tjs:dtjdd kr:t d ttjddS trdtjdd krtj	
drttj	
dd	 } | tkrtttj| ddS t d
 trFtjtkrtjtjttj d}t dtjd| t|ddS t d tD ]`\}}tj|tjtjddk	r<tj||tjtjd}t dtjd| t|dd  S t d qtrdttt}|dk	rd|S dS )u  对用户的请求进行按需重定向处理
    与 rewrite_client_request() 不同, 使用301/307等进行外部重定向, 不改变服务器内部数据
    遇到任意一个需要重定向的, 就跳出本函数

    这是第一阶段重定向

    第一阶段重定向, 是在 rewrite_client_request() 内部隐式重写 *之前* 的重定向
    第二阶段重定向, 是在 rewrite_client_request() 内部隐式重写 *之后* 的重定向

    如果 `custom_prior_request_redirect_enable` 启用, 则会调用 custom_func.custom_prior_redirect_func() 进行自定义重定向

    :return: 如果不需要重定向, 则返回None, 否则返回重定向的 Response
    :rtype: Union[Response, None]
    zprior_request_redirect url:r   Nr   z4Requesting main domain in extdomains, redirect back.3  r  Zrefererr^   z9prior_request_redirect before url_custom_redirect_enable:r   zRedirect fromr   z8prior_request_redirect before url_custom_redirect_regex:r<   z6url_custom_redirect_regex not match! remote_path_query)r   r   r  r   r   r   r   r  !enable_individual_sites_isolationr   r   r   isolated_domainsr   r~  url_custom_redirect_listr)   url_custom_redirect_regexrG   ru  
IGNORECASErp   Z$custom_prior_request_redirect_enableZcustom_prior_redirect_func)Zreference_domainZredirect_toZregex_matchZregex_replaceZredirectionr,   r,   r-   prior_request_redirect(  s2    "



r  c                  C   s<  t rtrtjtkrttj d rtjdkrtttj d tkrtt	t
jstttttj t   t } trt| ttj d d} t| tdS trttjtj}|dk	rtd tjd	dk	rtd
dt tjd	    |S tj t
j!ft"kr8t"tj t
j!f }t#tj$|dd}tdt
j%d| t|ddS dS )u  
    这是第二阶段重定向, 内部隐式重写 *之后* 的重定向
    第一阶段重定向, 是在 rewrite_client_request() 内部隐式重写 *之前* 的重定向
    第二阶段重定向, 是在 rewrite_client_request() 内部隐式重写 *之后* 的重定向

    遇到任意一个需要重定向的, 就跳出本函数

    :return: 如果不需要重定向, 则返回None, 否则返回重定向的 Response
    :rtype: Union[Response, None]
    r   r  rc   r   )r3  r  NzCacheHit,Returnr  r  r  Tr  z%Redirect via domain_guess_cache, fromr   r  )&r8  Z(cdn_redirect_code_if_cannot_hard_rewriter   r=  r}   r  intZcdn_soft_redirect_minimum_sizer   r   r   rJ  r	   rk   r9  r  r  r   r:  r   r;  r   r   r{   r*  r\  r)  r   r   r   r(  r   rm   r   r  r   r  r  )Zredirect_to_urlr  r^   r  r,   r,   r-   posterior_request_redirect]  sP    
 r  c                  C   s~   t  } | d t_| d t_| d t_| d t_ttjt_t t_tjtj	dd d t_
dttj< td	tjd
tj dS )u9   将用户请求的URL解析为对应的目标服务器URLr^   r   r   r   r   rc   NTzafter assemble_parse, url:z   path_query:)r   r   rm   r   r4  r  r   r  r\  r  r=  r  r   )Z_tempr,   r,   r-   r    s    




r  c                  C   s   d} t r@ttj}|dk	r@tdtjd| |t_t|jt_d} trtrt	 }|}tD ]J\}}t
|||}||krVtd|d| t| t_t|jt_d}  qqV| rt  | S )u  
    在这里的所有重写都只作用程序内部, 对请求者不可见
    与 prior_request_redirect() 的外部301/307重定向不同,
    本函数通过改变程序内部变量来起到重定向作用
    返回True表示进行了重定向, 需要重载某些设置, 返回False表示未重定向
    遇到重写后, 不会跳出本函数, 而是会继续下一项. 所以重写顺序很重要
    FNzBeforeEmbeddedExtract:z After:TzShadowUrlRedirect:r   )r;  r   r   r  r   r
   r   r~  shadow_url_redirect_regexr   rG   rp   r   r  )has_been_rewritedZreal_urlZ_path_queryZ_path_query_rawZbeforeZafterr,   r,   r-   rewrite_client_request  s,    

r  c                 C   s   t rt  | S rK   )ry   r$   release_lock)r$  r,   r,   r-   zmirror_after_request  s    r  z/zmirror_statc                  C   s.  t jrt jdkrtddS d} | tdt 7 } | tdt 7 } | tdt 7 } | tdt 7 } | td	t	 7 } | td
t
 7 } | tdt
 7 } | tdt 7 } | tdt 7 } | tdt 7 } | tdt 7 } | tdt 7 } | tdt 7 } | td7 } | tdt7 } d|  d S )u$   返回服务器的一些状态信息z	127.0.0.1s   Only 127.0.0.1 are allowedr  r!   r   z
is_content_type_streamedz
embed_real_url_to_embedded_urlz
check_global_ua_passz
extract_mime_from_content_typez
is_content_type_using_cdnz
is_ua_in_whitelistz
is_mime_represents_textz
is_domain_match_glob_whitelistz
verify_ip_hash_cookiez
is_denied_because_of_spiderz
is_ip_not_in_allow_rangez

current_threads_numberz
----------------
z
domain_alias_to_target_setz<pre>z</pre>
)r   r  r  Zstrxr   
cache_infor   r   r   r   r   r   r~   r   r   r   r   Zactive_countrj   )outputr,   r,   r-   zmirror_status  s&    
r  z/ip_ban_verify_pagePOST)methodsc               	   C   s   t jdkrtdt j d} ttD ]6\}}| d|d |t|dkrNt|d ndf 7 } q"tD ].\}}}| d|t	rvd	ndt|t|f 7 } q^d
t j
kr| dtt j
d
 7 } dttttt	rdndt| f S t jdkrtdt j ttD ]z\}}t jt|d}|dkr:t	r qntd|d   d  S ||d krbtd|d   d  S t	r q~qt	r~tddS i }tD ]H\}}}|t jkst j| std|  d  S t j| ||< qd}	d
t jkrFztt jd
jdd}	W n(   tdtt jd
 dd Y S X t|	j}
|
rF|
tkrFd}	tr`t|s`tddS t|	td}trt|}|jd|t ! t"t#d  t#d! d" d# ||d$< n^t$r
t%|t }td%| |d&krtddS |jd|t ! t"t#d  t#d! d" d# ||d$< t&t j|d' |S d&S )(u   生成一个身份验证页面r  zVerifying IP:r!   zN%s <input type="text" name="%d" placeholder="%s" style="width: 190px;" /><br/>r   r0  rc   z'%s %s<input type="%s" name="%s" /><br/>u)   <span style="color: red;">(必填)<span> ra  zF<input type="hidden" name="origin" value="%s" style="width: 190px;" />u]  <!doctype html>
        <html lang="zh-CN">
        <head>
        <meta charset="UTF-8">
        <title>%s</title>
        </head>
        <body>
          <h1>%s</h1>
          <p>这样的验证只会出现一次，通过后您会被加入白名单，之后相同IP的访问不会再需要验证。<br/>
          提示: 由于手机和宽带IP经常会发生改变，您可能会多次看到这一页面。</p>
          %s <br>
          <pre style="border: 1px dashed;">%s</pre>
          <form method='post'>%s<button type='submit'>递交</button>
          </form>
        </body>
        </html>u7   只需要回答出以下<b>任意一个</b>问题即可u+   你需要回答出以下<b>所有问题</b>r  zVerifying Request Forms   Please answer question: r!  r   s   Wrong answer in: s#   Please answer at least ONE questions   Param Missing or Blank: r/   r   rs   z#Unable to decode origin from value:Tr   s!   Verification Failed, please checkr   r  )Zdays   i  )r3   Zmax_ageZ__zmirror_verifyzSelfGeneratedCookie:Nr  )'r   r  r   r  r  human_ip_verification_questionsr  r   Z%human_ip_verification_identity_recordZ4human_ip_verification_answer_any_one_questions_is_okr   r   Zhuman_ip_verification_titleZ!human_ip_verification_descriptionZformr   r  r   r  Zurlsafe_b64decoderu   r   r
   r   ri   identity_verify_requiredcustom_identity_verifyZgenerate_html_redirect_pageZ!human_ip_verification_success_msgr  r	  Z
set_cookier   r   r   Z4human_ip_verification_whitelist_cookies_expires_daysr  custom_generate_access_cookierM  )Z	form_bodyZq_idZ	_questionZrec_explain_stringZrec_nameZ
input_typeZsubmitted_answerZrecord_dictZ	form_typera  r   r  _hashr,   r,   r-   ip_ban_verify_page	  s    
$
 


  

 









r  ZOPTIONSZPUTZDELETEZHEADZPATCHz/<path:input_path>c                 C   sr   zRt | d}tj D ]\}}|j|| qtj D ]\}}|jd| q8W n   tdd Y S X |S dS )uN   入口函数的壳, 只是包了一层异常处理, 实际是 main_function() )
input_pathrc  Tr  N)	main_functionr   Zextra_resp_headersrt  r   rB  Zextra_cookiesr   r   )r  r  r   r   rk  r,   r,   r-   zmirror_enter{	  s    
r  c                 C   s   t d t  tjt_t tjd< t  t }|dk	rDt d |S t	 }|dk	rV|S t
 }t rltddS t t_t }|dk	r|S t \t_t_t  t  t }trtjrttj|tjd t d |S )	u8   本程序的实际入口函数
    :rtype: Response
    z-----BeginRequest-----r  Nz"-----EndRequest(filtered out)-----s,   SSRF Prevention! Your domain is NOT ALLOWED.r  )r  z-----EndRequest-----)r   r   Zinitr   r  r   r   r  r  r  r  r  r  r  r)  r  r  request_dataZrequest_data_encodingr  r  r  r{   r  r&  r\  r  )r  r   r  r  r,   r,   r-   r  	  s6    

r  z/crossdomain.xmlc                   C   s   t dddS )NaD  <?xml version="1.0"?>
<!DOCTYPE cross-domain-policy SYSTEM "http://www.macromedia.com/xml/dtds/cross-domain-policy.dtd">
<cross-domain-policy>
<allow-access-from domain="*"/>
<site-control permitted-cross-domain-policies="all"/>
<allow-http-request-headers-from domain="*" headers="*" secure="false"/>
</cross-domain-policy>ztext/x-cross-domain-policyr   )r   r,   r,   r,   r-   r  	  s    r  z/about_zmirrorc                   C   s"   t djtjtjtjttdddS )Nzzmirror
version: {version}
Author: {author}
Github: {github_url}
Note: Love Luciaz Forever!

Mirroring: {source_site}
This site: {my_domain}
)r   r   Z
github_urlZsource_siteZ	my_domainz
text/plainr  )r   r[   r   r   
__AUTHOR__r   r   ri   r,   r,   r,   r-   about_zmirror	  s      r  custom_func)rg  zuCannot import custom_response_text_rewriter custom_func.py, `custom_text_rewriter` is now disabled(if it was enabled))r  zoCannot import custom_identity_verify from custom_func.py, `identity_verify` is now disabled (if it was enabled))r  r  zCannot import custom_generate_access_cookie and custom_generate_access_cookie from custom_func.py, `enable_custom_access_cookie_generate_and_verify` is now disabled (if it was enabled)r   )r   ZUnableToInitCronTask)r   rW  __main__z#Please use `python3 wsgi.py` to run)F)F)F)N)NNF)r   NN)r   r   FF)N)r  )F)N)N)F)NF)r  NNN)r   )r/   )r/   (  r   r5  rG   r#  r  ZschedrS  r  r  r   rN  r   r   r   r   r   Zhtmlr   r   r   r   Zurllib.parser	   r
   r   r   Zurllibr  Zflaskr   r   r   r   r   r!   r   Z$requests.packages.urllib3.exceptionsr   ZpackagesZurllib3Zdisable_warningstypingr   r   r   r   Zcchardetr   rw   rv   r   r   getcwdZZMIRROR_ROOTchdirZexternal_pkgs.ColorfulPyPrintenvironZunittest_modeZ	fastcacher   	functoolsr[  r   Zthreadlocalr   r[   r   r  r   Zconfig_defaultr   r   r   r(   r)   Z	importlibZreloadZimport_moduleZutilsZlru_dictr#   r$   r{   Zcache_systemr%   r&   r|   r   Z!ColorfulPyPrint_set_verbose_levelr  Z%developer_enable_experimental_featureri   r  rh   r   Zmy_host_name_urlencodedr   rW   rB  r   rU   r   Z_domainZhostnamerj   r   r   rk   Zmy_host_scheme_escapedr   Zmyurl_prefix_escapedr  r9  r:  re  updateZcustom_allowed_remote_headersZextract_root_domainr  Zmy_host_name_rootr  Zsteamed_mime_keywordsr~  r  r  r  r  r   r   Z$enable_stream_transfer_async_preloadre   Z$domains_whitelist_auto_add_glob_listr  r  Zisolated_domainr  r  Zis_use_proxyr  r  ZbuffrP  ZnetworkZ
ip_networkr  Zquestionr  r  r  r   Z	schedulerr   r}   r  r  rA   rC   rS   ZREGEX_MY_HOST_NAMErZ   r  r  r  ro  r  r  r\   ro   r  rs  rt  kr  r   r   I__name__apprr   rx   r   r   r   r   r   rf   r   Z$extract_from_url_may_have_extdomainsr   Zconvert_to_mirror_urlr   r   r   r   r   r	  r   r   r&  r*  r>  r   r   rE  rG  rM  r   rV  r_  rl  rh  rd  r{  rr  ri  r  r  r  r   r   r  r  r  r  r  r  r  r  r  r  r  Zafter_requestr  Zrouter  r  r  r  r  r  r   r   rI  r  rf  rg  r  r  r  r   Zcron_tasks_listZ
_task_dictglobals	ExceptionerY  ZthrZ  r   r,   r,   r,   r-   <module>   s  









          





	

'
 
	9
$
/
,
@
1
)
M


' 	

	

4M)E	*=64"=\'5@.



xK




